#!/bin/bash

# Number of CPU cores
CPU_CORES_PER_GPU=3

export OMP_NUM_THREADS=$CPU_CORES_PER_GPU

lrank=$OMPI_COMM_WORLD_LOCAL_RANK

APP="./xhpcg"
#APP="./xhpcg -b -s 1 -i 50 -x 128 -y 128 -z 128 -t 10"

case ${lrank} in
[0])
  #uncomment next line to set GPU affinity of local rank 0
  export CUDA_VISIBLE_DEVICES=0
  #uncomment next line to set CPU affinity of local rank 0
  numactl --cpunodebind=0 \
  $APP  
  ;;
[1])
  #uncomment next line to set GPU affinity of local rank 1
  export CUDA_VISIBLE_DEVICES=1
  #uncomment next line to set CPU affinity of local rank 1
  numactl --cpunodebind=0 \
  $APP
  ;;
[2])
  #uncomment next line to set GPU affinity of local rank 2
  export CUDA_VISIBLE_DEVICES=2
  #uncomment next line to set CPU affinity of local rank 2
  numactl --cpunodebind=1 \
  $APP
  ;;
[3])
  #uncomment next line to set GPU affinity of local rank 3
  export CUDA_VISIBLE_DEVICES=3
  #uncomment next line to set CPU affinity of local rank 3
  numactl --cpunodebind=1 \
  $APP
  ;;
esac
