Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
#!/bin/bash 
#SABTCH --job-name=hvd_tf
#SBATCH --time=01:00:00
#SBATCH --gresgpus=gpu:42
#SBATCH --constraint=v100gpus-per-node=1
#SBATCH --ntasksconstraint=4v100
#SBATCH --tasks-per-node=4ntasks=2
#SBATCH --cpus-per-task=4
#SBATCH --mem=256G64G


module load dl
module load intelpython3
# or tensorflow
module load tensorflow/2.2
module load horovod/0.20.3
module list

export OMPI_MCA_btl_openib_warn_no_device_params_found=0
export UCX_MEMTYPE_CACHE=n
export UCX_TLS=tcp

srun -u -n ${SLURM_NTASKS} -N ${SLURM_NNODES} -c ${SLURM_CPUS_PER_TASK} --cpu-bind=cores  python train.py

...

Code Block
#!/bin/bash 
#SABTCH --job-name=hvd_tf
#SBATCH --time=01:00:00
#SBATCH --gresgpus=gpu:48
#SBATCH --gpus-per-constraintnode=v1008
#SBATCH --ntasksconstraint=4v100
#SBATCH --tasks-per-node=4ntasks=8
#SBATCH --cpus-per-task=4
#SBATCH --mem=256G64G

module use /sw/csgv
module load dl
module load intelpython3
# or tensorflow
module load tensorflow/2.2
module load horovod/0.20.3
module list

export OMPI_MCA_btl_openib_warn_no_device_params_found=0
export UCX_MEMTYPE_CACHE=n
export UCX_TLS=tcp

srun -u -n ${SLURM_NTASKS} -N ${SLURM_NNODES} -c ${SLURM_CPUS_PER_TASK} --cpu-bind=cores  python pytorch_synthetic_benchmark.py

...

Code Block
#!/bin/bash 
#SABTCH --job-name=hvd_tf
#SBATCH --time=01:00:00
#SBATCH --gresgpus=gpu:28
#SBATCH --gpus-constraintper-node=v1004
#SBATCH --ntasksconstraint=4v100
#SBATCH --tasks-per-node=2ntasks=8
#SBATCH --cpus-per-task=4
#SBATCH --mem=256G64G

module use /sw/csgv
module load dl
module load intelpython3
# or tensorflow
module load tensorflow/2.2
module load horovod/0.20.3
module list

export OMPI_MCA_btl_openib_warn_no_device_params_found=0
export UCX_MEMTYPE_CACHE=n
export UCX_TLS=tcp

srun -u -n ${SLURM_NTASKS} -N ${SLURM_NNODES} -c ${SLURM_CPUS_PER_TASK} --cpu-bind=cores  python pytorch_synthetic_benchmark.py

...

Code Block
#!/bin/bash
#SBATCH --gpus=1
#SBATCH --gpus-per-gresnode=gpu:1
#SBATCH --constraint=v100
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=64
#SBATCH --mem=64G
#SBATCH --time=00:30:00

module load openmpi/4.0.3-cuda10.1
module load singularity

export IMAGE=/ibex/scratch/shaima0d/scratch/singularity_mpi_testing/images/horovod_gpu_0192.sif

echo "PyTorch with Horovod"
mpirun -np 1  singularity exec --nv $IMAGE python ./pytorch_synthetic_benchmark.py --model resnet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >>pytorch_1GPU.log

echo "Tensorflow2 with Horovod"
mpirun -np 1  singularity exec --nv $IMAGE python ./tensorflow2_synthetic_benchmark.py --model ResNet50  --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >> TF2_1GPU.log

...

Code Block
#!/bin/bash
#SBATCH --gresgpus=gpu:8
#SBATCH --constraint=v100gpus-per-node=8
#SBATCH --ntasksconstraint=8v100
#SBATCH --tasks-per-nodentasks=8
#SBATCH --cpus-per-task=64
#SBATCH --mem=64G
#SBATCH --time=00:30:00

module load openmpi/4.0.3-cuda10.1
module load singularity

export IMAGE=/ibex/scratch/shaima0d/scratch/singularity_mpi_testing/images/horovod_gpu_0192.sif

echo "PyTorch with Horovod"
mpirun -np 8  singularity exec --nv $IMAGE python ./pytorch_synthetic_benchmark.py --model resnet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >>pytorch_1node.log

echo "Tensorflow2 with Horovod"
mpirun -np 8  singularity exec --nv $IMAGE python ./tensorflow2_synthetic_benchmark.py --model ResNet50  --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >> TF2_1node.log

...

Code Block
#!/bin/bash
#SBATCH --gresgpus=gpu:8
#SBATCH --gpus-per-constraintnode=v1004
#SBATCH --ntasksconstraint=8v100
#SBATCH --tasks-per-node=4ntasks=8
#SBATCH --cpus-per-task=64
#SBATCH --mem=64G
#SBATCH --time=00:30:00

module load openmpi/4.0.3-cuda10.1
module load singularity

export IMAGE=/ibex/scratch/shaima0d/scratch/singularity_mpi_testing/images/horovod_gpu_0192.sif

echo "PyTorch with Horovod"
mpirun -np 8 -N 4 singularity exec --nv $IMAGE python ./pytorch_synthetic_benchmark.py --model resnet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >>pytorch_multiGPU.log

echo "Tensorflow2 with Horovod"
mpirun -np 8 -N 4 singularity exec --nv $IMAGE python ./tensorflow2_synthetic_benchmark.py --model ResNet50  --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >> TF2_multiGPU.log