...
Code Block |
---|
#!/bin/bash #SABTCH --job-name=hvd_tf #SBATCH --time=01:00:00 #SBATCH --gresgpus=gpu:42 #SBATCH --constraint=v100gpus-per-node=1 #SBATCH --ntasksconstraint=4v100 #SBATCH --tasks-per-node=4ntasks=2 #SBATCH --cpus-per-task=4 #SBATCH --mem=256G64G module load dl module load intelpython3 # or tensorflow module load tensorflow/2.2 module load horovod/0.20.3 module list export OMPI_MCA_btl_openib_warn_no_device_params_found=0 export UCX_MEMTYPE_CACHE=n export UCX_TLS=tcp srun -u -n ${SLURM_NTASKS} -N ${SLURM_NNODES} -c ${SLURM_CPUS_PER_TASK} --cpu-bind=cores python train.py |
...
Code Block |
---|
#!/bin/bash #SABTCH --job-name=hvd_tf #SBATCH --time=01:00:00 #SBATCH --gresgpus=gpu:48 #SBATCH --gpus-per-constraintnode=v1008 #SBATCH --ntasksconstraint=4v100 #SBATCH --tasks-per-node=4ntasks=8 #SBATCH --cpus-per-task=4 #SBATCH --mem=256G64G module use /sw/csgv module load dl module load intelpython3 # or tensorflow module load tensorflow/2.2 module load horovod/0.20.3 module list export OMPI_MCA_btl_openib_warn_no_device_params_found=0 export UCX_MEMTYPE_CACHE=n export UCX_TLS=tcp srun -u -n ${SLURM_NTASKS} -N ${SLURM_NNODES} -c ${SLURM_CPUS_PER_TASK} --cpu-bind=cores python pytorch_synthetic_benchmark.py |
...
Code Block |
---|
#!/bin/bash #SABTCH --job-name=hvd_tf #SBATCH --time=01:00:00 #SBATCH --gresgpus=gpu:28 #SBATCH --gpus-constraintper-node=v1004 #SBATCH --ntasksconstraint=4v100 #SBATCH --tasks-per-node=2ntasks=8 #SBATCH --cpus-per-task=4 #SBATCH --mem=256G64G module use /sw/csgv module load dl module load intelpython3 # or tensorflow module load tensorflow/2.2 module load horovod/0.20.3 module list export OMPI_MCA_btl_openib_warn_no_device_params_found=0 export UCX_MEMTYPE_CACHE=n export UCX_TLS=tcp srun -u -n ${SLURM_NTASKS} -N ${SLURM_NNODES} -c ${SLURM_CPUS_PER_TASK} --cpu-bind=cores python pytorch_synthetic_benchmark.py |
...
Code Block |
---|
#!/bin/bash #SBATCH --gpus=1 #SBATCH --gpus-per-gresnode=gpu:1 #SBATCH --constraint=v100 #SBATCH --ntasks=1 #SBATCH --cpus-per-task=64 #SBATCH --mem=64G #SBATCH --time=00:30:00 module load openmpi/4.0.3-cuda10.1 module load singularity export IMAGE=/ibex/scratch/shaima0d/scratch/singularity_mpi_testing/images/horovod_gpu_0192.sif echo "PyTorch with Horovod" mpirun -np 1 singularity exec --nv $IMAGE python ./pytorch_synthetic_benchmark.py --model resnet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >>pytorch_1GPU.log echo "Tensorflow2 with Horovod" mpirun -np 1 singularity exec --nv $IMAGE python ./tensorflow2_synthetic_benchmark.py --model ResNet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >> TF2_1GPU.log |
...
Code Block |
---|
#!/bin/bash #SBATCH --gresgpus=gpu:8 #SBATCH --constraint=v100gpus-per-node=8 #SBATCH --ntasksconstraint=8v100 #SBATCH --tasks-per-nodentasks=8 #SBATCH --cpus-per-task=64 #SBATCH --mem=64G #SBATCH --time=00:30:00 module load openmpi/4.0.3-cuda10.1 module load singularity export IMAGE=/ibex/scratch/shaima0d/scratch/singularity_mpi_testing/images/horovod_gpu_0192.sif echo "PyTorch with Horovod" mpirun -np 8 singularity exec --nv $IMAGE python ./pytorch_synthetic_benchmark.py --model resnet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >>pytorch_1node.log echo "Tensorflow2 with Horovod" mpirun -np 8 singularity exec --nv $IMAGE python ./tensorflow2_synthetic_benchmark.py --model ResNet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >> TF2_1node.log |
...
Code Block |
---|
#!/bin/bash #SBATCH --gresgpus=gpu:8 #SBATCH --gpus-per-constraintnode=v1004 #SBATCH --ntasksconstraint=8v100 #SBATCH --tasks-per-node=4ntasks=8 #SBATCH --cpus-per-task=64 #SBATCH --mem=64G #SBATCH --time=00:30:00 module load openmpi/4.0.3-cuda10.1 module load singularity export IMAGE=/ibex/scratch/shaima0d/scratch/singularity_mpi_testing/images/horovod_gpu_0192.sif echo "PyTorch with Horovod" mpirun -np 8 -N 4 singularity exec --nv $IMAGE python ./pytorch_synthetic_benchmark.py --model resnet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >>pytorch_multiGPU.log echo "Tensorflow2 with Horovod" mpirun -np 8 -N 4 singularity exec --nv $IMAGE python ./tensorflow2_synthetic_benchmark.py --model ResNet50 --batch-size 128 --num-warmup-batches 10 --num-batches-per-iter 10 --num-iters 10 >> TF2_multiGPU.log |