@@ -175,6 +175,104 @@ jobs:
175175 mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
176176 workingDirectory : ' $(System.DefaultWorkingDirectory)'
177177
178+ - task : Bash@3
179+ name : InstallNccl
180+ displayName : Install NCCL
181+ inputs :
182+ targetType : ' inline'
183+ script : |
184+ set -e
185+ HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
186+ ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
187+ SSH_OPTION="StrictHostKeyChecking=no"
188+ KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
189+ parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
190+ -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c " \
191+ cd; git clone https://github.com/NVIDIA/nccl.git; \
192+ cd nccl; \
193+ make -j src.build NVCC_GENCODE="-gencode=arch=compute_80,code=sm_80""'
194+ workingDirectory : ' $(System.DefaultWorkingDirectory)'
195+
196+ - task : Bash@3
197+ name : RunNcclAllGatherFallbaclkToNcclTest
198+ displayName : Run NCCL AllGather Test with or without Fallback to NCCL operation
199+ inputs :
200+ targetType : ' inline'
201+ script : |
202+ set -e
203+ HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
204+ ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
205+ SSH_OPTION="StrictHostKeyChecking=no"
206+ KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
207+ parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
208+ -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
209+ cd /root/mscclpp; \
210+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
211+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
212+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
213+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
214+ workingDirectory : ' $(System.DefaultWorkingDirectory)'
215+
216+ - task : Bash@3
217+ name : RunNcclAllReduceFallbaclkToNcclTest
218+ displayName : Run NCCL AllReduce Test with or without Fallback to NCCL operation
219+ inputs :
220+ targetType : ' inline'
221+ script : |
222+ set -e
223+ HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
224+ ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
225+ SSH_OPTION="StrictHostKeyChecking=no"
226+ KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
227+ parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
228+ -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
229+ cd /root/mscclpp; \
230+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
231+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
232+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
233+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allgather" /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
234+ workingDirectory : ' $(System.DefaultWorkingDirectory)'
235+
236+ - task : Bash@3
237+ name : RunNcclBroadcastFallbaclkToNcclTest
238+ displayName : Run NCCL Broadcast Test with or without Fallback to NCCL operation
239+ inputs :
240+ targetType : ' inline'
241+ script : |
242+ set -e
243+ HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
244+ ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
245+ SSH_OPTION="StrictHostKeyChecking=no"
246+ KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
247+ parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
248+ -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
249+ cd /root/mscclpp; \
250+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
251+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
252+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
253+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
254+ workingDirectory : ' $(System.DefaultWorkingDirectory)'
255+
256+ - task : Bash@3
257+ name : RunNcclReduceScatterFallbaclkToNcclTest
258+ displayName : Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
259+ inputs :
260+ targetType : ' inline'
261+ script : |
262+ set -e
263+ HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
264+ ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
265+ SSH_OPTION="StrictHostKeyChecking=no"
266+ KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
267+ parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
268+ -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
269+ cd /root/mscclpp; \
270+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
271+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
272+ echo "mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"; \
273+ mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
274+ workingDirectory : ' $(System.DefaultWorkingDirectory)'
275+
178276 - task : AzureCLI@2
179277 name : StopVMSS
180278 displayName : Deallocate VMSS
0 commit comments