@@ -79,24 +79,24 @@ steps:
79
79
parallel-scp -t 0 -r -h ${HOSTFILE} -x "-i ${KeyFilePath}" -O $SSH_OPTION ${ROOT_DIR} ${DST_DIR}
80
80
workingDirectory : ' $(System.DefaultWorkingDirectory)'
81
81
82
- - task : Bash@3
83
- name : GenerateExecutionFile
84
- displayName : Generate execution file
85
- inputs :
86
- targetType : ' inline'
87
- script : |
88
- set -e
89
- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
90
- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
91
- SSH_OPTION="StrictHostKeyChecking=no"
92
- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
93
- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
94
- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
95
- cd /root/mscclpp/msccl-users; \
96
- mkdir -p execution-files; \
97
- cd /root/mscclpp/msccl-users; \
98
- bash algos/mscclpp_a100/generate_execution_plan.sh"'
99
- workingDirectory : ' $(System.DefaultWorkingDirectory)'
82
+ # - task: Bash@3
83
+ # name: GenerateExecutionFile
84
+ # displayName: Generate execution file
85
+ # inputs:
86
+ # targetType: 'inline'
87
+ # script: |
88
+ # set -e
89
+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
90
+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
91
+ # SSH_OPTION="StrictHostKeyChecking=no"
92
+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
93
+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
94
+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
95
+ # cd /root/mscclpp/msccl-users; \
96
+ # mkdir -p execution-files; \
97
+ # cd /root/mscclpp/msccl-users; \
98
+ # bash algos/mscclpp_a100/generate_execution_plan.sh"'
99
+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
100
100
101
101
- task : Bash@3
102
102
name : InstallNcclTests
@@ -116,56 +116,56 @@ steps:
116
116
MPI=1 MPI_HOME=/usr/local/mpi make -j"'
117
117
workingDirectory : ' $(System.DefaultWorkingDirectory)'
118
118
119
- - task : Bash@3
120
- name : RunNcclAllReduceTest
121
- displayName : Run NCCL AllReduce Test
122
- inputs :
123
- targetType : inline
124
- script : |
125
- set -e
126
- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
127
- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
128
- SSH_OPTION="StrictHostKeyChecking=no"
129
- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
130
- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
131
- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
132
- cd /root/mscclpp; \
133
- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
134
- workingDirectory : ' $(System.DefaultWorkingDirectory)'
119
+ # - task: Bash@3
120
+ # name: RunNcclAllReduceTest
121
+ # displayName: Run NCCL AllReduce Test
122
+ # inputs:
123
+ # targetType: inline
124
+ # script: |
125
+ # set -e
126
+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
127
+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
128
+ # SSH_OPTION="StrictHostKeyChecking=no"
129
+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
130
+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
131
+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
132
+ # cd /root/mscclpp; \
133
+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_reduce_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
134
+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
135
135
136
- - task : Bash@3
137
- name : RunNcclAllGatherTest
138
- displayName : Run NCCL AllGather Test
139
- inputs :
140
- targetType : inline
141
- script : |
142
- set -e
143
- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
144
- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
145
- SSH_OPTION="StrictHostKeyChecking=no"
146
- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
147
- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
148
- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
149
- cd /root/mscclpp; \
150
- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
151
- workingDirectory : ' $(System.DefaultWorkingDirectory)'
136
+ # - task: Bash@3
137
+ # name: RunNcclAllGatherTest
138
+ # displayName: Run NCCL AllGather Test
139
+ # inputs:
140
+ # targetType: inline
141
+ # script: |
142
+ # set -e
143
+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
144
+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
145
+ # SSH_OPTION="StrictHostKeyChecking=no"
146
+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
147
+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
148
+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
149
+ # cd /root/mscclpp; \
150
+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/all_gather_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
151
+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
152
152
153
- - task : Bash@3
154
- name : RunNcclReduceScatterTest
155
- displayName : Run NCCL Reduce Scatter Test
156
- inputs :
157
- targetType : inline
158
- script : |
159
- set -e
160
- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
161
- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
162
- SSH_OPTION="StrictHostKeyChecking=no"
163
- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
164
- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
165
- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
166
- cd /root/mscclpp; \
167
- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
168
- workingDirectory : ' $(System.DefaultWorkingDirectory)'
153
+ # - task: Bash@3
154
+ # name: RunNcclReduceScatterTest
155
+ # displayName: Run NCCL Reduce Scatter Test
156
+ # inputs:
157
+ # targetType: inline
158
+ # script: |
159
+ # set -e
160
+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
161
+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
162
+ # SSH_OPTION="StrictHostKeyChecking=no"
163
+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
164
+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
165
+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
166
+ # cd /root/mscclpp; \
167
+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
168
+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
169
169
170
170
- task : Bash@3
171
171
name : InstallNccl
@@ -245,25 +245,25 @@ steps:
245
245
mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="allreduce" /root/nccl-tests/build/broadcast_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
246
246
workingDirectory : ' $(System.DefaultWorkingDirectory)'
247
247
248
- - task : Bash@3
249
- name : RunNcclReduceScatterFallbaclkToNcclTest
250
- displayName : Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
251
- inputs :
252
- targetType : ' inline'
253
- script : |
254
- set -e
255
- HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
256
- ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
257
- SSH_OPTION="StrictHostKeyChecking=no"
258
- KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
259
- parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
260
- -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
261
- cd /root/mscclpp; \
262
- echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
263
- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
264
- echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
265
- mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
266
- workingDirectory : ' $(System.DefaultWorkingDirectory)'
248
+ # - task: Bash@3
249
+ # name: RunNcclReduceScatterFallbaclkToNcclTest
250
+ # displayName: Run NCCL ReduceScatter Test with or without Fallback to NCCL operation
251
+ # inputs:
252
+ # targetType: 'inline'
253
+ # script: |
254
+ # set -e
255
+ # HOSTFILE=$(System.DefaultWorkingDirectory)/mscclpp/test/deploy/hostfile_ci
256
+ # ROOT_DIR=$(System.DefaultWorkingDirectory)/mscclpp
257
+ # SSH_OPTION="StrictHostKeyChecking=no"
258
+ # KeyFilePath=${SSHKEYFILE_SECUREFILEPATH}
259
+ # parallel-ssh -i -t 0 -h ${HOSTFILE} -x "-i ${KeyFilePath}" \
260
+ # -O $SSH_OPTION 'sudo docker exec -t mscclpp-test bash -c "\
261
+ # cd /root/mscclpp; \
262
+ # echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"reducescatter\" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
263
+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="reducescatter" /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20; \
264
+ # echo \"mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION=\"broadcast\" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20\"; \
265
+ # mpirun -np 8 --bind-to numa --allow-run-as-root -x LD_PRELOAD=/root/mscclpp/build/apps/nccl/libmscclpp_nccl.so -x NCCL_DEBUG=WARN -x MSCCLPP_ENABLE_NCCL_FALLBACK=TRUE -x MSCCLPP_NCCL_LIB_PATH=/root/nccl/build/lib/libnccl.so -x MSCCLPP_FORCE_NCCL_FALLBACK_OPERATION="broadcast" -x MSCCLPP_EXECUTION_PLAN_DIR=/root/mscclpp/msccl-users/execution-files /root/nccl-tests/build/reduce_scatter_perf -b 1K -e 1G -f 2 -d half -G 20 -w 10 -n 20"'
266
+ # workingDirectory: '$(System.DefaultWorkingDirectory)'
267
267
268
268
- task : AzureCLI@2
269
269
name : StopVMSS
0 commit comments