File tree Expand file tree Collapse file tree 1 file changed +6
-6
lines changed Expand file tree Collapse file tree 1 file changed +6
-6
lines changed Original file line number Diff line number Diff line change @@ -51,19 +51,19 @@ spec:
5151 -x IB_RX_QUEUE_LEN=8192 \
5252 -x NCCL_SOCKET_IFNAME=eth0 \
5353 -x NCCL_IGNORE_CPU_AFFINITY=1 \
54- /workspace/nccl-tests/build/alltoall_perf -b 8 -f 2 -g 1 -e 4G -c 1
54+ /workspace/nccl-tests/build/all_reduce_perf -b 8 -f 2 -g 1 -e 4G -c 1
5555 while :; do { [[ $exit ]] && break; }; sleep 1; done
5656 ports :
5757 - { name: mpijob-port, containerPort: 2222, protocol: TCP }
5858 image : ord.ocir.io/hpc_limited_availability/nccl-tests:pytorch-24.02-nccl-2.20.5-1
5959 name : mpimaster
6060 resources :
6161 limits :
62- ephemeral-storage : 32Gi
62+ ephemeral-storage : 16Gi
6363 requests :
64- cpu : 128
65- ephemeral-storage : 32Gi
66- memory : 512Gi
64+ cpu : 4
65+ ephemeral-storage : 16Gi
66+ memory : 1Gi
6767 securityContext :
6868 privileged : true
6969 capabilities :
@@ -121,4 +121,4 @@ spec:
121121 - { key: nvidia.com/gpu, operator: Exists }
122122 volumes :
123123 - { name: devinf, hostPath: { path: /dev/infiniband }}
124- - { name: shm, emptyDir: { medium: Memory, sizeLimit: 32Gi }}
124+ - { name: shm, emptyDir: { medium: Memory, sizeLimit: 32Gi }}
You can’t perform that action at this time.
0 commit comments