Skip to content

Commit 60473d1

Browse files
Update BM.GPU.H100.8-nccl-test.yaml
1 parent 5b790c6 commit 60473d1

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

manifests/BM.GPU.H100.8-nccl-test.yaml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,19 @@ spec:
5151
-x IB_RX_QUEUE_LEN=8192 \
5252
-x NCCL_SOCKET_IFNAME=eth0 \
5353
-x NCCL_IGNORE_CPU_AFFINITY=1 \
54-
/workspace/nccl-tests/build/alltoall_perf -b 8 -f 2 -g 1 -e 4G -c 1
54+
/workspace/nccl-tests/build/all_reduce_perf -b 8 -f 2 -g 1 -e 4G -c 1
5555
while :; do { [[ $exit ]] && break; }; sleep 1; done
5656
ports:
5757
- { name: mpijob-port, containerPort: 2222, protocol: TCP }
5858
image: ord.ocir.io/hpc_limited_availability/nccl-tests:pytorch-24.02-nccl-2.20.5-1
5959
name: mpimaster
6060
resources:
6161
limits:
62-
ephemeral-storage: 32Gi
62+
ephemeral-storage: 16Gi
6363
requests:
64-
cpu: 128
65-
ephemeral-storage: 32Gi
66-
memory: 512Gi
64+
cpu: 4
65+
ephemeral-storage: 16Gi
66+
memory: 1Gi
6767
securityContext:
6868
privileged: true
6969
capabilities:
@@ -121,4 +121,4 @@ spec:
121121
- { key: nvidia.com/gpu, operator: Exists }
122122
volumes:
123123
- { name: devinf, hostPath: { path: /dev/infiniband }}
124-
- { name: shm, emptyDir: { medium: Memory, sizeLimit: 32Gi }}
124+
- { name: shm, emptyDir: { medium: Memory, sizeLimit: 32Gi }}

0 commit comments

Comments
 (0)