File tree Expand file tree Collapse file tree 4 files changed +4
-4
lines changed
microbenchmarks/trillium/collectives Expand file tree Collapse file tree 4 files changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -16,7 +16,7 @@ python3 ~/xpk/xpk.py workload create \
1616 --device-type=v6e-256 \
1717 --command="git clone https://github.com/AI-Hypercomputer/accelerator-microbenchmarks.git && cd accelerator-microbenchmarks && git checkout trillium-collectives && pip install -r requirements.txt && echo '4096 41943040 314572800' > /proc/sys/net/ipv4/tcp_rmem && export LIBTPU_INIT_ARGS='--megascale_grpc_premap_memory_bytes=17179869184 --xla_tpu_enable_sunk_dcn_allreduce_done_with_host_reduction=true' && python src/run_benchmark.py --config=configs/1x_v6e_256.yaml" \
1818 --num-slices=1 \
19- --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.4.37 -rev1 \
19+ --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.5.2 -rev1 \
2020 --workload=${WORKLOAD_NAME}
2121```
2222
Original file line number Diff line number Diff line change @@ -5,5 +5,5 @@ python3 ~/xpk/xpk.py workload create \
55 --device-type=v6e-256 \
66 --command=" git clone https://github.com/AI-Hypercomputer/accelerator-microbenchmarks.git && cd accelerator-microbenchmarks && git checkout trillium-collectives && pip install -r requirements.txt && echo '4096 41943040 314572800' > /proc/sys/net/ipv4/tcp_rmem && export LIBTPU_INIT_ARGS='--megascale_grpc_premap_memory_bytes=17179869184 --xla_tpu_enable_sunk_dcn_allreduce_done_with_host_reduction=true' && python src/run_benchmark.py --config=configs/1x_v6e_256.yaml" \
77 --num-slices=1 \
8- --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.4.37 -rev1 \
8+ --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.5.2 -rev1 \
99 --workload=${WORKLOAD_NAME}
Original file line number Diff line number Diff line change @@ -5,5 +5,5 @@ python3 ~/xpk/xpk.py workload create \
55 --device-type=v6e-256 \
66 --command=" git clone https://github.com/AI-Hypercomputer/accelerator-microbenchmarks.git && cd accelerator-microbenchmarks && git checkout trillium-collectives && pip install -r requirements.txt && echo '4096 41943040 314572800' > /proc/sys/net/ipv4/tcp_rmem && export LIBTPU_INIT_ARGS='--megascale_grpc_premap_memory_bytes=17179869184 --xla_tpu_enable_sunk_dcn_allreduce_done_with_host_reduction=true' && python src/run_benchmark.py --config=configs/2x_v6e_256.yaml" \
77 --num-slices=2 \
8- --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.4.37 -rev1 \
8+ --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.5.2 -rev1 \
99 --workload=${WORKLOAD_NAME}
Original file line number Diff line number Diff line change @@ -5,5 +5,5 @@ python3 ~/dev/xpk/xpk.py workload create \
55 --device-type=v6e-256 \
66 --command=" git clone https://github.com/AI-Hypercomputer/accelerator-microbenchmarks.git && cd accelerator-microbenchmarks && git checkout trillium-collectives && pip install -r requirements.txt && echo '4096 41943040 314572800' > /proc/sys/net/ipv4/tcp_rmem && export LIBTPU_INIT_ARGS='--megascale_grpc_premap_memory_bytes=17179869184 --xla_tpu_enable_sunk_dcn_allreduce_done_with_host_reduction=true' && python src/run_benchmark.py --config=configs/4x_v6e_256.yaml" \
77 --num-slices=4 \
8- --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.4.37 -rev1 \
8+ --docker-image=us-docker.pkg.dev/cloud-tpu-images/jax-stable-stack/tpu:jax0.5.2 -rev1 \
99 --workload=${WORKLOAD_NAME}
You can’t perform that action at this time.
0 commit comments