Update recipes to use new MaxText release and command

bvandermoon · bvandermoon · commit 5092e25cc4fe · 2025-04-11T02:26:55.000Z
diff --git a/training/trillium/GPT3-175B-MaxText/bf16/README.md b/training/trillium/GPT3-175B-MaxText/bf16/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -25,7 +25,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 
 From the MaxText root directory, start your GPT3-175B workload
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/GPT3-175B-MaxText/bf16/gpt3-175b-v6e-256.sh b/training/trillium/GPT3-175B-MaxText/bf16/gpt3-175b-v6e-256.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/Llama2-70B-MaxText/README.md b/training/trillium/Llama2-70B-MaxText/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -25,7 +25,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 
 From the MaxText root directory, start your Llama2-70B workload
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/Llama2-70B-MaxText/llama2-70b-v6e-256.sh b/training/trillium/Llama2-70B-MaxText/llama2-70b-v6e-256.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/Llama3-8B-MaxText/v6e-8/README.md b/training/trillium/Llama3-8B-MaxText/v6e-8/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -25,7 +25,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 
 From the MaxText root directory, start your Llama3.1-8B workload. Note: this benchmark uses a different model name than the equivalent v6e-256 recipe.
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-8 \
diff --git a/training/trillium/Llama3-8B-MaxText/v6e-8/llama3-8B-1xv6e-8.sh b/training/trillium/Llama3-8B-MaxText/v6e-8/llama3-8B-1xv6e-8.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-8 \
diff --git a/training/trillium/Llama3.1-405B-MaxText/README.md b/training/trillium/Llama3.1-405B-MaxText/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -25,7 +25,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 
 From the MaxText root directory, start your Llama3.1-405B workload.
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/Llama3.1-405B-MaxText/llama3-1-405b-2xv6e-256.sh b/training/trillium/Llama3.1-405B-MaxText/llama3-1-405b-2xv6e-256.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/Llama3.1-70B-MaxText/README.md b/training/trillium/Llama3.1-70B-MaxText/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -25,7 +25,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 
 From the MaxText root directory, start your Llama3.1-70B workload
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-256 \
diff --git a/training/trillium/Llama3.1-70B-MaxText/llama3-1-70B-1xv6e-256.sh b/training/trillium/Llama3.1-70B-MaxText/llama3-1-70B-1xv6e-256.sh
@@ -1,2 +1,2 @@
-python3 benchmarks/benchmark_runner.py --project=$PROJECT --zone=$ZONE --device_type=v6e-256 --num_slices=1  --cluster_name=${CLUSTER_NAME} --base_output_directory=${OUTPUT_DIR} \
+python3 -m benchmarks.benchmark_runner xpk --project=$PROJECT --zone=$ZONE --device_type=v6e-256 --num_slices=1  --cluster_name=${CLUSTER_NAME} --base_output_directory=${OUTPUT_DIR} \
 --model_name="llama3_1_70b_8192" --base_docker_image maxtext_base_image
diff --git a/training/trillium/Mistral-7B-MaxText/README.md b/training/trillium/Mistral-7B-MaxText/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -25,7 +25,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 
 From the MaxText root directory, start your Mistral-7B workload.
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-8 \
diff --git a/training/trillium/Mistral-7B-MaxText/mistral-7B-1xv6e-8.sh b/training/trillium/Mistral-7B-MaxText/mistral-7B-1xv6e-8.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=$PROJECT \
     --zone=$ZONE \
     --device_type=v6e-8 \
diff --git a/training/trillium/Mixtral-8x7B-MaxText/README.md b/training/trillium/Mixtral-8x7B-MaxText/README.md
@@ -10,7 +10,7 @@ Please follow this [link](https://github.com/AI-Hypercomputer/tpu-recipes/blob/m
 
 In step 1, use the MaxText [tpu-recipes-v0.1.0](https://github.com/AI-Hypercomputer/maxtext/releases/tag/tpu-recipes-v0.1.0) tag to run this recipe:
 ```
-git checkout tpu-recipes-v0.1.0
+git checkout tpu-recipes-v0.1.1
 ```
 
 In step 2, use the jax-stable-stack image containing JAX 0.5.2:
@@ -26,7 +26,7 @@ bash docker_build_dependency_image.sh DEVICE=tpu MODE=stable_stack BASEIMAGE=${B
 From the MaxText root directory, start your Mixtral workload.
 
 ```
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=${PROJECT} \
     --zone=${ZONE} \
     --device_type=v6e-256 \
diff --git a/training/trillium/Mixtral-8x7B-MaxText/mixtral-8x7b-1xv6e-256.sh b/training/trillium/Mixtral-8x7B-MaxText/mixtral-8x7b-1xv6e-256.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=${PROJECT} \
     --zone=${ZONE} \
     --device_type=v6e-256 \
diff --git a/training/trillium/Mixtral-8x7B-MaxText/mixtral-8x7b-2xv6e-256.sh b/training/trillium/Mixtral-8x7B-MaxText/mixtral-8x7b-2xv6e-256.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=${PROJECT} \
     --zone=${ZONE} \
     --device_type=v6e-256 \
diff --git a/training/trillium/Mixtral-8x7B-MaxText/mixtral-8x7b-4xv6e-256.sh b/training/trillium/Mixtral-8x7B-MaxText/mixtral-8x7b-4xv6e-256.sh
@@ -1,5 +1,5 @@
 # Run this command from the MaxText root directory using the setup described in the README.
-python3 benchmarks/benchmark_runner.py xpk \
+python3 -m benchmarks.benchmark_runner xpk \
     --project=${PROJECT} \
     --zone=${ZONE} \
     --device_type=v6e-256 \

Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`		`-python3 benchmarks/benchmark_runner.py --project=$PROJECT --zone=$ZONE --device_type=v6e-256 --num_slices=1 --cluster_name=${CLUSTER_NAME} --base_output_directory=${OUTPUT_DIR} \`
	`1`	`+python3 -m benchmarks.benchmark_runner xpk --project=$PROJECT --zone=$ZONE --device_type=v6e-256 --num_slices=1 --cluster_name=${CLUSTER_NAME} --base_output_directory=${OUTPUT_DIR} \`
`2`	`2`	`--model_name="llama3_1_70b_8192" --base_docker_image maxtext_base_image`