make e2e training benchmark support mx (#2776)

vkuzo · web-flow · commit 49cb18a66551 · 2025-08-15T14:39:54.000-04:00
Update

[ghstack-poisoned]
diff --git a/benchmarks/float8/training/llama3.sh b/benchmarks/float8/training/llama3.sh
@@ -17,22 +17,30 @@ LOG_FILE="/tmp/float8_training_log.txt"
 # validate user has specified torchtitan root directory
 if [ -z "${TORCHTITAN_ROOT}" ]; then
   echo "Error: TORCHTITAN environment variable is not set. Please set it before running this script."
-  echo "Usage: TORCHTITAN_ROOT=<directory> ./float8_training_benchmark.sh"
+  echo "Usage: TORCHTITAN_ROOT=<directory> ./llama3.sh"
   echo "Optional parameters configurable via environment variables:"
   echo " * FLOAT8_RECIPE_WITH_BEST_SETTINGS: "rowwise" or "tensorwise". if set, use float8 training in torchtitan with the specified recipe, including the additional settings which are optimal for that recipe. otherwise, use bf16 mixed precision training."
+  echo " * MX_RECIPE: any valid MX recipe name. Note: only one of FLOAT8_RECIPE_WITH_BEST_SETTINGS and MX_RECIPE can be set."
   echo " * LOCAL_BATCH_SIZE: defaults to 1."
   echo " * STEPS: defaults to 100."
   echo " * EXTRA_ARGS: additional arguments to pass to the torchtitan training script."
   exit 1
 fi
 
 # validate recipe name
-if [ -n "${FLOAT8_RECIPE_WITH_BEST_SETTINGS}" ]; then
+if [ -n "${FLOAT8_RECIPE_WITH_BEST_SETTINGS}" ] && [ -n "${MX_RECIPE}" ]; then
+    echo "Error: both FLOAT8_RECIPE_WITH_BEST_SETTINGS and MX_RECIPE are set, please only set one of them." >&2
+    exit 1
+elif [ -n "${FLOAT8_RECIPE_WITH_BEST_SETTINGS}" ]; then
   if [ "${FLOAT8_RECIPE_WITH_BEST_SETTINGS}" == "tensorwise" ]; then
     FLOAT8_ARGS="--model.converters="float8" --float8.enable_fsdp_float8_all_gather --float8.precompute_float8_dynamic_scale_for_fsdp"
   else
     FLOAT8_ARGS="--model.converters="float8" --float8.recipe_name=${FLOAT8_RECIPE_WITH_BEST_SETTINGS}"
   fi
+elif [ -n "${MX_RECIPE}" ]; then
+    FLOAT8_ARGS="--model.converters="mx" --mx.recipe_name=${MX_RECIPE}"
+else
+    FLOAT8_ARGS=""
 fi
 
 
@@ -51,7 +59,7 @@ CONFIG_FILE="./torchtitan/models/llama3/train_configs/llama3_8b.toml" ${TORCHTIT
 cd $original_dir
 
 # parse logs to calculate top line metrics
-python parse_torchtitan_logs.py --log-file ${LOG_FILE}
+python benchmarks/float8/training/parse_torchtitan_logs.py --log-file ${LOG_FILE}
 
 # clean up logs
 rm ${LOG_FILE}
diff --git a/torchao/float8/README.md b/torchao/float8/README.md
@@ -53,10 +53,10 @@ To reproduce these benchmarks, you can follow these steps:
 1. On a machine with compatible GPUs, clone torchtitan and follow local installation [steps](https://github.com/pytorch/torchtitan?tab=readme-ov-file#installation),
 including [downloading a tokenizer](https://github.com/pytorch/torchtitan?tab=readme-ov-file#downloading-a-tokenizer).
 2. Install torchao following these [steps](https://github.com/pytorch/ao/tree/main?tab=readme-ov-file#installation).
-3. From the `torchao/benchmarks/float8/training/` directory, you can run the following commands to reproduce the benchmarks above:
-   - bf16 + compile: `TORCHTITAN_ROOT=<path> ./torchtitan_benchmark.sh`
-   - float8 tensorwise with float8 all-gather + compile: `TORCHTITAN_ROOT=<path> FLOAT8_RECIPE_WITH_BEST_SETTINGS="tensorwise" ./torchtitan_benchmark.sh`
-   - float8 rowwise with bf16 all-gather + compile: `TORCHTITAN_ROOT=<path> FLOAT8_RECIPE_WITH_BEST_SETTINGS="rowwise" ./torchtitan_benchmark.sh`
+3. From the `torchao/` directory, you can run the following commands to reproduce the benchmarks above:
+   - bf16 + compile: `TORCHTITAN_ROOT=<path> ./benchmarks/float8/training/llama3.sh`
+   - float8 tensorwise with float8 all-gather + compile: `TORCHTITAN_ROOT=<path> FLOAT8_RECIPE_WITH_BEST_SETTINGS="tensorwise" ./benchmarks/float8/training/llama3.sh`
+   - float8 rowwise with bf16 all-gather + compile: `TORCHTITAN_ROOT=<path> FLOAT8_RECIPE_WITH_BEST_SETTINGS="rowwise" ./benchmarks/float8/training/llama3.sh`
 
 See the float8 training benchmarking [guide](.torchao/benchmarks/float8/training/README.md) for more details.