Update torchtitan for proper bf16 & new quant APIs (#281)

joecummings · web-flow · commit 3186797797a8 · 2025-10-01T16:25:24.000-04:00
diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
@@ -4,8 +4,8 @@
 # Global configuration
 group_size: 8
 batch_size: 16
-max_req_tokens: 468
-max_res_tokens: 468
+max_req_tokens: 512
+max_res_tokens: 512
 model: "Qwen/Qwen3-8B"
 off_by_n: 1 # Off by one by default
 
diff --git a/assets/wheels/torchtitan-0.1.0-py3-none-any.whl b/assets/wheels/torchtitan-0.1.0-py3-none-any.whl
diff --git a/scripts/build_wheels.sh b/scripts/build_wheels.sh
@@ -18,7 +18,7 @@ NC='\033[0m'
 PYTORCH_VERSION="2.9.0.dev20250905"
 VLLM_BRANCH="v0.10.0"
 MONARCH_COMMIT="9c41b5c16edadeab7cfb8521ba7efe68a1e2bc87"
-TORCHTITAN_COMMIT="a3104201ba3a0fa19e9c3cc5ba748b0398551410"
+TORCHTITAN_COMMIT="9f3fe08635356b829e6bf41883760679a8207697"
 TORCHSTORE_COMMIT="0052f6d8b686b9cff0cf4ce203a836c4b5d5ac94"
 BUILD_DIR="$HOME/forge-build"
 WHEEL_DIR="$(pwd)/assets/wheels"
diff --git a/src/forge/actors/trainer.py b/src/forge/actors/trainer.py
@@ -27,7 +27,7 @@
     Checkpoint,
     Comm,
     Compile,
-    Float8Dense,
+    Float8Linear,
     LRScheduler,
     Model,
     Optimizer,
@@ -104,7 +104,7 @@ class RLTrainer(ForgeActor):
     )
     use_vllm_builtin_load: bool = True
     compile: Compile = field(default_factory=Compile)
-    float8: Float8Dense = field(default_factory=Float8Dense)
+    float8: Float8Linear = field(default_factory=Float8Linear)
     comm: Comm = field(default_factory=Comm)
     loss: Callable = lambda logits, **targets: logits
     state_dict_key: str = "model_state_dict"