Merge branch 'main' into uv

Allen Wang · Allen Wang · commit d9b6e1f64c74 · 2025-10-24T14:46:17.000-07:00
diff --git a/.meta/mast/qwen3_14b_mast.yaml b/.meta/mast/qwen3_14b_mast.yaml
@@ -61,7 +61,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
@@ -106,6 +106,7 @@ ref_model:
     flavor: 14B
     hf_assets_path: /mnt/wsfuse/teamforge/hf/qwen3_14b
   training:
+    seq_len: ${trainer.training.seq_len}
     dtype: bfloat16
     gc_freq: 1
   compile:
diff --git a/.meta/mast/qwen3_1_7b_mast.yaml b/.meta/mast/qwen3_1_7b_mast.yaml
@@ -62,7 +62,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
@@ -108,6 +108,7 @@ ref_model:
     hf_assets_path: /mnt/wsfuse/teamforge/hf/qwen3_1.7b
     # hf_assets_path: hf://${model}
   training:
+    seq_len: ${trainer.training.seq_len}
     dtype: bfloat16
     gc_freq: 1
   compile:
diff --git a/.meta/mast/qwen3_32b_mast.yaml b/.meta/mast/qwen3_32b_mast.yaml
@@ -61,7 +61,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
@@ -106,6 +106,7 @@ ref_model:
     flavor: 32B
     hf_assets_path: /mnt/wsfuse/teamforge/hf/qwen3_32b
   training:
+    seq_len: ${trainer.training.seq_len}
     dtype: bfloat16
     gc_freq: 1
   compile:
diff --git a/.meta/mast/qwen3_4b_mast.yaml b/.meta/mast/qwen3_4b_mast.yaml
@@ -62,7 +62,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
@@ -108,6 +108,7 @@ ref_model:
     hf_assets_path: /mnt/wsfuse/teamforge/hf/qwen3_4b
     # hf_assets_path: hf://${model}
   training:
+    seq_len: ${trainer.training.seq_len}
     dtype: bfloat16
     gc_freq: 1
   compile:
diff --git a/.meta/mast/qwen3_8b_mast.yaml b/.meta/mast/qwen3_8b_mast.yaml
@@ -61,7 +61,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
@@ -106,6 +106,7 @@ ref_model:
     flavor: 8B
     hf_assets_path: /mnt/wsfuse/teamforge/hf/qwen3_8b
   training:
+    seq_len: ${trainer.training.seq_len}
     dtype: bfloat16
     gc_freq: 1
   compile:
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@ Key features:
 
 ## 📖 Documentation (Coming Soon)
 
-View torchforge's hosted documentation (coming soon)
+View torchforge's hosted documentation: https://meta-pytorch.org/torchforge.
 
 ## Tutorials
 
diff --git a/apps/grpo/qwen3_1_7b.yaml b/apps/grpo/qwen3_1_7b.yaml
@@ -4,8 +4,8 @@
 # Global configuration
 group_size: 8
 local_batch_size: 16 # per-device batch size
-max_req_tokens: 512
-max_res_tokens: 512
+max_req_tokens: 1024
+max_res_tokens: 1024
 model: "Qwen/Qwen3-1.7B"
 off_by_n: 1 # Off by one by default
 
@@ -57,7 +57,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
diff --git a/apps/grpo/qwen3_32b.yaml b/apps/grpo/qwen3_32b.yaml
@@ -60,7 +60,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
diff --git a/apps/grpo/qwen3_8b.yaml b/apps/grpo/qwen3_8b.yaml
@@ -4,8 +4,8 @@
 # Global configuration
 group_size: 8
 local_batch_size: 16 # per-device batch size
-max_req_tokens: 512
-max_res_tokens: 512
+max_req_tokens: 1024
+max_res_tokens: 1024
 model: "Qwen/Qwen3-8B"
 off_by_n: 1 # Off by one by default
 
@@ -53,7 +53,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${local_batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
diff --git a/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md b/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md
@@ -155,7 +155,7 @@ async def conceptual_forge_rl_step(services, step):
 
 **Key difference**: Same RL logic, but each component is now a distributed, fault-tolerant, auto-scaling service.
 
-Did you realise-we are not worrying about any Infra code here! TorchForge Automagically handles the details behind the scenes and you can focus on writing your RL Algorthms!
+Did you realise-we are not worrying about any Infra code here! TorchForge Automagically handles the details behind the scenes and you can focus on writing your RL Algorithms!
 
 
 ## Why This Matters: Traditional ML Infrastructure Fails
diff --git a/src/forge/actors/generator.py b/src/forge/actors/generator.py
@@ -667,7 +667,7 @@ async def update_weights(
                     loaded = model.load_weights([(name, param)])
                     del param
                     loaded_weights.update(loaded)
-            logger.info(f"[PolicyWorker] updated {len(loaded_weights)} paremeters")
+            logger.info(f"[PolicyWorker] updated {len(loaded_weights)} parameters")
             t.stop()
             return
         # normal update_weights without shared memory prefetching
diff --git a/src/forge/util/config.py b/src/forge/util/config.py
@@ -15,6 +15,9 @@
 
 from omegaconf import DictConfig, OmegaConf
 
+# Add support for summing lists of numbers, e.g. ${sum:${max_req_tokens},${max_res_tokens}}
+OmegaConf.register_new_resolver("sum", lambda *args: sum(args), replace=True)
+
 
 def _has_component(node: Any) -> bool:
     """Check if a node has a _component_ field."""
diff --git a/tests/integration_tests/fixtures/qwen3_1_7b_no_tp.yaml b/tests/integration_tests/fixtures/qwen3_1_7b_no_tp.yaml
@@ -34,7 +34,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
diff --git a/tests/integration_tests/fixtures/qwen3_1_7b_tp.yaml b/tests/integration_tests/fixtures/qwen3_1_7b_tp.yaml
@@ -36,7 +36,7 @@ trainer:
     warmup_steps: 1
   training:
     local_batch_size: ${batch_size}
-    seq_len: 2048
+    seq_len: ${sum:${max_req_tokens},${max_res_tokens}}  # seq_len >= max_req_tokens + max_res_tokens
     max_norm: 1.0
     steps: 1000000
     dtype: bfloat16
diff --git a/tests/integration_tests/test_policy_update.py b/tests/integration_tests/test_policy_update.py
@@ -191,7 +191,7 @@ async def _setup_and_teardown(request):
 
     if use_dcp_override is not None:
         trainer_cfg["use_dcp"] = use_dcp_override
-        logger.info(f"`trainer.use_dcp` is overriden to {use_dcp_override}")
+        logger.info(f"`trainer.use_dcp` is overridden to {use_dcp_override}")
 
     if cfg.get("provisioner", None) is not None:
         await init_provisioner(