Merge branch 'meta-pytorch:main' into openenv

wukaixingxp · web-flow · commit 9eec90dfc2f0 · 2025-11-11T14:25:35.000-08:00
diff --git a/.meta/mast/launch.sh b/.meta/mast/launch.sh
@@ -34,10 +34,10 @@ fi
 
 CONFIG_FILE="$1"
 
-# Generate a unique job name
-USER=$(whoami)
+# Generate a unique job name based on the config file name
+BASENAME=$(basename "$CONFIG_FILE" .yaml)
 RANDOM_SUFFIX=$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 6 | head -n 1)
-JOB_NAME="${USER}-forge-${RANDOM_SUFFIX}"
+JOB_NAME="${BASENAME}-${RANDOM_SUFFIX}"
 log_info "Generated job name: $JOB_NAME"
 
 # Get the directory where this script is located
diff --git a/.meta/mast/main.py b/.meta/mast/main.py
@@ -63,8 +63,6 @@ async def main(cfg: DictConfig, mode: str = "detached", extra_args: list = None)
             extra_args=extra_args or [],
         )
         await launcher.launch_mast_job()
-        print(f"MAST job {launcher.job_name} launched successfully with client role.")
-        print("The client is running inside MAST and will execute the training.")
     else:
         # In remote mode, we're already running inside MAST, so mount directory, init provisioner and run training
         mount_mnt_directory("/mnt/wsfuse")
@@ -97,7 +95,6 @@ def _main(cfg):
         # Override job name from CLI
         if args.job_name:
             cfg[JOB_NAME_KEY] = args.job_name
-            print(f"Using job name: {args.job_name}")
         asyncio.run(main(cfg, mode=args.mode, extra_args=remaining))
 
     _main()  # @parse grabs the cfg from CLI
diff --git a/apps/grpo/main.py b/apps/grpo/main.py
@@ -23,7 +23,7 @@
 from forge.actors.generator import Generator
 from forge.actors.reference_model import ReferenceModel
 from forge.actors.replay_buffer import ReplayBuffer
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from forge.controller.actor import ForgeActor
 from forge.controller.provisioner import init_provisioner, shutdown
 from forge.data.rewards import MathReward, ThinkingReward
@@ -210,7 +210,7 @@ class DatasetActor(ForgeActor):
     model: str = "Qwen/Qwen3-1.7B"
 
     @endpoint
-    def setup(self):
+    async def setup(self):
         self._tokenizer = get_tokenizer(self.model)
         self._epoch = 0
 
@@ -266,7 +266,12 @@ async def sample(self) -> dict[str, str] | None:
 
     @endpoint
     async def pad_token(self):
-        return self._tokenizer.pad_token_id
+        # Use pad_token_id if available, otherwise use eos_token_id
+        # Llama models don't have a pad token by default
+        if self._tokenizer.pad_token_id is not None:
+            return self._tokenizer.pad_token_id
+        else:
+            return self._tokenizer.eos_token_id
 
 
 async def drop_weights(version: int):
@@ -318,7 +323,7 @@ async def main(cfg: DictConfig):
     ) = await asyncio.gather(
         DatasetActor.options(**cfg.actors.dataset).as_actor(**cfg.dataset),
         Policy.options(**cfg.services.policy).as_service(**cfg.policy),
-        RLTrainer.options(**cfg.actors.trainer).as_actor(
+        TitanTrainer.options(**cfg.actors.trainer).as_actor(
             **cfg.trainer, loss=simple_grpo_loss
         ),
         ReplayBuffer.options(**cfg.actors.replay_buffer).as_actor(
diff --git a/docs/source/api_trainer.md b/docs/source/api_trainer.md
@@ -7,17 +7,17 @@
 The Trainer manages model training in TorchForge, built on top of TorchTitan.
 It handles forward/backward passes, weight updates, and checkpoint management for reinforcement learning workflows.
 
-## RLTrainer
+## TitanTrainer
 
 ```{eval-rst}
-.. autoclass:: RLTrainer
+.. autoclass:: TitanTrainer
    :members: train_step, push_weights, cleanup
    :exclude-members: __init__
 ```
 
 ## Configuration
 
-The RLTrainer uses TorchTitan's configuration system with the following components:
+The TitanTrainer uses TorchTitan's configuration system with the following components:
 
 ### Job Configuration
 
diff --git a/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md b/docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md
@@ -96,7 +96,7 @@ graph LR
         S3["RewardActor"]
         S4["ReferenceModel"]
         S5["ReplayBuffer"]
-        S6["RLTrainer"]
+        S6["TitanTrainer"]
     end
 
     C1 --> S1
@@ -306,7 +306,7 @@ TorchForge handles behind the scenes:
 from forge.actors.generator import Generator as Policy
 from forge.actors.replay_buffer import ReplayBuffer
 from forge.actors.reference_model import ReferenceModel
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
 from forge.data.rewards import MathReward, ThinkingReward
 import asyncio
@@ -348,7 +348,7 @@ group_size = 1
             }
         ),
         # Trainer actor with GPU
-        RLTrainer.options(procs=1, with_gpus=True).as_actor(
+        TitanTrainer.options(procs=1, with_gpus=True).as_actor(
             # Trainer config would come from YAML in real usage
             model={"name": "qwen3", "flavor": "1.7B", "hf_assets_path": f"hf://{model}"},
             optimizer={"name": "AdamW", "lr": 1e-5},
@@ -378,12 +378,12 @@ group_size = 1
 
 TorchForge has two types of distributed components:
 - **Services**: Multiple replicas with automatic load balancing (like Policy, RewardActor)
-- **Actors**: Single instances that handle their own internal distribution (like RLTrainer, ReplayBuffer)
+- **Actors**: Single instances that handle their own internal distribution (like TitanTrainer, ReplayBuffer)
 
 We cover this distinction in detail in Part 2, but for now this explains the scaling patterns:
 - Policy service: num_replicas=8 for high inference demand
 - RewardActor service: num_replicas=16 for parallel evaluation
-- RLTrainer actor: Single instance with internal distributed training
+- TitanTrainer actor: Single instance with internal distributed training
 
 
 ### Fault Tolerance
diff --git a/docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md b/docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md
@@ -470,7 +470,7 @@ async def simple_rl_step():
     if batch is not None:
         print("Training on batch...")
         inputs, targets = batch  # GRPO returns (inputs, targets) tuple
-        loss = await trainer.train_step.call(inputs, targets)  # RLTrainer is an actor
+        loss = await trainer.train_step.call(inputs, targets)  # TitanTrainer is an actor
         print(f"Training loss: {loss}")
         return loss
     else:
@@ -507,7 +507,7 @@ reward_actor = await RewardActor.options(
 )
 
 # Training needs fewer but more powerful replicas
-trainer = await RLTrainer.options(
+trainer = await TitanTrainer.options(
     procs=1, with_gpus=True  # Fewer but GPU-heavy
 ).as_actor(  # Trainer typically uses .as_actor() not .as_service()
     model={"name": "qwen3", "flavor": "1.7B"},
@@ -580,7 +580,7 @@ import torch
 from forge.actors.generator import Generator as Policy
 from forge.actors.reference_model import ReferenceModel
 from forge.actors.replay_buffer import ReplayBuffer
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
 from forge.data.rewards import MathReward, ThinkingReward
 
@@ -603,7 +603,7 @@ print("Initializing all services...")
         engine_config={"model": "Qwen/Qwen3-1.7B", "tensor_parallel_size": 1},
         sampling_config={"n": 1, "max_tokens": 512}
     ),
-    RLTrainer.options(procs=1, with_gpus=True).as_actor(
+    TitanTrainer.options(procs=1, with_gpus=True).as_actor(
         model={"name": "qwen3", "flavor": "1.7B", "hf_assets_path": "hf://Qwen/Qwen3-1.7B"},
         optimizer={"name": "AdamW", "lr": 1e-5},
         training={"local_batch_size": 2, "seq_len": 2048}
@@ -667,7 +667,7 @@ print("Shutting down services...")
 await asyncio.gather(
     DatasetActor.shutdown(dataloader),
     policy.shutdown(),
-    RLTrainer.shutdown(trainer),
+    TitanTrainer.shutdown(trainer),
     ReplayBuffer.shutdown(replay_buffer),
     ComputeAdvantages.shutdown(compute_advantages),
     ReferenceModel.shutdown(ref_model),
diff --git a/pyproject.toml b/pyproject.toml
@@ -31,9 +31,9 @@ dependencies = [
 dynamic = ["version"]
 
 [project.urls]
-GitHub = "https://github.com/pytorch-labs/forge"
-Documentation = "https://github.com/pytorch-labs/forge/tree/main/docs"
-Issues = "https://github.com/pytorch-labs/forge/issues"
+GitHub = "https://github.com/meta-pytorch/torchforge"
+Documentation = "https://meta-pytorch.org/torchforge"
+Issues = "https://github.com/meta-pytorch/torchforge/issues"
 
 [project.optional-dependencies]
 dev = [
diff --git a/src/forge/actors/__init__.py b/src/forge/actors/__init__.py
@@ -4,9 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import warnings
+
 __all__ = [
     "Generator",
-    "RLTrainer",
+    "TitanTrainer",
+    "RLTrainer",  # Deprecated, use TitanTrainer
     "ReplayBuffer",
     "ReferenceModel",
     "SandboxedPythonCoder",
@@ -18,7 +21,17 @@ def __getattr__(name):
         from .generator import Generator
 
         return Generator
+    elif name == "TitanTrainer":
+        from .trainer import TitanTrainer
+
+        return TitanTrainer
     elif name == "RLTrainer":
+        warnings.warn(
+            "RLTrainer is deprecated and will be removed in a future version. "
+            "Please use TitanTrainer instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
         from .trainer import RLTrainer
 
         return RLTrainer
diff --git a/src/forge/actors/trainer/__init__.py b/src/forge/actors/trainer/__init__.py
@@ -0,0 +1,23 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+
+from .titan import TitanTrainer
+
+__all__ = ["TitanTrainer", "RLTrainer"]
+
+
+def __getattr__(name):
+    if name == "RLTrainer":
+        warnings.warn(
+            "RLTrainer is deprecated and will be removed in a future version. "
+            "Please use TitanTrainer instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
+        return TitanTrainer
+    raise AttributeError(f"module {__name__} has no attribute {name}")
diff --git a/src/forge/actors/trainer/titan.py b/src/forge/actors/trainer/titan.py
@@ -53,8 +53,8 @@
 
 
 @dataclass
-class RLTrainer(ForgeActor):
-    """A reinforcement learning trainer actor for policy optimization training.
+class TitanTrainer(ForgeActor):
+    """A generic trainer actor implementation built on top of TorchTitan.
 
     Built on top of TorchTitan's training engine, this actor provides a complete training
     loop for reinforcement learning. It performs forward and backward passes with gradient
diff --git a/src/forge/controller/launcher.py b/src/forge/controller/launcher.py
@@ -17,19 +17,18 @@
 import monarch
 
 import torchx.specs as specs
+
+from forge.types import Launcher, LauncherConfig
 from monarch._rust_bindings.monarch_hyperactor.alloc import AllocConstraints
 from monarch._rust_bindings.monarch_hyperactor.channel import ChannelTransport
 
 from monarch._rust_bindings.monarch_hyperactor.config import configure
 from monarch._src.actor.allocator import RemoteAllocator, TorchXRemoteAllocInitializer
 from monarch.actor import Actor, endpoint, ProcMesh
 from monarch.tools import commands
-from monarch.tools.commands import info
-from monarch.tools.components import hyperactor
+from monarch.tools.commands import create, info
 from monarch.tools.config import Config, Workspace
 
-from forge.types import Launcher, LauncherConfig
-
 _MAST_AVAILABLE = False
 
 try:
@@ -259,8 +258,12 @@ async def launch_mast_job(self):
             ),
         )
 
-        await commands.get_or_create(self.job_name, config)
-        return server_spec
+        job_handle = create(config, name=self.job_name)
+        print(
+            f"MAST job launched successfully:\n"
+            f"\033[92mhttps://www.internalfb.com/mlhub/pipelines/runs/mast/{self.job_name}\033[0m"
+        )
+        return job_handle
 
     def add_additional_packages(self, packages: "Packages") -> "Packages":
         packages.add_package("oil.oilfs:stable")
diff --git a/tests/integration_tests/test_policy_update.py b/tests/integration_tests/test_policy_update.py
@@ -16,7 +16,7 @@
 import torchstore as ts
 from forge.actors.generator import Generator
 
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from forge.controller.provisioner import init_provisioner
 
 from forge.controller.service.service import uuid
@@ -50,7 +50,7 @@
 TEST_DCP_DIR = "test_dcp_tmp"
 
 
-class MockRLTrainer(RLTrainer):
+class MockTitanTrainer(TitanTrainer):
     @endpoint
     async def zero_out_model_states(self):
         """This simply sets all model weights to zero."""
@@ -59,7 +59,7 @@ async def zero_out_model_states(self):
             for k in sd.keys():
                 if not torch.is_floating_point(sd[k]):
                     logger.info(
-                        f"[MockRLTrainer] zero_out_model_states(): skipping non-float param {k}"
+                        f"[MockTitanTrainer] zero_out_model_states(): skipping non-float param {k}"
                     )
                     continue
                 sd[k] *= 0.0
@@ -199,22 +199,22 @@ async def _setup_and_teardown(request):
         )
     await ts.initialize(strategy=ts.ControllerStorageVolumes())
 
-    policy, rl_trainer = await asyncio.gather(
+    policy, titan_trainer = await asyncio.gather(
         *[
             Generator.options(**services_policy_cfg).as_service(**cfg.policy),
-            MockRLTrainer.options(**cfg.actors.trainer).as_actor(**trainer_cfg),
+            MockTitanTrainer.options(**cfg.actors.trainer).as_actor(**trainer_cfg),
         ]
     )
 
-    yield policy, rl_trainer
+    yield policy, titan_trainer
 
     # ---- teardown ---- #
     logger.info("Shutting down services and cleaning up DCP directory..")
 
     await asyncio.gather(
         policy.shutdown(),
         ts.shutdown(),
-        RLTrainer.shutdown(rl_trainer),
+        TitanTrainer.shutdown(titan_trainer),
     )
 
     # Cleanup DCP directory
@@ -235,7 +235,7 @@ class TestWeightSync:
     @requires_cuda
     async def test_sanity_check(self, _setup_and_teardown):
         """
-        Sanity check for weight sync sharding between RLTrainer and Policy for a given model config.
+        Sanity check for weight sync sharding between TitanTrainer and Policy for a given model config.
 
         The check performs the following steps:
         - Initialize trainer and push weights v0 (original huggingface ckpt)
@@ -245,15 +245,15 @@ async def test_sanity_check(self, _setup_and_teardown):
 
         """
 
-        policy, rl_trainer = _setup_and_teardown
+        policy, titan_trainer = _setup_and_teardown
 
         v0 = uuid.uuid4().int
         v1 = v0 + 1
 
-        await rl_trainer.push_weights.call(policy_version=v0)
+        await titan_trainer.push_weights.call(policy_version=v0)
         # Setting everything to zero
-        await rl_trainer.zero_out_model_states.call()
-        await rl_trainer.push_weights.call(policy_version=v1)
+        await titan_trainer.zero_out_model_states.call()
+        await titan_trainer.push_weights.call(policy_version=v1)
         await policy.save_model_params.fanout()
 
         # Sanity check that before update all the tests pass
diff --git a/tests/sandbox/rl_trainer/main.py b/tests/sandbox/rl_trainer/main.py
@@ -10,7 +10,7 @@
 
 import torch
 import torchstore as ts
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from forge.controller.launcher import JOB_NAME_KEY, LAUNCHER_KEY
 from forge.controller.provisioner import init_provisioner, shutdown
 from forge.observability.metric_actors import get_or_create_metric_logger
@@ -182,7 +182,7 @@ async def main(cfg: DictConfig):
     await ts.initialize(strategy=ts.ControllerStorageVolumes())
     # Initialize trainer only
     print("Initializing trainer...")
-    trainer = await RLTrainer.options(**cfg.actors.trainer).as_actor(
+    trainer = await TitanTrainer.options(**cfg.actors.trainer).as_actor(
         **cfg.trainer, loss=simple_grpo_loss
     )
     print("Trainer initialized successfully with following configs!")