meta-pytorch
diff --git a/‎.meta/mast/env_setup.sh‎
Lines changed: 1 addition & 1 deletion b/‎.meta/mast/env_setup.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.meta/mast/launch.sh‎
Lines changed: 3 additions & 3 deletions b/‎.meta/mast/launch.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.meta/mast/main.py‎
Lines changed: 0 additions & 3 deletions b/‎.meta/mast/main.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 5 additions & 22 deletions b/‎README.md‎
Lines changed: 5 additions & 22 deletions
diff --git a/‎apps/grpo/main.py‎
Lines changed: 9 additions & 4 deletions b/‎apps/grpo/main.py‎
Lines changed: 9 additions & 4 deletions
diff --git a/‎docs/source/api_trainer.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/source/api_trainer.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 3 deletions b/‎pyproject.toml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/forge/actors/__init__.py‎
Lines changed: 14 additions & 1 deletion b/‎src/forge/actors/__init__.py‎
Lines changed: 14 additions & 1 deletion
@@ -9,7 +9,7 @@
 # setup_forge_env.sh - Setup conda environment and install forge with mounting
 
 # Configuration
-CONDA_ENV_NAME="forge:41468b33a03eaf2bf5b44517f418028a"
+CONDA_ENV_NAME="forge:314c3548ae691f4aa2e49f1b1fad06b3"
 
 # Colors for output
 RED='\033[0;31m'
 
@@ -34,10 +34,10 @@ fi
 
 CONFIG_FILE="$1"
 
-# Generate a unique job name
-USER=$(whoami)
+# Generate a unique job name based on the config file name
+BASENAME=$(basename "$CONFIG_FILE" .yaml)
 RANDOM_SUFFIX=$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 6 | head -n 1)
-JOB_NAME="${USER}-forge-${RANDOM_SUFFIX}"
+JOB_NAME="${BASENAME}-${RANDOM_SUFFIX}"
 log_info "Generated job name: $JOB_NAME"
 
 # Get the directory where this script is located
 
@@ -63,8 +63,6 @@ async def main(cfg: DictConfig, mode: str = "detached", extra_args: list = None)
             extra_args=extra_args or [],
         )
         await launcher.launch_mast_job()
-        print(f"MAST job {launcher.job_name} launched successfully with client role.")
-        print("The client is running inside MAST and will execute the training.")
     else:
         # In remote mode, we're already running inside MAST, so mount directory, init provisioner and run training
         mount_mnt_directory("/mnt/wsfuse")
@@ -97,7 +95,6 @@ def _main(cfg):
         # Override job name from CLI
         if args.job_name:
             cfg[JOB_NAME_KEY] = args.job_name
-            print(f"Using job name: {args.job_name}")
         asyncio.run(main(cfg, mode=args.mode, extra_args=remaining))
 
     _main()  # @parse grabs the cfg from CLI
@@ -30,40 +30,23 @@ You can also find our notebook tutorials (coming soon)
 
 ## Installation
 
-### Basic
-
 torchforge requires PyTorch 2.9.0 with [Monarch](https://github.com/meta-pytorch/monarch), [vLLM](https://github.com/vllm-project/vllm), and [torchtitan](https://github.com/pytorch/torchtitan).
 
-You can install Forge with:
-```
-$ conda create -n forge python=3.10
-$ conda activate forge
-$ uv pip install .
-```
-
-(conda-less uv install is a wip)
-
-For your reference, we also include a basic install script that installs other system dependencies
-along with torchforge:
-(note that this basic install script
-uses [DNF](https://docs.fedoraproject.org/en-US/quick-docs/dnf/), but could be easily extended to other Linux OS.)
+Install torchforge with:
 
 ```bash
 conda create -n forge python=3.12
 conda activate forge
 ./scripts/install.sh
 ```
 
-Optional: By default, the packages installation uses conda. If user wants to install system packages on the target machine instead of conda, they can pass the `--use-sudo` to the installation script: `./script/install.sh --use-sudo`.
+The install script installs system dependencies along with torchforge. Note that this install script uses [DNF](https://docs.fedoraproject.org/en-US/quick-docs/dnf/), but could be easily extended to other Linux OS.
 
-After install, you can run the following command and should see output confirming GRPO training is running (you need a minimum 3 GPU devices):
+Optional: By default, the packages installation uses conda. If you want to install system packages on the target machine instead of conda, you can pass the `--use-sudo` flag to the installation script: `./scripts/install.sh --use-sudo`.
 
+> **Note:** We are actively working on enabling pure `uv` installation. Currently, Conda is the recommended approach. `uv` support is not fully working at the moment but is being tracked in [issue #494](https://github.com/meta-pytorch/torchforge/issues/494).
 
-```
-uv run apps/grpo/main.py --config apps/grpo/qwen3_1_7b.yaml
-```
-
-or if not using uv:
+After install, you can run the following command and should see output confirming GRPO training is running (you need a minimum 3 GPU devices):
 
 ```
 python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
 
@@ -23,7 +23,7 @@
 from forge.actors.generator import Generator
 from forge.actors.reference_model import ReferenceModel
 from forge.actors.replay_buffer import ReplayBuffer
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from forge.controller.actor import ForgeActor
 from forge.controller.provisioner import init_provisioner, shutdown
 from forge.data.rewards import MathReward, ThinkingReward
@@ -210,7 +210,7 @@ class DatasetActor(ForgeActor):
     model: str = "Qwen/Qwen3-1.7B"
 
     @endpoint
-    def setup(self):
+    async def setup(self):
         self._tokenizer = get_tokenizer(self.model)
         self._epoch = 0
 
@@ -266,7 +266,12 @@ async def sample(self) -> dict[str, str] | None:
 
     @endpoint
     async def pad_token(self):
-        return self._tokenizer.pad_token_id
+        # Use pad_token_id if available, otherwise use eos_token_id
+        # Llama models don't have a pad token by default
+        if self._tokenizer.pad_token_id is not None:
+            return self._tokenizer.pad_token_id
+        else:
+            return self._tokenizer.eos_token_id
 
 
 async def drop_weights(version: int):
@@ -318,7 +323,7 @@ async def main(cfg: DictConfig):
     ) = await asyncio.gather(
         DatasetActor.options(**cfg.actors.dataset).as_actor(**cfg.dataset),
         Policy.options(**cfg.services.policy).as_service(**cfg.policy),
-        RLTrainer.options(**cfg.actors.trainer).as_actor(
+        TitanTrainer.options(**cfg.actors.trainer).as_actor(
             **cfg.trainer, loss=simple_grpo_loss
         ),
         ReplayBuffer.options(**cfg.actors.replay_buffer).as_actor(
 
@@ -7,17 +7,17 @@
 The Trainer manages model training in TorchForge, built on top of TorchTitan.
 It handles forward/backward passes, weight updates, and checkpoint management for reinforcement learning workflows.
 
-## RLTrainer
+## TitanTrainer
 
 ```{eval-rst}
-.. autoclass:: RLTrainer
+.. autoclass:: TitanTrainer
    :members: train_step, push_weights, cleanup
    :exclude-members: __init__
 ```
 
 ## Configuration
 
-The RLTrainer uses TorchTitan's configuration system with the following components:
+The TitanTrainer uses TorchTitan's configuration system with the following components:
 
 ### Job Configuration
 
 
@@ -96,7 +96,7 @@ graph LR
         S3["RewardActor"]
         S4["ReferenceModel"]
         S5["ReplayBuffer"]
-        S6["RLTrainer"]
+        S6["TitanTrainer"]
     end
 
     C1 --> S1
@@ -306,7 +306,7 @@ TorchForge handles behind the scenes:
 from forge.actors.generator import Generator as Policy
 from forge.actors.replay_buffer import ReplayBuffer
 from forge.actors.reference_model import ReferenceModel
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
 from forge.data.rewards import MathReward, ThinkingReward
 import asyncio
@@ -348,7 +348,7 @@ group_size = 1
             }
         ),
         # Trainer actor with GPU
-        RLTrainer.options(procs=1, with_gpus=True).as_actor(
+        TitanTrainer.options(procs=1, with_gpus=True).as_actor(
             # Trainer config would come from YAML in real usage
             model={"name": "qwen3", "flavor": "1.7B", "hf_assets_path": f"hf://{model}"},
             optimizer={"name": "AdamW", "lr": 1e-5},
@@ -378,12 +378,12 @@ group_size = 1
 
 TorchForge has two types of distributed components:
 - **Services**: Multiple replicas with automatic load balancing (like Policy, RewardActor)
-- **Actors**: Single instances that handle their own internal distribution (like RLTrainer, ReplayBuffer)
+- **Actors**: Single instances that handle their own internal distribution (like TitanTrainer, ReplayBuffer)
 
 We cover this distinction in detail in Part 2, but for now this explains the scaling patterns:
 - Policy service: num_replicas=8 for high inference demand
 - RewardActor service: num_replicas=16 for parallel evaluation
-- RLTrainer actor: Single instance with internal distributed training
+- TitanTrainer actor: Single instance with internal distributed training
 
 
 ### Fault Tolerance
 
@@ -470,7 +470,7 @@ async def simple_rl_step():
     if batch is not None:
         print("Training on batch...")
         inputs, targets = batch  # GRPO returns (inputs, targets) tuple
-        loss = await trainer.train_step.call(inputs, targets)  # RLTrainer is an actor
+        loss = await trainer.train_step.call(inputs, targets)  # TitanTrainer is an actor
         print(f"Training loss: {loss}")
         return loss
     else:
@@ -507,7 +507,7 @@ reward_actor = await RewardActor.options(
 )
 
 # Training needs fewer but more powerful replicas
-trainer = await RLTrainer.options(
+trainer = await TitanTrainer.options(
     procs=1, with_gpus=True  # Fewer but GPU-heavy
 ).as_actor(  # Trainer typically uses .as_actor() not .as_service()
     model={"name": "qwen3", "flavor": "1.7B"},
@@ -580,7 +580,7 @@ import torch
 from forge.actors.generator import Generator as Policy
 from forge.actors.reference_model import ReferenceModel
 from forge.actors.replay_buffer import ReplayBuffer
-from forge.actors.trainer import RLTrainer
+from forge.actors.trainer import TitanTrainer
 from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
 from forge.data.rewards import MathReward, ThinkingReward
 
@@ -603,7 +603,7 @@ print("Initializing all services...")
         engine_config={"model": "Qwen/Qwen3-1.7B", "tensor_parallel_size": 1},
         sampling_config={"n": 1, "max_tokens": 512}
     ),
-    RLTrainer.options(procs=1, with_gpus=True).as_actor(
+    TitanTrainer.options(procs=1, with_gpus=True).as_actor(
         model={"name": "qwen3", "flavor": "1.7B", "hf_assets_path": "hf://Qwen/Qwen3-1.7B"},
         optimizer={"name": "AdamW", "lr": 1e-5},
         training={"local_batch_size": 2, "seq_len": 2048}
@@ -667,7 +667,7 @@ print("Shutting down services...")
 await asyncio.gather(
     DatasetActor.shutdown(dataloader),
     policy.shutdown(),
-    RLTrainer.shutdown(trainer),
+    TitanTrainer.shutdown(trainer),
     ReplayBuffer.shutdown(replay_buffer),
     ComputeAdvantages.shutdown(compute_advantages),
     ReferenceModel.shutdown(ref_model),
 
@@ -31,9 +31,9 @@ dependencies = [
 dynamic = ["version"]
 
 [project.urls]
-GitHub = "https://github.com/pytorch-labs/forge"
-Documentation = "https://github.com/pytorch-labs/forge/tree/main/docs"
-Issues = "https://github.com/pytorch-labs/forge/issues"
+GitHub = "https://github.com/meta-pytorch/torchforge"
+Documentation = "https://meta-pytorch.org/torchforge"
+Issues = "https://github.com/meta-pytorch/torchforge/issues"
 
 [project.optional-dependencies]
 dev = [
 
@@ -4,9 +4,12 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
+import warnings
+
 __all__ = [
     "Generator",
-    "RLTrainer",
+    "TitanTrainer",
+    "RLTrainer",  # Deprecated, use TitanTrainer
     "ReplayBuffer",
     "ReferenceModel",
     "SandboxedPythonCoder",
@@ -18,7 +21,17 @@ def __getattr__(name):
         from .generator import Generator
 
         return Generator
+    elif name == "TitanTrainer":
+        from .trainer import TitanTrainer
+
+        return TitanTrainer
     elif name == "RLTrainer":
+        warnings.warn(
+            "RLTrainer is deprecated and will be removed in a future version. "
+            "Please use TitanTrainer instead.",
+            FutureWarning,
+            stacklevel=2,
+        )
         from .trainer import RLTrainer
 
         return RLTrainer