Skip to content

Commit 9740ba7

Browse files
committed
Merge remote-tracking branch 'upstream/main' into on-policy-distillation
2 parents e28f84b + 791af13 commit 9740ba7

File tree

24 files changed

+530
-461
lines changed

24 files changed

+530
-461
lines changed

.meta/mast/env_setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# setup_forge_env.sh - Setup conda environment and install forge with mounting
1010

1111
# Configuration
12-
CONDA_ENV_NAME="forge:41468b33a03eaf2bf5b44517f418028a"
12+
CONDA_ENV_NAME="forge:314c3548ae691f4aa2e49f1b1fad06b3"
1313

1414
# Colors for output
1515
RED='\033[0;31m'

.meta/mast/launch.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ fi
3434

3535
CONFIG_FILE="$1"
3636

37-
# Generate a unique job name
38-
USER=$(whoami)
37+
# Generate a unique job name based on the config file name
38+
BASENAME=$(basename "$CONFIG_FILE" .yaml)
3939
RANDOM_SUFFIX=$(cat /dev/urandom | tr -dc 'a-z0-9' | fold -w 6 | head -n 1)
40-
JOB_NAME="${USER}-forge-${RANDOM_SUFFIX}"
40+
JOB_NAME="${BASENAME}-${RANDOM_SUFFIX}"
4141
log_info "Generated job name: $JOB_NAME"
4242

4343
# Get the directory where this script is located

.meta/mast/main.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,6 @@ async def main(cfg: DictConfig, mode: str = "detached", extra_args: list = None)
6363
extra_args=extra_args or [],
6464
)
6565
await launcher.launch_mast_job()
66-
print(f"MAST job {launcher.job_name} launched successfully with client role.")
67-
print("The client is running inside MAST and will execute the training.")
6866
else:
6967
# In remote mode, we're already running inside MAST, so mount directory, init provisioner and run training
7068
mount_mnt_directory("/mnt/wsfuse")
@@ -97,7 +95,6 @@ def _main(cfg):
9795
# Override job name from CLI
9896
if args.job_name:
9997
cfg[JOB_NAME_KEY] = args.job_name
100-
print(f"Using job name: {args.job_name}")
10198
asyncio.run(main(cfg, mode=args.mode, extra_args=remaining))
10299

103100
_main() # @parse grabs the cfg from CLI

README.md

Lines changed: 5 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -30,40 +30,23 @@ You can also find our notebook tutorials (coming soon)
3030

3131
## Installation
3232

33-
### Basic
34-
3533
torchforge requires PyTorch 2.9.0 with [Monarch](https://github.com/meta-pytorch/monarch), [vLLM](https://github.com/vllm-project/vllm), and [torchtitan](https://github.com/pytorch/torchtitan).
3634

37-
You can install Forge with:
38-
```
39-
$ conda create -n forge python=3.10
40-
$ conda activate forge
41-
$ uv pip install .
42-
```
43-
44-
(conda-less uv install is a wip)
45-
46-
For your reference, we also include a basic install script that installs other system dependencies
47-
along with torchforge:
48-
(note that this basic install script
49-
uses [DNF](https://docs.fedoraproject.org/en-US/quick-docs/dnf/), but could be easily extended to other Linux OS.)
35+
Install torchforge with:
5036

5137
```bash
5238
conda create -n forge python=3.12
5339
conda activate forge
5440
./scripts/install.sh
5541
```
5642

57-
Optional: By default, the packages installation uses conda. If user wants to install system packages on the target machine instead of conda, they can pass the `--use-sudo` to the installation script: `./script/install.sh --use-sudo`.
43+
The install script installs system dependencies along with torchforge. Note that this install script uses [DNF](https://docs.fedoraproject.org/en-US/quick-docs/dnf/), but could be easily extended to other Linux OS.
5844

59-
After install, you can run the following command and should see output confirming GRPO training is running (you need a minimum 3 GPU devices):
45+
Optional: By default, the packages installation uses conda. If you want to install system packages on the target machine instead of conda, you can pass the `--use-sudo` flag to the installation script: `./scripts/install.sh --use-sudo`.
6046

47+
> **Note:** We are actively working on enabling pure `uv` installation. Currently, Conda is the recommended approach. `uv` support is not fully working at the moment but is being tracked in [issue #494](https://github.com/meta-pytorch/torchforge/issues/494).
6148
62-
```
63-
uv run apps/grpo/main.py --config apps/grpo/qwen3_1_7b.yaml
64-
```
65-
66-
or if not using uv:
49+
After install, you can run the following command and should see output confirming GRPO training is running (you need a minimum 3 GPU devices):
6750

6851
```
6952
python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml

apps/grpo/main.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from forge.actors.generator import Generator
2424
from forge.actors.reference_model import ReferenceModel
2525
from forge.actors.replay_buffer import ReplayBuffer
26-
from forge.actors.trainer import RLTrainer
26+
from forge.actors.trainer import TitanTrainer
2727
from forge.controller.actor import ForgeActor
2828
from forge.controller.provisioner import init_provisioner, shutdown
2929
from forge.data.rewards import MathReward, ThinkingReward
@@ -210,7 +210,7 @@ class DatasetActor(ForgeActor):
210210
model: str = "Qwen/Qwen3-1.7B"
211211

212212
@endpoint
213-
def setup(self):
213+
async def setup(self):
214214
self._tokenizer = get_tokenizer(self.model)
215215
self._epoch = 0
216216

@@ -266,7 +266,12 @@ async def sample(self) -> dict[str, str] | None:
266266

267267
@endpoint
268268
async def pad_token(self):
269-
return self._tokenizer.pad_token_id
269+
# Use pad_token_id if available, otherwise use eos_token_id
270+
# Llama models don't have a pad token by default
271+
if self._tokenizer.pad_token_id is not None:
272+
return self._tokenizer.pad_token_id
273+
else:
274+
return self._tokenizer.eos_token_id
270275

271276

272277
async def drop_weights(version: int):
@@ -318,7 +323,7 @@ async def main(cfg: DictConfig):
318323
) = await asyncio.gather(
319324
DatasetActor.options(**cfg.actors.dataset).as_actor(**cfg.dataset),
320325
Policy.options(**cfg.services.policy).as_service(**cfg.policy),
321-
RLTrainer.options(**cfg.actors.trainer).as_actor(
326+
TitanTrainer.options(**cfg.actors.trainer).as_actor(
322327
**cfg.trainer, loss=simple_grpo_loss
323328
),
324329
ReplayBuffer.options(**cfg.actors.replay_buffer).as_actor(

docs/source/api_trainer.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,17 @@
77
The Trainer manages model training in TorchForge, built on top of TorchTitan.
88
It handles forward/backward passes, weight updates, and checkpoint management for reinforcement learning workflows.
99

10-
## RLTrainer
10+
## TitanTrainer
1111

1212
```{eval-rst}
13-
.. autoclass:: RLTrainer
13+
.. autoclass:: TitanTrainer
1414
:members: train_step, push_weights, cleanup
1515
:exclude-members: __init__
1616
```
1717

1818
## Configuration
1919

20-
The RLTrainer uses TorchTitan's configuration system with the following components:
20+
The TitanTrainer uses TorchTitan's configuration system with the following components:
2121

2222
### Job Configuration
2323

docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ graph LR
9696
S3["RewardActor"]
9797
S4["ReferenceModel"]
9898
S5["ReplayBuffer"]
99-
S6["RLTrainer"]
99+
S6["TitanTrainer"]
100100
end
101101
102102
C1 --> S1
@@ -306,7 +306,7 @@ TorchForge handles behind the scenes:
306306
from forge.actors.generator import Generator as Policy
307307
from forge.actors.replay_buffer import ReplayBuffer
308308
from forge.actors.reference_model import ReferenceModel
309-
from forge.actors.trainer import RLTrainer
309+
from forge.actors.trainer import TitanTrainer
310310
from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
311311
from forge.data.rewards import MathReward, ThinkingReward
312312
import asyncio
@@ -348,7 +348,7 @@ group_size = 1
348348
}
349349
),
350350
# Trainer actor with GPU
351-
RLTrainer.options(procs=1, with_gpus=True).as_actor(
351+
TitanTrainer.options(procs=1, with_gpus=True).as_actor(
352352
# Trainer config would come from YAML in real usage
353353
model={"name": "qwen3", "flavor": "1.7B", "hf_assets_path": f"hf://{model}"},
354354
optimizer={"name": "AdamW", "lr": 1e-5},
@@ -378,12 +378,12 @@ group_size = 1
378378

379379
TorchForge has two types of distributed components:
380380
- **Services**: Multiple replicas with automatic load balancing (like Policy, RewardActor)
381-
- **Actors**: Single instances that handle their own internal distribution (like RLTrainer, ReplayBuffer)
381+
- **Actors**: Single instances that handle their own internal distribution (like TitanTrainer, ReplayBuffer)
382382

383383
We cover this distinction in detail in Part 2, but for now this explains the scaling patterns:
384384
- Policy service: num_replicas=8 for high inference demand
385385
- RewardActor service: num_replicas=16 for parallel evaluation
386-
- RLTrainer actor: Single instance with internal distributed training
386+
- TitanTrainer actor: Single instance with internal distributed training
387387

388388

389389
### Fault Tolerance

docs/source/tutorial_sources/zero-to-forge/2_Forge_Internals.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,7 @@ async def simple_rl_step():
470470
if batch is not None:
471471
print("Training on batch...")
472472
inputs, targets = batch # GRPO returns (inputs, targets) tuple
473-
loss = await trainer.train_step.call(inputs, targets) # RLTrainer is an actor
473+
loss = await trainer.train_step.call(inputs, targets) # TitanTrainer is an actor
474474
print(f"Training loss: {loss}")
475475
return loss
476476
else:
@@ -507,7 +507,7 @@ reward_actor = await RewardActor.options(
507507
)
508508

509509
# Training needs fewer but more powerful replicas
510-
trainer = await RLTrainer.options(
510+
trainer = await TitanTrainer.options(
511511
procs=1, with_gpus=True # Fewer but GPU-heavy
512512
).as_actor( # Trainer typically uses .as_actor() not .as_service()
513513
model={"name": "qwen3", "flavor": "1.7B"},
@@ -580,7 +580,7 @@ import torch
580580
from forge.actors.generator import Generator as Policy
581581
from forge.actors.reference_model import ReferenceModel
582582
from forge.actors.replay_buffer import ReplayBuffer
583-
from forge.actors.trainer import RLTrainer
583+
from forge.actors.trainer import TitanTrainer
584584
from apps.grpo.main import DatasetActor, RewardActor, ComputeAdvantages
585585
from forge.data.rewards import MathReward, ThinkingReward
586586

@@ -603,7 +603,7 @@ print("Initializing all services...")
603603
engine_config={"model": "Qwen/Qwen3-1.7B", "tensor_parallel_size": 1},
604604
sampling_config={"n": 1, "max_tokens": 512}
605605
),
606-
RLTrainer.options(procs=1, with_gpus=True).as_actor(
606+
TitanTrainer.options(procs=1, with_gpus=True).as_actor(
607607
model={"name": "qwen3", "flavor": "1.7B", "hf_assets_path": "hf://Qwen/Qwen3-1.7B"},
608608
optimizer={"name": "AdamW", "lr": 1e-5},
609609
training={"local_batch_size": 2, "seq_len": 2048}
@@ -667,7 +667,7 @@ print("Shutting down services...")
667667
await asyncio.gather(
668668
DatasetActor.shutdown(dataloader),
669669
policy.shutdown(),
670-
RLTrainer.shutdown(trainer),
670+
TitanTrainer.shutdown(trainer),
671671
ReplayBuffer.shutdown(replay_buffer),
672672
ComputeAdvantages.shutdown(compute_advantages),
673673
ReferenceModel.shutdown(ref_model),

pyproject.toml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ dependencies = [
3131
dynamic = ["version"]
3232

3333
[project.urls]
34-
GitHub = "https://github.com/pytorch-labs/forge"
35-
Documentation = "https://github.com/pytorch-labs/forge/tree/main/docs"
36-
Issues = "https://github.com/pytorch-labs/forge/issues"
34+
GitHub = "https://github.com/meta-pytorch/torchforge"
35+
Documentation = "https://meta-pytorch.org/torchforge"
36+
Issues = "https://github.com/meta-pytorch/torchforge/issues"
3737

3838
[project.optional-dependencies]
3939
dev = [

src/forge/actors/__init__.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,12 @@
44
# This source code is licensed under the BSD-style license found in the
55
# LICENSE file in the root directory of this source tree.
66

7+
import warnings
8+
79
__all__ = [
810
"Generator",
9-
"RLTrainer",
11+
"TitanTrainer",
12+
"RLTrainer", # Deprecated, use TitanTrainer
1013
"ReplayBuffer",
1114
"ReferenceModel",
1215
"SandboxedPythonCoder",
@@ -18,7 +21,17 @@ def __getattr__(name):
1821
from .generator import Generator
1922

2023
return Generator
24+
elif name == "TitanTrainer":
25+
from .trainer import TitanTrainer
26+
27+
return TitanTrainer
2128
elif name == "RLTrainer":
29+
warnings.warn(
30+
"RLTrainer is deprecated and will be removed in a future version. "
31+
"Please use TitanTrainer instead.",
32+
FutureWarning,
33+
stacklevel=2,
34+
)
2235
from .trainer import RLTrainer
2336

2437
return RLTrainer

0 commit comments

Comments
 (0)