Skip to content

Commit acf72b8

Browse files
committed
Merge branch 'main' into yhu/shared-tensor-mp
2 parents 2d14c40 + 633b219 commit acf72b8

File tree

31 files changed

+590
-417
lines changed

31 files changed

+590
-417
lines changed

.github/workflows/gpu_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: GPU tests
1+
name: GPU Tests
22

33
on:
44
schedule:

.github/workflows/unit_test.yaml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
name: Unit Test
1+
name: Unit Tests
22

33
on:
44
pull_request:
5-
5+
push:
6+
branches: [ main ]
7+
workflow_dispatch:
68

79
jobs:
810
unit_tests:

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
# <img width="35" height="35" alt="image" src="https://github.com/user-attachments/assets/2700a971-e5d6-4036-b03f-2f89c9791609" /> Forge
22

3-
43
#### A PyTorch-native agentic RL library that lets you focus on algorithms—not infra.
4+
[![Unit Tests](https://github.com/meta-pytorch/forge/actions/workflows/unit_test.yaml/badge.svg?branch=main)](https://github.com/meta-pytorch/forge/actions/workflows/unit_test.yaml?query=branch%3Amain)
5+
[![GPU Tests](https://github.com/meta-pytorch/forge/actions/workflows/gpu_test.yaml/badge.svg?branch=main)](https://github.com/meta-pytorch/forge/actions/workflows/gpu_test.yaml?query=branch%3Amain)
56

67
## Overview
78
The primary purpose of the Forge ecosystem is to delineate infra concerns from model concerns thereby making RL experimentation easier. Forge delivers this by providing clear RL abstractions and one scalable implementation of these abstractions. When you need fine-grained control over placement, fault handling/redirecting training loads during a run, or communication patterns, the primitives are there. When you don’t, you can focus purely on your RL algorithm.

apps/grpo/main.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -496,22 +496,6 @@ async def continuous_training():
496496

497497
training_task.cancel()
498498

499-
# give mlogger time to shutdown backends, otherwise they can stay running.
500-
# TODO (felipemello) find more elegant solution
501-
await mlogger.shutdown.call_one()
502-
await asyncio.sleep(2)
503-
504-
await asyncio.gather(
505-
DatasetActor.shutdown(dataloader),
506-
policy.shutdown(),
507-
RLTrainer.shutdown(trainer),
508-
ReplayBuffer.shutdown(replay_buffer),
509-
ComputeAdvantages.shutdown(compute_advantages),
510-
ref_model.shutdown(),
511-
reward_actor.shutdown(),
512-
)
513-
# TODO - add a global shutdown that implicitly shuts down all services
514-
# and remote allocations
515499
await shutdown()
516500

517501

docs/Tutorials/ReadMe.MD

Lines changed: 0 additions & 19 deletions
This file was deleted.

docs/source/_static/logo-icon.svg

Lines changed: 12 additions & 0 deletions
Loading

docs/source/conf.py

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ def get_version_path():
6565
"sphinx_gallery.gen_gallery",
6666
]
6767

68+
html_favicon = "_static/logo-icon.svg"
69+
6870
html_baseurl = (
6971
f"https://meta-pytorch.org/forge/{version_path}" # needed for sphinx-sitemap
7072
)
@@ -82,8 +84,14 @@ def get_version_path():
8284
"_templates",
8385
os.path.join(os.path.dirname(pytorch_sphinx_theme2.__file__), "templates"),
8486
]
85-
exclude_patterns = ["tutorials/index.rst", "tutorials/template_tutorial.rst"]
8687

88+
exclude_patterns = [
89+
"tutorials/index.rst",
90+
"tutorials/template_tutorial.rst",
91+
"tutorials/**/index.rst",
92+
"tutorial_sources/**/*.md", # Exclude all markdown files from tutorial_sources
93+
"tutorial_sources/**/*.MD", # Also exclude uppercase .MD files
94+
]
8795
html_static_path = ["_static"]
8896
html_css_files = ["custom.css"]
8997
html_js_files = ["custom.js"]
@@ -167,6 +175,9 @@ def get_version_path():
167175
"html_image",
168176
]
169177

178+
# Configure MyST parser to treat mermaid code blocks as mermaid directives
179+
myst_fence_as_directive = ["mermaid"]
180+
170181
autodoc_default_options = {
171182
"members": True,
172183
"undoc-members": True,
@@ -204,14 +215,15 @@ def get_version_path():
204215
sphinx_gallery_conf = {
205216
"examples_dirs": "tutorial_sources", # Path to examples directory
206217
"gallery_dirs": "tutorials", # Path to generate gallery
207-
"filename_pattern": ".*", # Include all files
218+
"filename_pattern": r".*\.py$", # Only process .py files, not .md files
208219
"download_all_examples": False,
209220
"first_notebook_cell": "%matplotlib inline",
210221
"plot_gallery": "True",
211222
"promote_jupyter_magic": True,
212223
"backreferences_dir": None,
213224
"show_signature": False,
214225
"write_computation_times": False,
226+
"ignore_pattern": r".*\.md$|.*\.MD$", # Explicitly ignore markdown files
215227
}
216228

217229

@@ -222,5 +234,42 @@ def clean_docstring_indentation(app, what, name, obj, options, lines):
222234
lines.append("")
223235

224236

237+
def copy_markdown_tutorials(app):
238+
"""Copy markdown files from tutorial_sources to tutorials directory.
239+
240+
This runs after the builder is initialized but before sphinx-gallery processes files,
241+
ensuring markdown files are available alongside generated .py tutorials.
242+
"""
243+
import shutil
244+
from pathlib import Path
245+
246+
source_dir = Path(app.srcdir) / "tutorial_sources"
247+
target_dir = Path(app.srcdir) / "tutorials"
248+
249+
# Ensure target directory exists
250+
target_dir.mkdir(parents=True, exist_ok=True)
251+
252+
# Walk through tutorial_sources and copy all .md files
253+
for md_file in source_dir.rglob("*.md"):
254+
# Skip README files
255+
if md_file.name.lower() in ["readme.md", "readme.txt"]:
256+
continue
257+
258+
# Calculate relative path from tutorial_sources
259+
rel_path = md_file.relative_to(source_dir)
260+
261+
# Create target path in tutorials directory
262+
target_path = target_dir / rel_path
263+
target_path.parent.mkdir(parents=True, exist_ok=True)
264+
265+
# Copy the file
266+
shutil.copy2(md_file, target_path)
267+
print(
268+
f"[Forge Docs] Copied {md_file.name} to {target_path.relative_to(app.srcdir)}"
269+
)
270+
271+
225272
def setup(app):
226273
app.connect("autodoc-process-docstring", clean_docstring_indentation)
274+
# Use builder-inited to ensure it runs before source files are read
275+
app.connect("builder-inited", copy_markdown_tutorials)

docs/source/getting_started.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ Welcome to TorchForge! This guide will help you get up and running with TorchFor
55
TorchForge specializes in post-training techniques for large language models, including:
66

77
- **Supervised Fine-Tuning (SFT)**: Adapt pre-trained models to specific tasks using labeled data
8-
- **Generalized Reward Policy Optimization (GRPO)**: Advanced reinforcement learning for model alignment
8+
- **Group Relative Policy Optimization (GRPO)**: Advanced reinforcement learning for model alignment
99
- **Multi-GPU Distributed Training**: Efficient scaling across multiple GPUs and nodes

docs/source/index.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Key Features
77
------------
88

99
* **Post-Training Focus**: Specializes in techniques
10-
like Supervised Fine-Tuning (SFT) and Generalized Reward Policy Optimization (GRPO)
10+
like Supervised Fine-Tuning (SFT) and Group Relative Policy Optimization (GRPO)
1111
* **PyTorch Integration**: Built natively on PyTorch with
1212
dependencies on [PyTorch nightly](https://pytorch.org/get-started/locally/),
1313
[Monarch](https://meta-pytorch.org/monarch), [vLLM](https://docs.vllm.ai/en/latest/),

docs/Tutorials/1_RL_and_Forge_Fundamentals.MD renamed to docs/source/tutorial_sources/zero-to-forge/1_RL_and_Forge_Fundamentals.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ Here's the key insight: **Each RL component becomes a Forge service**. The toy e
7676
```mermaid
7777
graph LR
7878
subgraph Concepts["RL Concepts"]
79+
direction TB
7980
C1["Dataset"]
8081
C2["Policy"]
8182
C3["Reward Model"]
@@ -85,6 +86,7 @@ graph LR
8586
end
8687
8788
subgraph Services["Forge Services (Real Classes)"]
89+
direction TB
8890
S1["DatasetActor"]
8991
S2["Policy"]
9092
S3["RewardActor"]
@@ -392,4 +394,4 @@ score = await reward_actor.evaluate_response.route(
392394

393395
This is fundamentally different from monolithic RL implementations where any component failure stops everything!
394396

395-
In the next Section, we will go a layer deeper and learn how ForgeServices work. Continue to [Part 2 here](./2_Forge_Internals.MD)
397+
In the next Section, we will go a layer deeper and learn how ForgeServices work. Continue to [Part 2 here](./2_Forge_Internals.md)

0 commit comments

Comments
 (0)