Skip to content

Commit 74b9b17

Browse files
jgerheuronymous-aithalcoderabbitai[bot]lbliiisnowmanwwg
authored
docs: Refactor Home Page and New About Section (#1338)
Signed-off-by: jgerh <[email protected]> Signed-off-by: Lawrence Lane <[email protected]> Signed-off-by: Ashwath Aithal <[email protected]> Signed-off-by: L.B. <[email protected]> Signed-off-by: Terry Kong <[email protected]> Co-authored-by: Ashwath Aithal <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Co-authored-by: Lawrence Lane <[email protected]> Co-authored-by: Wenwen Gao <[email protected]> Co-authored-by: Terry Kong <[email protected]>
1 parent 45f5ce6 commit 74b9b17

File tree

27 files changed

+2180
-287
lines changed

27 files changed

+2180
-287
lines changed

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ repos:
1616
args: ["check", "--select", "I", "--fix"]
1717
- id: ruff-format
1818

19+
- repo: https://github.com/ComPWA/mirrors-taplo
20+
rev: v0.9.3
21+
hooks:
22+
- id: taplo-format
23+
name: Format TOML files with taplo
24+
1925
- repo: local
2026
hooks:
2127
- id: no-underscore-md
Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
[build-system]
2-
requires = [
3-
"setuptools>=61.0",
4-
"wheel",
5-
]
2+
requires = ["setuptools>=61.0", "wheel"]
63
build-backend = "setuptools.build_meta"
74

85
[project]
@@ -11,4 +8,3 @@ dynamic = ["dependencies", "version"]
118
authors = [{ name = "NVIDIA", email = "[email protected]" }]
129
description = "Standalone packaging for the Megatron Bridge sub-module."
1310
requires-python = ">=3.10"
14-
Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
22

33
[build-system]
4-
requires = [
5-
"setuptools",
6-
"pybind11",
7-
]
4+
requires = ["setuptools", "pybind11"]
85
build-backend = "setuptools.build_meta"
96

107
[project]
118
name = "megatron-core"
129
dynamic = ["dependencies", "version"]
1310
description = "Megatron Core - a library for efficient and scalable training of transformer based models"
1411
authors = [{ name = "NVIDIA", email = "[email protected]" }]
15-
maintainers = [{ name = "NVIDIA", email = "[email protected]" }]
12+
maintainers = [{ name = "NVIDIA", email = "[email protected]" }]

3rdparty/Penguin-workspace/pyproject.toml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,5 @@
11
[build-system]
2-
requires = [
3-
"setuptools>=61.0",
4-
"wheel",
5-
]
2+
requires = ["setuptools>=61.0", "wheel"]
63
build-backend = "setuptools.build_meta"
74

85
[project]

docs/Makefile

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
# Makefile for building documentation with isolated UV environment
2+
3+
.DEFAULT_GOAL := help
4+
5+
# Cross-platform venv paths
6+
ifeq ($(OS),Windows_NT)
7+
VENV_DIR = .venv/Scripts
8+
PYTHON = $(VENV_DIR)/python.exe
9+
ACTIVATE_CMD = .venv\Scripts\activate
10+
RM = if exist _build rmdir /s /q _build
11+
else
12+
VENV_DIR = .venv/bin
13+
PYTHON = $(VENV_DIR)/python
14+
ACTIVATE_CMD = source .venv/bin/activate
15+
RM = rm -rf _build
16+
endif
17+
18+
# ------------------------------
19+
# Help
20+
# ------------------------------
21+
help:
22+
@echo ""
23+
@echo "📚 Documentation Build System"
24+
@echo "=============================="
25+
@echo ""
26+
@echo "Available targets:"
27+
@echo " make docs-html Build HTML documentation"
28+
@echo " make docs-live Start live-reload server"
29+
@echo " make docs-publish Build docs (fail on warnings)"
30+
@echo " make docs-clean Clean built documentation"
31+
@echo ""
32+
@echo "The environment is automatically set up on first run."
33+
@echo "To manually activate the docs environment, run:"
34+
@echo " $(ACTIVATE_CMD)"
35+
@echo ""
36+
37+
# ------------------------------
38+
# Ensure UV and isolated docs environment
39+
# ------------------------------
40+
ensure-docs-env:
41+
@command -v uv >/dev/null 2>&1 || ( \
42+
echo ""; \
43+
echo "❌ uv is not installed. See https://docs.astral.sh/uv/getting-started/installation/"; \
44+
exit 1 \
45+
)
46+
@if [ ! -x "$(PYTHON)" ]; then \
47+
echo "📦 Creating isolated docs environment..."; \
48+
uv venv .venv; \
49+
uv sync --no-config; \
50+
echo "✅ Docs environment ready."; \
51+
echo "📝 To activate it: $(ACTIVATE_CMD)"; \
52+
fi
53+
54+
# ------------------------------
55+
# Build HTML docs
56+
# ------------------------------
57+
docs-html: ensure-docs-env
58+
@echo "Building HTML documentation..."
59+
$(PYTHON) -m sphinx -b html . _build/html
60+
61+
# ------------------------------
62+
# Build docs for publication (fail on warnings)
63+
# ------------------------------
64+
docs-publish: ensure-docs-env
65+
@echo "Building HTML documentation (fail on warnings)..."
66+
$(PYTHON) -m sphinx --fail-on-warning -b html . _build/html
67+
68+
# ------------------------------
69+
# Start live-reload server
70+
# ------------------------------
71+
docs-live: ensure-docs-env
72+
@echo "Starting live-reload server..."
73+
$(PYTHON) -m sphinx_autobuild . _build/html --port 8001
74+
@echo ""
75+
@echo "📝 To manually activate the docs environment in a shell:"
76+
@echo " $(ACTIVATE_CMD)"
77+
78+
# ------------------------------
79+
# Clean built docs
80+
# ------------------------------
81+
docs-clean:
82+
@echo "Cleaning built documentation..."
83+
$(RM)

docs/about/algorithms/dapo.md

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# DAPO
2+
3+
[Dual-Clip Asymmetric Policy Optimization (DAPO)](https://arxiv.org/pdf/2503.14476) extends GRPO by allowing asymmetric clipping with distinct minimum and maximum clip parameters. This provides more fine-grained control over policy updates.
4+
5+
DAPO is implemented through the same `ClippedPGLossFn` as GRPO, but with the ability to set different values for `ratio_clip_min` and `ratio_clip_max`. For standard GRPO/PPO, these parameters are set to the same value.
6+
7+
## Key Differences from GRPO
8+
9+
- **Asymmetric Clipping**: DAPO allows `ratio_clip_min``ratio_clip_max`, providing asymmetric bounds on the probability ratio
10+
- **Same Infrastructure**: Uses the same training infrastructure and configurations as GRPO
11+
12+
## DAPO Single Node
13+
14+
To run DAPO on a single GPU, use the GRPO script with asymmetric clip parameters:
15+
16+
```sh
17+
# Run DAPO with asymmetric clipping
18+
uv run python examples/run_grpo_math.py \
19+
policy.model_name="Qwen/Qwen2.5-1.5B" \
20+
grpo.ratio_clip_min=0.15 \
21+
grpo.ratio_clip_max=0.25 \
22+
checkpointing.checkpoint_dir="results/dapo_math" \
23+
logger.wandb_enabled=True \
24+
logger.wandb.name="dapo-math"
25+
```
26+
27+
For multi-GPU setups:
28+
29+
```sh
30+
uv run python examples/run_grpo_math.py \
31+
cluster.gpus_per_node=8 \
32+
grpo.ratio_clip_min=0.15 \
33+
grpo.ratio_clip_max=0.25 \
34+
checkpointing.checkpoint_dir="results/dapo_8gpu" \
35+
logger.wandb_enabled=True \
36+
logger.wandb.name="dapo-8gpu"
37+
```
38+
39+
## DAPO Multi-node
40+
41+
DAPO can be run on multiple nodes using the same approach as GRPO:
42+
43+
```sh
44+
# Run from the root of NeMo RL repo
45+
NUM_ACTOR_NODES=2
46+
47+
COMMAND="uv run ./examples/run_grpo_math.py \
48+
--config examples/configs/grpo_math_8B.yaml \
49+
cluster.num_nodes=2 \
50+
grpo.ratio_clip_min=0.15 \
51+
grpo.ratio_clip_max=0.25 \
52+
checkpointing.checkpoint_dir='results/dapo_2nodes' \
53+
logger.wandb_enabled=True \
54+
logger.wandb.name='dapo-multinode'" \
55+
CONTAINER=YOUR_CONTAINER \
56+
MOUNTS="$PWD:$PWD" \
57+
sbatch \
58+
--nodes=${NUM_ACTOR_NODES} \
59+
--account=YOUR_ACCOUNT \
60+
--job-name=YOUR_JOBNAME \
61+
--partition=YOUR_PARTITION \
62+
--time=4:0:0 \
63+
--gres=gpu:8 \
64+
ray.sub
65+
```
66+
67+
## Configuration
68+
69+
DAPO uses the same configuration structure as GRPO. The key parameters are:
70+
71+
```yaml
72+
grpo:
73+
ratio_clip_min: 0.15 # Minimum clip value (can be different from max)
74+
ratio_clip_max: 0.25 # Maximum clip value (can be different from min)
75+
# ... other GRPO parameters ...
76+
```
77+
78+
For more details on other configuration options, refer to the [GRPO documentation](grpo.md).
79+
80+
## Additional Resources
81+
82+
- [DAPO Paper](https://arxiv.org/pdf/2503.14476)
83+
- [GRPO Documentation](grpo.md)
84+
- [Training Backends](../../design-docs/training-backends.md)

docs/about/algorithms/dpo.md

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# DPO
2+
3+
We provide a sample DPO experiment that uses the [HelpSteer3 dataset](https://huggingface.co/datasets/nvidia/HelpSteer3) for preference-based training.
4+
5+
## DPO Single Node
6+
7+
The default DPO experiment is configured to run on a single GPU. To launch the experiment:
8+
9+
```sh
10+
uv run python examples/run_dpo.py
11+
```
12+
13+
This trains `Llama3.2-1B-Instruct` on 1 GPU.
14+
15+
If you have access to more GPUs, you can update the experiment accordingly. To run on 8 GPUs, we update the cluster configuration and switch to an 8B Llama3.1 Instruct model:
16+
17+
```sh
18+
uv run python examples/run_dpo.py \
19+
policy.model_name="meta-llama/Llama-3.1-8B-Instruct" \
20+
policy.train_global_batch_size=256 \
21+
cluster.gpus_per_node=8
22+
```
23+
24+
Any of the DPO parameters can be customized from the command line. For example:
25+
26+
```sh
27+
uv run python examples/run_dpo.py \
28+
dpo.sft_loss_weight=0.1 \
29+
dpo.preference_average_log_probs=True \
30+
checkpointing.checkpoint_dir="results/llama_dpo_sft" \
31+
logger.wandb_enabled=True \
32+
logger.wandb.name="llama-dpo-sft"
33+
```
34+
35+
Refer to `examples/configs/dpo.yaml` for a full list of parameters that can be overridden. For an in-depth explanation of how to add your own DPO dataset, refer to the [DPO documentation](../../guides/dpo.md).
36+
37+
## DPO Multi-node
38+
39+
For distributed DPO training across multiple nodes, modify the following script for your use case:
40+
41+
```sh
42+
# Run from the root of NeMo RL repo
43+
## number of nodes to use for your job
44+
NUM_ACTOR_NODES=2
45+
46+
COMMAND="uv run ./examples/run_dpo.py --config examples/configs/dpo.yaml cluster.num_nodes=2 cluster.gpus_per_node=8 dpo.val_global_batch_size=32 checkpointing.checkpoint_dir='results/dpo_llama81_2nodes' logger.wandb_enabled=True logger.wandb.name='dpo-llama1b'" \
47+
CONTAINER=YOUR_CONTAINER \
48+
MOUNTS="$PWD:$PWD" \
49+
sbatch \
50+
--nodes=${NUM_ACTOR_NODES} \
51+
--account=YOUR_ACCOUNT \
52+
--job-name=YOUR_JOBNAME \
53+
--partition=YOUR_PARTITION \
54+
--time=4:0:0 \
55+
--gres=gpu:8 \
56+
ray.sub
57+
```
58+

docs/about/algorithms/grpo.md

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# GRPO
2+
3+
We provide a reference GRPO configuration for math benchmarks using the [OpenInstructMath2](https://huggingface.co/datasets/nvidia/OpenMathInstruct-2) dataset.
4+
5+
You can read about the details of the GRPO implementation [here](../../guides/grpo.md).
6+
7+
## GRPO Single Node
8+
9+
To run GRPO on a single GPU for `Qwen/Qwen2.5-1.5B`:
10+
11+
```sh
12+
# Run the GRPO math example using a 1B parameter model
13+
uv run python examples/run_grpo_math.py
14+
```
15+
16+
By default, this uses the configuration in `examples/configs/grpo_math_1B.yaml`. You can customize parameters with command-line overrides. For example, to run on 8 GPUs:
17+
18+
```sh
19+
# Run the GRPO math example using a 1B parameter model using 8 GPUs
20+
uv run python examples/run_grpo_math.py \
21+
cluster.gpus_per_node=8
22+
```
23+
24+
You can override any of the parameters listed in the YAML configuration file. For example:
25+
26+
```sh
27+
uv run python examples/run_grpo_math.py \
28+
policy.model_name="meta-llama/Llama-3.2-1B-Instruct" \
29+
checkpointing.checkpoint_dir="results/llama1b_math" \
30+
logger.wandb_enabled=True \
31+
logger.wandb.name="grpo-llama1b_math" \
32+
logger.num_val_samples_to_print=10
33+
```
34+
35+
The default configuration uses the DTensor training backend. We also provide a config `examples/configs/grpo_math_1B_megatron.yaml` which is set up to use the Megatron backend out of the box.
36+
37+
To train using this config on a single GPU:
38+
39+
```sh
40+
# Run a GRPO math example on 1 GPU using the Megatron backend
41+
uv run python examples/run_grpo_math.py \
42+
--config examples/configs/grpo_math_1B_megatron.yaml
43+
```
44+
45+
For additional details on supported backends and how to configure the training backend to suit your setup, refer to the [Training Backends documentation](../../design-docs/training-backends.md).
46+
47+
## GRPO Multi-node
48+
49+
```sh
50+
# Run from the root of NeMo RL repo
51+
NUM_ACTOR_NODES=2
52+
53+
# grpo_math_8b uses Llama-3.1-8B-Instruct model
54+
COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml cluster.num_nodes=2 checkpointing.checkpoint_dir='results/llama8b_2nodes' logger.wandb_enabled=True logger.wandb.name='grpo-llama8b_math'" \
55+
CONTAINER=YOUR_CONTAINER \
56+
MOUNTS="$PWD:$PWD" \
57+
sbatch \
58+
--nodes=${NUM_ACTOR_NODES} \
59+
--account=YOUR_ACCOUNT \
60+
--job-name=YOUR_JOBNAME \
61+
--partition=YOUR_PARTITION \
62+
--time=4:0:0 \
63+
--gres=gpu:8 \
64+
ray.sub
65+
```
66+
67+
The required `CONTAINER` can be built by following the instructions in the [Docker documentation](../../docker.md).
68+
69+
## GRPO Qwen2.5-32B
70+
71+
This section outlines how to run GRPO for Qwen2.5-32B with a 16k sequence length.
72+
73+
```sh
74+
# Run from the root of NeMo RL repo
75+
NUM_ACTOR_NODES=32
76+
77+
# Download Qwen before the job starts to avoid spending time downloading during the training loop
78+
HF_HOME=/path/to/hf_home huggingface-cli download Qwen/Qwen2.5-32B
79+
80+
# Ensure HF_HOME is included in your MOUNTS
81+
HF_HOME=/path/to/hf_home \
82+
COMMAND="uv run ./examples/run_grpo_math.py --config examples/configs/grpo_math_8B.yaml policy.model_name='Qwen/Qwen2.5-32B' policy.generation.vllm_cfg.tensor_parallel_size=4 policy.max_total_sequence_length=16384 cluster.num_nodes=${NUM_ACTOR_NODES} policy.dtensor_cfg.enabled=True policy.dtensor_cfg.tensor_parallel_size=8 policy.dtensor_cfg.sequence_parallel=True policy.dtensor_cfg.activation_checkpointing=True checkpointing.checkpoint_dir='results/qwen2.5-32b' logger.wandb_enabled=True logger.wandb.name='qwen2.5-32b'" \
83+
CONTAINER=YOUR_CONTAINER \
84+
MOUNTS="$PWD:$PWD" \
85+
sbatch \
86+
--nodes=${NUM_ACTOR_NODES} \
87+
--account=YOUR_ACCOUNT \
88+
--job-name=YOUR_JOBNAME \
89+
--partition=YOUR_PARTITION \
90+
--time=4:0:0 \
91+
--gres=gpu:8 \
92+
ray.sub
93+
```
94+
95+
## GRPO Multi-Turn
96+
97+
We also support multi-turn generation and training (tool use, games, etc.). Reference example for training to play a Sliding Puzzle Game:
98+
99+
```sh
100+
uv run python examples/run_grpo_sliding_puzzle.py
101+
```
102+

0 commit comments

Comments
 (0)