Skip to content

Commit 20af897

Browse files
authored
feat: lots of fixes (#17)
- flatten hyperparams for tb no longer errors for lists (was an issue for schedulers) - the submission script now overlaps the head on the first worker (no longer needs extra node just for head) - fixes the CI to handle weird permissions issues - added sphinx build and doctest to CI - added functional tests to CI - nuked an old example - added docs for functional tests - --no-container-mount-home - fix a unit tests that expected cuda to skip - allow running unit tests on slurm head node with no gpu - add a hermetic script to run functional tests Signed-off-by: Terry Kong <terryk@nvidia.com>
1 parent 8816926 commit 20af897

File tree

20 files changed

+313
-831
lines changed

20 files changed

+313
-831
lines changed

.github/workflows/_run_test.yml

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -64,15 +64,34 @@ jobs:
6464
- name: Docker pull image
6565
run: |
6666
docker pull nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }}
67+
68+
# NOTE: under certain circumstances, the checkout action cannot clean up the workspace properly, so
69+
# this workaround is needed to ensure that the workspace is clean by removing all files created by root.
70+
#
71+
# The error observed looked like this from the checkout action:
72+
# Run actions/checkout@v4
73+
# ...
74+
# Deleting the contents of '/home/azureuser/actions-runner/_work/reinforcer/reinforcer'
75+
# Error: File was unable to be removed Error: EACCES: permission denied, rmdir '/home/azureuser/actions-runner/_work/reinforcer/reinforcer/docs/_build/doctest'
76+
- name: Forcefully clean up the repository
77+
run: |
78+
docker run --rm -u root \
79+
-v /home/azureuser/actions-runner/_work/reinforcer/reinforcer:/home/azureuser/actions-runner/_work/reinforcer/reinforcer \
80+
nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
81+
bash -x -c "ls -lah /home/azureuser/actions-runner/_work/reinforcer/reinforcer && shopt -s dotglob && rm -rf /home/azureuser/actions-runner/_work/reinforcer/reinforcer/*"
82+
83+
- name: Checkout repository
84+
uses: actions/checkout@v4
6785

6886
- name: Start container
6987
run: |
7088
docker run --rm -d --name nemo_container_${{ github.run_id }} --runtime=nvidia --gpus all --shm-size=64g \
7189
--env TRANSFORMERS_OFFLINE=0 \
7290
--env HYDRA_FULL_ERROR=1 \
7391
--env HF_HOME=/home/TestData/reinforcer/hf_home \
74-
--env REINFORCER_CI_DIR=/home/TestData/reinforcer \
75-
--env REINFORCER_REPO_DIR=/opt/NeMo-Reinforcer \
92+
--env REINFORCER_REPO_DIR=/opt/reinforcer \
93+
--volume $PWD:/opt/reinforcer \
94+
--volume /mnt/datadrive/TestData/reinforcer/datasets:/opt/reinforcer/datasets:ro \
7695
--volume /mnt/datadrive/TestData/reinforcer/checkpoints:/home/TestData/reinforcer/checkpoints:ro \
7796
--volume /mnt/datadrive/TestData/reinforcer/hf_home/hub:/home/TestData/reinforcer/hf_home/hub \
7897
nemoci.azurecr.io/nemo_reinforcer_container:${{ github.run_id }} \
@@ -94,6 +113,9 @@ jobs:
94113
set -e
95114
96115
cmd=$(cat <<"RUN_TEST_EOF"
116+
# This is needed since we create virtualenvs in the workspace, so this allows it to be cleaned up if necessary
117+
umask 000
118+
97119
nvidia-smi
98120
99121
# In case git commands need to be run inside Reinforcer

.github/workflows/cicd-main.yml

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,20 @@ jobs:
102102
pre-commit install
103103
pre-commit run --all-files --show-diff-on-failure --color=always
104104
105+
sphinx-build:
106+
name: Sphinx build
107+
needs: [pre-flight]
108+
runs-on: ubuntu-latest
109+
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
110+
steps:
111+
- name: Checkout repository
112+
uses: actions/checkout@v4
113+
- name: build docs
114+
run: |
115+
pip install uv
116+
cd docs/
117+
uv run --extra docs sphinx-build . _build/html
118+
105119
build-container:
106120
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
107121
needs: [pre-flight]
@@ -115,6 +129,20 @@ jobs:
115129
MAX_JOBS=32
116130
REINFORCER_COMMIT=${{ github.sha }}
117131
132+
sphinx-doctest:
133+
name: Sphinx doctest
134+
needs: [build-container, pre-flight]
135+
uses: ./.github/workflows/_run_test.yml
136+
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
137+
with:
138+
RUNNER: self-hosted-azure
139+
TIMEOUT: 10
140+
SCRIPT: |
141+
cd ${REINFORCER_REPO_DIR}/docs
142+
uv run --extra docs sphinx-build -b doctest . _build/doctest
143+
secrets:
144+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
145+
118146
unit-tests:
119147
name: Unit tests
120148
needs: [build-container, pre-flight]
@@ -125,6 +153,27 @@ jobs:
125153
TIMEOUT: 10
126154
SCRIPT: |
127155
cd ${REINFORCER_REPO_DIR}
128-
uv run bash -x ./tests/run_unit.sh
156+
uv run --extra test bash -x ./tests/run_unit.sh
129157
secrets:
130158
HF_TOKEN: ${{ secrets.HF_TOKEN }}
159+
160+
functional-tests:
161+
name: ${{ matrix.test_case }}
162+
needs: [build-container, pre-flight]
163+
uses: ./.github/workflows/_run_test.yml
164+
if: ${{ needs.pre-flight.outputs.run_ci == 'true' }}
165+
strategy:
166+
matrix:
167+
test_case:
168+
- sft.sh
169+
- grpo.sh
170+
with:
171+
# TODO: For now, allow these to fail since the checks are not robust.
172+
OPTIONAL: true
173+
RUNNER: self-hosted-azure
174+
TIMEOUT: 8
175+
SCRIPT: |
176+
cd ${REINFORCER_REPO_DIR}
177+
uv run bash ./tests/functional/${{ matrix.test_case }}
178+
secrets:
179+
HF_TOKEN: ${{ secrets.HF_TOKEN }}

docker/Dockerfile

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,41 @@
11
ARG BASE_IMAGE=anyscale/ray:2.43.0-py312-cu125
2-
FROM ${BASE_IMAGE}
2+
FROM ${BASE_IMAGE} AS base
3+
# base is just ray + uv with minimal installs so it is a very lightweight container
34

4-
WORKDIR /opt/NeMo-Reinforcer
5+
# It is more convenient for users to run as root
6+
USER root
57

6-
RUN sudo apt-get update && sudo apt-get install -y jq
8+
RUN apt-get update && sudo apt-get install -y jq
79

10+
RUN pip install uv
811
RUN echo "unset RAY_RUNTIME_ENV_HOOK" >> /home/ray/.bashrc
912

10-
COPY pyproject.toml .
13+
FROM base AS hermetic
14+
# hermetic creates a virtual environment with the default dependencies pre-installed for convenience
1115

12-
RUN pip install uv && \
13-
uv venv -p python3.12 && \
14-
uv pip install -r pyproject.toml --extra dev --extra test
16+
COPY --chown=ray --chmod=755 pyproject.toml /opt/reinforcer/pyproject.toml
17+
RUN chmod 755 /home/ray/.cache
18+
WORKDIR /opt/reinforcer
19+
RUN uv venv .venv
20+
# uv sync has a more reliable resolver than simple uv pip install which can fail
21+
RUN uv sync --extra test --extra dev --extra docs --no-install-project
1522

16-
COPY . .
23+
ENV VIRTUAL_ENV=/opt/reinforcer/.venv
24+
ENV PATH="/opt/reinforcer/.venv/bin:$PATH"
25+
# The ray images automatically activate the anaconda venv. We will
26+
# comment this out of the .bashrc to give the same UX between docker
27+
# and other clusters like slurm.
28+
RUN <<"EOF"
29+
cp ~/.bashrc ~/.bashrc.backup # backup existing .bashrc
1730

18-
RUN uv pip install -e .
31+
# Comment out the conda initialize block
32+
sed -i '/# >>> conda initialize >>>/,/# <<< conda initialize <<</ { /^[^#]/ s/^/# / }' ~/.bashrc
33+
34+
# Comment out any line that explicitly exports the anaconda3 PATH
35+
sed -i '/export PATH=\$HOME\/anaconda3\/bin:\$PATH/ s/^/# /' ~/.bashrc
36+
EOF
37+
38+
COPY --chown=ray --chmod=755 . /opt/reinforcer
39+
RUN uv pip install --no-deps --editable /opt/reinforcer
40+
41+
FROM base

docs/cluster.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ export UV_CACHE_DIR=/path/that/all/workers/can/access/uv_cache
2121

2222
```sh
2323
# Run from the root of NeMo-Reinforcer repo
24-
NUM_ACTOR_NODES=1 # Total nodes requested are $NUM_ACTOR_NODES + 1 (+1 for head node)
24+
NUM_ACTOR_NODES=1 # Total nodes requested (head is colocated on ray-worker-0)
2525

2626
COMMAND="bash -c 'uv pip install -e .; uv run ./examples/run_grpo.py'" \
2727
RAY_DEDUP_LOGS=0 \
@@ -55,7 +55,7 @@ tail -f 1980204-logs/ray-driver.log
5555
To run interactively, launch the same command as the [Batched Job Submission](#batched-job-submission) except omit the `COMMAND` line:
5656
```sh
5757
# Run from the root of NeMo-Reinforcer repo
58-
NUM_ACTOR_NODES=1 # Total nodes requested are $NUM_ACTOR_NODES + 1 (+1 for head node)
58+
NUM_ACTOR_NODES=1 # Total nodes requested (head is colocated on ray-worker-0)
5959

6060
RAY_DEDUP_LOGS=0 \
6161
UV_CACHE_DIR=YOUR_UV_CACHE_DIR \

docs/docker.md

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,22 @@
1-
# Building Docker Image
1+
# Building Docker Images
22

3-
## Docker Build
3+
### Base Image
4+
If you only need the base image with ray + uv, you can build it like so:
45
```sh
56
cd docker/
6-
docker buildx build -t nemo-reinforcer -f Dockerfile .
7+
docker buildx build -t reinforcer -f Dockerfile ..
78
```
9+
10+
This is **our recommendation** as it is a small image and allows you to specify your python dependencies at runtime.
11+
12+
### Hermetic Image
13+
We also provide a way to build the docker image with all of default dependencies to get started.
14+
```sh
15+
cd docker/
16+
docker buildx build --target hermetic -t reinforcer -f Dockerfile ..
17+
```
18+
19+
This image sets up the python environment for you, so you do not have to use `uv` if you don't need
20+
any other packages.
21+
22+
This image is useful in situations where you may not have network connectivity to re-download packages.

docs/testing.md

Lines changed: 53 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,70 @@
1-
# Testing NeMo-Reinforcer
1+
# Testing Reinforcer
22

33
## Unit Tests
44

5+
:::{important}
6+
Unit tests require 2 GPUs to test the full suite.
7+
:::
8+
59
```sh
10+
# Install the project and the test dependencies
611
uv pip install -e '.[test]'
12+
13+
# Run the unit tests using local GPUs
714
uv run bash tests/run_unit.sh
815
```
916

10-
### Run Unit Tests Hermetic
17+
:::{note}
18+
Tests can also be run on SLURM with `ray.sub`, but note that some tests will be skipped
19+
due to no GPUs being located on the head node. To run the full suite of tests, please
20+
launch on a regular GPU allocation.
21+
:::
1122

12-
If your local environment does not have all the necessary dependencies (e.g., `gcc`, `nvcc`)
13-
or there is concern that something in your environment may be misconfigured, you can also run
14-
the tests in docker with this script:
23+
### Running Unit Tests in a Hermetic Environment
24+
25+
For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
26+
or where environmental configuration may be problematic, tests can be run
27+
in docker with this script:
1528

1629
```sh
1730
CONTAINER=... bash tests/run_unit_in_docker.sh
1831
```
1932

20-
The `CONTAINER` can be built by following the instructions [here](docker.md).
33+
The required `CONTAINER` can be built by following the instructions in the [docker documentation](docker.md).
2134

2235
## Functional tests
2336

24-
TBD
37+
:::{important}
38+
Functional tests may require multiple GPUs to run. See each script to understand the requirements.
39+
:::
40+
41+
Functional tests are located under `tests/functional/`.
42+
43+
```sh
44+
# Install the project and the test dependencies
45+
uv pip install -e '.[test]'
46+
# Run the functional test for sft
47+
uv run bash tests/functional/sft.sh
48+
```
49+
50+
At the end of each functional test, the metric checks will be printed as well as
51+
whether they pass or fail. Here is an example:
52+
53+
```text
54+
Metric Checks
55+
┏━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┓
56+
┃ Status ┃ Check ┃ Value ┃ Message ┃
57+
┡━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━┩
58+
│ PASS │ data["train/loss"]["9"] < 1500 │ 817.4517822265625 │ │
59+
└────────┴────────────────────────────────┴───────────────────┴─────────┘
60+
```
61+
62+
### Running Functional Tests in a Hermetic Environment
63+
64+
For environments lacking necessary dependencies (e.g., `gcc`, `nvcc`)
65+
or where environmental configuration may be problematic, tests can be run
66+
in docker with this script:
67+
68+
```sh
69+
CONTAINER=... bash run_functional_in_docker.sh functional/sft.sh
70+
```

examples/run_grpo_math.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,9 @@ def main():
170170
args, overrides = parse_args()
171171

172172
if not args.config:
173-
args.config = os.path.join(os.path.dirname(__file__), "configs", "grpo_math_1B.yaml")
173+
args.config = os.path.join(
174+
os.path.dirname(__file__), "configs", "grpo_math_1B.yaml"
175+
)
174176

175177
config = load_config(args.config)
176178
print(f"Loaded configuration from: {args.config}")

nemo_reinforcer/algorithms/grpo.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -445,7 +445,9 @@ def grpo_train(
445445

446446
# Run grpo training (single-turn)
447447
for batch in dataloader:
448-
print(f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}")
448+
print(
449+
f"\n{'=' * 25} Step {step + 1}/{min(len(dataloader), master_config['grpo']['max_num_steps'])} {'=' * 25}"
450+
)
449451

450452
with timer.time("total_step_time"):
451453
# Prepare batch

nemo_reinforcer/utils/logger.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,30 @@ def log_hyperparams(self, params: Dict[str, Any]) -> None:
189189

190190

191191
def flatten_dict(d: Dict[str, Any], sep: str = ".") -> Dict[str, Any]:
192-
"""Flatten a nested dictionary."""
192+
"""Flatten a nested dictionary.
193+
194+
Handles nested dictionaries and lists by creating keys with separators.
195+
For lists, the index is used as part of the key.
196+
197+
Args:
198+
d: Dictionary to flatten
199+
sep: Separator to use between nested keys
200+
201+
Returns:
202+
Flattened dictionary with compound keys
203+
204+
Examples:
205+
```{doctest}
206+
>>> flatten_dict({"a": 1, "b": {"c": 2}})
207+
{'a': 1, 'b.c': 2}
208+
209+
>>> flatten_dict({"a": [1, 2], "b": {"c": [3, 4]}})
210+
{'a.0': 1, 'a.1': 2, 'b.c.0': 3, 'b.c.1': 4}
211+
212+
>>> flatten_dict({"a": [{"b": 1}, {"c": 2}]})
213+
{'a.0.b': 1, 'a.1.c': 2}
214+
```
215+
"""
193216
result = {}
194217

195218
def _flatten(d, parent_key=""):
@@ -198,6 +221,13 @@ def _flatten(d, parent_key=""):
198221

199222
if isinstance(value, dict):
200223
_flatten(value, new_key)
224+
elif isinstance(value, list):
225+
for i, item in enumerate(value):
226+
list_key = f"{new_key}{sep}{i}"
227+
if isinstance(item, dict):
228+
_flatten(item, list_key)
229+
else:
230+
result[list_key] = item
201231
else:
202232
result[new_key] = value
203233

0 commit comments

Comments
 (0)