Skip to content

Commit 94a77bc

Browse files
authored
Merge branch 'main' into gemma3
2 parents b4e296c + 50a2b8f commit 94a77bc

File tree

562 files changed

+34794
-7062
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

562 files changed

+34794
-7062
lines changed

.github/workflows/export.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ jobs:
3030
python-version: ['3.9', '3.10', '3.11']
3131
steps:
3232
- name: Check out repo
33-
uses: actions/checkout@v3
33+
uses: actions/checkout@v4
3434
- name: Setup conda env
3535
uses: conda-incubator/setup-miniconda@v2
3636
with:

.github/workflows/gpu_test.yaml

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ defaults:
2424
jobs:
2525
gpu_test:
2626
if: github.repository_owner == 'pytorch'
27-
runs-on: linux.8xlarge.nvidia.gpu
27+
runs-on: linux.g5.12xlarge.nvidia.gpu
2828
strategy:
2929
matrix:
3030
python-version: ['3.9', '3.10', '3.11']
@@ -34,7 +34,7 @@ jobs:
3434
- torch-version: ${{ github.event_name == 'pull_request' && 'nightly' }}
3535
steps:
3636
- name: Check out repo
37-
uses: actions/checkout@v3
37+
uses: actions/checkout@v4
3838
- name: Setup conda env
3939
uses: conda-incubator/setup-miniconda@v2
4040
with:
@@ -44,17 +44,17 @@ jobs:
4444
python-version: ${{ matrix.python-version }}
4545
- name: Update pip
4646
run: python -m pip install --upgrade pip
47-
- name: Install torch nightly
47+
- name: Install nightly versions of PyTorch packages (if applicable)
4848
if: ${{ matrix.torch-version == 'nightly' }}
49-
run: python -m pip install --pre torch==2.7.0.dev20250201 torchvision==0.22.0.dev20250201 torchao==0.9.0.dev20250201 --index-url https://download.pytorch.org/whl/nightly/cu126
50-
- name: Install torch stable
49+
run: python -m pip install --pre torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cu126
50+
- name: Install torch stable (if applicable)
5151
if: ${{ matrix.torch-version == 'stable' }}
5252
run: python -m pip install torch torchvision torchao
53-
- name: Install remaining dependencies
54-
run: |
55-
python -m pip install -e ".[dev]"
56-
python -m pip install lm-eval>=0.4.5
53+
- name: Install recipe-specific dependencies
54+
run: python -m pip install lm-eval==0.4.8
55+
- name: Install the torchtune library with dev options
56+
run: python -m pip install -e ".[dev]"
5757
- name: Run recipe and unit tests with coverage
5858
run: pytest tests --ignore tests/torchtune/modules/_export --with-integration --cov=. --cov-report=xml --durations=20 -vv
59-
- name: Upload Coverage to Codecov
59+
- name: Upload coverage to Codecov
6060
uses: codecov/codecov-action@v3

.github/workflows/lint.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
python-version: ['3.10']
2323
steps:
2424
- name: Check out repo
25-
uses: actions/checkout@v3
25+
uses: actions/checkout@v4
2626
- name: Setup python
2727
uses: actions/setup-python@v4
2828
with:
@@ -35,6 +35,6 @@ jobs:
3535
pre-commit install-hooks
3636
- name: Get changed files
3737
id: changed-files
38-
uses: tj-actions/changed-files@v41.0.0
38+
uses: tj-actions/changed-files@d6e91a2266cdb9d62096cebf1e8546899c6aa18f # v45.0.6
3939
- name: Lint modified files
4040
run: pre-commit run --files ${{ steps.changed-files.outputs.all_changed_files }}

.github/workflows/recipe_test.yaml

Lines changed: 0 additions & 49 deletions
This file was deleted.

.github/workflows/regression_test.yaml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,9 @@ jobs:
2626
python-version: ['3.11']
2727
torch-version: ["stable", "nightly"]
2828
fail-fast: false
29-
env:
30-
PYTORCH_CUDA_ALLOC_CONF: expandable_segments:True
3129
steps:
3230
- name: Check out repo
33-
uses: actions/checkout@v3
31+
uses: actions/checkout@v4
3432
- name: Setup conda env
3533
uses: conda-incubator/setup-miniconda@v2
3634
with:
@@ -57,8 +55,8 @@ jobs:
5755
run: python -m pip install torch torchvision torchao
5856
- name: Install remaining dependencies
5957
run: |
58+
python -m pip install lm-eval==0.4.8
6059
python -m pip install -e ".[dev]"
61-
python -m pip install lm-eval>=0.4.5
6260
- name: Run regression tests with coverage
6361
run: pytest tests -m slow_integration_test --silence-s3-logs --cov=. --cov-report=xml --durations=20 -vv
6462
- name: Upload Coverage to Codecov

.github/workflows/rl_test.yaml

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
name: RL tests
2+
3+
on:
4+
push:
5+
paths:
6+
- 'torchtune/recipes/dev/**grpo**'
7+
- 'torchtune/recipes/configs/dev/**grpo**'
8+
- 'torchtune/dev/rl/**'
9+
- 'torchtune/dev/grpo/**'
10+
pull_request:
11+
paths:
12+
- 'torchtune/recipes/dev/**grpo**'
13+
- 'torchtune/recipes/configs/dev/**grpo**'
14+
- 'torchtune/dev/rl/**'
15+
- 'torchtune/dev/grpo/**'
16+
schedule:
17+
# Runs at midnight evvery day
18+
- cron: '0 0 * * *'
19+
20+
concurrency:
21+
group: gpu-test-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
22+
cancel-in-progress: true
23+
24+
permissions:
25+
id-token: write
26+
contents: read
27+
28+
defaults:
29+
run:
30+
shell: bash -l -eo pipefail {0}
31+
32+
jobs:
33+
gpu_test:
34+
if: github.repository_owner == 'pytorch'
35+
runs-on: linux.g5.12xlarge.nvidia.gpu
36+
strategy:
37+
matrix:
38+
python-version: ['3.9', '3.10', '3.11']
39+
torch-version: ["stable", "nightly"]
40+
# Do not run against nightlies on PR
41+
exclude:
42+
- torch-version: ${{ github.event_name == 'pull_request' && 'nightly' }}
43+
steps:
44+
- name: Check out repo
45+
uses: actions/checkout@v4
46+
- name: Setup conda env
47+
uses: conda-incubator/setup-miniconda@v2
48+
with:
49+
auto-update-conda: true
50+
miniconda-version: "latest"
51+
activate-environment: test
52+
python-version: ${{ matrix.python-version }}
53+
- name: Update pip
54+
run: python -m pip install --upgrade pip
55+
- name: Install nightly versions of PyTorch packages (if applicable)
56+
if: ${{ matrix.torch-version == 'nightly' }}
57+
run: python -m pip install --pre torch torchvision torchao --index-url https://download.pytorch.org/whl/nightly/cu126
58+
- name: Install torch stable (if applicable)
59+
if: ${{ matrix.torch-version == 'stable' }}
60+
run: python -m pip install torch torchvision torchao
61+
- name: Install recipe-specific dependencies
62+
run: python -m pip install lm-eval==0.4.8
63+
- name: Install the torchtune library with dev options
64+
run: python -m pip install -e ".[dev]"
65+
- name: Install the torchtune libary with async_rl options
66+
if: ${{ matrix.python-version != '3.9' }}
67+
run: python -m pip install -e ".[async_rl]"
68+
- name: Run recipe and unit tests with coverage
69+
run: pytest tests/torchtune/dev/rl tests/recipes/dev --run-rl-tests --with-integration --cov=. --cov-report=xml --durations=20 -vv
70+
- name: Upload coverage to Codecov
71+
uses: codecov/codecov-action@v3

.github/workflows/unit_test.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ jobs:
2222
python-version: ['3.9', '3.10', '3.11']
2323
steps:
2424
- name: Check out repo
25-
uses: actions/checkout@v3
25+
uses: actions/checkout@v4
2626
- name: Setup conda env
2727
uses: conda-incubator/setup-miniconda@v2
2828
with:

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,9 @@ repos:
4949
hooks:
5050
- id: pydoclint
5151
args: [--config=pyproject.toml]
52+
53+
- repo: https://github.com/astral-sh/ruff-pre-commit
54+
rev: v0.11.10
55+
hooks:
56+
- id: ruff-check
57+
args: [ --fix ]

CONTRIBUTING.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,9 @@ Whenever running tests in torchtune, favor using the command line flags as much
6060

6161
Note that the above flags can be combined with other pytest flags, so e.g. `pytest tests -m integration_test -k 'test_loss'` will run only recipe tests matching the substring `test_loss`.
6262

63+
> [!NOTE]
64+
> torchtune now contains a [prototype for an asynchronous implementation of GRPO](recipes/dev/async_grpo.md), complete with corresponding tests. Since this prototype brings with it additional dependencies, we do not run any of our async RL tests by default. To run an async RL test, you should append `--run-rl-tests` to your pytest command. E.g. `pytest --with-integration --run-rl-tests tests/recipes/dev/test_async_grpo_full_finetune_distributed.py`.
65+
6366
> [!NOTE]
6467
> Expected reference values for many of our tests have been calculated on Intel-based CPUs, or CUDA-based GPUs. Precision differences when testing on other hardware may cause certain tests to fail due to calculated values falling outside the configured tolerance limits. These tests may be skipped based on detected hardware (e.g. for [MPS](https://github.com/pytorch/torchtune/blob/ca95345b732d41bab7261d208cd5c860a2f76a5a/tests/test_utils.py#L345)).
6568

0 commit comments

Comments
 (0)