Skip to content

Commit 9c5bbdf

Browse files
committed
Merge branch 'main' into jingyux/megatron-lora
2 parents 8c59aca + 615f3c0 commit 9c5bbdf

File tree

118 files changed

+4207
-2430
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+4207
-2430
lines changed

.github/CODEOWNERS

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ modelopt/torch/trace @NVIDIA/modelopt-torch-nas-prune-codeowners
3030
modelopt/torch/utils @NVIDIA/modelopt-torch-utils-codeowners
3131

3232
# Examples
33-
/docker @NVIDIA/modelopt-docker-codeowners
3433
/README.md @NVIDIA/modelopt-examples-codeowners
3534
/examples @NVIDIA/modelopt-examples-codeowners
3635
/examples/chained_optimizations @NVIDIA/modelopt-torch-nas-prune-codeowners

.github/workflows/code_quality.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ concurrency:
1515
jobs:
1616
code-quality:
1717
runs-on: ubuntu-latest
18-
timeout-minutes: 15
18+
timeout-minutes: 30
1919
steps:
2020
- uses: actions/checkout@v4
2121
- uses: actions/setup-python@v5
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# NOTE: Make sure this file is consistent with .gitlab/tests.yml
2+
name: E2E Example tests
3+
4+
on:
5+
push:
6+
branches: ["pull-request/[0-9]+"]
7+
# NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
8+
schedule:
9+
- cron: "0 0 * * *" # Nightly
10+
workflow_dispatch: # On-demand
11+
12+
# Cancel previous runs if new commit is pushed to the same PR
13+
concurrency:
14+
group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
15+
cancel-in-progress: true
16+
17+
jobs:
18+
check-file-changes:
19+
if: startsWith(github.ref, 'refs/heads/pull-request/')
20+
runs-on: ubuntu-latest
21+
outputs:
22+
any_changed: ${{ steps.changed-tests.outputs.any_changed }}
23+
steps:
24+
- uses: actions/checkout@v4
25+
with:
26+
fetch-depth: 0
27+
- id: get-pr-info
28+
uses: nv-gha-runners/get-pr-info@main
29+
# Get commit from main branch that is present in the PR to use as base for changed files
30+
- id: calculate-merge-base
31+
env:
32+
PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
33+
BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
34+
run: |
35+
(echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
36+
- name: Check for changes in test-relevant directories
37+
id: changed-tests
38+
uses: step-security/[email protected]
39+
with:
40+
base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
41+
sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
42+
files: |
43+
.github/workflows/example_tests.yml
44+
examples/llm_ptq/**
45+
modelopt/torch/**
46+
tests/examples/llm_ptq/**
47+
setup.py
48+
fail_on_initial_diff_error: true
49+
wait-checks:
50+
needs: [check-file-changes]
51+
if: needs.check-file-changes.outputs.any_changed == 'true'
52+
uses: ./.github/workflows/_wait_for_checks.yml
53+
permissions:
54+
checks: read
55+
secrets: inherit
56+
with:
57+
match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
58+
delay: 300s
59+
example-tests-pr:
60+
needs: [check-file-changes, wait-checks]
61+
if: needs.check-file-changes.outputs.any_changed == 'true'
62+
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
63+
runs-on: linux-amd64-gpu-h100-latest-1
64+
timeout-minutes: 90
65+
strategy:
66+
matrix:
67+
EXAMPLE: [llm_ptq]
68+
container: &example_container
69+
image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
70+
env:
71+
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
72+
steps: &example_steps
73+
- uses: actions/checkout@v4
74+
- uses: nv-gha-runners/setup-proxy-cache@main
75+
- name: Setup environment variables
76+
run: |
77+
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
78+
echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
79+
- name: Run example tests
80+
run: |
81+
pip install ".[hf,dev-test]"
82+
find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
83+
pytest -s tests/examples/${{ matrix.EXAMPLE }}
84+
example-tests-non-pr:
85+
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
86+
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
87+
runs-on: linux-amd64-gpu-h100-latest-1
88+
timeout-minutes: 90
89+
strategy:
90+
matrix:
91+
EXAMPLE: [llm_ptq]
92+
container: *example_container
93+
steps: *example_steps
94+
example-pr-required-check:
95+
# Run even if example-tests-pr is skipped
96+
if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
97+
needs: [check-file-changes, example-tests-pr]
98+
runs-on: ubuntu-latest
99+
steps:
100+
- name: Required GPU tests did not succeed
101+
if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.example-tests-pr.result != 'success') }}
102+
run: exit 1

.github/workflows/gpu_tests.yml

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,6 @@ jobs:
4444
modelopt/**
4545
tests/gpu/**
4646
tox.ini
47-
pyproject.toml
4847
setup.py
4948
fail_on_initial_diff_error: true
5049
wait-checks:
@@ -67,11 +66,14 @@ jobs:
6766
image: nvcr.io/nvidia/pytorch:25.06-py3
6867
env:
6968
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
70-
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
7169
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
7270
steps: &gpu_steps
7371
- uses: actions/checkout@v4
7472
- uses: nv-gha-runners/setup-proxy-cache@main
73+
- name: Setup environment variables
74+
run: |
75+
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
76+
echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
7577
- name: Run gpu tests
7678
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
7779
gpu-tests-non-pr:

.github/workflows/pages.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ permissions:
2323
jobs:
2424
build-docs:
2525
runs-on: ubuntu-latest
26-
timeout-minutes: 15
26+
timeout-minutes: 30
2727
steps:
2828
- uses: actions/checkout@v4
2929
- uses: actions/setup-python@v5

.github/workflows/unit_tests.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ on:
1010
- ".github/workflows/unit_tests.yml"
1111
- "modelopt/**"
1212
- "tests/unit/**"
13-
- "pyproject.toml"
1413
- "setup.py"
1514
- "tox.ini"
1615
schedule:

.gitlab/tests.yml

Lines changed: 53 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu}_tests.yml
1+
# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu,example}_tests.yml
22
.tests-default:
3+
variables:
4+
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
35
stage: tests
46
rules:
57
- if: $CI_PIPELINE_SOURCE == "schedule"
6-
when: always
7-
- if: $CI_PIPELINE_SOURCE != "schedule"
8-
when: manual
8+
- if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/
9+
- when: manual
910

1011
##### Unit Tests #####
1112
unit:
@@ -24,49 +25,74 @@ unit:
2425
- tox -e py3$PYTHON-torch$TORCH-tf_$TRANSFORMERS-unit
2526

2627
##### GPU Tests #####
27-
gpu:
28+
.multi-gpu-tests-default:
2829
extends: .tests-default
29-
timeout: 60m
30+
timeout: 90m
3031
image: nvcr.io/nvidia/pytorch:25.06-py3
3132
variables:
3233
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
33-
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
34-
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
3534
tags: [docker, linux, 2-gpu]
35+
before_script:
36+
# Add libcudnn*.so and libnv*.so to path
37+
- export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
38+
# Add trtexec to path
39+
- export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
40+
# Install git-lfs for Daring-Anteater dataset
41+
- apt-get update && apt-get install -y git-lfs
42+
- git lfs install --system
43+
44+
multi-gpu:
45+
extends: .multi-gpu-tests-default
3646
script:
3747
# Use pre-installed packages without a new venv with tox-current-env
3848
- pip install tox-current-env
3949
- tox -e py312-cuda12-gpu --current-env
4050

4151
##### Example Tests #####
42-
example:
43-
extends: .tests-default
44-
stage: tests
45-
timeout: 45m
46-
image: gitlab-master.nvidia.com:5005/omniml/modelopt/modelopt_examples:latest
47-
variables:
48-
TEST_TYPE: pytest
49-
tags: [docker, linux, 2-gpu, sm<89]
52+
example-torch:
53+
extends: .multi-gpu-tests-default
54+
timeout: 30m
5055
parallel:
5156
matrix:
52-
- TEST: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
53-
allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
54-
before_script:
55-
- pip install ".[all]" -U
57+
- EXAMPLE: [llm_distill, llm_sparsity, speculative_decoding]
5658
script:
57-
# Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
58-
- if [ "$TEST" = "diffusers" ]; then pip uninstall -y apex; fi
59-
- if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$TEST; else bash tests/examples/test_$TEST.sh; fi
59+
- pip install ".[hf,dev-test]"
60+
- find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
61+
- pytest -s tests/examples/$EXAMPLE
6062

61-
example-ada:
62-
extends: example
63+
# TODO: Fix llm_qat test hang in GitLab CI
64+
example-failing:
65+
extends: example-torch
66+
allow_failure: true
67+
parallel:
68+
matrix:
69+
- EXAMPLE: [llm_qat]
70+
71+
example-trtllm:
72+
extends: example-torch
6373
timeout: 60m
74+
image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
75+
tags: [docker, linux, 2-gpu, sm>=89]
76+
parallel:
77+
matrix:
78+
- EXAMPLE: [llm_autodeploy, llm_eval, llm_ptq, vlm_ptq]
79+
80+
example-onnx:
81+
extends: example-torch
82+
image: nvcr.io/nvidia/tensorrt:25.08-py3
6483
tags: [docker, linux, 2-gpu, sm>=89]
6584
parallel:
6685
matrix:
67-
- TEST: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
68-
- TEST: [onnx_ptq]
86+
- EXAMPLE: [diffusers, onnx_ptq]
87+
TEST_TYPE: pytest
88+
- EXAMPLE: [onnx_ptq]
6989
TEST_TYPE: bash
90+
script:
91+
# Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
92+
- if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
93+
- pip install ".[all,dev-test]"
94+
- find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
95+
- if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
7096

7197
##### Megatron / NeMo Integration Tests #####
7298
megatron-nemo-integration:

CHANGELOG.rst

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,23 @@
11
Model Optimizer Changelog (Linux)
22
=================================
33

4+
0.39 (2025-10-xx)
5+
^^^^^^^^^^^^^^^^^
6+
7+
**Deprecations**
8+
9+
**New Features**
10+
11+
- Add flag ``op_types_to_exclude_fp16`` in ONNX quantization to exclude ops from being converted to FP16/BF16. Alternatively, for custom TensorRT ops, this can also be done by indicating ``'fp32'`` precision in ``trt_plugins_precision``.
12+
413
0.37 (2025-09-xx)
514
^^^^^^^^^^^^^^^^^
615

716
**Deprecations**
817

18+
- Deprecated ModelOpt's custom docker images. Please use the PyTorch, TensorRT-LLM or TensorRT docker image directly or refer to the `installation guide <https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html>`_ for more details.
919
- Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
10-
- Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. For performance evaluation, please use ``trtllm-bench`` directly.
20+
- Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. ``engine_dir`` is replaced with ``checkpoint_dir`` in ``examples/llm_ptq`` and ``examples/vlm_ptq``. For performance evaluation, please use ``trtllm-bench`` directly.
1121
- ``--export_fmt`` flag in ``examples/llm_ptq`` is removed. By default we export to the unified Hugging Face checkpoint format.
1222
- Deprecated ``examples/vlm_eval`` as it depends on the deprecated TRT-LLM's TRT backend.
1323

@@ -16,6 +26,8 @@ Model Optimizer Changelog (Linux)
1626
- ``high_precision_dtype`` default to fp16 in ONNX quantization, i.e. quantized output model weights are now FP16 by default.
1727
- Upgrade TensorRT-LLM dependency to 1.1.0rc2.
1828
- Support Phi-4-multimodal and Qwen2.5-VL quantized HF checkpoint export in ``examples/vlm_ptq``.
29+
- Support storing and restoring Minitron pruning activations and scores for re-pruning without running the forward loop again.
30+
- Add Minitron pruning example for Megatron-LM framework. See ``examples/megatron-lm`` for more details.
1931

2032
0.35 (2025-09-04)
2133
^^^^^^^^^^^^^^^^^

CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ pip install -e ".[dev]"
1111
```
1212

1313
If you are working on features that require dependencies like TensorRT-LLM or Megatron-Core, consider using a docker container to simplify the setup process.
14-
See [docker README](./README.md#installation--docker) for more details.
14+
Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
1515

1616
## 🧹 Code linting and formatting
1717

README.md

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,20 +61,24 @@ Model Optimizer is also integrated with [NVIDIA NeMo](https://github.com/NVIDIA-
6161
To install stable release packages for Model Optimizer with `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/):
6262

6363
```bash
64-
pip install nvidia-modelopt[all]
64+
pip install -U nvidia-modelopt[all]
6565
```
6666

67-
To install from source in editable mode with all development dependencies or to test the latest changes, run:
67+
To install from source in editable mode with all development dependencies or to use the latest features, run:
6868

6969
```bash
7070
# Clone the Model Optimizer repository
71-
git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
71+
git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git
7272
cd TensorRT-Model-Optimizer
7373

7474
pip install -e .[dev]
7575
```
7676

77-
Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more fine-grained control on installed dependencies or view our pre-made [dockerfiles](docker/README.md) for more information.
77+
You can also directly use the [TensorRT-LLM docker images](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags)
78+
(e.g., `nvcr.io/nvidia/tensorrt-llm/release:<version>`), which have Model Optimizer pre-installed.
79+
Make sure to upgrade Model Optimizer to the latest version using ``pip`` as described above.
80+
Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for
81+
more fine-grained control on installed dependencies or for alternative docker images and environment variables to setup.
7882

7983
## Techniques
8084

0 commit comments

Comments
 (0)