Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/code_quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ concurrency:
jobs:
code-quality:
runs-on: ubuntu-latest
timeout-minutes: 15
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand Down
100 changes: 100 additions & 0 deletions .github/workflows/example_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
# NOTE: Make sure this file is consistent with .gitlab/tests.yml
name: E2E Example tests

on:
push:
branches: ["pull-request/[0-9]+"]
# NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used
schedule:
- cron: "0 0 * * *" # Nightly
workflow_dispatch: # On-demand

# Cancel previous runs if new commit is pushed to the same PR
concurrency:
group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }}
cancel-in-progress: true

jobs:
check-file-changes:
if: startsWith(github.ref, 'refs/heads/pull-request/')
runs-on: ubuntu-latest
outputs:
any_changed: ${{ steps.changed-tests.outputs.any_changed }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: get-pr-info
uses: nv-gha-runners/get-pr-info@main
# Get commit from main branch that is present in the PR to use as base for changed files
- id: calculate-merge-base
env:
PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
run: |
(echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}"
- name: Check for changes in test-relevant directories
id: changed-tests
uses: step-security/[email protected]
with:
base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
files: |
.github/workflows/example_tests.yml
examples/llm_ptq/**
modelopt/torch/**
tests/examples/llm_ptq/**
setup.py
fail_on_initial_diff_error: true
wait-checks:
needs: [check-file-changes]
if: needs.check-file-changes.outputs.any_changed == 'true'
uses: ./.github/workflows/_wait_for_checks.yml
permissions:
checks: read
secrets: inherit
with:
match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass
delay: 300s
example-tests-pr:
needs: [check-file-changes, wait-checks]
if: needs.check-file-changes.outputs.any_changed == 'true'
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-h100-latest-1
timeout-minutes: 90
strategy:
matrix:
EXAMPLE: [llm_ptq]
container: &example_container
image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
env:
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
# PATH: "/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
steps: &example_steps
- uses: actions/checkout@v4
- uses: nv-gha-runners/setup-proxy-cache@main
- name: Run example tests
run: |
pip install ".[all,dev-test]"
find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
pytest -s tests/examples/${{ matrix.EXAMPLE }}
example-tests-non-pr:
if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }}
# Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
runs-on: linux-amd64-gpu-h100-latest-1
timeout-minutes: 90
strategy:
matrix:
EXAMPLE: [llm_ptq]
container: *example_container
steps: *example_steps
example-pr-required-check:
# Run even if example-tests-pr is skipped
if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }}
needs: [check-file-changes, example-tests-pr]
runs-on: ubuntu-latest
steps:
- name: Required GPU tests did not succeed
if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.example-tests-pr.result != 'success') }}
run: exit 1
1 change: 0 additions & 1 deletion .github/workflows/gpu_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ jobs:
modelopt/**
tests/gpu/**
tox.ini
pyproject.toml
setup.py
fail_on_initial_diff_error: true
wait-checks:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ permissions:
jobs:
build-docs:
runs-on: ubuntu-latest
timeout-minutes: 15
timeout-minutes: 30
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
Expand Down
1 change: 0 additions & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ on:
- ".github/workflows/unit_tests.yml"
- "modelopt/**"
- "tests/unit/**"
- "pyproject.toml"
- "setup.py"
- "tox.ini"
schedule:
Expand Down
13 changes: 7 additions & 6 deletions .gitlab/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,23 +49,24 @@ example:
tags: [docker, linux, 2-gpu, sm<89]
parallel:
matrix:
- TEST: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
- EXAMPLE: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
before_script:
- pip install ".[all]" -U
- pip install ".[all,dev-test]"
script:
# Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
- if [ "$TEST" = "diffusers" ]; then pip uninstall -y apex; fi
- if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$TEST; else bash tests/examples/test_$TEST.sh; fi
- if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
- find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
- if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi

example-ada:
extends: example
timeout: 60m
tags: [docker, linux, 2-gpu, sm>=89]
parallel:
matrix:
- TEST: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
- TEST: [onnx_ptq]
- EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
- EXAMPLE: [onnx_ptq]
TEST_TYPE: bash

##### Megatron / NeMo Integration Tests #####
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ To install from source in editable mode with all development dependencies or to

```bash
# Clone the Model Optimizer repository
git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git
cd TensorRT-Model-Optimizer

pip install -e .[dev]
Expand Down
17 changes: 5 additions & 12 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
FROM nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2

ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com"
ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \
ENV PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" \
PIP_NO_CACHE_DIR=off \
PIP_CONSTRAINT= \
TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX"
TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0+PTX"

RUN apt-get update && \
apt-get install -y libgl1 && \
Expand All @@ -18,17 +17,11 @@ RUN ln -s /app/tensorrt_llm /workspace/tensorrt_llm
ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \
PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"

# Install modelopt with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
RUN pip install -U "nvidia-modelopt[all,dev-test]"
RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"

# Find and install requirements.txt files for all examples excluding windows
# Install modelopt from source with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
COPY . TensorRT-Model-Optimizer
RUN pip install -e "./TensorRT-Model-Optimizer[all]"
RUN rm -rf TensorRT-Model-Optimizer/.git
RUN find TensorRT-Model-Optimizer/examples -name "requirements.txt" | grep -v "windows" | while read req_file; do \
echo "Installing from $req_file"; \
pip install -r "$req_file" || exit 1; \
done
RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"

# Allow users to run without root
RUN chmod -R 777 /workspace
11 changes: 5 additions & 6 deletions docs/source/getting_started/_installation_for_Linux.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ Environment setup

To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using our provided docker image
which is based on the `TensorRT-LLM <https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags>`_
docker image with additional example-specific dependencies installed.
docker image with additional dependencies installed.

After installing the `NVIDIA Container Toolkit <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>`_,
please run the following commands to build the Model Optimizer docker container which has all the necessary
dependencies pre-installed for running the examples.
please run the following commands to build the Model Optimizer docker container which has all the base
dependencies pre-installed. You may need to install additional dependencies from the examples's `requirements.txt` file.

.. code-block:: shell

# Clone the ModelOpt repository
git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git
git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git
cd TensorRT-Model-Optimizer

# Build the docker (will be tagged `docker.io/library/modelopt_examples:latest`)
Expand All @@ -60,8 +60,7 @@ Environment setup

For PyTorch, you can also use `NVIDIA NGC PyTorch container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags>`_
and for NVIDIA NeMo framework, you can use the `NeMo container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags>`_.
Both of these containers come with Model Optimizer pre-installed. NeMo container also comes with the HuggingFace and TensorRT-LLM
dependencies. Make sure to update the Model Optimizer to the latest version if not already.
Both of these containers come with Model Optimizer pre-installed. Make sure to update the Model Optimizer to the latest version if not already.

For ONNX PTQ, you can use the optimized docker image from [onnx_ptq Dockerfile](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/onnx_ptq/docker).

Expand Down
1 change: 1 addition & 0 deletions examples/cnn_qat/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
torchvision
1 change: 1 addition & 0 deletions examples/llm_eval/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ openai>=0.28.1
peft>=0.5.0
rwkv>=0.7.3
tiktoken
torchvision
12 changes: 6 additions & 6 deletions examples/onnx_ptq/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ ENV LD_LIBRARY_PATH="${CUDNN_LIB_DIR}:${TRT_PATH}/lib:/usr/include:${LD_LIBRARY_
ENV PATH="${TRT_PATH}/bin:${PATH}"

# Copy application code and install requirements
COPY modelopt modelopt/modelopt
COPY examples/onnx_ptq modelopt/examples/onnx_ptq
COPY setup.py modelopt/setup.py
COPY pyproject.toml modelopt/pyproject.toml
COPY modelopt TensorRT-Model-Optimizer/modelopt
COPY examples/onnx_ptq TensorRT-Model-Optimizer/examples/onnx_ptq
COPY setup.py TensorRT-Model-Optimizer/setup.py
COPY pyproject.toml TensorRT-Model-Optimizer/pyproject.toml

# Install onnx_ptq requirements
RUN pip install -r modelopt/examples/onnx_ptq/requirements.txt
RUN pip install -r TensorRT-Model-Optimizer/examples/onnx_ptq/requirements.txt

# Install modelopt
RUN pip install -e "./modelopt[hf,onnx]"
RUN pip install -e "./TensorRT-Model-Optimizer[hf,onnx]"

# Allow users to run without root
RUN chmod -R 777 /workspace
1 change: 1 addition & 0 deletions examples/onnx_ptq/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@ datasets>=2.14.4
optimum
sentencepiece
timm
torchvision
9 changes: 9 additions & 0 deletions examples/pruning/cifar_resnet.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,15 @@
"Let's first install `Model Optimizer` following the [installation steps](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html)."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"! pip install nvidia-modelopt torchvision"
]
},
{
"cell_type": "code",
"execution_count": 2,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
"safetensors",
"torch>=2.6",
"torchprofile>=0.0.4",
"torchvision",
]

optional_deps = {
Expand Down Expand Up @@ -79,6 +78,7 @@
"pytest-cov",
"pytest-timeout",
"timm",
"torchvision",
"tox>4.18",
"tox-current-env>=0.0.12",
],
Expand Down
11 changes: 0 additions & 11 deletions tests/examples/speculative_decoding/test_medusa.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import subprocess

import pytest
from _test_utils.examples.run_command import run_example_command


# TODO: Medusa QAT FSDP test hangs if transformers>=4.50
@pytest.fixture(scope="session", autouse=True)
def install_transformers_lt_4_50():
subprocess.run(
["pip", "install", "transformers<4.50"],
check=True,
)


# fmt: off
def _run_hf_ptq(model_path, output_dir, qformat):
run_example_command(
Expand Down
Loading