Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ modelopt/torch/trace @NVIDIA/modelopt-torch-nas-prune-codeowners
modelopt/torch/utils @NVIDIA/modelopt-torch-utils-codeowners

# Examples
/docker @NVIDIA/modelopt-docker-codeowners
/README.md @NVIDIA/modelopt-examples-codeowners
/examples @NVIDIA/modelopt-examples-codeowners
/examples/chained_optimizations @NVIDIA/modelopt-torch-nas-prune-codeowners
Expand Down
8 changes: 5 additions & 3 deletions .github/workflows/example_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,15 +68,17 @@ jobs:
container: &example_container
image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
env:
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
# PATH: "/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
steps: &example_steps
- uses: actions/checkout@v4
- uses: nv-gha-runners/setup-proxy-cache@main
- name: Setup environment variables
run: |
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
- name: Run example tests
run: |
pip install ".[all,dev-test]"
pip install ".[hf,dev-test]"
find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
pytest -s tests/examples/${{ matrix.EXAMPLE }}
example-tests-non-pr:
Expand Down
5 changes: 4 additions & 1 deletion .github/workflows/gpu_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,14 @@ jobs:
image: nvcr.io/nvidia/pytorch:25.06-py3
env:
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
steps: &gpu_steps
- uses: actions/checkout@v4
- uses: nv-gha-runners/setup-proxy-cache@main
- name: Setup environment variables
run: |
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
- name: Run gpu tests
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
gpu-tests-non-pr:
Expand Down
75 changes: 50 additions & 25 deletions .gitlab/tests.yml
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu}_tests.yml
# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu,example}_tests.yml
.tests-default:
variables:
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
stage: tests
rules:
- if: $CI_PIPELINE_SOURCE == "schedule"
when: always
- if: $CI_PIPELINE_SOURCE != "schedule"
when: manual
- if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/
- when: manual

##### Unit Tests #####
unit:
Expand All @@ -24,50 +25,74 @@ unit:
- tox -e py3$PYTHON-torch$TORCH-tf_$TRANSFORMERS-unit

##### GPU Tests #####
gpu:
.multi-gpu-tests-default:
extends: .tests-default
timeout: 60m
image: nvcr.io/nvidia/pytorch:25.06-py3
variables:
GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
tags: [docker, linux, 2-gpu]
before_script:
# Add libcudnn*.so and libnv*.so to path
- export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
# Add trtexec to path
- export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
# Install git-lfs for Daring-Anteater dataset
- apt-get update && apt-get install -y git-lfs
- git lfs install --system

multi-gpu:
extends: .multi-gpu-tests-default
script:
# Use pre-installed packages without a new venv with tox-current-env
- pip install tox-current-env
- tox -e py312-cuda12-gpu --current-env

##### Example Tests #####
example:
extends: .tests-default
stage: tests
timeout: 45m
image: gitlab-master.nvidia.com:5005/omniml/modelopt/modelopt_examples:latest
variables:
TEST_TYPE: pytest
tags: [docker, linux, 2-gpu, sm<89]
example-torch:
extends: .multi-gpu-tests-default
timeout: 30m
parallel:
matrix:
- EXAMPLE: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
before_script:
- pip install ".[all,dev-test]"
- EXAMPLE: [llm_distill, llm_sparsity, speculative_decoding]
script:
# Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
- if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
- pip install ".[hf,dev-test]"
- find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
- if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
- pytest -s tests/examples/$EXAMPLE

example-ada:
extends: example
# TODO: Fix llm_qat test hang in GitLab CI
example-failing:
extends: example-torch
allow_failure: true
parallel:
matrix:
- EXAMPLE: [llm_qat]

example-trtllm:
extends: example-torch
timeout: 60m
image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
tags: [docker, linux, 2-gpu, sm>=89]
parallel:
matrix:
- EXAMPLE: [llm_autodeploy, llm_eval, llm_ptq, vlm_ptq]

example-onnx:
extends: example-torch
image: nvcr.io/nvidia/tensorrt:25.08-py3
tags: [docker, linux, 2-gpu, sm>=89]
parallel:
matrix:
- EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
- EXAMPLE: [diffusers, onnx_ptq]
TEST_TYPE: pytest
- EXAMPLE: [onnx_ptq]
TEST_TYPE: bash
script:
# Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
- if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
- pip install ".[all,dev-test]"
- find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
- if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi

##### Megatron / NeMo Integration Tests #####
megatron-nemo-integration:
Expand Down
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Model Optimizer Changelog (Linux)

**Deprecations**

- Deprecated ModelOpt's custom docker images. Please use the PyTorch, TensorRT-LLM or TensorRT docker image directly or refer to the `installation guide <https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html>`_ for more details.
- Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
- Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. For performance evaluation, please use ``trtllm-bench`` directly.
- ``--export_fmt`` flag in ``examples/llm_ptq`` is removed. By default we export to the unified Hugging Face checkpoint format.
Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pip install -e ".[dev]"
```

If you are working on features that require dependencies like TensorRT-LLM or Megatron-Core, consider using a docker container to simplify the setup process.
See [docker README](./README.md#installation--docker) for more details.
Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.

## 🧹 Code linting and formatting

Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,10 @@ Model Optimizer is also integrated with [NVIDIA NeMo](https://github.com/NVIDIA-
To install stable release packages for Model Optimizer with `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/):

```bash
pip install nvidia-modelopt[all]
pip install -U nvidia-modelopt[all]
```

To install from source in editable mode with all development dependencies or to test the latest changes, run:
To install from source in editable mode with all development dependencies or to use the latest features, run:

```bash
# Clone the Model Optimizer repository
Expand All @@ -74,7 +74,11 @@ cd TensorRT-Model-Optimizer
pip install -e .[dev]
```

Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more fine-grained control on installed dependencies or view our pre-made [dockerfiles](docker/README.md) for more information.
You can also directly use the [TensorRT-LLM docker images](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags)
(e.g., `nvcr.io/nvidia/tensorrt-llm/release:<version>`), which have Model Optimizer pre-installed.
Make sure to upgrade Model Optimizer to the latest version using ``pip`` as described above.
Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for
more fine-grained control on installed dependencies or for alternative docker images and environment variables to setup.

## Techniques

Expand Down
27 changes: 0 additions & 27 deletions docker/Dockerfile

This file was deleted.

16 changes: 0 additions & 16 deletions docker/README.md

This file was deleted.

19 changes: 0 additions & 19 deletions docker/build.sh

This file was deleted.

38 changes: 14 additions & 24 deletions docs/source/getting_started/_installation_for_Linux.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,39 +30,30 @@ Environment setup

.. tab:: Docker image (Recommended)

**Using ModelOpt's docker image**
To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using the
`TensorRT-LLM docker image <https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags>`_,
e.g., ``nvcr.io/nvidia/tensorrt-llm/release:<version>``.

To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using our provided docker image
which is based on the `TensorRT-LLM <https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags>`_
docker image with additional dependencies installed.
Make sure to upgrade Model Optimizer to the latest version using ``pip`` as described in the next section.

After installing the `NVIDIA Container Toolkit <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>`_,
please run the following commands to build the Model Optimizer docker container which has all the base
dependencies pre-installed. You may need to install additional dependencies from the examples's `requirements.txt` file.
You would also need to setup appropriate environment variables for the TensorRT binaries as follows:

.. code-block:: shell

# Clone the ModelOpt repository
git clone [email protected]:NVIDIA/TensorRT-Model-Optimizer.git
cd TensorRT-Model-Optimizer
export PIP_CONSTRAINT=""
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"

# Build the docker (will be tagged `docker.io/library/modelopt_examples:latest`)
# You may customize `docker/Dockerfile` to include or exclude certain dependencies you may or may not need.
bash docker/build.sh
You may need to install additional dependencies from the respective examples's `requirements.txt` file.

# Run the docker image
docker run --gpus all -it --shm-size 20g --rm docker.io/library/modelopt_examples:latest bash

# Check installation (inside the docker container)
python -c "import modelopt; print(modelopt.__version__)"

**Using alternative NVIDIA docker images**
**Alternative NVIDIA docker images**

For PyTorch, you can also use `NVIDIA NGC PyTorch container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags>`_
and for NVIDIA NeMo framework, you can use the `NeMo container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags>`_.
Both of these containers come with Model Optimizer pre-installed. Make sure to update the Model Optimizer to the latest version if not already.

For ONNX PTQ, you can use the optimized docker image from [onnx_ptq Dockerfile](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/onnx_ptq/docker).
For ONNX / TensorRT use cases, you can also use the `TensorRT container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorrt/tags>`_
which provides superior performance to the PyTorch container.

.. tab:: Local environment (PIP / Conda)

Expand All @@ -86,9 +77,8 @@ Environment setup

If you wish to use ModelOpt in conjunction with other NVIDIA libraries (e.g. TensorRT, TensorRT-LLM, NeMo, Triton, etc.),
please make sure to check the ease of installation of these libraries in a local environment. If you face any
issues, we recommend using a docker image for a seamless experience. For example, `TensorRT-LLM documentation <https://nvidia.github.io/TensorRT-LLM/>`_.
requires installing in a docker image. You may still choose to use other ModelOpt's features locally for example,
quantizing a HuggingFace model and then use a docker image for deployment.
issues, we recommend using a docker image for a seamless experience. You may still choose to use other ModelOpt's
features locally for example, quantizing a HuggingFace model and then use a docker image for deployment.

Install Model Optimizer
=======================
Expand Down
10 changes: 9 additions & 1 deletion examples/diffusers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ Cache Diffusion is a technique that reuses cached outputs from previous diffusio

## Pre-Requisites

### Docker

Please use the TensorRT docker image (e.g., `nvcr.io/nvidia/tensorrt:25.08-py3`) or visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.

Also follow the installation steps below to upgrade to the latest version of Model Optimizer and install example-specific dependencies.

### Local Installation

Install Model Optimizer with `onnx` and `hf` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/):

```bash
Expand All @@ -37,7 +45,7 @@ Each subsection (cache_diffusion, quantization, etc.) have their own `requiremen

You can find the latest TensorRT [here](https://developer.nvidia.com/tensorrt/download).

Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) or view our pre-made [dockerfiles](../../docker/Dockerfile) for more information.
Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.

## Getting Started

Expand Down
2 changes: 1 addition & 1 deletion examples/diffusers/cache_diffusion/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cuda-python
cuda-python<13
opencv-python>=4.8.1.78,<4.12.0.88
peft>=0.10.0
polygraphy==0.49.9
Expand Down
2 changes: 1 addition & 1 deletion examples/diffusers/quantization/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
cuda-python
cuda-python<13
diffusers<=0.34.0
nvtx
onnx_graphsurgeon
Expand Down
2 changes: 1 addition & 1 deletion examples/llm_autodeploy/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ This guide demonstrates how to deploy mixed-precision models using ModelOpt's Au

## Prerequisites

AutoDeploy is currently available on the main branch of TRT-LLM. Follow the [docker setup instructions](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/installation/build-from-source-linux.md#option-1-build-tensorrt-llm-in-one-step) to get started.
AutoDeploy is available in TensorRT-LLM docker images. Please refer to our [Installation Guide](../../README.md#installation) for more details.

### 1. Quantize and Deploy Model

Expand Down
Loading
Loading