Deprecate onnx ptq docker image as well

kevalmorabia97 · kevalmorabia97 · commit 7832b15d1760 · 2025-09-19T10:20:28.000-07:00
Signed-off-by: Keval Morabia &lt;28916987+kevalmorabia97@users.noreply.github.com&gt;
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
@@ -74,7 +74,7 @@ jobs:
       - uses: nv-gha-runners/setup-proxy-cache@main
       - name: Setup environment variables
         run: |
-          echo "LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
+          echo "LD_LIBRARY_PATH=/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" >> $GITHUB_ENV
           echo "PATH=/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}" >> $GITHUB_ENV
       - name: Run example tests
         run: |
diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml
@@ -25,7 +25,7 @@ unit:
     - tox -e py3$PYTHON-torch$TORCH-tf_$TRANSFORMERS-unit
 
 ##### GPU Tests #####
-.gpu-tests-default:
+.multi-gpu-tests-default:
   extends: .tests-default
   timeout: 60m
   image: nvcr.io/nvidia/pytorch:25.06-py3
@@ -34,54 +34,65 @@ unit:
   tags: [docker, linux, 2-gpu]
   before_script:
     # Add libcudnn*.so and libnv*.so to path
-    - export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
+    - export LD_LIBRARY_PATH="/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
     # Add trtexec to path
-    - export PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:$PATH"
+    - export PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
     # Install git-lfs for Daring-Anteater dataset
     - apt-get update && apt-get install -y git-lfs
     - git lfs install --system
 
 multi-gpu:
-  extends: .gpu-tests-default
+  extends: .multi-gpu-tests-default
   script:
     # Use pre-installed packages without a new venv with tox-current-env
     - pip install tox-current-env
     - tox -e py312-cuda12-gpu --current-env
 
 ##### Example Tests #####
-example:
-  extends: .gpu-tests-default
+example-torch:
+  extends: .multi-gpu-tests-default
   timeout: 30m
-  variables:
-    TEST_TYPE: pytest
   parallel:
     matrix:
-      - EXAMPLE: [diffusers, llm_distill, llm_sparsity, onnx_ptq, speculative_decoding]
+      - EXAMPLE: [llm_distill, llm_sparsity, speculative_decoding]
   script:
     - pip install ".[all,dev-test]"
-    # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
-    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
     - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
-    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
+    - pytest -s tests/examples/$EXAMPLE
 
 # TODO: Fix llm_qat test hang in GitLab CI
 example-failing:
-  extends: example
+  extends: example-torch
   allow_failure: true
   parallel:
     matrix:
       - EXAMPLE: [llm_qat]
 
-example-ada:
-  extends: example
+example-trtllm:
+  extends: example-torch
   timeout: 60m
   image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
   tags: [docker, linux, 2-gpu, sm>=89]
   parallel:
     matrix:
-      - EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
+      - EXAMPLE: [llm_autodeploy, llm_eval, llm_ptq, vlm_ptq]
+
+example-onnx:
+  extends: example-torch
+  image: nvcr.io/nvidia/tensorrt:25.08-py3
+  tags: [docker, linux, 2-gpu, sm>=89]
+  parallel:
+    matrix:
+      - EXAMPLE: [diffusers, onnx_ptq]
+        TEST_TYPE: pytest
       - EXAMPLE: [onnx_ptq]
         TEST_TYPE: bash
+  script:
+    # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
+    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
+    - pip install ".[all,dev-test]"
+    - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
 
 ##### Megatron / NeMo Integration Tests #####
 megatron-nemo-integration:
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -6,7 +6,7 @@ Model Optimizer Changelog (Linux)
 
 **Deprecations**
 
-- Deprecated ModelOpt's custom docker image. Please use the TensorRT-LLM docker image directly or refer to the `installation guide <https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html>`_ for more details.
+- Deprecated ModelOpt's custom docker images. Please use the PyTorch, TensorRT-LLM or TensorRT docker image directly or refer to the `installation guide <https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html>`_ for more details.
 - Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
 - Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. For performance evaluation, please use ``trtllm-bench`` directly.
 - ``--export_fmt`` flag in ``examples/llm_ptq`` is removed. By default we export to the unified Hugging Face checkpoint format.
diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst
@@ -40,7 +40,7 @@ Environment setup
 
     .. code-block:: shell
 
-        export LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
+        export LD_LIBRARY_PATH="/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
         export PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
 
     You may need to install additional dependencies from the respective examples's `requirements.txt` file.
@@ -51,8 +51,8 @@ Environment setup
     and for NVIDIA NeMo framework, you can use the `NeMo container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags>`_.
     Both of these containers come with Model Optimizer pre-installed. Make sure to update the Model Optimizer to the latest version if not already.
 
-    For ONNX PTQ, you can use the docker image from `onnx_ptq Dockerfile <https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/onnx_ptq/docker>`_
-    which includes the latest publicly available TensorRT version, providing access to cutting-edge features and superior performance.
+    For ONNX / TensorRT use cases, you can also use the `TensorRT container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorrt/tags>`_
+    which provides superior performance to the PyTorch container.
 
 .. tab:: Local environment (PIP / Conda)
 
@@ -76,9 +76,8 @@ Environment setup
 
     If you wish to use ModelOpt in conjunction with other NVIDIA libraries (e.g. TensorRT, TensorRT-LLM, NeMo, Triton, etc.),
     please make sure to check the ease of installation of these libraries in a local environment. If you face any
-    issues, we recommend using a docker image for a seamless experience. For example, `TensorRT-LLM documentation <https://nvidia.github.io/TensorRT-LLM/>`_.
-    requires installing in a docker image. You may still choose to use other ModelOpt's features locally for example,
-    quantizing a HuggingFace model and then use a docker image for deployment.
+    issues, we recommend using a docker image for a seamless experience. You may still choose to use other ModelOpt's
+    features locally for example, quantizing a HuggingFace model and then use a docker image for deployment.
 
 Install Model Optimizer
 =======================
diff --git a/examples/llm_autodeploy/README.md b/examples/llm_autodeploy/README.md
@@ -8,7 +8,7 @@ This guide demonstrates how to deploy mixed-precision models using ModelOpt's Au
 
 ## Prerequisites
 
-AutoDeploy is currently available on the main branch of TRT-LLM. Follow the [docker setup instructions](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/installation/build-from-source-linux.md#option-1-build-tensorrt-llm-in-one-step) to get started.
+AutoDeploy is available in TensorRT-LLM docker images. Please refer to our [Installation Guide](../../README.md#installation) for more details.
 
 ### 1. Quantize and Deploy Model
 
diff --git a/examples/onnx_ptq/README.md b/examples/onnx_ptq/README.md
@@ -24,19 +24,7 @@ Model Optimizer enables highly performant quantization formats including NVFP4,
 
 ### Docker
 
-Build from this [Dockerfile](./docker/Dockerfile) which includes the latest publicly available TensorRT version, providing access to cutting-edge features and superior performance.
-
-Build the Docker image (will be tagged `docker.io/library/onnx_ptq_examples:latest`)
-
-```bash
-./docker/build.sh
-```
-
-Run the docker image
-
-```bash
-docker run --user 0:0 -it --gpus all --shm-size=2g -v /path/to/ImageNet/dataset:/workspace/imagenet docker.io/library/onnx_ptq_examples:latest
-```
+Please refer to our [Installation Guide](../../README.md#installation) for recommended docker images.
 
 ### Local Installation
 
diff --git a/examples/onnx_ptq/docker/Dockerfile b/examples/onnx_ptq/docker/Dockerfile
diff --git a/examples/onnx_ptq/docker/build.sh b/examples/onnx_ptq/docker/build.sh