diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 924861c1..61a416ee 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -30,7 +30,6 @@ modelopt/torch/trace @NVIDIA/modelopt-torch-nas-prune-codeowners
 modelopt/torch/utils @NVIDIA/modelopt-torch-utils-codeowners
 
 # Examples
-/docker @NVIDIA/modelopt-docker-codeowners
 /README.md @NVIDIA/modelopt-examples-codeowners
 /examples @NVIDIA/modelopt-examples-codeowners
 /examples/chained_optimizations @NVIDIA/modelopt-torch-nas-prune-codeowners
diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml
index 57b9f04c..272b84bf 100644
--- a/.github/workflows/example_tests.yml
+++ b/.github/workflows/example_tests.yml
@@ -68,15 +68,17 @@ jobs:
     container: &example_container
       image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
       env:
-        LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}"
-        # PATH: "/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
     steps: &example_steps
       - uses: actions/checkout@v4
       - uses: nv-gha-runners/setup-proxy-cache@main
+      - name: Setup environment variables
+        run: |
+          echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
+          echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
       - name: Run example tests
         run: |
-          pip install ".[all,dev-test]"
+          pip install ".[hf,dev-test]"
           find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
           pytest -s tests/examples/${{ matrix.EXAMPLE }}
   example-tests-non-pr:
diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml
index 693c99b1..402191dc 100644
--- a/.github/workflows/gpu_tests.yml
+++ b/.github/workflows/gpu_tests.yml
@@ -66,11 +66,14 @@ jobs:
       image: nvcr.io/nvidia/pytorch:25.06-py3
       env:
         GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
-        LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
         PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
     steps: &gpu_steps
       - uses: actions/checkout@v4
       - uses: nv-gha-runners/setup-proxy-cache@main
+      - name: Setup environment variables
+        run: |
+          echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
+          echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
       - name: Run gpu tests
         run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
   gpu-tests-non-pr:
diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml
index 91640b11..e4b5c90e 100644
--- a/.gitlab/tests.yml
+++ b/.gitlab/tests.yml
@@ -1,11 +1,12 @@
-# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu}_tests.yml
+# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu,example}_tests.yml
 .tests-default:
+  variables:
+    PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
   stage: tests
   rules:
     - if: $CI_PIPELINE_SOURCE == "schedule"
-      when: always
-    - if: $CI_PIPELINE_SOURCE != "schedule"
-      when: manual
+    - if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/
+    - when: manual
 
 ##### Unit Tests #####
 unit:
@@ -24,50 +25,74 @@ unit:
     - tox -e py3$PYTHON-torch$TORCH-tf_$TRANSFORMERS-unit
 
 ##### GPU Tests #####
-gpu:
+.multi-gpu-tests-default:
   extends: .tests-default
   timeout: 60m
   image: nvcr.io/nvidia/pytorch:25.06-py3
   variables:
     GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
-    LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
-    PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
   tags: [docker, linux, 2-gpu]
+  before_script:
+    # Add libcudnn*.so and libnv*.so to path
+    - export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
+    # Add trtexec to path
+    - export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
+    # Install git-lfs for Daring-Anteater dataset
+    - apt-get update && apt-get install -y git-lfs
+    - git lfs install --system
+
+multi-gpu:
+  extends: .multi-gpu-tests-default
   script:
     # Use pre-installed packages without a new venv with tox-current-env
     - pip install tox-current-env
     - tox -e py312-cuda12-gpu --current-env
 
 ##### Example Tests #####
-example:
-  extends: .tests-default
-  stage: tests
-  timeout: 45m
-  image: gitlab-master.nvidia.com:5005/omniml/modelopt/modelopt_examples:latest
-  variables:
-    TEST_TYPE: pytest
-  tags: [docker, linux, 2-gpu, sm<89]
+example-torch:
+  extends: .multi-gpu-tests-default
+  timeout: 30m
   parallel:
     matrix:
-      - EXAMPLE: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
-  allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
-  before_script:
-    - pip install ".[all,dev-test]"
+      - EXAMPLE: [llm_distill, llm_sparsity, speculative_decoding]
   script:
-    # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
-    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
+    - pip install ".[hf,dev-test]"
     - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
-    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
+    - pytest -s tests/examples/$EXAMPLE
 
-example-ada:
-  extends: example
+# TODO: Fix llm_qat test hang in GitLab CI
+example-failing:
+  extends: example-torch
+  allow_failure: true
+  parallel:
+    matrix:
+      - EXAMPLE: [llm_qat]
+
+example-trtllm:
+  extends: example-torch
   timeout: 60m
+  image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
+  tags: [docker, linux, 2-gpu, sm>=89]
+  parallel:
+    matrix:
+      - EXAMPLE: [llm_autodeploy, llm_eval, llm_ptq, vlm_ptq]
+
+example-onnx:
+  extends: example-torch
+  image: nvcr.io/nvidia/tensorrt:25.08-py3
   tags: [docker, linux, 2-gpu, sm>=89]
   parallel:
     matrix:
-      - EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
+      - EXAMPLE: [diffusers, onnx_ptq]
+        TEST_TYPE: pytest
       - EXAMPLE: [onnx_ptq]
         TEST_TYPE: bash
+  script:
+    # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
+    - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi
+    - pip install ".[all,dev-test]"
+    - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done
+    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi
 
 ##### Megatron / NeMo Integration Tests #####
 megatron-nemo-integration:
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 8dc315c4..38d2a52b 100755
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -6,6 +6,7 @@ Model Optimizer Changelog (Linux)
 
 **Deprecations**
 
+- Deprecated ModelOpt's custom docker images. Please use the PyTorch, TensorRT-LLM or TensorRT docker image directly or refer to the `installation guide <https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html>`_ for more details.
 - Deprecated ``quantize_mode`` argument in ``examples/onnx_ptq/evaluate.py`` to support strongly typing. Use ``engine_precision`` instead.
 - Deprecated TRT-LLM's TRT backend in ``examples/llm_ptq`` and ``examples/vlm_ptq``. Tasks ``build`` and ``benchmark`` support are removed and replaced with ``quant``. For performance evaluation, please use ``trtllm-bench`` directly.
 - ``--export_fmt`` flag in ``examples/llm_ptq`` is removed. By default we export to the unified Hugging Face checkpoint format.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 27182ace..52568976 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,7 +11,7 @@ pip install -e ".[dev]"
 ```
 
 If you are working on features that require dependencies like TensorRT-LLM or Megatron-Core, consider using a docker container to simplify the setup process.
-See [docker README](./README.md#installation--docker) for more details.
+Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
 
 ## 🧹 Code linting and formatting
 
diff --git a/README.md b/README.md
index a6c88c78..c19bfdde 100644
--- a/README.md
+++ b/README.md
@@ -61,10 +61,10 @@ Model Optimizer is also integrated with [NVIDIA NeMo](https://github.com/NVIDIA-
 To install stable release packages for Model Optimizer with `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/):
 
 ```bash
-pip install nvidia-modelopt[all]
+pip install -U nvidia-modelopt[all]
 ```
 
-To install from source in editable mode with all development dependencies or to test the latest changes, run:
+To install from source in editable mode with all development dependencies or to use the latest features, run:
 
 ```bash
 # Clone the Model Optimizer repository
@@ -74,7 +74,11 @@ cd TensorRT-Model-Optimizer
 pip install -e .[dev]
 ```
 
-Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more fine-grained control on installed dependencies or view our pre-made [dockerfiles](docker/README.md) for more information.
+You can also directly use the [TensorRT-LLM docker images](https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags)
+(e.g., `nvcr.io/nvidia/tensorrt-llm/release:<version>`), which have Model Optimizer pre-installed.
+Make sure to upgrade Model Optimizer to the latest version using ``pip`` as described above.
+Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for
+more fine-grained control on installed dependencies or for alternative docker images and environment variables to setup.
 
 ## Techniques
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
deleted file mode 100644
index 8a736d25..00000000
--- a/docker/Dockerfile
+++ /dev/null
@@ -1,27 +0,0 @@
-FROM nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2
-
-ENV PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" \
-    PIP_NO_CACHE_DIR=off \
-    PIP_CONSTRAINT= \
-    TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0+PTX"
-
-RUN apt-get update && \
-    apt-get install -y libgl1 && \
-    rm -rf /var/lib/apt/lists/*
-
-WORKDIR /workspace
-
-RUN ln -s /app/tensorrt_llm /workspace/tensorrt_llm
-
-# Update PATH and LD_LIBRARY_PATH variables for the TensorRT binaries
-ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \
-    PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}"
-
-# Install modelopt from source with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run
-COPY . TensorRT-Model-Optimizer
-RUN pip install -e "./TensorRT-Model-Optimizer[all]"
-RUN rm -rf TensorRT-Model-Optimizer/.git
-RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()"
-
-# Allow users to run without root
-RUN chmod -R 777 /workspace
diff --git a/docker/README.md b/docker/README.md
deleted file mode 100644
index 1b6984b8..00000000
--- a/docker/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# ModelOpt Docker
-
-This folder contains the Dockerfile for the ModelOpt docker image.
-
-## Building the Docker Image
-
-To build the docker image, run the following command from the root of the repository:
-
-```bash
-bash docker/build.sh
-```
-
-The docker image will be built and tagged as `docker.io/library/modelopt_examples:latest`.
-
-> [!NOTE]
-> For ONNX PTQ, use the optimized docker image from [onnx_ptq Dockerfile](../examples/onnx_ptq/docker/) instead of this one.
diff --git a/docker/build.sh b/docker/build.sh
deleted file mode 100755
index 7addcdd5..00000000
--- a/docker/build.sh
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -e
-
-docker build --network=host --progress=plain . -f docker/Dockerfile -t modelopt_examples:latest "$@"
diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst
index 16afac64..7c214fd3 100644
--- a/docs/source/getting_started/_installation_for_Linux.rst
+++ b/docs/source/getting_started/_installation_for_Linux.rst
@@ -30,39 +30,30 @@ Environment setup
 
 .. tab:: Docker image (Recommended)
 
-    **Using ModelOpt's docker image**
+    To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using the
+    `TensorRT-LLM docker image <https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags>`_,
+    e.g., ``nvcr.io/nvidia/tensorrt-llm/release:<version>``.
 
-    To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using our provided docker image
-    which is based on the `TensorRT-LLM <https://catalog.ngc.nvidia.com/orgs/nvidia/teams/tensorrt-llm/containers/release/tags>`_
-    docker image with additional dependencies installed.
+    Make sure to upgrade Model Optimizer to the latest version using ``pip`` as described in the next section.
 
-    After installing the `NVIDIA Container Toolkit <https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html>`_,
-    please run the following commands to build the Model Optimizer docker container which has all the base
-    dependencies pre-installed. You may need to install additional dependencies from the examples's `requirements.txt` file.
+    You would also need to setup appropriate environment variables for the TensorRT binaries as follows:
 
     .. code-block:: shell
 
-        # Clone the ModelOpt repository
-        git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git
-        cd TensorRT-Model-Optimizer
+        export PIP_CONSTRAINT=""
+        export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
+        export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
 
-        # Build the docker (will be tagged `docker.io/library/modelopt_examples:latest`)
-        # You may customize `docker/Dockerfile` to include or exclude certain dependencies you may or may not need.
-        bash docker/build.sh
+    You may need to install additional dependencies from the respective examples's `requirements.txt` file.
 
-        # Run the docker image
-        docker run --gpus all -it --shm-size 20g --rm docker.io/library/modelopt_examples:latest bash
-
-        # Check installation (inside the docker container)
-        python -c "import modelopt; print(modelopt.__version__)"
-
-    **Using alternative NVIDIA docker images**
+    **Alternative NVIDIA docker images**
 
     For PyTorch, you can also use `NVIDIA NGC PyTorch container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/pytorch/tags>`_
     and for NVIDIA NeMo framework, you can use the `NeMo container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/nemo/tags>`_.
     Both of these containers come with Model Optimizer pre-installed. Make sure to update the Model Optimizer to the latest version if not already.
 
-    For ONNX PTQ, you can use the optimized docker image from [onnx_ptq Dockerfile](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/onnx_ptq/docker).
+    For ONNX / TensorRT use cases, you can also use the `TensorRT container <https://catalog.ngc.nvidia.com/orgs/nvidia/containers/tensorrt/tags>`_
+    which provides superior performance to the PyTorch container.
 
 .. tab:: Local environment (PIP / Conda)
 
@@ -86,9 +77,8 @@ Environment setup
 
     If you wish to use ModelOpt in conjunction with other NVIDIA libraries (e.g. TensorRT, TensorRT-LLM, NeMo, Triton, etc.),
     please make sure to check the ease of installation of these libraries in a local environment. If you face any
-    issues, we recommend using a docker image for a seamless experience. For example, `TensorRT-LLM documentation <https://nvidia.github.io/TensorRT-LLM/>`_.
-    requires installing in a docker image. You may still choose to use other ModelOpt's features locally for example,
-    quantizing a HuggingFace model and then use a docker image for deployment.
+    issues, we recommend using a docker image for a seamless experience. You may still choose to use other ModelOpt's
+    features locally for example, quantizing a HuggingFace model and then use a docker image for deployment.
 
 Install Model Optimizer
 =======================
diff --git a/examples/diffusers/README.md b/examples/diffusers/README.md
index d957db75..cb8ec08a 100644
--- a/examples/diffusers/README.md
+++ b/examples/diffusers/README.md
@@ -27,6 +27,14 @@ Cache Diffusion is a technique that reuses cached outputs from previous diffusio
 
 ## Pre-Requisites
 
+### Docker
+
+Please use the TensorRT docker image (e.g., `nvcr.io/nvidia/tensorrt:25.08-py3`) or visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
+
+Also follow the installation steps below to upgrade to the latest version of Model Optimizer and install example-specific dependencies.
+
+### Local Installation
+
 Install Model Optimizer with `onnx` and `hf` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/):
 
 ```bash
@@ -37,7 +45,7 @@ Each subsection (cache_diffusion, quantization, etc.) have their own `requiremen
 
 You can find the latest TensorRT [here](https://developer.nvidia.com/tensorrt/download).
 
-Visit our [installation guide](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) or view our pre-made [dockerfiles](../../docker/Dockerfile) for more information.
+Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
 
 ## Getting Started
 
diff --git a/examples/diffusers/cache_diffusion/requirements.txt b/examples/diffusers/cache_diffusion/requirements.txt
index 38c65678..e7c2b3c1 100644
--- a/examples/diffusers/cache_diffusion/requirements.txt
+++ b/examples/diffusers/cache_diffusion/requirements.txt
@@ -1,4 +1,4 @@
-cuda-python
+cuda-python<13
 opencv-python>=4.8.1.78,<4.12.0.88
 peft>=0.10.0
 polygraphy==0.49.9
diff --git a/examples/diffusers/quantization/requirements.txt b/examples/diffusers/quantization/requirements.txt
index 52921fe7..67d9ab99 100644
--- a/examples/diffusers/quantization/requirements.txt
+++ b/examples/diffusers/quantization/requirements.txt
@@ -1,4 +1,4 @@
-cuda-python
+cuda-python<13
 diffusers<=0.34.0
 nvtx
 onnx_graphsurgeon
diff --git a/examples/llm_autodeploy/README.md b/examples/llm_autodeploy/README.md
index f14ecb8b..cd910a28 100644
--- a/examples/llm_autodeploy/README.md
+++ b/examples/llm_autodeploy/README.md
@@ -8,7 +8,7 @@ This guide demonstrates how to deploy mixed-precision models using ModelOpt's Au
 
 ## Prerequisites
 
-AutoDeploy is currently available on the main branch of TRT-LLM. Follow the [docker setup instructions](https://github.com/NVIDIA/TensorRT-LLM/blob/main/docs/source/installation/build-from-source-linux.md#option-1-build-tensorrt-llm-in-one-step) to get started.
+AutoDeploy is available in TensorRT-LLM docker images. Please refer to our [Installation Guide](../../README.md#installation) for more details.
 
 ### 1. Quantize and Deploy Model
 
diff --git a/examples/llm_distill/README.md b/examples/llm_distill/README.md
index 6b97e8f7..3b44b4f7 100644
--- a/examples/llm_distill/README.md
+++ b/examples/llm_distill/README.md
@@ -21,15 +21,23 @@ This section focuses on demonstrating how to apply Model Optimizer to perform kn
 
 ## Pre-Requisites
 
+### Docker
+
+For Hugging Face models, please use the PyTorch docker image (e.g., `nvcr.io/nvidia/pytorch:25.06-py3`).
+For NeMo models, use the NeMo container (e.g., `nvcr.io/nvidia/nemo:25.07`) which has all the dependencies installed.
+Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
+
+Also follow the installation steps below to upgrade to the latest version of Model Optimizer and install example-specific dependencies.
+
+### Local Installation
+
 For Hugging Face models, install Model Optimizer with `hf` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/) and install the requirements for the example:
 
 ```bash
-pip install nvidia-modelopt[hf]
+pip install -U nvidia-modelopt[hf]
 pip install -r requirements.txt
 ```
 
-For NeMo models, use the NeMo container `nvcr.io/nvidia/nemo:25.07` or later which has all the dependencies installed.
-
 ## Getting Started
 
 ### Set up your base models
diff --git a/examples/llm_ptq/README.md b/examples/llm_ptq/README.md
index 2c95b831..46780b36 100755
--- a/examples/llm_ptq/README.md
+++ b/examples/llm_ptq/README.md
@@ -25,16 +25,25 @@ This section focuses on Post-training quantization, a technique that reduces mod
 
 ## Pre-Requisites
 
+### Docker
+
+For Hugging Face models, please use the TensorRT-LLM docker image (e.g., `nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2`).
+For NeMo models, use the NeMo container (e.g., `nvcr.io/nvidia/nemo:25.07`).
+Visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
+
+Also follow the installation steps below to upgrade to the latest version of Model Optimizer and install example-specific dependencies.
+
+### Local Installation
+
 For Hugging Face models, install Model Optimizer with `hf` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/) and install the requirements for the example:
 
 ```bash
-pip install nvidia-modelopt[hf]
+pip install -U nvidia-modelopt[hf]
 pip install -r requirements.txt
 ```
 
-If you want to deploy the quantized model on TRT-LLM, you will also need to install the TRT-LLM dependencies as per the [TRT-LLM documentation](https://nvidia.github.io/TensorRT-LLM/quick-start-guide.html#installation). Alternatively you can use the ModelOpt docker image built from the [ModelOpt docker build step](../../docker/README.md) which has all the dependencies including TRT-LLM installed.
-
-For NeMo models, use the NeMo container `nvcr.io/nvidia/nemo:25.04` or later which has all the dependencies including TRT-LLM installed.
+For TensorRT-LLM deployment, please use the TensorRT-LLM docker image or follow their [installation docs](https://nvidia.github.io/TensorRT-LLM/installation/index.html).
+Similarly, for vLLM or SGLang deployment, please use their installation docs.
 
 ## Getting Started
 
diff --git a/examples/llm_qat/README.md b/examples/llm_qat/README.md
index 3d895694..44e3ceb6 100644
--- a/examples/llm_qat/README.md
+++ b/examples/llm_qat/README.md
@@ -22,16 +22,7 @@ Quantization Aware Training (QAT) helps to improve the model accuracy beyond pos
 
 ## Pre-Requisites
 
-For Hugging Face models, install Model Optimizer with `hf` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/) and install the requirements for the example:
-
-```bash
-pip install nvidia-modelopt[hf]
-pip install -r requirements.txt
-```
-
-If you want to deploy the quantized model on TRT-LLM, you will also need to install the TRT-LLM dependencies as per the [TRT-LLM documentation](https://nvidia.github.io/TensorRT-LLM/quick-start-guide.html#installation). Alternatively you can use the ModelOpt docker image built from the [ModelOpt docker build step](../../docker/Dockerfile) which has all the dependencies including TRT-LLM installed.
-
-For NeMo models, use the NeMo container `nvcr.io/nvidia/nemo:25.04` or later which has all the dependencies including TRT-LLM installed.
+Please refer to the [llm_ptq/README.md](../llm_ptq/README.md#pre-requisites) for the pre-requisites.
 
 ## Getting Started
 
diff --git a/examples/llm_sparsity/data_prep.py b/examples/llm_sparsity/data_prep.py
index d47ff118..b37212f6 100644
--- a/examples/llm_sparsity/data_prep.py
+++ b/examples/llm_sparsity/data_prep.py
@@ -57,9 +57,9 @@ def main():
     os.makedirs(args.save_path, exist_ok=True)
 
     with open(os.path.join(args.save_path, "cnn_train.json"), "w") as write_f:
-        json.dump(tokenized_dataset["train"]["text"], write_f, indent=4, ensure_ascii=False)
+        json.dump(list(tokenized_dataset["train"]["text"]), write_f, indent=4, ensure_ascii=False)
     with open(os.path.join(args.save_path, "cnn_eval.json"), "w") as write_f:
-        json.dump(tokenized_dataset["test"]["text"], write_f, indent=4, ensure_ascii=False)
+        json.dump(list(tokenized_dataset["test"]["text"]), write_f, indent=4, ensure_ascii=False)
 
 
 if __name__ == "__main__":
diff --git a/examples/onnx_ptq/README.md b/examples/onnx_ptq/README.md
index 93be1f3e..47058d36 100644
--- a/examples/onnx_ptq/README.md
+++ b/examples/onnx_ptq/README.md
@@ -24,26 +24,16 @@ Model Optimizer enables highly performant quantization formats including NVFP4,
 
 ### Docker
 
-Build from this [Dockerfile](./docker/Dockerfile) which includes the latest publicly available TensorRT version, providing access to cutting-edge features and superior performance compared to the `modelopt_examples` [Docker image](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/docker/Dockerfile).
+Please use the TensorRT docker image (e.g., `nvcr.io/nvidia/tensorrt:25.08-py3`) or visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
 
-Build the Docker image (will be tagged `docker.io/library/onnx_ptq_examples:latest`)
-
-```bash
-./docker/build.sh
-```
-
-Run the docker image
-
-```bash
-docker run --user 0:0 -it --gpus all --shm-size=2g -v /path/to/ImageNet/dataset:/workspace/imagenet docker.io/library/onnx_ptq_examples:latest
-```
+Also follow the installation steps below to upgrade to the latest version of Model Optimizer and install example-specific dependencies.
 
 ### Local Installation
 
 Install Model Optimizer with `onnx` dependencies using `pip` from [PyPI](https://pypi.org/project/nvidia-modelopt/) and install the requirements for the example:
 
 ```bash
-pip install nvidia-modelopt[onnx]
+pip install -U nvidia-modelopt[onnx]
 pip install -r requirements.txt
 ```
 
diff --git a/examples/onnx_ptq/docker/Dockerfile b/examples/onnx_ptq/docker/Dockerfile
deleted file mode 100644
index 0b770b74..00000000
--- a/examples/onnx_ptq/docker/Dockerfile
+++ /dev/null
@@ -1,34 +0,0 @@
-FROM nvcr.io/nvidia/tensorrt:25.08-py3
-
-ARG CMAKE_VERSION=3.28.0
-
-ENV PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" \
-    PIP_NO_CACHE_DIR=off
-
-RUN python -m pip install --upgrade pip \
-    && pip install cmake==${CMAKE_VERSION} \
-    && mkdir -p -m 0600 ~/.ssh \
-    && ssh-keyscan github.com >> ~/.ssh/known_hosts
-
-WORKDIR /workspace
-
-RUN pip install tensorrt==10.13.2.6
-ENV TRT_PATH=/usr/local/lib/python3.12/dist-packages/tensorrt
-ENV CUDNN_LIB_DIR=/usr/local/lib/python3.12/dist-packages/nvidia/cudnn/lib
-ENV LD_LIBRARY_PATH="${CUDNN_LIB_DIR}:${TRT_PATH}/lib:/usr/include:${LD_LIBRARY_PATH}"
-ENV PATH="${TRT_PATH}/bin:${PATH}"
-
-# Copy application code and install requirements
-COPY modelopt TensorRT-Model-Optimizer/modelopt
-COPY examples/onnx_ptq TensorRT-Model-Optimizer/examples/onnx_ptq
-COPY setup.py TensorRT-Model-Optimizer/setup.py
-COPY pyproject.toml TensorRT-Model-Optimizer/pyproject.toml
-
-# Install onnx_ptq requirements
-RUN pip install -r TensorRT-Model-Optimizer/examples/onnx_ptq/requirements.txt
-
-# Install modelopt
-RUN pip install -e "./TensorRT-Model-Optimizer[hf,onnx]"
-
-# Allow users to run without root
-RUN chmod -R 777 /workspace
diff --git a/examples/onnx_ptq/docker/build.sh b/examples/onnx_ptq/docker/build.sh
deleted file mode 100755
index f1ac572e..00000000
--- a/examples/onnx_ptq/docker/build.sh
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/bin/bash
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-set -euo pipefail  # Exit on error, undefined vars, pipe failures
-
-# Default values
-IMAGE_NAME="modelopt_onnx_examples:latest"
-DOCKERFILE_PATH="examples/onnx_ptq/docker/Dockerfile"
-
-# Function to show usage
-usage() {
-    cat << EOF
-Usage: $0 [OPTIONS]
-
-Options:
-  -t, --tag IMAGE_NAME   Docker image name (default: $IMAGE_NAME)
-  -h, --help             Show this help message
-
-This script automatically detects whether you're running from:
-  • modelopt/ root directory
-  • modelopt/examples/onnx_ptq/ directory
-
-and builds the Docker image accordingly.
-EOF
-    exit 1
-}
-
-# Parse arguments
-while [[ $# -gt 0 ]]; do
-    case $1 in
-        -t|--tag)
-            [[ -n "${2:-}" ]] || { echo "Error: --tag requires a value"; exit 1; }
-            IMAGE_NAME="$2"
-            shift 2
-            ;;
-        -h|--help)
-            usage
-            ;;
-        *)
-            echo "Error: Unknown option '$1'"
-            usage
-            ;;
-    esac
-done
-
-# Function to find modelopt root directory
-find_modelopt_root() {
-    local current_dir="$PWD"
-
-    # Check current directory first
-    if [[ -f "setup.py" && -f "pyproject.toml" && -d "modelopt" ]]; then
-        echo "$current_dir"
-        return 0
-    fi
-
-    # Check parent directories (up to 3 levels)
-    for i in {1..3}; do
-        local parent_dir
-        parent_dir=$(dirname "$current_dir")
-        [[ "$parent_dir" == "$current_dir" ]] && break  # Reached filesystem root
-
-        if [[ -f "$parent_dir/setup.py" && -f "$parent_dir/pyproject.toml" && -d "$parent_dir/modelopt" ]]; then
-            echo "$parent_dir"
-            return 0
-        fi
-        current_dir="$parent_dir"
-    done
-
-    return 1
-}
-
-# Find modelopt root directory
-echo "🔍 Locating modelopt root directory..."
-if ROOT_DIR=$(find_modelopt_root); then
-    echo "✅ Found modelopt root: $ROOT_DIR"
-    cd "$ROOT_DIR"
-else
-    cat << EOF
-❌ Error: Cannot locate modelopt root directory.
-
-Expected structure:
-  modelopt/
-  ├── setup.py
-  ├── pyproject.toml
-  ├── modelopt/
-  └── examples/onnx_ptq/docker/
-
-Please run this script from within the modelopt repository.
-EOF
-    exit 1
-fi
-
-# Validate that Dockerfile exists
-if [[ ! -f "$DOCKERFILE_PATH" ]]; then
-    echo "❌ Error: Dockerfile not found at $DOCKERFILE_PATH"
-    exit 1
-fi
-
-# Build Docker image
-echo "🐳 Building Docker image..."
-echo "  • Image name: $IMAGE_NAME"
-echo "  • Build context: $(pwd)"
-echo "  • Dockerfile: $DOCKERFILE_PATH"
-echo
-
-docker build \
-    --file "$DOCKERFILE_PATH" \
-    --tag "$IMAGE_NAME" \
-    . \
-    "$@"
-
-echo
-echo "✅ Docker image built successfully: $IMAGE_NAME"
-echo
-echo "🚀 To run the container:"
-echo "  docker run --user 0:0 -it --gpus all --shm-size=2g \\"
-echo "    -v /path/to/ImageNet/dataset:/workspace/imagenet \\"
-echo "    $IMAGE_NAME"
diff --git a/examples/pruning/README.md b/examples/pruning/README.md
index 6d0123e3..34ed302b 100644
--- a/examples/pruning/README.md
+++ b/examples/pruning/README.md
@@ -23,7 +23,7 @@ This section focuses on applying Model Optimizer's state-of-the-art complementar
 
 ## Pre-Requisites
 
-For Minitron pruning for Megatron-LM / NeMo models, use the NeMo container `nvcr.io/nvidia/nemo:25.07` or later which has all the dependencies installed.
+For Minitron pruning for Megatron-LM / NeMo models, use the NeMo container (e.g., `nvcr.io/nvidia/nemo:25.07`) which has all the dependencies installed.
 
 For FastNAS pruning for PyTorch Computer Vision models, no additional dependencies are required.
 
diff --git a/examples/speculative_decoding/README.md b/examples/speculative_decoding/README.md
index 2e8966d2..503cf303 100644
--- a/examples/speculative_decoding/README.md
+++ b/examples/speculative_decoding/README.md
@@ -25,16 +25,26 @@ This example focuses on training with Hugging Face. To train with Megatron‑LM,
 
 ## Pre-Requisites
 
+### Docker
+
+Please use the PyTorch docker image (e.g., `nvcr.io/nvidia/pytorch:25.06-py3`) or visit our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html) for more information.
+
+Also follow the installation steps below to upgrade to the latest version of Model Optimizer and install dataset and example-specific dependencies.
+
+### Local Installation
+
 Install Modelopt with `hf` dependencies and other requirements for this example:
 
 ```bash
-pip install -e ...
+pip install -U nvidia-modelopt[hf]
 pip install -r requirements.txt
 ```
 
 We use [Daring-Anteater](https://huggingface.co/datasets/nvidia/Daring-Anteater) dataset in this example. Download by:
 
 ```bash
+apt-get update && apt-get install -y git-lfs
+git lfs install --system
 git clone https://huggingface.co/datasets/nvidia/Daring-Anteater
 ```
 
diff --git a/tests/examples/README.md b/tests/examples/README.md
index 869d8311..ed9a32f2 100644
--- a/tests/examples/README.md
+++ b/tests/examples/README.md
@@ -9,22 +9,17 @@ Make sure to use as small models and less data as possible to keep the tests fas
 
 ## Running the tests
 
-To run a test, use the [ModelOpt docker image](../../README.md#installation--docker) so all required dependencies are available.
-and mount your local modelopt directory to `/workspace/TensorRT-Model-Optimizer` and run this from the root of the repository.
+To run a test, start from the recommended docker image from our [installation docs](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html).
+Then mount your local modelopt directory to `/workspace/TensorRT-Model-Optimizer` and run this from the root of the repository.
 
 ```bash
+cd /workspace/TensorRT-Model-Optimizer
+pip install -e ".[all,dev-test]"
 pytest tests/examples/$TEST
 ```
 
-NOTE: Some tests (e.g. `llm_ptq`) have an option to disable using a smaller proxy model, and instead use the original model by setting the `MODELOPT_FAST_TESTS` environment variable to `false`. This is useful in nightly tests to ensure the original model is used.
-
-```bash
-MODELOPT_FAST_TESTS=false ROOT_SAVE_PATH=/tmp/test_llm_ptq/ pytest tests/examples/llm_ptq/
-```
-
 ## Environment variables
 
 The following environment variables can be set to control the behavior of the tests:
 
-- `MODELOPT_FAST_TESTS`: If set to `false`, the tests will use the original model instead of a smaller proxy model. Default is `true`.
 - `MODELOPT_LOCAL_MODEL_ROOT`: If set, the tests will use the local model directory instead of downloading the model from the internet. Default is not set, which means the model will be downloaded.
diff --git a/tests/examples/llm_sparsity/test_llama_sparsify.py b/tests/examples/llm_sparsity/test_llama_sparsify.py
index 1fc94292..6c815a45 100644
--- a/tests/examples/llm_sparsity/test_llama_sparsify.py
+++ b/tests/examples/llm_sparsity/test_llama_sparsify.py
@@ -39,7 +39,6 @@ def data_path(tmp_path_factory):
 @pytest.mark.parametrize(
     ("sparsity_fmt", "dtype"),
     [
-        ("sparsegpt", "bf16"),
         ("sparse_magnitude", "bf16"),
     ],
 )