diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml index fe900000..d8f9eaa5 100644 --- a/.github/workflows/code_quality.yml +++ b/.github/workflows/code_quality.yml @@ -15,7 +15,7 @@ concurrency: jobs: code-quality: runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/.github/workflows/example_tests.yml b/.github/workflows/example_tests.yml new file mode 100644 index 00000000..57b9f04c --- /dev/null +++ b/.github/workflows/example_tests.yml @@ -0,0 +1,100 @@ +# NOTE: Make sure this file is consistent with .gitlab/tests.yml +name: E2E Example tests + +on: + push: + branches: ["pull-request/[0-9]+"] + # NOTE: paths cannot be used since push happens to copied PR and only latest commit to PR is used + schedule: + - cron: "0 0 * * *" # Nightly + workflow_dispatch: # On-demand + +# Cancel previous runs if new commit is pushed to the same PR +concurrency: + group: ${{ github.workflow }}-${{ startsWith(github.ref, 'refs/heads/pull-request/') && github.ref || github.sha }} + cancel-in-progress: true + +jobs: + check-file-changes: + if: startsWith(github.ref, 'refs/heads/pull-request/') + runs-on: ubuntu-latest + outputs: + any_changed: ${{ steps.changed-tests.outputs.any_changed }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - id: get-pr-info + uses: nv-gha-runners/get-pr-info@main + # Get commit from main branch that is present in the PR to use as base for changed files + - id: calculate-merge-base + env: + PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }} + BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }} + run: | + (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") | tee --append "${GITHUB_OUTPUT}" + - name: Check for changes in test-relevant directories + id: changed-tests + uses: step-security/changed-files@v46.0.5 + with: + base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }} + sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }} + files: | + .github/workflows/example_tests.yml + examples/llm_ptq/** + modelopt/torch/** + tests/examples/llm_ptq/** + setup.py + fail_on_initial_diff_error: true + wait-checks: + needs: [check-file-changes] + if: needs.check-file-changes.outputs.any_changed == 'true' + uses: ./.github/workflows/_wait_for_checks.yml + permissions: + checks: read + secrets: inherit + with: + match_pattern: '^DCO$|^linux$' # Wait for DCO and Unit tests / linux to pass + delay: 300s + example-tests-pr: + needs: [check-file-changes, wait-checks] + if: needs.check-file-changes.outputs.any_changed == 'true' + # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md + runs-on: linux-amd64-gpu-h100-latest-1 + timeout-minutes: 90 + strategy: + matrix: + EXAMPLE: [llm_ptq] + container: &example_container + image: nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2 + env: + LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" + # PATH: "/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}" + PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages + steps: &example_steps + - uses: actions/checkout@v4 + - uses: nv-gha-runners/setup-proxy-cache@main + - name: Run example tests + run: | + pip install ".[all,dev-test]" + find examples/${{ matrix.EXAMPLE }} -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done + pytest -s tests/examples/${{ matrix.EXAMPLE }} + example-tests-non-pr: + if: ${{ !startsWith(github.ref, 'refs/heads/pull-request/') }} + # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md + runs-on: linux-amd64-gpu-h100-latest-1 + timeout-minutes: 90 + strategy: + matrix: + EXAMPLE: [llm_ptq] + container: *example_container + steps: *example_steps + example-pr-required-check: + # Run even if example-tests-pr is skipped + if: ${{ startsWith(github.ref, 'refs/heads/pull-request/') && always() }} + needs: [check-file-changes, example-tests-pr] + runs-on: ubuntu-latest + steps: + - name: Required GPU tests did not succeed + if: ${{ needs.check-file-changes.result != 'success' || (needs.check-file-changes.outputs.any_changed == 'true' && needs.example-tests-pr.result != 'success') }} + run: exit 1 diff --git a/.github/workflows/gpu_tests.yml b/.github/workflows/gpu_tests.yml index e3a8e77a..693c99b1 100644 --- a/.github/workflows/gpu_tests.yml +++ b/.github/workflows/gpu_tests.yml @@ -44,7 +44,6 @@ jobs: modelopt/** tests/gpu/** tox.ini - pyproject.toml setup.py fail_on_initial_diff_error: true wait-checks: diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 279e3834..38bc6eb8 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -23,7 +23,7 @@ permissions: jobs: build-docs: runs-on: ubuntu-latest - timeout-minutes: 15 + timeout-minutes: 30 steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index dc304807..aca510b5 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -10,7 +10,6 @@ on: - ".github/workflows/unit_tests.yml" - "modelopt/**" - "tests/unit/**" - - "pyproject.toml" - "setup.py" - "tox.ini" schedule: diff --git a/.gitlab/tests.yml b/.gitlab/tests.yml index 7c42856d..91640b11 100644 --- a/.gitlab/tests.yml +++ b/.gitlab/tests.yml @@ -49,14 +49,15 @@ example: tags: [docker, linux, 2-gpu, sm<89] parallel: matrix: - - TEST: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding] + - EXAMPLE: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding] allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release) before_script: - - pip install ".[all]" -U + - pip install ".[all,dev-test]" script: # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391 - - if [ "$TEST" = "diffusers" ]; then pip uninstall -y apex; fi - - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$TEST; else bash tests/examples/test_$TEST.sh; fi + - if [ "$EXAMPLE" = "diffusers" ]; then pip uninstall -y apex; fi + - find examples/$EXAMPLE -name "requirements.txt" | while read req_file; do pip install -r "$req_file" || exit 1; done + - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$EXAMPLE; else bash tests/examples/test_$EXAMPLE.sh; fi example-ada: extends: example @@ -64,8 +65,8 @@ example-ada: tags: [docker, linux, 2-gpu, sm>=89] parallel: matrix: - - TEST: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy] - - TEST: [onnx_ptq] + - EXAMPLE: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy] + - EXAMPLE: [onnx_ptq] TEST_TYPE: bash ##### Megatron / NeMo Integration Tests ##### diff --git a/README.md b/README.md index b16196b0..a6c88c78 100644 --- a/README.md +++ b/README.md @@ -68,7 +68,7 @@ To install from source in editable mode with all development dependencies or to ```bash # Clone the Model Optimizer repository -git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git +git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git cd TensorRT-Model-Optimizer pip install -e .[dev] diff --git a/docker/Dockerfile b/docker/Dockerfile index 3fd96a09..8a736d25 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,10 +1,9 @@ FROM nvcr.io/nvidia/tensorrt-llm/release:1.1.0rc2.post2 -ARG PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" -ENV PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \ +ENV PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com" \ PIP_NO_CACHE_DIR=off \ PIP_CONSTRAINT= \ - TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0+PTX" + TORCH_CUDA_ARCH_LIST="8.0 8.6 8.7 8.9 9.0 10.0 12.0+PTX" RUN apt-get update && \ apt-get install -y libgl1 && \ @@ -18,17 +17,11 @@ RUN ln -s /app/tensorrt_llm /workspace/tensorrt_llm ENV LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib:${LD_LIBRARY_PATH}" \ PATH="/usr/local/tensorrt/targets/x86_64-linux-gnu/bin:${PATH}" -# Install modelopt with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run -RUN pip install -U "nvidia-modelopt[all,dev-test]" -RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()" - -# Find and install requirements.txt files for all examples excluding windows +# Install modelopt from source with all optional dependencies and pre-compile CUDA extensions otherwise they take several minutes on every docker run COPY . TensorRT-Model-Optimizer +RUN pip install -e "./TensorRT-Model-Optimizer[all]" RUN rm -rf TensorRT-Model-Optimizer/.git -RUN find TensorRT-Model-Optimizer/examples -name "requirements.txt" | grep -v "windows" | while read req_file; do \ - echo "Installing from $req_file"; \ - pip install -r "$req_file" || exit 1; \ - done +RUN python -c "import modelopt.torch.quantization.extensions as ext; ext.precompile()" # Allow users to run without root RUN chmod -R 777 /workspace diff --git a/docs/source/getting_started/_installation_for_Linux.rst b/docs/source/getting_started/_installation_for_Linux.rst index 42892966..16afac64 100644 --- a/docs/source/getting_started/_installation_for_Linux.rst +++ b/docs/source/getting_started/_installation_for_Linux.rst @@ -34,16 +34,16 @@ Environment setup To use Model Optimizer with full dependencies (e.g. TensorRT/TensorRT-LLM deployment), we recommend using our provided docker image which is based on the `TensorRT-LLM `_ - docker image with additional example-specific dependencies installed. + docker image with additional dependencies installed. After installing the `NVIDIA Container Toolkit `_, - please run the following commands to build the Model Optimizer docker container which has all the necessary - dependencies pre-installed for running the examples. + please run the following commands to build the Model Optimizer docker container which has all the base + dependencies pre-installed. You may need to install additional dependencies from the examples's `requirements.txt` file. .. code-block:: shell # Clone the ModelOpt repository - git clone https://github.com/NVIDIA/TensorRT-Model-Optimizer.git + git clone git@github.com:NVIDIA/TensorRT-Model-Optimizer.git cd TensorRT-Model-Optimizer # Build the docker (will be tagged `docker.io/library/modelopt_examples:latest`) @@ -60,8 +60,7 @@ Environment setup For PyTorch, you can also use `NVIDIA NGC PyTorch container `_ and for NVIDIA NeMo framework, you can use the `NeMo container `_. - Both of these containers come with Model Optimizer pre-installed. NeMo container also comes with the HuggingFace and TensorRT-LLM - dependencies. Make sure to update the Model Optimizer to the latest version if not already. + Both of these containers come with Model Optimizer pre-installed. Make sure to update the Model Optimizer to the latest version if not already. For ONNX PTQ, you can use the optimized docker image from [onnx_ptq Dockerfile](https://github.com/NVIDIA/TensorRT-Model-Optimizer/tree/main/examples/onnx_ptq/docker). diff --git a/examples/cnn_qat/requirements.txt b/examples/cnn_qat/requirements.txt new file mode 100644 index 00000000..e35531e5 --- /dev/null +++ b/examples/cnn_qat/requirements.txt @@ -0,0 +1 @@ +torchvision diff --git a/examples/llm_eval/requirements.txt b/examples/llm_eval/requirements.txt index d73e9fc7..7daa0c80 100644 --- a/examples/llm_eval/requirements.txt +++ b/examples/llm_eval/requirements.txt @@ -4,3 +4,4 @@ openai>=0.28.1 peft>=0.5.0 rwkv>=0.7.3 tiktoken +torchvision diff --git a/examples/onnx_ptq/docker/Dockerfile b/examples/onnx_ptq/docker/Dockerfile index 71c38f0e..0b770b74 100644 --- a/examples/onnx_ptq/docker/Dockerfile +++ b/examples/onnx_ptq/docker/Dockerfile @@ -19,16 +19,16 @@ ENV LD_LIBRARY_PATH="${CUDNN_LIB_DIR}:${TRT_PATH}/lib:/usr/include:${LD_LIBRARY_ ENV PATH="${TRT_PATH}/bin:${PATH}" # Copy application code and install requirements -COPY modelopt modelopt/modelopt -COPY examples/onnx_ptq modelopt/examples/onnx_ptq -COPY setup.py modelopt/setup.py -COPY pyproject.toml modelopt/pyproject.toml +COPY modelopt TensorRT-Model-Optimizer/modelopt +COPY examples/onnx_ptq TensorRT-Model-Optimizer/examples/onnx_ptq +COPY setup.py TensorRT-Model-Optimizer/setup.py +COPY pyproject.toml TensorRT-Model-Optimizer/pyproject.toml # Install onnx_ptq requirements -RUN pip install -r modelopt/examples/onnx_ptq/requirements.txt +RUN pip install -r TensorRT-Model-Optimizer/examples/onnx_ptq/requirements.txt # Install modelopt -RUN pip install -e "./modelopt[hf,onnx]" +RUN pip install -e "./TensorRT-Model-Optimizer[hf,onnx]" # Allow users to run without root RUN chmod -R 777 /workspace diff --git a/examples/onnx_ptq/requirements.txt b/examples/onnx_ptq/requirements.txt index 22b1e83c..01f7f6dd 100644 --- a/examples/onnx_ptq/requirements.txt +++ b/examples/onnx_ptq/requirements.txt @@ -2,3 +2,4 @@ datasets>=2.14.4 optimum sentencepiece timm +torchvision diff --git a/examples/pruning/cifar_resnet.ipynb b/examples/pruning/cifar_resnet.ipynb index 6a5fed4f..62d297ba 100644 --- a/examples/pruning/cifar_resnet.ipynb +++ b/examples/pruning/cifar_resnet.ipynb @@ -22,6 +22,15 @@ "Let's first install `Model Optimizer` following the [installation steps](https://nvidia.github.io/TensorRT-Model-Optimizer/getting_started/2_installation.html)." ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip install nvidia-modelopt torchvision" + ] + }, { "cell_type": "code", "execution_count": 2, diff --git a/setup.py b/setup.py index 46abccac..67bf114a 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,6 @@ "safetensors", "torch>=2.6", "torchprofile>=0.0.4", - "torchvision", ] optional_deps = { @@ -79,6 +78,7 @@ "pytest-cov", "pytest-timeout", "timm", + "torchvision", "tox>4.18", "tox-current-env>=0.0.12", ], diff --git a/tests/examples/speculative_decoding/test_medusa.py b/tests/examples/speculative_decoding/test_medusa.py index 27f74eda..c11a2e70 100644 --- a/tests/examples/speculative_decoding/test_medusa.py +++ b/tests/examples/speculative_decoding/test_medusa.py @@ -13,21 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import subprocess -import pytest from _test_utils.examples.run_command import run_example_command -# TODO: Medusa QAT FSDP test hangs if transformers>=4.50 -@pytest.fixture(scope="session", autouse=True) -def install_transformers_lt_4_50(): - subprocess.run( - ["pip", "install", "transformers<4.50"], - check=True, - ) - - # fmt: off def _run_hf_ptq(model_path, output_dir, qformat): run_example_command(