NVIDIA · kevalmorabia97 · Aug 12, 2025 · Aug 12, 2025
@@ -1,3 +1,4 @@
+# NOTE: Make sure this file is consistent with .gitlab/tests.yml
 name: GPU tests
 
 on:
@@ -46,7 +47,7 @@ jobs:
     if: needs.check-file-changes.outputs.any_changed == 'true'
     # Runner list at https://github.com/nv-gha-runners/enterprise-runner-configuration/blob/main/docs/runner-groups.md
     runs-on: linux-amd64-gpu-h100-latest-1
-    timeout-minutes: 60
+    timeout-minutes: 90
     container:
       image: nvcr.io/nvidia/pytorch:25.06-py3
       env:

@@ -1,3 +1,4 @@
+# NOTE: Make sure this file is consistent with .gitlab/tests.yml
 name: Unit tests
 
 on:
@@ -84,7 +85,7 @@ jobs:
     timeout-minutes: 30
     strategy:
       matrix:
-        torch: [25, 26, 27]
+        torch: [26, 27]
     steps:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5

@@ -0,0 +1,15 @@
+workflow:
+  auto_cancel:
+    on_new_commit: interruptible
+
+default:
+  image: python:3.12
+  tags: [type/docker, os/linux, cpu] # Use a runner with these tags
+
+stages: # List of stages for jobs, and their order of execution
+  - tests
+  - release
+
+include:
+  - .gitlab/tests.yml
+  - .gitlab/release.yml
@@ -0,0 +1,53 @@
+# Upload to PyPI. For external releases with KitMaker, we need to check compliance and use jfrog cli
+build-and-upload-wheels:
+  variables:
+    GIT_DEPTH: 1000 # For correct version naming (e.g. 0.1.dev20) of nightly builds
+  stage: release
+  timeout: 15m
+  tags: [type/docker, os/linux] # Use a runner with these tags
+  rules:
+    - if: $JET_ONLY != null
+      when: never
+    - if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/
+      variables:
+        RELEASE: "true"
+        TWINE_USERNAME: svc-dl-algo-ammo
+        TWINE_PASSWORD: $ARTIFACTORY_TOKEN # Configured in GitLab > Settings > CI/CD
+        REPO_URL: https://urm.nvidia.com/artifactory/api/pypi/sw-dl-algo-ammo-pypi-local
+    - if: $CI_PIPELINE_SOURCE == "schedule"
+      variables:
+        RELEASE: "false"
+        TWINE_USERNAME: gitlab-ci-token
+        TWINE_PASSWORD: $CI_JOB_TOKEN
+        REPO_URL: $CI_API_V4_URL/projects/$CI_PROJECT_ID/packages/pypi
+  script:
+    - pip install tox
+    - tox -e build-wheel
+    # KitMaker compliance checker: https://gitlab-master.nvidia.com/dl/pypi/Wheel-CI-CD/
+    # - |
+    #   if [[ $RELEASE == "true" ]]; then
+    #     curl -fsSL https://get.docker.com | sh
+    #     docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
+    #     docker run --pull=always --rm --network=host \
+    #       -e IGNORE_FAILED_PIP_INSTALL="1" \
+    #       -e EXPECTED_PKG_LICENSE="Apache 2.0" \
+    #       -e SKIPPED_SECURITY_RULES="" \
+    #       -e ALLOWED_NOSEC_COUNT="0" \
+    #       -v dist:/workspace/ \
+    #       gitlab-master.nvidia.com:5005/dl/pypi/wheel-ci-cd:wheeltamer
+    #   fi
+    - |
+      set -ex
+      if [[ $RELEASE == "true" ]]; then
+        curl -fL https://install-cli.jfrog.io | sh
+        jf rt upload "dist/*.whl" sw-dl-algo-ammo-pypi-local/nvidia-modelopt/release/$CI_COMMIT_TAG/ \
+          --url=https://urm.nvidia.com/artifactory --user=$TWINE_USERNAME --password=$TWINE_PASSWORD \
+          --target-props="component_name=nvidia-modelopt;os=any;arch=any;version=$CI_COMMIT_TAG;branch=release;release_approver=kmorabia;release_status=ready" \
+          --flat --detailed-summary
+      else
+        pip install twine
+        twine upload --repository-url $REPO_URL dist/*.whl
+      fi
+  artifacts:
+    paths:
+      - dist/
@@ -0,0 +1,90 @@
+# NOTE: Make sure this file is consistent with .github/workflows/{unit,gpu}_tests.yml
+.tests-default:
+  stage: tests
+  variables:
+    PYTHON: 12
+    TORCH: 28
+  rules:
+    - if: $JET_ONLY != null
+      when: never
+    - if: $CI_COMMIT_TAG =~ /^\d+\.\d+\.\d+$/
+    - if: $CI_PIPELINE_SOURCE == "web" || $CI_PIPELINE_SOURCE == "schedule"
+
+##### Unit Tests #####
+unit:
+  extends: .tests-default
+  timeout: 30m
+  image: python:3.$PYTHON
+  before_script:
+    # Install cmake to build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
+    - if [ "$PYTHON" = "12" ]; then apt-get update && apt-get install -y cmake; fi
+    - pip install tox
+  script:
+    - tox -e py3$PYTHON-torch$TORCH-unit
+
+multi-py-unit:
+  extends: unit
+  parallel:
+    matrix:
+      - PYTHON: [10, 11]
+
+multi-torch-unit:
+  extends: unit
+  parallel:
+    matrix:
+      - TORCH: [26, 27]
+
+##### GPU Tests #####
+gpu:
+  extends: .tests-default
+  timeout: 60m
+  image: nvcr.io/nvidia/pytorch:25.06-py3
+  variables:
+    GIT_DEPTH: 1000 # For correct version for tests/gpu/torch/quantization/plugins/test_megatron.py
+    LD_LIBRARY_PATH: "/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}" # Add libcudnn*.so and libnv*.so to path.
+    PIP_CONSTRAINT: "" # Disable pip constraint for upgrading packages
+  tags: [docker, linux, 2-gpu]
+  script:
+    # Use pre-installed packages without a new venv with tox-current-env
+    - pip install tox-current-env
+    - tox -e py312-cuda12-gpu --current-env
+
+##### Example Tests #####
+example:
+  extends: .tests-default
+  stage: tests
+  timeout: 45m
+  image: gitlab-master.nvidia.com:5005/omniml/modelopt/modelopt_examples:latest
+  variables:
+    TEST_TYPE: pytest
+  tags: [docker, linux, 2-gpu, sm<89]
+  parallel:
+    matrix:
+      - TEST: [diffusers, llm_distill, llm_qat, llm_sparsity, onnx_ptq, speculative_decoding]
+  allow_failure: true # Allow to continue next stages even if job is canceled (e.g. during release)
+  before_script:
+    - pip install ".[all]" -U
+  script:
+    # Uninstall apex since T5 Int8 (PixArt) + Apex is not supported as per https://github.com/huggingface/transformers/issues/21391
+    - if [ "$TEST" = "diffusers" ]; then pip uninstall -y apex; fi
+    - if [ "$TEST_TYPE" = "pytest" ]; then pytest -s tests/examples/$TEST; else bash tests/examples/test_$TEST.sh; fi
+
+example-ada:
+  extends: example
+  timeout: 60m
+  tags: [docker, linux, 2-gpu, sm>=89]
+  parallel:
+    matrix:
+      - TEST: [llm_eval, llm_ptq, vlm_ptq, llm_autodeploy]
+      - TEST: [onnx_ptq]
+        TEST_TYPE: bash
+
+##### Megatron / NeMo Integration Tests #####
+megatron-nemo-integration:
+  extends: .tests-default
+  variables:
+    UPSTREAM_REF: $CI_COMMIT_REF_NAME
+  trigger:
+    project: omniml/integration/nmm-sandbox
+    branch: main
+    strategy: depend # Make sure the upstream task is waiting for the downstream task
@@ -9,13 +9,12 @@ toxworkdir = /tmp/{env:USER}-modelopt-tox
 ############################
 # CPU Unit test environments
 ############################
-[testenv:{py310,py311,py312}-torch{25,26,27,28}-unit]
+[testenv:{py310,py311,py312}-torch{26,27,28}-unit]
 deps =
     # Build onnxsim from sdists for Python 3.12 until http://github.com/daquexian/onnx-simplifier/pull/353
     py312: onnxsim
 
     # torch version auto-selected based on torchvision version
-    torch25: torchvision~=0.20.0
     torch26: torchvision~=0.21.0
     torch27: torchvision~=0.22.0
     torch28: torchvision~=0.23.0