Skip to content

Commit e6e0d2c

Browse files
Fix failing CICD nightly tests (NVIDIA#445)
Signed-off-by: Keval Morabia <[email protected]>
1 parent ae78b9f commit e6e0d2c

File tree

11 files changed

+25
-31
lines changed

11 files changed

+25
-31
lines changed

.github/workflows/gpu_tests.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,8 +73,7 @@ jobs:
7373
- uses: nv-gha-runners/setup-proxy-cache@main
7474
- name: Setup environment variables
7575
run: |
76-
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib" >> $GITHUB_ENV
77-
echo "PATH=${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin" >> $GITHUB_ENV
76+
echo "LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu" >> $GITHUB_ENV
7877
- name: Run gpu tests
7978
run: pip install tox-current-env && tox -e py312-cuda12-gpu --current-env
8079
gpu-tests-non-pr:

.gitlab/tests.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,7 @@ unit:
3535
tags: [docker, linux, 2-gpu]
3636
before_script:
3737
# Add libcudnn*.so and libnv*.so to path
38-
- export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
39-
# Add trtexec to path
40-
- export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
38+
- export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu"
4139
# Install git-lfs for Daring-Anteater dataset
4240
- apt-get update && apt-get install -y git-lfs
4341
- git lfs install --system

CHANGELOG.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,15 @@
11
Model Optimizer Changelog (Linux)
22
=================================
33

4-
0.39 (2025-11-xx)
4+
0.39 (2025-11-07)
55
^^^^^^^^^^^^^^^^^
66

7-
**Deprecations**
8-
97
**New Features**
108

119
- Add flag ``op_types_to_exclude_fp16`` in ONNX quantization to exclude ops from being converted to FP16/BF16. Alternatively, for custom TensorRT ops, this can also be done by indicating ``'fp32'`` precision in ``trt_plugins_precision``.
1210
- Add LoRA mode support for MCore in a new peft submodule: ``modelopt.torch.peft.update_model(model, LORA_CFG)``.
1311
- Support PTQ and fakequant in vLLM for fast evaluation of arbitrary quantization formats. See ``examples/vllm_serve`` for more details.
14-
- Add support for ``nemotron-post-training-dataset-v2`` and ``nemotron-post-training-dataset-v1`` in ``examples/llm_ptq``. Default to a mix of ``cnn_dailymail`` and ``nemotron-post-training-dataset-v2`` if no dataset is specified.
12+
- Add support for ``nemotron-post-training-dataset-v2`` and ``nemotron-post-training-dataset-v1`` in ``examples/llm_ptq``. Default to a mix of ``cnn_dailymail`` and ``nemotron-post-training-dataset-v2`` (gated dataset accessed using ``HF_TOKEN`` environment variable) if no dataset is specified.
1513
- Allow specifying ``calib_seq`` in ``examples/llm_ptq`` to set the maximum sequence length for calibration.
1614

1715
**Documentation**

docs/source/getting_started/_installation_for_Linux.rst

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ Environment setup
4141
.. code-block:: shell
4242
4343
export PIP_CONSTRAINT=""
44-
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu:/usr/local/tensorrt/targets/x86_64-linux-gnu/lib"
45-
export PATH="${PATH}:/usr/local/tensorrt/targets/x86_64-linux-gnu/bin"
44+
export LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/include:/usr/lib/x86_64-linux-gnu"
4645
4746
You may need to install additional dependencies from the respective examples's `requirements.txt` file.
4847

examples/diffusers/quantization/requirements.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@ nvtx
44
onnx_graphsurgeon
55
opencv-python>=4.8.1.78,<4.12.0.88
66
sentencepiece
7+
# TODO: Fix for torch 2.9
8+
torch<2.9
9+
torchvision<0.24.0

examples/llm_sparsity/launch_finetune.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ CMD="accelerate launch --multi_gpu --mixed_precision bf16 finetune.py \
9191
--warmup_ratio 0.0 \
9292
--lr_scheduler_type cosine \
9393
--logging_steps 1 \
94-
--fsdp full_shard auto_wrap \
94+
--fsdp 'full_shard auto_wrap' \
9595
--fsdp_transformer_layer_cls_to_wrap LlamaDecoderLayer \
9696
--tf32 True \
9797
--modelopt_restore_path $MODELOPT_RESTORE_PATH \
Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
11
flash-attn
22
sentencepiece>=0.2.0
33
tensorboardX
4-
transformers>=4.57.0

tests/_test_utils/torch_quantization/onnx_export.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def forward_loop(model):
6565
input_names=input_names,
6666
output_names=output_names,
6767
do_constant_folding=constant_folding,
68+
dynamo=False,
6869
**kwargs,
6970
)
7071

tests/gpu/onnx/test_plugin.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,13 @@
1919
import onnx
2020
import onnx_graphsurgeon as gs
2121
from _test_utils.import_helper import skip_if_no_libcudnn, skip_if_no_tensorrt
22+
from _test_utils.onnx_autocast.utils import _assert_tensors_are_fp16
23+
from _test_utils.onnx_quantization.utils import _assert_nodes_are_quantized
24+
25+
from modelopt.onnx.autocast import convert_to_mixed_precision
26+
from modelopt.onnx.autocast.graphsanitizer import GraphSanitizer
27+
from modelopt.onnx.quantization.quantize import quantize
28+
from modelopt.onnx.trt_utils import load_onnx_model
2229

2330
skip_if_no_libcudnn()
2431
skip_if_no_tensorrt()
@@ -95,11 +102,6 @@ def _create_test_model_trt():
95102

96103

97104
def test_trt_plugin_quantization(tmp_path):
98-
from _test_utils.onnx_quantization.utils import _assert_nodes_are_quantized
99-
100-
from modelopt.onnx.quantization.quantize import quantize
101-
from modelopt.onnx.trt_utils import load_onnx_model
102-
103105
model = _create_test_model_trt()
104106
with open(os.path.join(tmp_path, "model_with_trt_plugin.onnx"), "w") as f:
105107
onnx.save_model(model, f.name)
@@ -126,11 +128,6 @@ def test_trt_plugin_quantization(tmp_path):
126128

127129

128130
def test_trt_plugin_autocast(tmp_path):
129-
from _test_utils.onnx_autocast.utils import _assert_tensors_are_fp16
130-
131-
from modelopt.onnx.autocast import convert_to_mixed_precision
132-
from modelopt.onnx.autocast.graphsanitizer import GraphSanitizer
133-
134131
model = _create_test_model_trt()
135132
with open(os.path.join(tmp_path, "model_with_trt_plugin_autocast.onnx"), "w") as f:
136133
onnx.save_model(model, f.name)

tests/gpu/onnx/test_quantize_onnx_torch_int4_awq.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,12 +111,11 @@ def _forward_loop(model, dataloader):
111111

112112
wq_onnx_awq_clip = dq_tensor(wq_onnx_awq_clip, scale_awq_clip, block_size)
113113

114-
assert np.allclose(wq_torch_awq_lite.detach(), wq_onnx_awq_lite.T, atol=1e-3)
115-
assert np.allclose(wq_torch_awq_clip.detach(), wq_onnx_awq_clip.T, atol=1e-3)
114+
assert np.allclose(wq_torch_awq_lite.detach().cpu(), wq_onnx_awq_lite.T, atol=1e-3)
115+
assert np.allclose(wq_torch_awq_clip.detach().cpu(), wq_onnx_awq_clip.T, atol=1e-3)
116116

117117

118118
def test_int4_awq_cuda(tmp_path):
119-
skip_if_onnx_version_above_1_18()
120119
skip_if_no_libcudnn()
121120
block_size = 128
122121

0 commit comments

Comments
 (0)