diff --git a/examples/models/contrib/dit/vae_decoder_trt.py b/examples/models/contrib/dit/vae_decoder_trt.py index 1374dbcbfa6..31803a6690d 100644 --- a/examples/models/contrib/dit/vae_decoder_trt.py +++ b/examples/models/contrib/dit/vae_decoder_trt.py @@ -34,15 +34,18 @@ def export_onnx(self, onnxFile): *self.latent_shape).cuda() self.pytorch_model.cuda().eval() with torch.inference_mode(): - torch.onnx.export(self.pytorch_model, - latent, - onnxFile, - opset_version=17, - input_names=['input'], - output_names=['output'], - dynamic_axes={'input': { - 0: 'batch' - }}) + torch.onnx.export( + self.pytorch_model, + latent, + onnxFile, + opset_version=17, + input_names=['input'], + output_names=['output'], + dynamic_axes={'input': { + 0: 'batch' + }}, + # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively + dynamo=False) def generate_trt_engine(self, onnxFile, planFile): print(f"Start exporting TRT model to {planFile}!") diff --git a/examples/models/core/qwenvl/vit_onnx_trt.py b/examples/models/core/qwenvl/vit_onnx_trt.py index 4b2f197db37..a993c87b47d 100644 --- a/examples/models/core/qwenvl/vit_onnx_trt.py +++ b/examples/models/core/qwenvl/vit_onnx_trt.py @@ -89,7 +89,8 @@ def export_onnx(self, onnx_file_path, pretrained_model_path, image_url): dynamic_axes={"input": { 0: "batch" }}, - ) + # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively + dynamo=False) release_gc() # Further release memory print( f"Export to ONNX file successfully! The ONNX file stays in {onnx_file_path}" diff --git a/tensorrt_llm/tools/multimodal_builder.py b/tensorrt_llm/tools/multimodal_builder.py index de3943c5634..54cf819d1f4 100644 --- a/tensorrt_llm/tools/multimodal_builder.py +++ b/tensorrt_llm/tools/multimodal_builder.py @@ -163,13 +163,16 @@ def export_onnx(model, logger.log(trt.Logger.INFO, f"Exporting onnx to {onnx_dir}/{onnx_name}") os.makedirs(onnx_dir, exist_ok=True) - torch.onnx.export(model, - input, - f'{onnx_dir}/{onnx_name}', - opset_version=17, - input_names=input_names, - output_names=output_names, - dynamic_axes=dynamic_axes) + torch.onnx.export( + model, + input, + f'{onnx_dir}/{onnx_name}', + opset_version=17, + input_names=input_names, + output_names=output_names, + dynamic_axes=dynamic_axes, + # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively + dynamo=False) def build_trt_engine(model_type, diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 1e994f72469..14048b642df 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -343,14 +343,6 @@ full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype full:H20-3e/accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency] SKIP (slow I/O) full:H20-3e/test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[DeepSeek-V3-671B-FP8-DeepSeek-V3-0324-8] SKIP (slow I/O) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_spec_dec_batch_slot_limit[False-False-EAGLE3-LLaMA3.1-Instruct-8B-Llama-3.1-8B-Instruct] SKIP (https://nvbugs/5608743) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136) -triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136) accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8 SKIP (https://nvbugs/5606233) examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https://nvbugs/5606233) disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5626197)