NVIDIA · SimengLiu-nv · Nov 4, 2025 · Oct 31, 2025 · Nov 4, 2025
@@ -34,15 +34,18 @@ def export_onnx(self, onnxFile):
                              *self.latent_shape).cuda()
         self.pytorch_model.cuda().eval()
         with torch.inference_mode():
-            torch.onnx.export(self.pytorch_model,
-                              latent,
-                              onnxFile,
-                              opset_version=17,
-                              input_names=['input'],
-                              output_names=['output'],
-                              dynamic_axes={'input': {
-                                  0: 'batch'
-                              }})
+            torch.onnx.export(
+                self.pytorch_model,
+                latent,
+                onnxFile,
+                opset_version=17,
+                input_names=['input'],
+                output_names=['output'],
+                dynamic_axes={'input': {
+                    0: 'batch'
+                }},
+                # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively
+                dynamo=False)
 
     def generate_trt_engine(self, onnxFile, planFile):
         print(f"Start exporting TRT model to {planFile}!")

@@ -89,7 +89,8 @@ def export_onnx(self, onnx_file_path, pretrained_model_path, image_url):
             dynamic_axes={"input": {
                 0: "batch"
             }},
-        )
+            # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively
+            dynamo=False)
         release_gc()  # Further release memory
         print(
             f"Export to ONNX file successfully! The ONNX file stays in {onnx_file_path}"

@@ -163,13 +163,16 @@ def export_onnx(model,
     logger.log(trt.Logger.INFO, f"Exporting onnx to {onnx_dir}/{onnx_name}")
     os.makedirs(onnx_dir, exist_ok=True)
 
-    torch.onnx.export(model,
-                      input,
-                      f'{onnx_dir}/{onnx_name}',
-                      opset_version=17,
-                      input_names=input_names,
-                      output_names=output_names,
-                      dynamic_axes=dynamic_axes)
+    torch.onnx.export(
+        model,
+        input,
+        f'{onnx_dir}/{onnx_name}',
+        opset_version=17,
+        input_names=input_names,
+        output_names=output_names,
+        dynamic_axes=dynamic_axes,
+        # Required for pytorch>=2.9.0 as dynamo becomes the default and introduces bugs as it does not support opset_version=17 natively
+        dynamo=False)
 
 
 def build_trt_engine(model_type,

@@ -343,14 +343,6 @@ full:H20-3e/accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype
 full:H20-3e/accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency] SKIP (slow I/O)
 full:H20-3e/test_e2e.py::test_ptp_quickstart_advanced_multi_gpus[DeepSeek-V3-671B-FP8-DeepSeek-V3-0324-8] SKIP (slow I/O)
 disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_spec_dec_batch_slot_limit[False-False-EAGLE3-LLaMA3.1-Instruct-8B-Llama-3.1-8B-Instruct] SKIP (https://nvbugs/5608743)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-ensemble] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-disableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-guaranteed_no_evict---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136)
-triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136)
 accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8 SKIP (https://nvbugs/5606233)
 examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https://nvbugs/5606233)
 disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5626197)