NVIDIA · ajrasane · Sep 18, 2025 · Sep 10, 2025 · Sep 18, 2025
@@ -35,6 +35,12 @@ def main():
         help="""Path to the image classification ONNX model with input shape of
         [batch_size,3,224,224] and output shape of [1,1000]""",
     )
+    parser.add_argument(
+        "--engine_path",
+        type=str,
+        required=True,
+        help="Path to the TensorRT engine",
+    )
     parser.add_argument(
         "--imagenet_path", type=str, default=None, help="Path to the imagenet dataset"
     )
@@ -73,7 +79,10 @@ def main():
     client = RuntimeRegistry.get(deployment)
 
     # Compile the ONNX model to TRT engine and create the device model
-    compiled_model = client.ir_to_compiled(onnx_bytes)
+    compilation_args = {
+        "engine_path": args.engine_path,
+    }
+    compiled_model = client.ir_to_compiled(onnx_bytes, compilation_args)
     device_model = DeviceModel(client, compiled_model, metadata={})
 
     top1_accuracy, top5_accuracy = 0.0, 0.0

@@ -122,6 +122,7 @@ def _update_dynamic_shapes(dynamic_shapes: dict, cmd: list[str]) -> None:
 def build_engine(
     onnx_bytes: OnnxBytes,
     trt_mode: str = TRTMode.FLOAT32,
+    engine_path: Path | None = None,
     calib_cache: str | None = None,
     dynamic_shapes: dict | None = None,
     plugin_config: dict | None = None,
@@ -133,6 +134,7 @@ def build_engine(
 
     Args:
         onnx_bytes: Data of the ONNX model stored as an OnnxBytes object.
+        engine_path: Path to save the TensorRT engine.
         trt_mode: The precision with which the TensorRT engine will be built. Supported modes are:
             - TRTMode.FLOAT32
             - TRTMode.FLOAT16
@@ -202,22 +204,28 @@ def _build_command(
 
     def _setup_files_and_paths(
         tmp_dir_path: Path,
+        engine_path: Path | None,
     ) -> tuple[Path, Path, Path | None, Path | None, Path]:
         tmp_onnx_dir = tmp_dir_path / "onnx"
         onnx_bytes.write_to_disk(str(tmp_onnx_dir))
         onnx_path = tmp_onnx_dir / f"{onnx_bytes.model_name}.onnx"
 
         final_output_dir = Path(output_dir or Path(gettempdir()) / DEFAULT_ARTIFACT_DIR)
         final_output_dir.mkdir(parents=True, exist_ok=True)
-        engine_path = final_output_dir / f"{onnx_bytes.model_name}.engine"
+        engine_path = (
+            Path(engine_path)
+            if engine_path
+            else final_output_dir / f"{onnx_bytes.model_name}.engine"
+        )
+        engine_path.parent.mkdir(parents=True, exist_ok=True)
         calib_cache_path = final_output_dir / "calib_cache" if calib_cache else None
         timing_cache_path = final_output_dir / "timing.cache"
 
         return onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir
 
     with TemporaryDirectory() as tmp_dir:
         onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir = (
-            _setup_files_and_paths(Path(tmp_dir))
+            _setup_files_and_paths(Path(tmp_dir), engine_path)
         )
         cmd = _build_command(onnx_path, engine_path, calib_cache_path, timing_cache_path)
 

@@ -73,7 +73,8 @@ def _ir_to_compiled(
 
         Args:
             ir_bytes: The ONNX model bytes.
-            compilation_args: A dictionary of compilation arguments. Supported args: dynamic_shapes, plugin_config.
+            compilation_args: A dictionary of compilation arguments.
+                The following arguments are supported: dynamic_shapes, plugin_config, engine_path.
 
         Returns:
             The compiled TRT engine bytes.
@@ -85,6 +86,7 @@ def _ir_to_compiled(
             onnx_bytes,
             dynamic_shapes=compilation_args.get("dynamic_shapes"),  # type: ignore[union-attr]
             plugin_config=compilation_args.get("plugin_config"),  # type: ignore[union-attr]
+            engine_path=compilation_args.get("engine_path"),  # type: ignore[union-attr]
             trt_mode=self.deployment["precision"],
             verbose=(self.deployment.get("verbose", "false").lower() == "true"),
         )

diff --git a/tests/examples/test_onnx_ptq.sh b/tests/examples/test_onnx_ptq.sh
@@ -21,7 +21,7 @@
 # It is recommended to execute this script inside the Model Optimization Toolkit TensorRT Docker container.
 # Please ensure that the ImageNet dataset is available in the container at the specified path.
 
-# Usage: ./test_onnx_ptq.sh /path/to/imagenet /path/to/models
+# Usage: ./test_onnx_ptq.sh [--no-clean] [/path/to/imagenet] [/path/to/models]
 
 set -exo pipefail
 
@@ -34,12 +34,35 @@ cuda_capability=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | hea
 
 
 pushd $public_example_dir
+
+# Parse arguments
+clean_mode=true
+imagenet_path=""
+models_folder=""
+
+for arg in "$@"; do
+    case $arg in
+        --no-clean)
+            clean_mode=false
+            shift
+            ;;
+        *)
+            if [ -z "$imagenet_path" ]; then
+                imagenet_path="$arg"
+            elif [ -z "$models_folder" ]; then
+                models_folder="$arg"
+            fi
+            shift
+            ;;
+    esac
+done
+
 export TQDM_DISABLE=1
 
 
 # Setting image and model paths (contains 8 models)
-imagenet_path=${1:-/data/imagenet/}
-models_folder=${2:-/models/onnx}
+imagenet_path=${imagenet_path:-/data/imagenet/}
+models_folder=${models_folder:-/models/onnx}
 calib_size=64
 batch_size=1
 
@@ -88,9 +111,9 @@ declare -A timm_model_name=(
 latency_models=("efficientnet_b0" "efficientnet_b3" "efficientnet-lite4-11" "faster_vit_timm_opset13_simplified" "faster_vit_timm_opset17_simplified" "inception-v1-12" "inception-v2-9")
 
 # Create build directory to store all the results
+rm -rf build
 mkdir -p build
 
-
 # Iterate over each model path to create directories for all modes for each model
 for model_path in "${model_paths[@]}"; do
     model_name=$(basename "$model_path" .onnx)
@@ -129,7 +152,8 @@ for model_path in "${model_paths[@]}"; do
             --onnx_path=$model_dir/fp16/model.onnx \
             --quantize_mode=$quant_mode \
             --calibration_data=$calib_data_path \
-            --output_path=$model_dir/$quant_mode/model.quant.onnx &
+            --output_path=$model_dir/$quant_mode/model.quant.onnx \
+            --calibration_eps=cuda:0 &
         pids+=($!)
     done
 
@@ -163,25 +187,30 @@ for model_path in "${model_paths[@]}"; do
 
         if [ "$quant_mode" == "fp16" ]; then
             eval_model_path=$model_dir/fp16/model.onnx
+            engine_path=$model_dir/fp16/model.engine
             precision="fp16"
         elif [ "$quant_mode" == "int8_iq" ]; then
             eval_model_path=$model_dir/fp16/model.onnx
+            engine_path=$model_dir/int8_iq/model.engine
             precision="best"
         else
             eval_model_path=$model_dir/$quant_mode/model.quant.onnx
+            engine_path=$model_dir/$quant_mode/model.quant.engine
             precision="stronglyTyped"
         fi
 
         echo "Starting evaluation of $model_name for mode: $quant_mode on GPU $gpu_id"
         if [[ " ${latency_models[@]} " =~ " $model_name " ]]; then
             CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
                 --onnx_path=$eval_model_path \
+                --engine_path=$engine_path \
                 --model_name="${timm_model_name[$model_name]}" \
                 --engine_precision=$precision \
                 --results_path=$model_dir/$quant_mode/${model_name}_${quant_mode}.csv &
         else
             CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
                 --onnx_path=$eval_model_path \
+                --engine_path=$engine_path \
                 --imagenet_path=$imagenet_path \
                 --eval_data_size=$calib_size \
                 --batch_size $batch_size \
@@ -209,6 +238,15 @@ for model_path in "${model_paths[@]}"; do
 done
 
 python $test_utils_dir/aggregate_results.py --results_dir=build
+
+if [ "$clean_mode" = true ]; then
+    echo "Cleaning build artifacts..."
+    rm -rf build/
+    echo "Build artifacts cleaned successfully."
+    popd
+    exit 0
+fi
+
 popd