diff --git a/examples/onnx_ptq/evaluate.py b/examples/onnx_ptq/evaluate.py index bddc5121..cc527151 100644 --- a/examples/onnx_ptq/evaluate.py +++ b/examples/onnx_ptq/evaluate.py @@ -35,6 +35,12 @@ def main(): help="""Path to the image classification ONNX model with input shape of [batch_size,3,224,224] and output shape of [1,1000]""", ) + parser.add_argument( + "--engine_path", + type=str, + required=True, + help="Path to the TensorRT engine", + ) parser.add_argument( "--imagenet_path", type=str, default=None, help="Path to the imagenet dataset" ) @@ -73,7 +79,10 @@ def main(): client = RuntimeRegistry.get(deployment) # Compile the ONNX model to TRT engine and create the device model - compiled_model = client.ir_to_compiled(onnx_bytes) + compilation_args = { + "engine_path": args.engine_path, + } + compiled_model = client.ir_to_compiled(onnx_bytes, compilation_args) device_model = DeviceModel(client, compiled_model, metadata={}) top1_accuracy, top5_accuracy = 0.0, 0.0 diff --git a/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py b/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py index feb8d5d6..cb9f67ae 100644 --- a/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py +++ b/modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py @@ -122,6 +122,7 @@ def _update_dynamic_shapes(dynamic_shapes: dict, cmd: list[str]) -> None: def build_engine( onnx_bytes: OnnxBytes, trt_mode: str = TRTMode.FLOAT32, + engine_path: Path | None = None, calib_cache: str | None = None, dynamic_shapes: dict | None = None, plugin_config: dict | None = None, @@ -133,6 +134,7 @@ def build_engine( Args: onnx_bytes: Data of the ONNX model stored as an OnnxBytes object. + engine_path: Path to save the TensorRT engine. trt_mode: The precision with which the TensorRT engine will be built. Supported modes are: - TRTMode.FLOAT32 - TRTMode.FLOAT16 @@ -202,6 +204,7 @@ def _build_command( def _setup_files_and_paths( tmp_dir_path: Path, + engine_path: Path | None, ) -> tuple[Path, Path, Path | None, Path | None, Path]: tmp_onnx_dir = tmp_dir_path / "onnx" onnx_bytes.write_to_disk(str(tmp_onnx_dir)) @@ -209,7 +212,12 @@ def _setup_files_and_paths( final_output_dir = Path(output_dir or Path(gettempdir()) / DEFAULT_ARTIFACT_DIR) final_output_dir.mkdir(parents=True, exist_ok=True) - engine_path = final_output_dir / f"{onnx_bytes.model_name}.engine" + engine_path = ( + Path(engine_path) + if engine_path + else final_output_dir / f"{onnx_bytes.model_name}.engine" + ) + engine_path.parent.mkdir(parents=True, exist_ok=True) calib_cache_path = final_output_dir / "calib_cache" if calib_cache else None timing_cache_path = final_output_dir / "timing.cache" @@ -217,7 +225,7 @@ def _setup_files_and_paths( with TemporaryDirectory() as tmp_dir: onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir = ( - _setup_files_and_paths(Path(tmp_dir)) + _setup_files_and_paths(Path(tmp_dir), engine_path) ) cmd = _build_command(onnx_path, engine_path, calib_cache_path, timing_cache_path) diff --git a/modelopt/torch/_deploy/_runtime/trt_client.py b/modelopt/torch/_deploy/_runtime/trt_client.py index 0f491b85..8d9185ee 100644 --- a/modelopt/torch/_deploy/_runtime/trt_client.py +++ b/modelopt/torch/_deploy/_runtime/trt_client.py @@ -73,7 +73,8 @@ def _ir_to_compiled( Args: ir_bytes: The ONNX model bytes. - compilation_args: A dictionary of compilation arguments. Supported args: dynamic_shapes, plugin_config. + compilation_args: A dictionary of compilation arguments. + The following arguments are supported: dynamic_shapes, plugin_config, engine_path. Returns: The compiled TRT engine bytes. @@ -85,6 +86,7 @@ def _ir_to_compiled( onnx_bytes, dynamic_shapes=compilation_args.get("dynamic_shapes"), # type: ignore[union-attr] plugin_config=compilation_args.get("plugin_config"), # type: ignore[union-attr] + engine_path=compilation_args.get("engine_path"), # type: ignore[union-attr] trt_mode=self.deployment["precision"], verbose=(self.deployment.get("verbose", "false").lower() == "true"), ) diff --git a/tests/examples/test_onnx_ptq.sh b/tests/examples/test_onnx_ptq.sh index 543089ce..9b4647a6 100755 --- a/tests/examples/test_onnx_ptq.sh +++ b/tests/examples/test_onnx_ptq.sh @@ -21,7 +21,7 @@ # It is recommended to execute this script inside the Model Optimization Toolkit TensorRT Docker container. # Please ensure that the ImageNet dataset is available in the container at the specified path. -# Usage: ./test_onnx_ptq.sh /path/to/imagenet /path/to/models +# Usage: ./test_onnx_ptq.sh [--no-clean] [/path/to/imagenet] [/path/to/models] set -exo pipefail @@ -34,12 +34,35 @@ cuda_capability=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | hea pushd $public_example_dir + +# Parse arguments +clean_mode=true +imagenet_path="" +models_folder="" + +for arg in "$@"; do + case $arg in + --no-clean) + clean_mode=false + shift + ;; + *) + if [ -z "$imagenet_path" ]; then + imagenet_path="$arg" + elif [ -z "$models_folder" ]; then + models_folder="$arg" + fi + shift + ;; + esac +done + export TQDM_DISABLE=1 # Setting image and model paths (contains 8 models) -imagenet_path=${1:-/data/imagenet/} -models_folder=${2:-/models/onnx} +imagenet_path=${imagenet_path:-/data/imagenet/} +models_folder=${models_folder:-/models/onnx} calib_size=64 batch_size=1 @@ -88,9 +111,9 @@ declare -A timm_model_name=( latency_models=("efficientnet_b0" "efficientnet_b3" "efficientnet-lite4-11" "faster_vit_timm_opset13_simplified" "faster_vit_timm_opset17_simplified" "inception-v1-12" "inception-v2-9") # Create build directory to store all the results +rm -rf build mkdir -p build - # Iterate over each model path to create directories for all modes for each model for model_path in "${model_paths[@]}"; do model_name=$(basename "$model_path" .onnx) @@ -129,7 +152,8 @@ for model_path in "${model_paths[@]}"; do --onnx_path=$model_dir/fp16/model.onnx \ --quantize_mode=$quant_mode \ --calibration_data=$calib_data_path \ - --output_path=$model_dir/$quant_mode/model.quant.onnx & + --output_path=$model_dir/$quant_mode/model.quant.onnx \ + --calibration_eps=cuda:0 & pids+=($!) done @@ -163,12 +187,15 @@ for model_path in "${model_paths[@]}"; do if [ "$quant_mode" == "fp16" ]; then eval_model_path=$model_dir/fp16/model.onnx + engine_path=$model_dir/fp16/model.engine precision="fp16" elif [ "$quant_mode" == "int8_iq" ]; then eval_model_path=$model_dir/fp16/model.onnx + engine_path=$model_dir/int8_iq/model.engine precision="best" else eval_model_path=$model_dir/$quant_mode/model.quant.onnx + engine_path=$model_dir/$quant_mode/model.quant.engine precision="stronglyTyped" fi @@ -176,12 +203,14 @@ for model_path in "${model_paths[@]}"; do if [[ " ${latency_models[@]} " =~ " $model_name " ]]; then CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \ --onnx_path=$eval_model_path \ + --engine_path=$engine_path \ --model_name="${timm_model_name[$model_name]}" \ --engine_precision=$precision \ --results_path=$model_dir/$quant_mode/${model_name}_${quant_mode}.csv & else CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \ --onnx_path=$eval_model_path \ + --engine_path=$engine_path \ --imagenet_path=$imagenet_path \ --eval_data_size=$calib_size \ --batch_size $batch_size \ @@ -209,6 +238,15 @@ for model_path in "${model_paths[@]}"; do done python $test_utils_dir/aggregate_results.py --results_dir=build + +if [ "$clean_mode" = true ]; then + echo "Cleaning build artifacts..." + rm -rf build/ + echo "Build artifacts cleaned successfully." + popd + exit 0 +fi + popd