Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion examples/onnx_ptq/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def main():
help="""Path to the image classification ONNX model with input shape of
[batch_size,3,224,224] and output shape of [1,1000]""",
)
parser.add_argument(
"--engine_path",
type=str,
required=True,
help="Path to the TensorRT engine",
)
parser.add_argument(
"--imagenet_path", type=str, default=None, help="Path to the imagenet dataset"
)
Expand Down Expand Up @@ -73,7 +79,10 @@ def main():
client = RuntimeRegistry.get(deployment)

# Compile the ONNX model to TRT engine and create the device model
compiled_model = client.ir_to_compiled(onnx_bytes)
compilation_args = {
"engine_path": args.engine_path,
}
compiled_model = client.ir_to_compiled(onnx_bytes, compilation_args)
device_model = DeviceModel(client, compiled_model, metadata={})

top1_accuracy, top5_accuracy = 0.0, 0.0
Expand Down
12 changes: 10 additions & 2 deletions modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ def _update_dynamic_shapes(dynamic_shapes: dict, cmd: list[str]) -> None:
def build_engine(
onnx_bytes: OnnxBytes,
trt_mode: str = TRTMode.FLOAT32,
engine_path: Path | None = None,
calib_cache: str | None = None,
dynamic_shapes: dict | None = None,
plugin_config: dict | None = None,
Expand All @@ -133,6 +134,7 @@ def build_engine(

Args:
onnx_bytes: Data of the ONNX model stored as an OnnxBytes object.
engine_path: Path to save the TensorRT engine.
trt_mode: The precision with which the TensorRT engine will be built. Supported modes are:
- TRTMode.FLOAT32
- TRTMode.FLOAT16
Expand Down Expand Up @@ -202,22 +204,28 @@ def _build_command(

def _setup_files_and_paths(
tmp_dir_path: Path,
engine_path: Path | None,
) -> tuple[Path, Path, Path | None, Path | None, Path]:
tmp_onnx_dir = tmp_dir_path / "onnx"
onnx_bytes.write_to_disk(str(tmp_onnx_dir))
onnx_path = tmp_onnx_dir / f"{onnx_bytes.model_name}.onnx"

final_output_dir = Path(output_dir or Path(gettempdir()) / DEFAULT_ARTIFACT_DIR)
final_output_dir.mkdir(parents=True, exist_ok=True)
engine_path = final_output_dir / f"{onnx_bytes.model_name}.engine"
engine_path = (
Path(engine_path)
if engine_path
else final_output_dir / f"{onnx_bytes.model_name}.engine"
)
engine_path.parent.mkdir(parents=True, exist_ok=True)
calib_cache_path = final_output_dir / "calib_cache" if calib_cache else None
timing_cache_path = final_output_dir / "timing.cache"

return onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir

with TemporaryDirectory() as tmp_dir:
onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir = (
_setup_files_and_paths(Path(tmp_dir))
_setup_files_and_paths(Path(tmp_dir), engine_path)
)
cmd = _build_command(onnx_path, engine_path, calib_cache_path, timing_cache_path)

Expand Down
4 changes: 3 additions & 1 deletion modelopt/torch/_deploy/_runtime/trt_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ def _ir_to_compiled(

Args:
ir_bytes: The ONNX model bytes.
compilation_args: A dictionary of compilation arguments. Supported args: dynamic_shapes, plugin_config.
compilation_args: A dictionary of compilation arguments.
The following arguments are supported: dynamic_shapes, plugin_config, engine_path.

Returns:
The compiled TRT engine bytes.
Expand All @@ -85,6 +86,7 @@ def _ir_to_compiled(
onnx_bytes,
dynamic_shapes=compilation_args.get("dynamic_shapes"), # type: ignore[union-attr]
plugin_config=compilation_args.get("plugin_config"), # type: ignore[union-attr]
engine_path=compilation_args.get("engine_path"), # type: ignore[union-attr]
trt_mode=self.deployment["precision"],
verbose=(self.deployment.get("verbose", "false").lower() == "true"),
)
Expand Down
48 changes: 43 additions & 5 deletions tests/examples/test_onnx_ptq.sh
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
# It is recommended to execute this script inside the Model Optimization Toolkit TensorRT Docker container.
# Please ensure that the ImageNet dataset is available in the container at the specified path.

# Usage: ./test_onnx_ptq.sh /path/to/imagenet /path/to/models
# Usage: ./test_onnx_ptq.sh [--no-clean] [/path/to/imagenet] [/path/to/models]

set -exo pipefail

Expand All @@ -34,12 +34,35 @@ cuda_capability=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | hea


pushd $public_example_dir

# Parse arguments
clean_mode=true
imagenet_path=""
models_folder=""

for arg in "$@"; do
case $arg in
--no-clean)
clean_mode=false
shift
;;
*)
if [ -z "$imagenet_path" ]; then
imagenet_path="$arg"
elif [ -z "$models_folder" ]; then
models_folder="$arg"
fi
shift
;;
esac
done

export TQDM_DISABLE=1


# Setting image and model paths (contains 8 models)
imagenet_path=${1:-/data/imagenet/}
models_folder=${2:-/models/onnx}
imagenet_path=${imagenet_path:-/data/imagenet/}
models_folder=${models_folder:-/models/onnx}
calib_size=64
batch_size=1

Expand Down Expand Up @@ -88,9 +111,9 @@ declare -A timm_model_name=(
latency_models=("efficientnet_b0" "efficientnet_b3" "efficientnet-lite4-11" "faster_vit_timm_opset13_simplified" "faster_vit_timm_opset17_simplified" "inception-v1-12" "inception-v2-9")

# Create build directory to store all the results
rm -rf build
mkdir -p build


# Iterate over each model path to create directories for all modes for each model
for model_path in "${model_paths[@]}"; do
model_name=$(basename "$model_path" .onnx)
Expand Down Expand Up @@ -129,7 +152,8 @@ for model_path in "${model_paths[@]}"; do
--onnx_path=$model_dir/fp16/model.onnx \
--quantize_mode=$quant_mode \
--calibration_data=$calib_data_path \
--output_path=$model_dir/$quant_mode/model.quant.onnx &
--output_path=$model_dir/$quant_mode/model.quant.onnx \
--calibration_eps=cuda:0 &
pids+=($!)
done

Expand Down Expand Up @@ -163,25 +187,30 @@ for model_path in "${model_paths[@]}"; do

if [ "$quant_mode" == "fp16" ]; then
eval_model_path=$model_dir/fp16/model.onnx
engine_path=$model_dir/fp16/model.engine
precision="fp16"
elif [ "$quant_mode" == "int8_iq" ]; then
eval_model_path=$model_dir/fp16/model.onnx
engine_path=$model_dir/int8_iq/model.engine
precision="best"
else
eval_model_path=$model_dir/$quant_mode/model.quant.onnx
engine_path=$model_dir/$quant_mode/model.quant.engine
precision="stronglyTyped"
fi

echo "Starting evaluation of $model_name for mode: $quant_mode on GPU $gpu_id"
if [[ " ${latency_models[@]} " =~ " $model_name " ]]; then
CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
--onnx_path=$eval_model_path \
--engine_path=$engine_path \
--model_name="${timm_model_name[$model_name]}" \
--engine_precision=$precision \
--results_path=$model_dir/$quant_mode/${model_name}_${quant_mode}.csv &
else
CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
--onnx_path=$eval_model_path \
--engine_path=$engine_path \
--imagenet_path=$imagenet_path \
--eval_data_size=$calib_size \
--batch_size $batch_size \
Expand Down Expand Up @@ -209,6 +238,15 @@ for model_path in "${model_paths[@]}"; do
done

python $test_utils_dir/aggregate_results.py --results_dir=build

if [ "$clean_mode" = true ]; then
echo "Cleaning build artifacts..."
rm -rf build/
echo "Build artifacts cleaned successfully."
popd
exit 0
fi

popd


Expand Down
Loading