Skip to content

Commit 89818be

Browse files
committed
Specify engine path for TRT evaluation
Signed-off-by: ajrasane <[email protected]>
1 parent 0d24c1b commit 89818be

File tree

4 files changed

+63
-9
lines changed

4 files changed

+63
-9
lines changed

examples/onnx_ptq/evaluate.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ def main():
3535
help="""Path to the image classification ONNX model with input shape of
3636
[batch_size,3,224,224] and output shape of [1,1000]""",
3737
)
38+
parser.add_argument(
39+
"--engine_path",
40+
type=str,
41+
required=True,
42+
help="Path to the TensorRT engine",
43+
)
3844
parser.add_argument(
3945
"--imagenet_path", type=str, default=None, help="Path to the imagenet dataset"
4046
)
@@ -73,7 +79,10 @@ def main():
7379
client = RuntimeRegistry.get(deployment)
7480

7581
# Compile the ONNX model to TRT engine and create the device model
76-
compiled_model = client.ir_to_compiled(onnx_bytes)
82+
compilation_args = {
83+
"engine_path": args.engine_path,
84+
}
85+
compiled_model = client.ir_to_compiled(onnx_bytes, compilation_args)
7786
device_model = DeviceModel(client, compiled_model, metadata={})
7887

7988
top1_accuracy, top5_accuracy = 0.0, 0.0

modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def _update_dynamic_shapes(dynamic_shapes: dict, cmd: list[str]) -> None:
122122
def build_engine(
123123
onnx_bytes: OnnxBytes,
124124
trt_mode: str = TRTMode.FLOAT32,
125+
engine_path: Path | None = None,
125126
calib_cache: str | None = None,
126127
dynamic_shapes: dict | None = None,
127128
plugin_config: dict | None = None,
@@ -133,6 +134,7 @@ def build_engine(
133134
134135
Args:
135136
onnx_bytes: Data of the ONNX model stored as an OnnxBytes object.
137+
engine_path: Path to save the TensorRT engine.
136138
trt_mode: The precision with which the TensorRT engine will be built. Supported modes are:
137139
- TRTMode.FLOAT32
138140
- TRTMode.FLOAT16
@@ -202,22 +204,25 @@ def _build_command(
202204

203205
def _setup_files_and_paths(
204206
tmp_dir_path: Path,
207+
engine_path: Path | None,
205208
) -> tuple[Path, Path, Path | None, Path | None, Path]:
206209
tmp_onnx_dir = tmp_dir_path / "onnx"
207210
onnx_bytes.write_to_disk(str(tmp_onnx_dir))
208211
onnx_path = tmp_onnx_dir / f"{onnx_bytes.model_name}.onnx"
209212

210213
final_output_dir = Path(output_dir or Path(gettempdir()) / DEFAULT_ARTIFACT_DIR)
211214
final_output_dir.mkdir(parents=True, exist_ok=True)
212-
engine_path = final_output_dir / f"{onnx_bytes.model_name}.engine"
215+
engine_path = (
216+
engine_path if engine_path else final_output_dir / f"{onnx_bytes.model_name}.engine"
217+
)
213218
calib_cache_path = final_output_dir / "calib_cache" if calib_cache else None
214219
timing_cache_path = final_output_dir / "timing.cache"
215220

216221
return onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir
217222

218223
with TemporaryDirectory() as tmp_dir:
219224
onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir = (
220-
_setup_files_and_paths(Path(tmp_dir))
225+
_setup_files_and_paths(Path(tmp_dir), engine_path)
221226
)
222227
cmd = _build_command(onnx_path, engine_path, calib_cache_path, timing_cache_path)
223228

modelopt/torch/_deploy/_runtime/trt_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def _ir_to_compiled(
7373
7474
Args:
7575
ir_bytes: The ONNX model bytes.
76-
compilation_args: A dictionary of compilation arguments. Supported args: dynamic_shapes, plugin_config.
76+
compilation_args: A dictionary of compilation arguments.
77+
The following arguments are supported: dynamic_shapes, plugin_config, engine_path.
7778
7879
Returns:
7980
The compiled TRT engine bytes.
@@ -85,6 +86,7 @@ def _ir_to_compiled(
8586
onnx_bytes,
8687
dynamic_shapes=compilation_args.get("dynamic_shapes"), # type: ignore[union-attr]
8788
plugin_config=compilation_args.get("plugin_config"), # type: ignore[union-attr]
89+
engine_path=compilation_args.get("engine_path"), # type: ignore[union-attr]
8890
trt_mode=self.deployment["precision"],
8991
verbose=(self.deployment.get("verbose", "false").lower() == "true"),
9092
)

tests/examples/test_onnx_ptq.sh

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
# It is recommended to execute this script inside the Model Optimization Toolkit TensorRT Docker container.
2222
# Please ensure that the ImageNet dataset is available in the container at the specified path.
2323

24-
# Usage: ./test_onnx_ptq.sh /path/to/imagenet /path/to/models
24+
# Usage: ./test_onnx_ptq.sh [--no-clean] [/path/to/imagenet] [/path/to/models]
2525

2626
set -exo pipefail
2727

@@ -34,12 +34,35 @@ cuda_capability=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader | hea
3434

3535

3636
pushd $public_example_dir
37+
38+
# Parse arguments
39+
clean_mode=true
40+
imagenet_path=""
41+
models_folder=""
42+
43+
for arg in "$@"; do
44+
case $arg in
45+
--no-clean)
46+
clean_mode=false
47+
shift
48+
;;
49+
*)
50+
if [ -z "$imagenet_path" ]; then
51+
imagenet_path="$arg"
52+
elif [ -z "$models_folder" ]; then
53+
models_folder="$arg"
54+
fi
55+
shift
56+
;;
57+
esac
58+
done
59+
3760
export TQDM_DISABLE=1
3861

3962

4063
# Setting image and model paths (contains 8 models)
41-
imagenet_path=${1:-/data/imagenet/}
42-
models_folder=${2:-/models/onnx}
64+
imagenet_path=${imagenet_path:-/data/imagenet/}
65+
models_folder=${models_folder:-/models/onnx}
4366
calib_size=64
4467
batch_size=1
4568

@@ -88,9 +111,9 @@ declare -A timm_model_name=(
88111
latency_models=("efficientnet_b0" "efficientnet_b3" "efficientnet-lite4-11" "faster_vit_timm_opset13_simplified" "faster_vit_timm_opset17_simplified" "inception-v1-12" "inception-v2-9")
89112

90113
# Create build directory to store all the results
114+
rm -rf build
91115
mkdir -p build
92116

93-
94117
# Iterate over each model path to create directories for all modes for each model
95118
for model_path in "${model_paths[@]}"; do
96119
model_name=$(basename "$model_path" .onnx)
@@ -129,7 +152,8 @@ for model_path in "${model_paths[@]}"; do
129152
--onnx_path=$model_dir/fp16/model.onnx \
130153
--quantize_mode=$quant_mode \
131154
--calibration_data=$calib_data_path \
132-
--output_path=$model_dir/$quant_mode/model.quant.onnx &
155+
--output_path=$model_dir/$quant_mode/model.quant.onnx \
156+
--calibration_eps=cuda:0 &
133157
pids+=($!)
134158
done
135159

@@ -163,25 +187,30 @@ for model_path in "${model_paths[@]}"; do
163187

164188
if [ "$quant_mode" == "fp16" ]; then
165189
eval_model_path=$model_dir/fp16/model.onnx
190+
engine_path=$model_dir/fp16/model.engine
166191
precision="fp16"
167192
elif [ "$quant_mode" == "int8_iq" ]; then
168193
eval_model_path=$model_dir/fp16/model.onnx
194+
engine_path=$model_dir/int8_iq/model.engine
169195
precision="best"
170196
else
171197
eval_model_path=$model_dir/$quant_mode/model.quant.onnx
198+
engine_path=$model_dir/$quant_mode/model.quant.engine
172199
precision="stronglyTyped"
173200
fi
174201

175202
echo "Starting evaluation of $model_name for mode: $quant_mode on GPU $gpu_id"
176203
if [[ " ${latency_models[@]} " =~ " $model_name " ]]; then
177204
CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
178205
--onnx_path=$eval_model_path \
206+
--engine_path=$engine_path \
179207
--model_name="${timm_model_name[$model_name]}" \
180208
--engine_precision=$precision \
181209
--results_path=$model_dir/$quant_mode/${model_name}_${quant_mode}.csv &
182210
else
183211
CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
184212
--onnx_path=$eval_model_path \
213+
--engine_path=$engine_path \
185214
--imagenet_path=$imagenet_path \
186215
--eval_data_size=$calib_size \
187216
--batch_size $batch_size \
@@ -209,6 +238,15 @@ for model_path in "${model_paths[@]}"; do
209238
done
210239

211240
python $test_utils_dir/aggregate_results.py --results_dir=build
241+
242+
if [ "$clean_mode" = true ]; then
243+
echo "Cleaning build artifacts..."
244+
rm -rf build/
245+
echo "Build artifacts cleaned successfully."
246+
popd
247+
exit 0
248+
fi
249+
212250
popd
213251

214252

0 commit comments

Comments
 (0)