Skip to content

Commit 37c6250

Browse files
committed
Specify engine path for TRT evaluation
Signed-off-by: ajrasane <[email protected]>
1 parent 669ae05 commit 37c6250

File tree

4 files changed

+30
-6
lines changed

4 files changed

+30
-6
lines changed

examples/onnx_ptq/evaluate.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,12 @@ def main():
3535
help="""Path to the image classification ONNX model with input shape of
3636
[batch_size,3,224,224] and output shape of [1,1000]""",
3737
)
38+
parser.add_argument(
39+
"--engine_path",
40+
type=str,
41+
required=True,
42+
help="Path to the TensorRT engine",
43+
)
3844
parser.add_argument(
3945
"--imagenet_path", type=str, default=None, help="Path to the imagenet dataset"
4046
)
@@ -80,7 +86,10 @@ def main():
8086
client = RuntimeRegistry.get(deployment)
8187

8288
# Compile the ONNX model to TRT engine and create the device model
83-
compiled_model = client.ir_to_compiled(onnx_bytes)
89+
compilation_args = {
90+
"engine_path": args.engine_path,
91+
}
92+
compiled_model = client.ir_to_compiled(onnx_bytes, compilation_args)
8493
device_model = DeviceModel(client, compiled_model, metadata={})
8594

8695
top1_accuracy, top5_accuracy = 0.0, 0.0

modelopt/torch/_deploy/_runtime/tensorrt/engine_builder.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ def _update_dynamic_shapes(dynamic_shapes: dict, cmd: list[str]) -> None:
122122
def build_engine(
123123
onnx_bytes: OnnxBytes,
124124
trt_mode: str = TRTMode.FLOAT32,
125+
engine_path: Path | None = None,
125126
calib_cache: str | None = None,
126127
dynamic_shapes: dict | None = None,
127128
plugin_config: dict | None = None,
@@ -133,6 +134,7 @@ def build_engine(
133134
134135
Args:
135136
onnx_bytes: Data of the ONNX model stored as an OnnxBytes object.
137+
engine_path: Path to save the TensorRT engine.
136138
trt_mode: The precision with which the TensorRT engine will be built. Supported modes are:
137139
- TRTMode.FLOAT32
138140
- TRTMode.FLOAT16
@@ -202,22 +204,25 @@ def _build_command(
202204

203205
def _setup_files_and_paths(
204206
tmp_dir_path: Path,
207+
engine_path: Path | None,
205208
) -> tuple[Path, Path, Path | None, Path | None, Path]:
206209
tmp_onnx_dir = tmp_dir_path / "onnx"
207210
onnx_bytes.write_to_disk(str(tmp_onnx_dir))
208211
onnx_path = tmp_onnx_dir / f"{onnx_bytes.model_name}.onnx"
209212

210213
final_output_dir = Path(output_dir or Path(gettempdir()) / DEFAULT_ARTIFACT_DIR)
211214
final_output_dir.mkdir(parents=True, exist_ok=True)
212-
engine_path = final_output_dir / f"{onnx_bytes.model_name}.engine"
215+
engine_path = (
216+
engine_path if engine_path else final_output_dir / f"{onnx_bytes.model_name}.engine"
217+
)
213218
calib_cache_path = final_output_dir / "calib_cache" if calib_cache else None
214219
timing_cache_path = final_output_dir / "timing.cache"
215220

216221
return onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir
217222

218223
with TemporaryDirectory() as tmp_dir:
219224
onnx_path, engine_path, calib_cache_path, timing_cache_path, final_output_dir = (
220-
_setup_files_and_paths(Path(tmp_dir))
225+
_setup_files_and_paths(Path(tmp_dir), engine_path)
221226
)
222227
cmd = _build_command(onnx_path, engine_path, calib_cache_path, timing_cache_path)
223228

modelopt/torch/_deploy/_runtime/trt_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,8 @@ def _ir_to_compiled(
7474
7575
Args:
7676
ir_bytes: The ONNX model bytes.
77-
compilation_args: A dictionary of compilation arguments. Supported args: dynamic_shapes, plugin_config.
77+
compilation_args: A dictionary of compilation arguments.
78+
The following arguments are supported: dynamic_shapes, plugin_config, engine_path.
7879
7980
Returns:
8081
The compiled TRT engine bytes.
@@ -86,6 +87,7 @@ def _ir_to_compiled(
8687
onnx_bytes,
8788
dynamic_shapes=compilation_args.get("dynamic_shapes"), # type: ignore[union-attr]
8889
plugin_config=compilation_args.get("plugin_config"), # type: ignore[union-attr]
90+
engine_path=compilation_args.get("engine_path"), # type: ignore[union-attr]
8991
trt_mode=self.deployment["precision"],
9092
verbose=(self.deployment.get("verbose", "false").lower() == "true"),
9193
)

tests/examples/test_onnx_ptq.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ for model_path in "${model_paths[@]}"; do
129129
--onnx_path=$model_dir/fp16/model.onnx \
130130
--quantize_mode=$quant_mode \
131131
--calibration_data=$calib_data_path \
132-
--output_path=$model_dir/$quant_mode/model.quant.onnx &
132+
--output_path=$model_dir/$quant_mode/model.quant.onnx \
133+
--calibration_eps=cuda:0 &
133134
pids+=($!)
134135
done
135136

@@ -161,22 +162,29 @@ for model_path in "${model_paths[@]}"; do
161162
quant_mode="${all_modes[$i]}"
162163
gpu_id=$((i % nvidia_gpu_count))
163164

164-
if [ "$quant_mode" == "fp16" ] || [ "$quant_mode" == "int8_iq" ]; then
165+
if [ "$quant_mode" == "fp16" ]; then
165166
eval_model_path=$model_dir/fp16/model.onnx
167+
engine_path=$model_dir/fp16/model.engine
168+
elif [ "$quant_mode" == "int8_iq" ]; then
169+
eval_model_path=$model_dir/fp16/model.onnx
170+
engine_path=$model_dir/int8_iq/model.engine
166171
else
167172
eval_model_path=$model_dir/$quant_mode/model.quant.onnx
173+
engine_path=$model_dir/$quant_mode/model.quant.engine
168174
fi
169175

170176
echo "Starting evaluation of $model_name for mode: $quant_mode on GPU $gpu_id"
171177
if [[ " ${latency_models[@]} " =~ " $model_name " ]]; then
172178
CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
173179
--onnx_path=$eval_model_path \
180+
--engine_path=$engine_path \
174181
--model_name="${timm_model_name[$model_name]}" \
175182
--quantize_mode=$quant_mode \
176183
--results_path=$model_dir/$quant_mode/${model_name}_${quant_mode}.csv &
177184
else
178185
CUDA_VISIBLE_DEVICES=$gpu_id python evaluate.py \
179186
--onnx_path=$eval_model_path \
187+
--engine_path=$engine_path \
180188
--imagenet_path=$imagenet_path \
181189
--eval_data_size=$calib_size \
182190
--batch_size $batch_size \

0 commit comments

Comments
 (0)