We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e27a2b7 commit 505f715Copy full SHA for 505f715
modelopt/onnx/trt_utils.py
@@ -416,9 +416,10 @@ def interpret_trt_plugins_precision_flag(
416
# Will add Q/DQ nodes in the requested I/O indices
417
inp_precision_quant = [i for i, p in enumerate(inp_precision) if p in ["int8", "fp8"]]
418
out_precision_quant = [i for i, p in enumerate(out_precision) if p in ["int8", "fp8"]]
419
- custom_ops_to_quantize[op_type] = {
420
- "inp": inp_precision_quant,
421
- "out": out_precision_quant,
422
- }
+ if inp_precision_quant or out_precision_quant:
+ custom_ops_to_quantize[op_type] = {
+ "inp": inp_precision_quant,
+ "out": out_precision_quant,
423
+ }
424
425
return custom_ops_to_cast, custom_ops_to_quantize
0 commit comments