-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Description
I have an onnx model, and through the onnx2trt function, I obtained the fp16 engine segmentation model and the int8 engine segmentation model. The int8 model has been calibrated with over 1000 data points, but on the F1 metric, the int8 model is 10 points lower than the fp16 model.
`
def onnx2trt(
model,
log_level='ERROR',
max_batch_size=1,
min_input_shapes=None,
max_input_shapes=None,
max_workspace_size=4,
fp16_mode=True,
strict_type_constraints=False,
int8_mode=False,
int8_calibrator=None,
):
logger = trt.Logger(getattr(trt.Logger, log_level))
builder = trt.Builder(logger)
network = builder.create_network(
1 << (int)(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))
parser = trt.OnnxParser(network, logger)
if isinstance(model, str):
with open(model, 'rb') as f:
flag = parser.parse(f.read())
else:
flag = parser.parse(model.read())
if not flag:
for error in range(parser.num_errors):
print(parser.get_error(error))
# re-order output tensor
output_tensors = [network.get_output(i)
for i in range(network.num_outputs)]
[network.unmark_output(tensor) for tensor in output_tensors]
for tensor in output_tensors:
identity_out_tensor = network.add_identity(tensor).get_output(0)
identity_out_tensor.name = 'identity_{}'.format(tensor.name)
network.mark_output(tensor=identity_out_tensor)
builder.max_batch_size = max_batch_size
config = builder.create_builder_config()
config.max_workspace_size = max_workspace_size * (1 << 32)
if fp16_mode:
config.set_flag(trt.BuilderFlag.FP16)
if strict_type_constraints:
config.set_flag(trt.BuilderFlag.STRICT_TYPES)
if int8_mode:
config.set_flag(trt.BuilderFlag.INT8)
if int8_calibrator is None:
shapes = [(1,) + network.get_input(i).shape[1:]
for i in range(network.num_inputs)]
dummy_data = gen_ones(shapes)
int8_calibrator = EntropyCalibrator2(CustomDataset(dummy_data))
config.int8_calibrator = int8_calibrator
# set dynamic shape profile
assert not (bool(min_input_shapes) ^ bool(max_input_shapes))
profile = builder.create_optimization_profile()
input_shapes = [network.get_input(i).shape[1:]
for i in range(network.num_inputs)]
if not min_input_shapes:
min_input_shapes = input_shapes
if not max_input_shapes:
max_input_shapes = input_shapes
assert len(min_input_shapes) == len(max_input_shapes) == len(input_shapes)
for i in range(network.num_inputs):
tensor = network.get_input(i)
name = tensor.name
min_shape = (1,) + min_input_shapes[i]
max_shape = (max_batch_size,) + max_input_shapes[i]
opt_shape = [(min_ + max_) // 2
for min_, max_ in zip(min_shape, max_shape)]
profile.set_shape(name, min_shape, opt_shape, max_shape)
config.add_optimization_profile(profile)
engine = builder.build_engine(network, config)
return engine
`
I want to know:
-
Is there any tools to compare the errors of each layer, find the layer with larger errors, and set thses layers to fp32 precision?
-
I tried using
polygraphyto analyze, but how should I parse the resulting JSON file?polygraphy run 20251023_174037_fp16.engine --trt --trt-outputs mark all --save-outputs fp16_all_outputs.json
polygraphy run 20251023_174037_int8.engine --trt --trt-outputs mark all --save-outputs int8_all_outputs.json -
I noticed that
trtexecis used for calibration in issues, but how can I obtain the calibration cache file using my own data?