-
Notifications
You must be signed in to change notification settings - Fork 5
Open
Description
Env
- NVIDIA GeForce RTX 4060 Ti
- cuda 12.6
- tensorrt 10.13.0.35
- torch 2.7.0+cu126
- transformers 5.0.0rc1
Description
I find there are some gaps between the inference results from onnx model and tensorrt model.
So I modify the onnxexport.py to export some middle layers' output:
import torch
from pathlib import Path
from transformers.models.sam3 import Sam3Processor, Sam3Model
from transformers.models.sam3.modeling_sam3 import (
inverse_sigmoid,
box_cxcywh_to_xyxy,
Sam3ImageSegmentationOutput,
)
from PIL import Image
import requests
device = "cpu" # for onnx export we use CPU for maximum compatibility
# 1. Load model & processor
model = Sam3Model.from_pretrained("/home/zy/weights/sam3").to(device)
processor = Sam3Processor.from_pretrained("/home/zy/weights/sam3")
model.eval()
prompt = "dog"
# 2. Build a sample batch (same as your example)
image_url = "http://images.cocodataset.org/val2017/000000077595.jpg"
image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")
inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)
pixel_values = inputs["pixel_values"]
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
print(input_ids.shape)
# 3. Wrap Sam3Model so the ONNX graph has clean inputs/outputs
# class Sam3ONNXWrapper(torch.nn.Module):
# def __init__(self, sam3, input_ids, attention_mask):
# super().__init__()
# self.sam3 = sam3
# self.register_buffer("const_input_ids", input_ids.to(torch.int64).cpu())
# self.register_buffer("const_attention_mask", attention_mask.to(torch.int64).cpu())
# def forward(self, pixel_values):
# outputs = self.sam3(
# pixel_values=pixel_values,
# input_ids=self.const_input_ids,
# attention_mask=self.const_attention_mask,
# )
# print(outputs.keys())
# # Typical useful outputs
# pred_boxes = outputs.pred_boxes
# pred_logits = outputs.pred_logits
# pred_masks = outputs.pred_masks
# presence_score = outputs.presence_logits
# return (pred_boxes, pred_logits, pred_masks, presence_score)
class Sam3ONNXWrapper(torch.nn.Module):
def __init__(self, sam3, input_ids, attention_mask):
super().__init__()
self.sam3 = sam3
self.register_buffer("const_input_ids", input_ids.to(torch.int64).cpu())
self.register_buffer(
"const_attention_mask", attention_mask.to(torch.int64).cpu()
)
def forward(self, pixel_values):
vision_outputs = self.sam3.vision_encoder(pixel_values)
fpn_hidden_states = vision_outputs.fpn_hidden_states[:-1]
fpn_position_encoding = vision_outputs.fpn_position_encoding[:-1]
text_features = self.sam3.get_text_features(
input_ids=self.const_input_ids, attention_mask=self.const_attention_mask
)
text_mask = attention_mask.bool() if attention_mask is not None else None
combined_prompt_features = text_features
combined_prompt_mask = text_mask
encoder_outputs = self.sam3.detr_encoder(
vision_features=[fpn_hidden_states[-1]],
text_features=combined_prompt_features,
vision_pos_embeds=[fpn_position_encoding[-1]],
text_mask=combined_prompt_mask,
)
decoder_outputs = self.sam3.detr_decoder(
vision_features=encoder_outputs.last_hidden_state,
text_features=encoder_outputs.text_features,
vision_pos_encoding=encoder_outputs.pos_embeds_flattened,
text_mask=combined_prompt_mask,
spatial_shapes=encoder_outputs.spatial_shapes,
)
all_box_offsets = self.sam3.detr_decoder.box_head(
decoder_outputs.intermediate_hidden_states
)
reference_boxes_inv_sig = inverse_sigmoid(decoder_outputs.reference_boxes)
all_pred_boxes_cxcywh = (reference_boxes_inv_sig + all_box_offsets).sigmoid()
all_pred_boxes = box_cxcywh_to_xyxy(all_pred_boxes_cxcywh)
all_pred_logits = self.sam3.dot_product_scoring(
decoder_hidden_states=decoder_outputs.intermediate_hidden_states,
text_features=encoder_outputs.text_features,
text_mask=combined_prompt_mask,
).squeeze(-1)
pred_logits = all_pred_logits[-1]
pred_boxes = all_pred_boxes[-1]
decoder_hidden_states = decoder_outputs.intermediate_hidden_states[-1]
presence_logits = decoder_outputs.presence_logits[-1]
mask_outputs = self.sam3.mask_decoder(
decoder_queries=decoder_hidden_states,
backbone_features=list(fpn_hidden_states),
encoder_hidden_states=encoder_outputs.last_hidden_state,
prompt_features=combined_prompt_features,
prompt_mask=combined_prompt_mask,
)
pred_masks = mask_outputs.pred_masks
semantic_seg = mask_outputs.semantic_seg
vision_feature_0 = fpn_hidden_states[0]
vision_feature_1 = fpn_hidden_states[1]
vision_feature_2 = fpn_hidden_states[2]
return (
vision_feature_0,
vision_feature_1,
vision_feature_2,
text_features,
encoder_outputs.last_hidden_state,
decoder_hidden_states,
pred_boxes,
pred_logits,
presence_logits,
pred_masks,
semantic_seg,
)
wrapper = Sam3ONNXWrapper(model, input_ids, attention_mask).to(device).eval()
# wrapper(pixel_values)/
# 5. Export to ONNX
output_dir = Path(f"onnx_weights_dbg")
output_dir.mkdir(exist_ok=True)
onnx_path = str(output_dir / f"sam3_dbg.onnx")
# output_names = ["pred_boxes", "pred_logits", "pred_masks", "presence_score"]
output_names = [
"vision_feature_0",
"vision_feature_1",
"vision_feature_2",
"text_features",
"encoder_hidden_state",
"decoder_hidden_states",
"pred_boxes",
"pred_logits",
"presence_logits",
"pred_masks",
"semantic_seg",
]
torch.onnx.export(
wrapper,
(pixel_values),
onnx_path,
input_names=["pixel_values"],
output_names=output_names,
dynamo=False,
opset_version=17,
)
print(f"Exported to {onnx_path}")
And I use polygraphy to check. Here's the result:
[I] TF32 is disabled by default. Turn on TF32 for better performance with minor accuracy differences.
[I] trt-runner-N0-01/05/26-15:07:18 | Activating and starting inference
[I] Configuring with profiles:[
Profile 0:
{pixel_values [min=[1, 3, 1008, 1008], opt=[1, 3, 1008, 1008], max=[1, 3, 1008, 1008]]}
]
[W] profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect.
[I] Building engine with configuration:
Flags | []
Engine Capability | EngineCapability.STANDARD
Memory Pools | [WORKSPACE: 15936.19 MiB, TACTIC_DRAM: 15936.19 MiB, TACTIC_SHARED_MEMORY: 1024.00 MiB]
Tactic Sources | [EDGE_MASK_CONVOLUTIONS, JIT_CONVOLUTIONS]
Profiling Verbosity | ProfilingVerbosity.DETAILED
Preview Features | [PROFILE_SHARING_0806]
[I] Finished engine building in 58.691 seconds
[I] trt-runner-N0-01/05/26-15:07:18
---- Inference Input(s) ----
{pixel_values [dtype=float32, shape=(1, 3, 1008, 1008)]}
[I] trt-runner-N0-01/05/26-15:07:18
---- Inference Output(s) ----
{vision_feature_0 [dtype=float32, shape=(1, 256, 288, 288)],
vision_feature_1 [dtype=float32, shape=(1, 256, 144, 144)],
vision_feature_2 [dtype=float32, shape=(1, 256, 72, 72)],
text_features [dtype=float32, shape=(1, 32, 256)],
encoder_hidden_state [dtype=float32, shape=(1, 5184, 256)],
decoder_hidden_states [dtype=float32, shape=(1, 200, 256)],
pred_boxes [dtype=float32, shape=(1, 200, 4)],
pred_logits [dtype=float32, shape=(1, 200)],
presence_logits [dtype=float32, shape=(1, 1)],
pred_masks [dtype=float32, shape=(1, 200, 288, 288)],
semantic_seg [dtype=float32, shape=(1, 1, 288, 288)]}
[I] trt-runner-N0-01/05/26-15:07:18 | Completed 1 iteration(s) in 1306 ms | Average inference time: 1306 ms.
[I] onnxrt-runner-N0-01/05/26-15:07:18 | Activating and starting inference
[I] Creating ONNX-Runtime Inference Session with providers: ['CPUExecutionProvider']
2026-01-05 15:08:22.030170384 [W:onnxruntime:, graph.cc:120 MergeShapeInfo] Error merging shape info for output. '/vision_encoder/backbone/embeddings/Concat_output_0' source:{4} target:{5}. Falling back to lenient merge.
[I] onnxrt-runner-N0-01/05/26-15:07:18
---- Inference Input(s) ----
{pixel_values [dtype=float32, shape=(1, 3, 1008, 1008)]}
[I] onnxrt-runner-N0-01/05/26-15:07:18
---- Inference Output(s) ----
{vision_feature_0 [dtype=float32, shape=(1, 256, 288, 288)],
vision_feature_1 [dtype=float32, shape=(1, 256, 144, 144)],
vision_feature_2 [dtype=float32, shape=(1, 256, 72, 72)],
text_features [dtype=float32, shape=(1, 32, 256)],
encoder_hidden_state [dtype=float32, shape=(1, 5184, 256)],
decoder_hidden_states [dtype=float32, shape=(1, 200, 256)],
pred_boxes [dtype=float32, shape=(1, 200, 4)],
pred_logits [dtype=float32, shape=(1, 200)],
presence_logits [dtype=float32, shape=(1, 1)],
pred_masks [dtype=float32, shape=(1, 200, 288, 288)],
semantic_seg [dtype=float32, shape=(1, 1, 288, 288)]}
[I] onnxrt-runner-N0-01/05/26-15:07:18 | Completed 1 iteration(s) in 8450 ms | Average inference time: 8450 ms.
[I] Accuracy Comparison | trt-runner-N0-01/05/26-15:07:18 vs. onnxrt-runner-N0-01/05/26-15:07:18
[I] Comparing Output: 'vision_feature_0' (dtype=float32, shape=(1, 256, 288, 288)) with 'vision_feature_0' (dtype=float32, shape=(1, 256, 288, 288))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[W] onnxrt-runner-N0-01/05/26-15:07:18 | Output: vision_feature_0: Some values are 0. Will add a small epsilon quantity to these when computing relative difference. Note that this may cause some relative differences to be extremely high.
[I] trt-runner-N0-01/05/26-15:07:18: vision_feature_0 | Stats: mean=-0.01514, std-dev=0.016879, var=0.00028488, median=-0.01227, min=-0.14353 at (0, 17, 285, 11), max=0.07911 at (0, 111, 281, 214), avg-magnitude=0.017047, p90=0.0032417, p95=0.0072221, p99=0.015082
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-0.144 , -0.121 ) | 47 |
(-0.121 , -0.099 ) | 5634 |
(-0.099 , -0.0767 ) | 119782 |
(-0.0767 , -0.0545 ) | 408355 | #
(-0.0545 , -0.0322 ) | 2606431 | ###########
(-0.0322 , -0.00995) | 8920261 | ########################################
(-0.00995, 0.0123 ) | 8790913 | #######################################
(0.0123 , 0.0346 ) | 378221 | #
(0.0346 , 0.0568 ) | 3978 |
(0.0568 , 0.0791 ) | 42 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: vision_feature_0 | Stats: mean=-0.01514, std-dev=0.016879, var=0.00028488, median=-0.01227, min=-0.14353 at (0, 17, 285, 11), max=0.07911 at (0, 111, 281, 214), avg-magnitude=0.017047, p90=0.0032417, p95=0.007222, p99=0.015082
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-0.144 , -0.121 ) | 47 |
(-0.121 , -0.099 ) | 5634 |
(-0.099 , -0.0767 ) | 119783 |
(-0.0767 , -0.0545 ) | 408353 | #
(-0.0545 , -0.0322 ) | 2606436 | ###########
(-0.0322 , -0.00995) | 8920265 | ########################################
(-0.00995, 0.0123 ) | 8790908 | #######################################
(0.0123 , 0.0346 ) | 378218 | #
(0.0346 , 0.0568 ) | 3978 |
(0.0568 , 0.0791 ) | 42 |
[I] Error Metrics: vision_feature_0
[I] Minimum Required Tolerance: elemwise error | [abs=6.754e-06] OR [rel=1.0737e+09] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=5.7006e-08, std-dev=6.4222e-08, var=4.1245e-15, median=4.2375e-08, min=0 at (0, 0, 0, 8), max=6.754e-06 at (0, 211, 257, 30), avg-magnitude=5.7006e-08, p90=1.1921e-07, p95=1.546e-07, p99=2.645e-07
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 6.75e-07) | 21216916 | ########################################
(6.75e-07, 1.35e-06) | 13286 |
(1.35e-06, 2.03e-06) | 2264 |
(2.03e-06, 2.7e-06 ) | 702 |
(2.7e-06 , 3.38e-06) | 299 |
(3.38e-06, 4.05e-06) | 107 |
(4.05e-06, 4.73e-06) | 60 |
(4.73e-06, 5.4e-06 ) | 22 |
(5.4e-06 , 6.08e-06) | 6 |
(6.08e-06, 6.75e-06) | 2 |
[I] Relative Difference | Stats: mean=54.914, std-dev=2.3388e+05, var=5.4698e+10, median=3.2489e-06, min=0 at (0, 0, 0, 8), max=1.0737e+09 at (0, 251, 32, 46), avg-magnitude=54.914, p90=2.2915e-05, p95=4.6669e-05, p99=0.00023475
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 1.07e+08) | 21233663 | ########################################
(1.07e+08, 2.15e+08) | 0 |
(2.15e+08, 3.22e+08) | 0 |
(3.22e+08, 4.29e+08) | 0 |
(4.29e+08, 5.37e+08) | 0 |
(5.37e+08, 6.44e+08) | 0 |
(6.44e+08, 7.52e+08) | 0 |
(7.52e+08, 8.59e+08) | 0 |
(8.59e+08, 9.66e+08) | 0 |
(9.66e+08, 1.07e+09) | 1 |
[E] FAILED | Output: 'vision_feature_0' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'vision_feature_1' (dtype=float32, shape=(1, 256, 144, 144)) with 'vision_feature_1' (dtype=float32, shape=(1, 256, 144, 144))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: vision_feature_1 | Stats: mean=0.0019535, std-dev=0.99889, var=0.99777, median=-0.002932, min=-5.9977 at (0, 139, 51, 57), max=6.2735 at (0, 44, 20, 101), avg-magnitude=0.7847, p90=1.2633, p95=1.6437, p99=2.3871
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-6 , -4.77) | 148 |
(-4.77, -3.54) | 4602 |
(-3.54, -2.32) | 60173 | #
(-2.32, -1.09) | 623058 | ##########
(-1.09, 0.138) | 2276178 | ########################################
(0.138, 1.37 ) | 1898847 | #################################
(1.37 , 2.59 ) | 412856 | #######
(2.59 , 3.82 ) | 30715 |
(3.82 , 5.05 ) | 1750 |
(5.05 , 6.27 ) | 89 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: vision_feature_1 | Stats: mean=0.0019535, std-dev=0.99889, var=0.99778, median=-0.0029335, min=-5.9977 at (0, 139, 51, 57), max=6.2735 at (0, 44, 20, 101), avg-magnitude=0.7847, p90=1.2633, p95=1.6437, p99=2.3871
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-6 , -4.77) | 148 |
(-4.77, -3.54) | 4602 |
(-3.54, -2.32) | 60174 | #
(-2.32, -1.09) | 623058 | ##########
(-1.09, 0.138) | 2276180 | ########################################
(0.138, 1.37 ) | 1898843 | #################################
(1.37 , 2.59 ) | 412857 | #######
(2.59 , 3.82 ) | 30715 |
(3.82 , 5.05 ) | 1750 |
(5.05 , 6.27 ) | 89 |
[I] Error Metrics: vision_feature_1
[I] Minimum Required Tolerance: elemwise error | [abs=0.00055704] OR [rel=78.247] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=5.5588e-06, std-dev=5.9869e-06, var=3.5843e-11, median=4.1723e-06, min=0 at (0, 0, 0, 78), max=0.00055704 at (0, 191, 114, 120), avg-magnitude=5.5588e-06, p90=1.1563e-05, p95=1.4782e-05, p99=2.4557e-05
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 5.57e-05) | 5302571 | ########################################
(5.57e-05, 0.000111) | 4649 |
(0.000111, 0.000167) | 811 |
(0.000167, 0.000223) | 260 |
(0.000223, 0.000279) | 92 |
(0.000279, 0.000334) | 23 |
(0.000334, 0.00039 ) | 4 |
(0.00039 , 0.000446) | 4 |
(0.000446, 0.000501) | 1 |
(0.000501, 0.000557) | 1 |
[I] Relative Difference | Stats: mean=9.6089e-05, std-dev=0.045692, var=0.0020878, median=6.5735e-06, min=0 at (0, 0, 0, 78), max=78.247 at (0, 205, 50, 50), avg-magnitude=9.6089e-05, p90=4.3977e-05, p95=8.9272e-05, p99=0.00044842
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 7.82) | 5308410 | ########################################
(7.82, 15.6) | 4 |
(15.6, 23.5) | 0 |
(23.5, 31.3) | 0 |
(31.3, 39.1) | 0 |
(39.1, 46.9) | 0 |
(46.9, 54.8) | 0 |
(54.8, 62.6) | 0 |
(62.6, 70.4) | 1 |
(70.4, 78.2) | 1 |
[E] FAILED | Output: 'vision_feature_1' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'vision_feature_2' (dtype=float32, shape=(1, 256, 72, 72)) with 'vision_feature_2' (dtype=float32, shape=(1, 256, 72, 72))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: vision_feature_2 | Stats: mean=-0.02647, std-dev=0.68585, var=0.47039, median=-0.018099, min=-3.9931 at (0, 43, 66, 57), max=3.8522 at (0, 185, 17, 3), avg-magnitude=0.53415, p90=0.82253, p95=1.0764, p99=1.5906
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-3.99 , -3.21 ) | 176 |
(-3.21 , -2.42 ) | 2074 |
(-2.42 , -1.64 ) | 16797 | #
(-1.64 , -0.855 ) | 120900 | #########
(-0.855 , -0.0704) | 480100 | ###################################
(-0.0704, 0.714 ) | 533487 | ########################################
(0.714 , 1.5 ) | 155587 | ###########
(1.5 , 2.28 ) | 16819 | #
(2.28 , 3.07 ) | 1131 |
(3.07 , 3.85 ) | 33 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: vision_feature_2 | Stats: mean=-0.02647, std-dev=0.68585, var=0.47039, median=-0.018098, min=-3.9931 at (0, 43, 66, 57), max=3.8522 at (0, 185, 17, 3), avg-magnitude=0.53415, p90=0.82253, p95=1.0764, p99=1.5906
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-3.99 , -3.21 ) | 176 |
(-3.21 , -2.42 ) | 2074 |
(-2.42 , -1.64 ) | 16797 | #
(-1.64 , -0.855 ) | 120900 | #########
(-0.855 , -0.0704) | 480098 | ###################################
(-0.0704, 0.714 ) | 533488 | ########################################
(0.714 , 1.5 ) | 155588 | ###########
(1.5 , 2.28 ) | 16819 | #
(2.28 , 3.07 ) | 1131 |
(3.07 , 3.85 ) | 33 |
[I] Error Metrics: vision_feature_2
[I] Minimum Required Tolerance: elemwise error | [abs=0.00030649] OR [rel=18.878] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=4.0281e-06, std-dev=4.2446e-06, var=1.8017e-11, median=3.0398e-06, min=0 at (0, 0, 0, 40), max=0.00030649 at (0, 175, 64, 7), avg-magnitude=4.0281e-06, p90=8.4341e-06, p95=1.0818e-05, p99=1.7703e-05
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 3.06e-05) | 1324953 | ########################################
(3.06e-05, 6.13e-05) | 1739 |
(6.13e-05, 9.19e-05) | 218 |
(9.19e-05, 0.000123) | 97 |
(0.000123, 0.000153) | 46 |
(0.000153, 0.000184) | 27 |
(0.000184, 0.000215) | 13 |
(0.000215, 0.000245) | 6 |
(0.000245, 0.000276) | 3 |
(0.000276, 0.000306) | 2 |
[I] Relative Difference | Stats: mean=9.2484e-05, std-dev=0.018245, var=0.00033289, median=7.1053e-06, min=0 at (0, 0, 0, 40), max=18.878 at (0, 58, 67, 40), avg-magnitude=9.2484e-05, p90=4.8423e-05, p95=9.8349e-05, p99=0.00049433
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 1.89) | 1327099 | ########################################
(1.89, 3.78) | 3 |
(3.78, 5.66) | 0 |
(5.66, 7.55) | 1 |
(7.55, 9.44) | 0 |
(9.44, 11.3) | 0 |
(11.3, 13.2) | 0 |
(13.2, 15.1) | 0 |
(15.1, 17 ) | 0 |
(17 , 18.9) | 1 |
[E] FAILED | Output: 'vision_feature_2' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'text_features' (dtype=float32, shape=(1, 32, 256)) with 'text_features' (dtype=float32, shape=(1, 32, 256))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: text_features | Stats: mean=-0.0045255, std-dev=0.69494, var=0.48294, median=0.0011168, min=-4.3921 at (0, 6, 54), max=4.1521 at (0, 1, 112), avg-magnitude=0.39579, p90=0.57612, p95=1.1946, p99=2.154
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-4.39 , -3.54 ) | 24 |
(-3.54 , -2.68 ) | 36 |
(-2.68 , -1.83 ) | 91 |
(-1.83 , -0.974) | 318 | ##
(-0.974, -0.12 ) | 2229 | ##################
(-0.12 , 0.734 ) | 4806 | ########################################
(0.734 , 1.59 ) | 458 | ###
(1.59 , 2.44 ) | 190 | #
(2.44 , 3.3 ) | 33 |
(3.3 , 4.15 ) | 7 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: text_features | Stats: mean=-0.0045255, std-dev=0.69494, var=0.48294, median=0.0011169, min=-4.3921 at (0, 6, 54), max=4.1521 at (0, 1, 112), avg-magnitude=0.39579, p90=0.57612, p95=1.1946, p99=2.154
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-4.39 , -3.54 ) | 24 |
(-3.54 , -2.68 ) | 36 |
(-2.68 , -1.83 ) | 91 |
(-1.83 , -0.974) | 318 | ##
(-0.974, -0.12 ) | 2229 | ##################
(-0.12 , 0.734 ) | 4806 | ########################################
(0.734 , 1.59 ) | 458 | ###
(1.59 , 2.44 ) | 190 | #
(2.44 , 3.3 ) | 33 |
(3.3 , 4.15 ) | 7 |
[I] Error Metrics: text_features
[I] Minimum Required Tolerance: elemwise error | [abs=4.1723e-06] OR [rel=0.0014184] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=3.3811e-07, std-dev=3.9789e-07, var=1.5831e-13, median=2.0862e-07, min=0 at (0, 0, 45), max=4.1723e-06 at (0, 9, 36), avg-magnitude=3.3811e-07, p90=7.7486e-07, p95=1.0729e-06, p99=1.9981e-06
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 4.17e-07) | 5984 | ########################################
(4.17e-07, 8.34e-07) | 1420 | #########
(8.34e-07, 1.25e-06) | 489 | ###
(1.25e-06, 1.67e-06) | 152 | #
(1.67e-06, 2.09e-06) | 73 |
(2.09e-06, 2.5e-06 ) | 44 |
(2.5e-06 , 2.92e-06) | 19 |
(2.92e-06, 3.34e-06) | 3 |
(3.34e-06, 3.76e-06) | 5 |
(3.76e-06, 4.17e-06) | 3 |
[I] Relative Difference | Stats: mean=5.1803e-06, std-dev=3.0262e-05, var=9.1576e-10, median=1.1036e-06, min=0 at (0, 0, 45), max=0.0014184 at (0, 23, 201), avg-magnitude=5.1803e-06, p90=6.8691e-06, p95=1.4526e-05, p99=7.9079e-05
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 0.000142) | 8152 | ########################################
(0.000142, 0.000284) | 23 |
(0.000284, 0.000426) | 10 |
(0.000426, 0.000567) | 3 |
(0.000567, 0.000709) | 0 |
(0.000709, 0.000851) | 2 |
(0.000851, 0.000993) | 1 |
(0.000993, 0.00113 ) | 0 |
(0.00113 , 0.00128 ) | 0 |
(0.00128 , 0.00142 ) | 1 |
[E] FAILED | Output: 'text_features' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'encoder_hidden_state' (dtype=float32, shape=(1, 5184, 256)) with 'encoder_hidden_state' (dtype=float32, shape=(1, 5184, 256))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: encoder_hidden_state | Stats: mean=-0.029215, std-dev=2.0924, var=4.3781, median=0.04309, min=-16.037 at (0, 3240, 233), max=10.017 at (0, 4940, 222), avg-magnitude=1.5971, p90=2.4842, p95=3.2261, p99=4.6539
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-18.5, -15.1) | 19 |
(-15.1, -11.7) | 263 |
(-11.7, -8.27) | 4140 |
(-8.27, -4.87) | 17348 |
(-4.87, -1.47) | 269728 | #############
(-1.47, 1.94 ) | 826388 | ########################################
(1.94 , 5.34 ) | 203948 | #########
(5.34 , 8.74 ) | 5239 |
(8.74 , 12.1 ) | 31 |
(12.1 , 15.5 ) | 0 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: encoder_hidden_state | Stats: mean=-0.054315, std-dev=2.3718, var=5.6254, median=0.014089, min=-18.478 at (0, 5121, 233), max=15.546 at (0, 4940, 222), avg-magnitude=1.819, p90=2.8103, p95=3.6918, p99=5.3783
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-18.5, -15.1) | 89 |
(-15.1, -11.7) | 427 |
(-11.7, -8.27) | 4753 |
(-8.27, -4.87) | 29375 | #
(-4.87, -1.47) | 297798 | ###############
(-1.47, 1.94 ) | 749043 | ########################################
(1.94 , 5.34 ) | 231736 | ############
(5.34 , 8.74 ) | 13373 |
(8.74 , 12.1 ) | 487 |
(12.1 , 15.5 ) | 23 |
[I] Error Metrics: encoder_hidden_state
[I] Minimum Required Tolerance: elemwise error | [abs=8.6287] OR [rel=4.8772e+05] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=1.0321, std-dev=0.84537, var=0.71465, median=0.83463, min=2.3842e-07 at (0, 723, 246), max=8.6287 at (0, 5104, 233), avg-magnitude=1.0321, p90=2.1743, p95=2.6776, p99=3.7953
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(2.38e-07, 0.863) | 682372 | ########################################
(0.863 , 1.73 ) | 407910 | #######################
(1.73 , 2.59 ) | 161541 | #########
(2.59 , 3.45 ) | 52946 | ###
(3.45 , 4.31 ) | 16452 |
(4.31 , 5.18 ) | 4393 |
(5.18 , 6.04 ) | 1113 |
(6.04 , 6.9 ) | 256 |
(6.9 , 7.77 ) | 95 |
(7.77 , 8.63 ) | 26 |
[I] Relative Difference | Stats: mean=4.9905, std-dev=582.27, var=3.3904e+05, median=0.57435, min=1.1797e-07 at (0, 723, 246), max=4.8772e+05 at (0, 3625, 33), avg-magnitude=4.9905, p90=3.2239, p95=6.4486, p99=32.176
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(1.18e-07, 4.88e+04) | 1327092 | ########################################
(4.88e+04, 9.75e+04) | 6 |
(9.75e+04, 1.46e+05) | 3 |
(1.46e+05, 1.95e+05) | 0 |
(1.95e+05, 2.44e+05) | 0 |
(2.44e+05, 2.93e+05) | 2 |
(2.93e+05, 3.41e+05) | 0 |
(3.41e+05, 3.9e+05 ) | 0 |
(3.9e+05 , 4.39e+05) | 0 |
(4.39e+05, 4.88e+05) | 1 |
[E] FAILED | Output: 'encoder_hidden_state' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'decoder_hidden_states' (dtype=float32, shape=(1, 200, 256)) with 'decoder_hidden_states' (dtype=float32, shape=(1, 200, 256))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: decoder_hidden_states | Stats: mean=-0.0075265, std-dev=0.97304, var=0.9468, median=0.023594, min=-5.9228 at (0, 44, 32), max=4.4711 at (0, 143, 171), avg-magnitude=0.71806, p90=1.0877, p95=1.469, p99=2.4004
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-6.03 , -4.93 ) | 65 |
(-4.93 , -3.82 ) | 177 |
(-3.82 , -2.72 ) | 337 |
(-2.72 , -1.62 ) | 1634 | ##
(-1.62 , -0.518) | 11169 | #################
(-0.518, 0.584 ) | 25552 | ########################################
(0.584 , 1.69 ) | 10529 | ################
(1.69 , 2.79 ) | 1426 | ##
(2.79 , 3.89 ) | 264 |
(3.89 , 4.99 ) | 47 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: decoder_hidden_states | Stats: mean=-0.0072725, std-dev=0.96756, var=0.93617, median=0.022201, min=-6.0283 at (0, 10, 32), max=4.9917 at (0, 79, 171), avg-magnitude=0.713, p90=1.0802, p95=1.4609, p99=2.3643
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-6.03 , -4.93 ) | 80 |
(-4.93 , -3.82 ) | 141 |
(-3.82 , -2.72 ) | 343 |
(-2.72 , -1.62 ) | 1553 | ##
(-1.62 , -0.518) | 11210 | #################
(-0.518, 0.584 ) | 25704 | ########################################
(0.584 , 1.69 ) | 10501 | ################
(1.69 , 2.79 ) | 1377 | ##
(2.79 , 3.89 ) | 240 |
(3.89 , 4.99 ) | 51 |
[I] Error Metrics: decoder_hidden_states
[I] Minimum Required Tolerance: elemwise error | [abs=3.905] OR [rel=30657] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=0.4291, std-dev=0.38742, var=0.15009, median=0.3242, min=0 at (0, 92, 155), max=3.905 at (0, 106, 159), avg-magnitude=0.4291, p90=0.94021, p95=1.1949, p99=1.7619
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 0.39 ) | 29305 | ########################################
(0.39 , 0.781) | 13941 | ###################
(0.781, 1.17 ) | 5220 | #######
(1.17 , 1.56 ) | 1796 | ##
(1.56 , 1.95 ) | 649 |
(1.95 , 2.34 ) | 192 |
(2.34 , 2.73 ) | 71 |
(2.73 , 3.12 ) | 23 |
(3.12 , 3.51 ) | 2 |
(3.51 , 3.9 ) | 1 |
[I] Relative Difference | Stats: mean=3.8535, std-dev=142.28, var=20243, median=0.586, min=0 at (0, 92, 155), max=30657 at (0, 85, 138), avg-magnitude=3.8535, p90=3.808, p95=7.4858, p99=36.234
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 3.07e+03) | 51197 | ########################################
(3.07e+03, 6.13e+03) | 1 |
(6.13e+03, 9.2e+03 ) | 1 |
(9.2e+03 , 1.23e+04) | 0 |
(1.23e+04, 1.53e+04) | 0 |
(1.53e+04, 1.84e+04) | 0 |
(1.84e+04, 2.15e+04) | 0 |
(2.15e+04, 2.45e+04) | 0 |
(2.45e+04, 2.76e+04) | 0 |
(2.76e+04, 3.07e+04) | 1 |
[E] FAILED | Output: 'decoder_hidden_states' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'pred_boxes' (dtype=float32, shape=(1, 200, 4)) with 'pred_boxes' (dtype=float32, shape=(1, 200, 4))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: pred_boxes | Stats: mean=0.52495, std-dev=0.20177, var=0.04071, median=0.52565, min=-0.097559 at (0, 89, 0), max=1.0239 at (0, 144, 3), avg-magnitude=0.52519, p90=0.77729, p95=0.8783, p99=0.98599
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-0.0976, 0.0146) | 5 | #
(0.0146 , 0.127 ) | 25 | #####
(0.127 , 0.239 ) | 30 | ######
(0.239 , 0.351 ) | 75 | ################
(0.351 , 0.463 ) | 170 | #####################################
(0.463 , 0.575 ) | 179 | ########################################
(0.575 , 0.687 ) | 159 | ###################################
(0.687 , 0.8 ) | 90 | ####################
(0.8 , 0.912 ) | 36 | ########
(0.912 , 1.02 ) | 31 | ######
[I] onnxrt-runner-N0-01/05/26-15:07:18: pred_boxes | Stats: mean=0.51037, std-dev=0.25017, var=0.062584, median=0.50878, min=-0.0011068 at (0, 111, 0), max=1.0027 at (0, 26, 2), avg-magnitude=0.51038, p90=0.85718, p95=0.9608, p99=0.99875
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-0.0976, 0.0146) | 9 | ##
(0.0146 , 0.127 ) | 43 | ############
(0.127 , 0.239 ) | 73 | #####################
(0.239 , 0.351 ) | 102 | #############################
(0.351 , 0.463 ) | 138 | ########################################
(0.463 , 0.575 ) | 108 | ###############################
(0.575 , 0.687 ) | 125 | ####################################
(0.687 , 0.8 ) | 87 | #########################
(0.8 , 0.912 ) | 55 | ###############
(0.912 , 1.02 ) | 60 | #################
[I] Error Metrics: pred_boxes
[I] Minimum Required Tolerance: elemwise error | [abs=0.68987] OR [rel=151.79] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=0.077092, std-dev=0.087299, var=0.007621, median=0.046964, min=0.00022468 at (0, 70, 3), max=0.68987 at (0, 71, 0), avg-magnitude=0.077092, p90=0.19431, p95=0.2537, p99=0.38542
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0.000225, 0.0692) | 483 | ########################################
(0.0692 , 0.138 ) | 169 | #############
(0.138 , 0.207 ) | 78 | ######
(0.207 , 0.276 ) | 47 | ###
(0.276 , 0.345 ) | 14 | #
(0.345 , 0.414 ) | 3 |
(0.414 , 0.483 ) | 3 |
(0.483 , 0.552 ) | 0 |
(0.552 , 0.621 ) | 0 |
(0.621 , 0.69 ) | 3 |
[I] Relative Difference | Stats: mean=0.8091, std-dev=6.8001, var=46.241, median=0.099547, min=0.00048962 at (0, 70, 3), max=151.79 at (0, 40, 0), avg-magnitude=0.8091, p90=0.67171, p95=1.2416, p99=10.851
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0.00049, 15.2) | 792 | ########################################
(15.2 , 30.4) | 4 |
(30.4 , 45.5) | 0 |
(45.5 , 60.7) | 1 |
(60.7 , 75.9) | 2 |
(75.9 , 91.1) | 0 |
(91.1 , 106 ) | 0 |
(106 , 121 ) | 0 |
(121 , 137 ) | 0 |
(137 , 152 ) | 1 |
[E] FAILED | Output: 'pred_boxes' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'pred_logits' (dtype=float32, shape=(1, 200)) with 'pred_logits' (dtype=float32, shape=(1, 200))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: pred_logits | Stats: mean=-2.2528, std-dev=0.3234, var=0.10459, median=-2.2741, min=-3.3643 at (0, 167), max=-0.99596 at (0, 144), avg-magnitude=2.2528, p90=-1.8565, p95=-1.7238, p99=-1.3785
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-3.36 , -3.13 ) | 1 |
(-3.13 , -2.89 ) | 2 | #
(-2.89 , -2.65 ) | 14 | ########
(-2.65 , -2.42 ) | 49 | ##############################
(-2.42 , -2.18 ) | 64 | ########################################
(-2.18 , -1.94 ) | 40 | #########################
(-1.94 , -1.71 ) | 20 | ############
(-1.71 , -1.47 ) | 5 | ###
(-1.47 , -1.23 ) | 4 | ##
(-1.23 , -0.996) | 1 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: pred_logits | Stats: mean=-2.3136, std-dev=0.36742, var=0.135, median=-2.3567, min=-3.1664 at (0, 126), max=-1.0731 at (0, 175), avg-magnitude=2.3136, p90=-1.8362, p95=-1.7265, p99=-1.3385
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-3.36 , -3.13 ) | 1 |
(-3.13 , -2.89 ) | 8 | ######
(-2.89 , -2.65 ) | 29 | #######################
(-2.65 , -2.42 ) | 43 | ##################################
(-2.42 , -2.18 ) | 50 | ########################################
(-2.18 , -1.94 ) | 39 | ###############################
(-1.94 , -1.71 ) | 20 | ################
(-1.71 , -1.47 ) | 6 | ####
(-1.47 , -1.23 ) | 3 | ##
(-1.23 , -0.996) | 1 |
[I] Error Metrics: pred_logits
[I] Minimum Required Tolerance: elemwise error | [abs=1.2043] OR [rel=0.96577] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=0.29036, std-dev=0.22991, var=0.052859, median=0.2241, min=0.00070286 at (0, 67), max=1.2043 at (0, 89), avg-magnitude=0.29036, p90=0.62114, p95=0.76967, p99=0.93489
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0.000703, 0.121) | 49 | ####################################
(0.121 , 0.241) | 53 | ########################################
(0.241 , 0.362) | 39 | #############################
(0.362 , 0.482) | 24 | ##################
(0.482 , 0.603) | 12 | #########
(0.603 , 0.723) | 10 | #######
(0.723 , 0.843) | 7 | #####
(0.843 , 0.964) | 4 | ###
(0.964 , 1.08 ) | 0 |
(1.08 , 1.2 ) | 2 | #
[I] Relative Difference | Stats: mean=0.13184, std-dev=0.12588, var=0.015846, median=0.09891, min=0.00028166 at (0, 67), max=0.96577 at (0, 89), avg-magnitude=0.13184, p90=0.26058, p95=0.35903, p99=0.56951
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0.000282, 0.0968) | 98 | ########################################
(0.0968 , 0.193 ) | 62 | #########################
(0.193 , 0.29 ) | 23 | #########
(0.29 , 0.386 ) | 7 | ##
(0.386 , 0.483 ) | 5 | ##
(0.483 , 0.58 ) | 3 | #
(0.58 , 0.676 ) | 1 |
(0.676 , 0.773 ) | 0 |
(0.773 , 0.869 ) | 0 |
(0.869 , 0.966 ) | 1 |
[E] FAILED | Output: 'pred_logits' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'presence_logits' (dtype=float32, shape=(1, 1)) with 'presence_logits' (dtype=float32, shape=(1, 1))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: presence_logits | Stats: mean=-3.6407, std-dev=0, var=0, median=-3.6407, min=-3.6407 at (0, 0), max=-3.6407 at (0, 0), avg-magnitude=3.6407, p90=-3.6407, p95=-3.6407, p99=-3.6407
[I] ---- Values ----
[[-3.640685]]
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-3.64, -3.52) | 1 | ########################################
(-3.52, -3.41) | 0 |
(-3.41, -3.29) | 0 |
(-3.29, -3.18) | 0 |
(-3.18, -3.06) | 0 |
(-3.06, -2.95) | 0 |
(-2.95, -2.83) | 0 |
(-2.83, -2.71) | 0 |
(-2.71, -2.6 ) | 0 |
(-2.6 , -2.48) | 0 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: presence_logits | Stats: mean=-2.4821, std-dev=0, var=0, median=-2.4821, min=-2.4821 at (0, 0), max=-2.4821 at (0, 0), avg-magnitude=2.4821, p90=-2.4821, p95=-2.4821, p99=-2.4821
[I] ---- Values ----
[[-2.482077]]
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-3.64, -3.52) | 0 |
(-3.52, -3.41) | 0 |
(-3.41, -3.29) | 0 |
(-3.29, -3.18) | 0 |
(-3.18, -3.06) | 0 |
(-3.06, -2.95) | 0 |
(-2.95, -2.83) | 0 |
(-2.83, -2.71) | 0 |
(-2.71, -2.6 ) | 0 |
(-2.6 , -2.48) | 1 | ########################################
[I] Error Metrics: presence_logits
[I] Minimum Required Tolerance: elemwise error | [abs=1.1586] OR [rel=0.46679] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=1.1586, std-dev=0, var=0, median=1.1586, min=1.1586 at (0, 0), max=1.1586 at (0, 0), avg-magnitude=1.1586, p90=1.1586, p95=1.1586, p99=1.1586
[I] ---- Values ----
[[1.1586082]]
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0.659, 0.759) | 0 |
(0.759, 0.859) | 0 |
(0.859, 0.959) | 0 |
(0.959, 1.06 ) | 0 |
(1.06 , 1.16 ) | 0 |
(1.16 , 1.26 ) | 1 | ########################################
(1.26 , 1.36 ) | 0 |
(1.36 , 1.46 ) | 0 |
(1.46 , 1.56 ) | 0 |
(1.56 , 1.66 ) | 0 |
[I] Relative Difference | Stats: mean=0.46679, std-dev=0, var=0, median=0.46679, min=0.46679 at (0, 0), max=0.46679 at (0, 0), avg-magnitude=0.46679, p90=0.46679, p95=0.46679, p99=0.46679
[I] ---- Values ----
[[0.4667898]]
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-0.0332, 0.0668) | 0 |
(0.0668 , 0.167 ) | 0 |
(0.167 , 0.267 ) | 0 |
(0.267 , 0.367 ) | 0 |
(0.367 , 0.467 ) | 0 |
(0.467 , 0.567 ) | 1 | ########################################
(0.567 , 0.667 ) | 0 |
(0.667 , 0.767 ) | 0 |
(0.767 , 0.867 ) | 0 |
(0.867 , 0.967 ) | 0 |
[E] FAILED | Output: 'presence_logits' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'pred_masks' (dtype=float32, shape=(1, 200, 288, 288)) with 'pred_masks' (dtype=float32, shape=(1, 200, 288, 288))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: pred_masks | Stats: mean=-17.966, std-dev=12.069, var=145.66, median=-17.336, min=-78.59 at (0, 24, 230, 154), max=9.0686 at (0, 33, 192, 166), avg-magnitude=17.987, p90=-1.9971, p95=-0.5272, p99=0.28164
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-100 , -89.1 ) | 0 |
(-89.1 , -78.1 ) | 1 |
(-78.1 , -67 ) | 1281 |
(-67 , -56 ) | 31777 |
(-56 , -45 ) | 299428 | ##
(-45 , -34 ) | 1390408 | ##########
(-34 , -23 ) | 3754791 | ###########################
(-23 , -11.9 ) | 5382122 | ########################################
(-11.9 , -0.927) | 4604179 | ##################################
(-0.927, 10.1 ) | 1124813 | ########
[I] onnxrt-runner-N0-01/05/26-15:07:18: pred_masks | Stats: mean=-19.907, std-dev=13.444, var=180.73, median=-18.39, min=-100.1 at (0, 100, 184, 157), max=10.092 at (0, 158, 282, 215), avg-magnitude=19.925, p90=-3.4109, p95=-1.288, p99=0.17772
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-100 , -89.1 ) | 436 |
(-89.1 , -78.1 ) | 3340 |
(-78.1 , -67 ) | 37138 |
(-67 , -56 ) | 173544 | #
(-56 , -45 ) | 582371 | ####
(-45 , -34 ) | 1682030 | ############
(-34 , -23 ) | 3667927 | ############################
(-23 , -11.9 ) | 5196738 | ########################################
(-11.9 , -0.927) | 4558279 | ###################################
(-0.927, 10.1 ) | 686997 | #####
[I] Error Metrics: pred_masks
[I] Minimum Required Tolerance: elemwise error | [abs=71.439] OR [rel=8.2647e+06] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=8.4159, std-dev=7.7006, var=59.299, median=6.2722, min=0 at (0, 4, 227, 131), max=71.439 at (0, 114, 172, 144), avg-magnitude=8.4159, p90=19.056, p95=23.809, p99=33.912
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 7.14) | 9077192 | ########################################
(7.14, 14.3) | 4377686 | ###################
(14.3, 21.4) | 1950703 | ########
(21.4, 28.6) | 787138 | ###
(28.6, 35.7) | 272937 | #
(35.7, 42.9) | 85912 |
(42.9, 50 ) | 26309 |
(50 , 57.2) | 9677 |
(57.2, 64.3) | 1195 |
(64.3, 71.4) | 51 |
[I] Relative Difference | Stats: mean=2.5923, std-dev=2148.1, var=4.6144e+06, median=0.39781, min=0 at (0, 4, 227, 131), max=8.2647e+06 at (0, 134, 159, 97), avg-magnitude=2.5923, p90=1.1935, p95=2.3632, p99=11.902
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(0 , 8.26e+05) | 16588798 | ########################################
(8.26e+05, 1.65e+06) | 0 |
(1.65e+06, 2.48e+06) | 0 |
(2.48e+06, 3.31e+06) | 1 |
(3.31e+06, 4.13e+06) | 0 |
(4.13e+06, 4.96e+06) | 0 |
(4.96e+06, 5.79e+06) | 0 |
(5.79e+06, 6.61e+06) | 0 |
(6.61e+06, 7.44e+06) | 0 |
(7.44e+06, 8.26e+06) | 1 |
[E] FAILED | Output: 'pred_masks' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I] Comparing Output: 'semantic_seg' (dtype=float32, shape=(1, 1, 288, 288)) with 'semantic_seg' (dtype=float32, shape=(1, 1, 288, 288))
[I] Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I] trt-runner-N0-01/05/26-15:07:18: semantic_seg | Stats: mean=-0.31233, std-dev=0.57507, var=0.33071, median=-0.31994, min=-8.318 at (0, 0, 167, 1), max=3.231 at (0, 0, 162, 187), avg-magnitude=0.47049, p90=0.25276, p95=0.54216, p99=1.2357
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-15.3 , -13.4 ) | 0 |
(-13.4 , -11.5 ) | 0 |
(-11.5 , -9.6 ) | 0 |
(-9.6 , -7.7 ) | 1 |
(-7.7 , -5.8 ) | 4 |
(-5.8 , -3.9 ) | 95 |
(-3.9 , -2 ) | 1095 |
(-2 , -0.0949) | 59160 | ########################################
(-0.0949, 1.81 ) | 22351 | ###############
(1.81 , 3.71 ) | 238 |
[I] onnxrt-runner-N0-01/05/26-15:07:18: semantic_seg | Stats: mean=-2.5686, std-dev=1.2996, var=1.6889, median=-2.9381, min=-15.308 at (0, 0, 287, 97), max=3.7083 at (0, 0, 117, 179), avg-magnitude=2.6896, p90=-0.51426, p95=0.13506, p99=2.0755
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(-15.3 , -13.4 ) | 4 |
(-13.4 , -11.5 ) | 9 |
(-11.5 , -9.6 ) | 16 |
(-9.6 , -7.7 ) | 36 |
(-7.7 , -5.8 ) | 441 |
(-5.8 , -3.9 ) | 1951 | #
(-3.9 , -2 ) | 64646 | ########################################
(-2 , -0.0949) | 10397 | ######
(-0.0949, 1.81 ) | 4274 | ##
(1.81 , 3.71 ) | 1170 |
[I] Error Metrics: semantic_seg
[I] Minimum Required Tolerance: elemwise error | [abs=13.127] OR [rel=6008.1] (requirements may be lower if both abs/rel tolerances are set)
[I] Absolute Difference | Stats: mean=2.3233, std-dev=0.89382, var=0.79891, median=2.5438, min=1.0788e-05 at (0, 0, 130, 156), max=13.127 at (0, 0, 287, 97), avg-magnitude=2.3233, p90=3.1925, p95=3.3673, p99=3.7743
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(1.08e-05, 1.31) | 12790 | ##############
(1.31 , 2.63) | 33420 | ####################################
(2.63 , 3.94) | 36182 | ########################################
(3.94 , 5.25) | 472 |
(5.25 , 6.56) | 43 |
(6.56 , 7.88) | 16 |
(7.88 , 9.19) | 8 |
(9.19 , 10.5) | 9 |
(10.5 , 11.8) | 3 |
(11.8 , 13.1) | 1 |
[I] Relative Difference | Stats: mean=1.2131, std-dev=22.438, var=503.48, median=0.87599, min=3.3699e-05 at (0, 0, 130, 156), max=6008.1 at (0, 0, 163, 181), avg-magnitude=1.2131, p90=1.0899, p95=1.3205, p99=4.7536
[I] ---- Histogram ----
Bin Range | Num Elems | Visualization
(3.37e-05, 601 ) | 82939 | ########################################
(601 , 1.2e+03 ) | 3 |
(1.2e+03 , 1.8e+03 ) | 1 |
(1.8e+03 , 2.4e+03 ) | 0 |
(2.4e+03 , 3e+03 ) | 0 |
(3e+03 , 3.6e+03 ) | 0 |
(3.6e+03 , 4.21e+03) | 0 |
(4.21e+03, 4.81e+03) | 0 |
(4.81e+03, 5.41e+03) | 0 |
(5.41e+03, 6.01e+03) | 1 |
[E] FAILED | Output: 'semantic_seg' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[E] FAILED | Mismatched outputs: ['vision_feature_0', 'vision_feature_1', 'vision_feature_2', 'text_features', 'encoder_hidden_state', 'decoder_hidden_states', 'pred_boxes', 'pred_logits', 'presence_logits', 'pred_masks', 'semantic_seg']
[E] Accuracy Summary | trt-runner-N0-01/05/26-15:07:18 vs. onnxrt-runner-N0-01/05/26-15:07:18 | Passed: 0/1 iterations | Pass Rate: 0.0%
Traceback (most recent call last):
File "/home/zy/open_project/SAM3-TensorRT/python/polygraphy_test.py", line 45, in <module>
main()
File "/home/zy/open_project/SAM3-TensorRT/python/polygraphy_test.py", line 37, in main
assert bool(Comparator.compare_accuracy(run_results, compare_func=CompareFunc.simple(atol=1e-8)))
AssertionError
There are a lot of differences in the encoder_hidden_states. It indicates that the detr_encoder brings some gaps.
Did you notice the gap in your device?
Metadata
Metadata
Assignees
Labels
No labels