Skip to content

Precision loss in TensorRT inference #7

@Huntersdeng

Description

@Huntersdeng

Env

  • NVIDIA GeForce RTX 4060 Ti
  • cuda 12.6
  • tensorrt 10.13.0.35
  • torch 2.7.0+cu126
  • transformers 5.0.0rc1

Description

I find there are some gaps between the inference results from onnx model and tensorrt model.
So I modify the onnxexport.py to export some middle layers' output:

import torch
from pathlib import Path
from transformers.models.sam3 import Sam3Processor, Sam3Model
from transformers.models.sam3.modeling_sam3 import (
    inverse_sigmoid,
    box_cxcywh_to_xyxy,
    Sam3ImageSegmentationOutput,
)
from PIL import Image
import requests

device = "cpu"  # for onnx export we use CPU for maximum compatibility

# 1. Load model & processor
model = Sam3Model.from_pretrained("/home/zy/weights/sam3").to(device)
processor = Sam3Processor.from_pretrained("/home/zy/weights/sam3")

model.eval()

prompt = "dog"

# 2. Build a sample batch (same as your example)
image_url = "http://images.cocodataset.org/val2017/000000077595.jpg"
image = Image.open(requests.get(image_url, stream=True).raw).convert("RGB")

inputs = processor(images=image, text=prompt, return_tensors="pt").to(device)

pixel_values = inputs["pixel_values"]
input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]
print(input_ids.shape)

# 3. Wrap Sam3Model so the ONNX graph has clean inputs/outputs
# class Sam3ONNXWrapper(torch.nn.Module):
#     def __init__(self, sam3, input_ids, attention_mask):
#         super().__init__()
#         self.sam3 = sam3
#         self.register_buffer("const_input_ids", input_ids.to(torch.int64).cpu())
#         self.register_buffer("const_attention_mask", attention_mask.to(torch.int64).cpu())

#     def forward(self, pixel_values):
#         outputs = self.sam3(
#             pixel_values=pixel_values,
#             input_ids=self.const_input_ids,
#             attention_mask=self.const_attention_mask,
#         )
#         print(outputs.keys())
#         # Typical useful outputs
#         pred_boxes = outputs.pred_boxes
#         pred_logits = outputs.pred_logits
#         pred_masks = outputs.pred_masks
#         presence_score = outputs.presence_logits
#         return (pred_boxes, pred_logits, pred_masks, presence_score)


class Sam3ONNXWrapper(torch.nn.Module):
    def __init__(self, sam3, input_ids, attention_mask):
        super().__init__()
        self.sam3 = sam3
        self.register_buffer("const_input_ids", input_ids.to(torch.int64).cpu())
        self.register_buffer(
            "const_attention_mask", attention_mask.to(torch.int64).cpu()
        )

    def forward(self, pixel_values):
        vision_outputs = self.sam3.vision_encoder(pixel_values)
        fpn_hidden_states = vision_outputs.fpn_hidden_states[:-1]
        fpn_position_encoding = vision_outputs.fpn_position_encoding[:-1]
        text_features = self.sam3.get_text_features(
            input_ids=self.const_input_ids, attention_mask=self.const_attention_mask
        )
        text_mask = attention_mask.bool() if attention_mask is not None else None
        combined_prompt_features = text_features
        combined_prompt_mask = text_mask
        encoder_outputs = self.sam3.detr_encoder(
            vision_features=[fpn_hidden_states[-1]],
            text_features=combined_prompt_features,
            vision_pos_embeds=[fpn_position_encoding[-1]],
            text_mask=combined_prompt_mask,
        )
        decoder_outputs = self.sam3.detr_decoder(
            vision_features=encoder_outputs.last_hidden_state,
            text_features=encoder_outputs.text_features,
            vision_pos_encoding=encoder_outputs.pos_embeds_flattened,
            text_mask=combined_prompt_mask,
            spatial_shapes=encoder_outputs.spatial_shapes,
        )

        all_box_offsets = self.sam3.detr_decoder.box_head(
            decoder_outputs.intermediate_hidden_states
        )
        reference_boxes_inv_sig = inverse_sigmoid(decoder_outputs.reference_boxes)
        all_pred_boxes_cxcywh = (reference_boxes_inv_sig + all_box_offsets).sigmoid()
        all_pred_boxes = box_cxcywh_to_xyxy(all_pred_boxes_cxcywh)

        all_pred_logits = self.sam3.dot_product_scoring(
            decoder_hidden_states=decoder_outputs.intermediate_hidden_states,
            text_features=encoder_outputs.text_features,
            text_mask=combined_prompt_mask,
        ).squeeze(-1)

        pred_logits = all_pred_logits[-1]
        pred_boxes = all_pred_boxes[-1]
        decoder_hidden_states = decoder_outputs.intermediate_hidden_states[-1]
        presence_logits = decoder_outputs.presence_logits[-1]

        mask_outputs = self.sam3.mask_decoder(
            decoder_queries=decoder_hidden_states,
            backbone_features=list(fpn_hidden_states),
            encoder_hidden_states=encoder_outputs.last_hidden_state,
            prompt_features=combined_prompt_features,
            prompt_mask=combined_prompt_mask,
        )

        pred_masks = mask_outputs.pred_masks
        semantic_seg = mask_outputs.semantic_seg

        vision_feature_0 = fpn_hidden_states[0]
        vision_feature_1 = fpn_hidden_states[1]
        vision_feature_2 = fpn_hidden_states[2]

        return (
            vision_feature_0,
            vision_feature_1,
            vision_feature_2,
            text_features,
            encoder_outputs.last_hidden_state,
            decoder_hidden_states,
            pred_boxes,
            pred_logits,
            presence_logits,
            pred_masks,
            semantic_seg,
        )


wrapper = Sam3ONNXWrapper(model, input_ids, attention_mask).to(device).eval()
# wrapper(pixel_values)/

# 5. Export to ONNX
output_dir = Path(f"onnx_weights_dbg")
output_dir.mkdir(exist_ok=True)
onnx_path = str(output_dir / f"sam3_dbg.onnx")
# output_names = ["pred_boxes", "pred_logits", "pred_masks", "presence_score"]
output_names = [
    "vision_feature_0",
    "vision_feature_1",
    "vision_feature_2",
    "text_features",
    "encoder_hidden_state",
    "decoder_hidden_states",
    "pred_boxes",
    "pred_logits",
    "presence_logits",
    "pred_masks",
    "semantic_seg",
]


torch.onnx.export(
    wrapper,
    (pixel_values),
    onnx_path,
    input_names=["pixel_values"],
    output_names=output_names,
    dynamo=False,
    opset_version=17,
)

print(f"Exported to {onnx_path}")

And I use polygraphy to check. Here's the result:

[I] TF32 is disabled by default. Turn on TF32 for better performance with minor accuracy differences.
[I] trt-runner-N0-01/05/26-15:07:18     | Activating and starting inference
[I] Configuring with profiles:[
        Profile 0:
            {pixel_values [min=[1, 3, 1008, 1008], opt=[1, 3, 1008, 1008], max=[1, 3, 1008, 1008]]}
    ]
[W] profileSharing0806 is on by default in TensorRT 10.0. This flag is deprecated and has no effect.
[I] Building engine with configuration:
    Flags                  | []
    Engine Capability      | EngineCapability.STANDARD
    Memory Pools           | [WORKSPACE: 15936.19 MiB, TACTIC_DRAM: 15936.19 MiB, TACTIC_SHARED_MEMORY: 1024.00 MiB]
    Tactic Sources         | [EDGE_MASK_CONVOLUTIONS, JIT_CONVOLUTIONS]
    Profiling Verbosity    | ProfilingVerbosity.DETAILED
    Preview Features       | [PROFILE_SHARING_0806]
[I] Finished engine building in 58.691 seconds
[I] trt-runner-N0-01/05/26-15:07:18    
    ---- Inference Input(s) ----
    {pixel_values [dtype=float32, shape=(1, 3, 1008, 1008)]}
[I] trt-runner-N0-01/05/26-15:07:18    
    ---- Inference Output(s) ----
    {vision_feature_0 [dtype=float32, shape=(1, 256, 288, 288)],
     vision_feature_1 [dtype=float32, shape=(1, 256, 144, 144)],
     vision_feature_2 [dtype=float32, shape=(1, 256, 72, 72)],
     text_features [dtype=float32, shape=(1, 32, 256)],
     encoder_hidden_state [dtype=float32, shape=(1, 5184, 256)],
     decoder_hidden_states [dtype=float32, shape=(1, 200, 256)],
     pred_boxes [dtype=float32, shape=(1, 200, 4)],
     pred_logits [dtype=float32, shape=(1, 200)],
     presence_logits [dtype=float32, shape=(1, 1)],
     pred_masks [dtype=float32, shape=(1, 200, 288, 288)],
     semantic_seg [dtype=float32, shape=(1, 1, 288, 288)]}
[I] trt-runner-N0-01/05/26-15:07:18     | Completed 1 iteration(s) in 1306 ms | Average inference time: 1306 ms.
[I] onnxrt-runner-N0-01/05/26-15:07:18  | Activating and starting inference
[I] Creating ONNX-Runtime Inference Session with providers: ['CPUExecutionProvider']
2026-01-05 15:08:22.030170384 [W:onnxruntime:, graph.cc:120 MergeShapeInfo] Error merging shape info for output. '/vision_encoder/backbone/embeddings/Concat_output_0' source:{4} target:{5}. Falling back to lenient merge.
[I] onnxrt-runner-N0-01/05/26-15:07:18 
    ---- Inference Input(s) ----
    {pixel_values [dtype=float32, shape=(1, 3, 1008, 1008)]}
[I] onnxrt-runner-N0-01/05/26-15:07:18 
    ---- Inference Output(s) ----
    {vision_feature_0 [dtype=float32, shape=(1, 256, 288, 288)],
     vision_feature_1 [dtype=float32, shape=(1, 256, 144, 144)],
     vision_feature_2 [dtype=float32, shape=(1, 256, 72, 72)],
     text_features [dtype=float32, shape=(1, 32, 256)],
     encoder_hidden_state [dtype=float32, shape=(1, 5184, 256)],
     decoder_hidden_states [dtype=float32, shape=(1, 200, 256)],
     pred_boxes [dtype=float32, shape=(1, 200, 4)],
     pred_logits [dtype=float32, shape=(1, 200)],
     presence_logits [dtype=float32, shape=(1, 1)],
     pred_masks [dtype=float32, shape=(1, 200, 288, 288)],
     semantic_seg [dtype=float32, shape=(1, 1, 288, 288)]}
[I] onnxrt-runner-N0-01/05/26-15:07:18  | Completed 1 iteration(s) in 8450 ms | Average inference time: 8450 ms.
[I] Accuracy Comparison | trt-runner-N0-01/05/26-15:07:18 vs. onnxrt-runner-N0-01/05/26-15:07:18
[I]     Comparing Output: 'vision_feature_0' (dtype=float32, shape=(1, 256, 288, 288)) with 'vision_feature_0' (dtype=float32, shape=(1, 256, 288, 288))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[W]         onnxrt-runner-N0-01/05/26-15:07:18  | Output: vision_feature_0: Some values are 0. Will add a small epsilon quantity to these when computing relative difference. Note that this may cause some relative differences to be extremely high. 
[I]         trt-runner-N0-01/05/26-15:07:18: vision_feature_0 | Stats: mean=-0.01514, std-dev=0.016879, var=0.00028488, median=-0.01227, min=-0.14353 at (0, 17, 285, 11), max=0.07911 at (0, 111, 281, 214), avg-magnitude=0.017047, p90=0.0032417, p95=0.0072221, p99=0.015082
[I]             ---- Histogram ----
                Bin Range            |  Num Elems | Visualization
                (-0.144  , -0.121  ) |         47 | 
                (-0.121  , -0.099  ) |       5634 | 
                (-0.099  , -0.0767 ) |     119782 | 
                (-0.0767 , -0.0545 ) |     408355 | #
                (-0.0545 , -0.0322 ) |    2606431 | ###########
                (-0.0322 , -0.00995) |    8920261 | ########################################
                (-0.00995, 0.0123  ) |    8790913 | #######################################
                (0.0123  , 0.0346  ) |     378221 | #
                (0.0346  , 0.0568  ) |       3978 | 
                (0.0568  , 0.0791  ) |         42 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: vision_feature_0 | Stats: mean=-0.01514, std-dev=0.016879, var=0.00028488, median=-0.01227, min=-0.14353 at (0, 17, 285, 11), max=0.07911 at (0, 111, 281, 214), avg-magnitude=0.017047, p90=0.0032417, p95=0.007222, p99=0.015082
[I]             ---- Histogram ----
                Bin Range            |  Num Elems | Visualization
                (-0.144  , -0.121  ) |         47 | 
                (-0.121  , -0.099  ) |       5634 | 
                (-0.099  , -0.0767 ) |     119783 | 
                (-0.0767 , -0.0545 ) |     408353 | #
                (-0.0545 , -0.0322 ) |    2606436 | ###########
                (-0.0322 , -0.00995) |    8920265 | ########################################
                (-0.00995, 0.0123  ) |    8790908 | #######################################
                (0.0123  , 0.0346  ) |     378218 | #
                (0.0346  , 0.0568  ) |       3978 | 
                (0.0568  , 0.0791  ) |         42 | 
[I]         Error Metrics: vision_feature_0
[I]             Minimum Required Tolerance: elemwise error | [abs=6.754e-06] OR [rel=1.0737e+09] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=5.7006e-08, std-dev=6.4222e-08, var=4.1245e-15, median=4.2375e-08, min=0 at (0, 0, 0, 8), max=6.754e-06 at (0, 211, 257, 30), avg-magnitude=5.7006e-08, p90=1.1921e-07, p95=1.546e-07, p99=2.645e-07
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 6.75e-07) |   21216916 | ########################################
                    (6.75e-07, 1.35e-06) |      13286 | 
                    (1.35e-06, 2.03e-06) |       2264 | 
                    (2.03e-06, 2.7e-06 ) |        702 | 
                    (2.7e-06 , 3.38e-06) |        299 | 
                    (3.38e-06, 4.05e-06) |        107 | 
                    (4.05e-06, 4.73e-06) |         60 | 
                    (4.73e-06, 5.4e-06 ) |         22 | 
                    (5.4e-06 , 6.08e-06) |          6 | 
                    (6.08e-06, 6.75e-06) |          2 | 
[I]             Relative Difference | Stats: mean=54.914, std-dev=2.3388e+05, var=5.4698e+10, median=3.2489e-06, min=0 at (0, 0, 0, 8), max=1.0737e+09 at (0, 251, 32, 46), avg-magnitude=54.914, p90=2.2915e-05, p95=4.6669e-05, p99=0.00023475
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 1.07e+08) |   21233663 | ########################################
                    (1.07e+08, 2.15e+08) |          0 | 
                    (2.15e+08, 3.22e+08) |          0 | 
                    (3.22e+08, 4.29e+08) |          0 | 
                    (4.29e+08, 5.37e+08) |          0 | 
                    (5.37e+08, 6.44e+08) |          0 | 
                    (6.44e+08, 7.52e+08) |          0 | 
                    (7.52e+08, 8.59e+08) |          0 | 
                    (8.59e+08, 9.66e+08) |          0 | 
                    (9.66e+08, 1.07e+09) |          1 | 
[E]         FAILED | Output: 'vision_feature_0' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'vision_feature_1' (dtype=float32, shape=(1, 256, 144, 144)) with 'vision_feature_1' (dtype=float32, shape=(1, 256, 144, 144))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: vision_feature_1 | Stats: mean=0.0019535, std-dev=0.99889, var=0.99777, median=-0.002932, min=-5.9977 at (0, 139, 51, 57), max=6.2735 at (0, 44, 20, 101), avg-magnitude=0.7847, p90=1.2633, p95=1.6437, p99=2.3871
[I]             ---- Histogram ----
                Bin Range      |  Num Elems | Visualization
                (-6   , -4.77) |        148 | 
                (-4.77, -3.54) |       4602 | 
                (-3.54, -2.32) |      60173 | #
                (-2.32, -1.09) |     623058 | ##########
                (-1.09, 0.138) |    2276178 | ########################################
                (0.138, 1.37 ) |    1898847 | #################################
                (1.37 , 2.59 ) |     412856 | #######
                (2.59 , 3.82 ) |      30715 | 
                (3.82 , 5.05 ) |       1750 | 
                (5.05 , 6.27 ) |         89 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: vision_feature_1 | Stats: mean=0.0019535, std-dev=0.99889, var=0.99778, median=-0.0029335, min=-5.9977 at (0, 139, 51, 57), max=6.2735 at (0, 44, 20, 101), avg-magnitude=0.7847, p90=1.2633, p95=1.6437, p99=2.3871
[I]             ---- Histogram ----
                Bin Range      |  Num Elems | Visualization
                (-6   , -4.77) |        148 | 
                (-4.77, -3.54) |       4602 | 
                (-3.54, -2.32) |      60174 | #
                (-2.32, -1.09) |     623058 | ##########
                (-1.09, 0.138) |    2276180 | ########################################
                (0.138, 1.37 ) |    1898843 | #################################
                (1.37 , 2.59 ) |     412857 | #######
                (2.59 , 3.82 ) |      30715 | 
                (3.82 , 5.05 ) |       1750 | 
                (5.05 , 6.27 ) |         89 | 
[I]         Error Metrics: vision_feature_1
[I]             Minimum Required Tolerance: elemwise error | [abs=0.00055704] OR [rel=78.247] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=5.5588e-06, std-dev=5.9869e-06, var=3.5843e-11, median=4.1723e-06, min=0 at (0, 0, 0, 78), max=0.00055704 at (0, 191, 114, 120), avg-magnitude=5.5588e-06, p90=1.1563e-05, p95=1.4782e-05, p99=2.4557e-05
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 5.57e-05) |    5302571 | ########################################
                    (5.57e-05, 0.000111) |       4649 | 
                    (0.000111, 0.000167) |        811 | 
                    (0.000167, 0.000223) |        260 | 
                    (0.000223, 0.000279) |         92 | 
                    (0.000279, 0.000334) |         23 | 
                    (0.000334, 0.00039 ) |          4 | 
                    (0.00039 , 0.000446) |          4 | 
                    (0.000446, 0.000501) |          1 | 
                    (0.000501, 0.000557) |          1 | 
[I]             Relative Difference | Stats: mean=9.6089e-05, std-dev=0.045692, var=0.0020878, median=6.5735e-06, min=0 at (0, 0, 0, 78), max=78.247 at (0, 205, 50, 50), avg-magnitude=9.6089e-05, p90=4.3977e-05, p95=8.9272e-05, p99=0.00044842
[I]                 ---- Histogram ----
                    Bin Range    |  Num Elems | Visualization
                    (0   , 7.82) |    5308410 | ########################################
                    (7.82, 15.6) |          4 | 
                    (15.6, 23.5) |          0 | 
                    (23.5, 31.3) |          0 | 
                    (31.3, 39.1) |          0 | 
                    (39.1, 46.9) |          0 | 
                    (46.9, 54.8) |          0 | 
                    (54.8, 62.6) |          0 | 
                    (62.6, 70.4) |          1 | 
                    (70.4, 78.2) |          1 | 
[E]         FAILED | Output: 'vision_feature_1' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'vision_feature_2' (dtype=float32, shape=(1, 256, 72, 72)) with 'vision_feature_2' (dtype=float32, shape=(1, 256, 72, 72))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: vision_feature_2 | Stats: mean=-0.02647, std-dev=0.68585, var=0.47039, median=-0.018099, min=-3.9931 at (0, 43, 66, 57), max=3.8522 at (0, 185, 17, 3), avg-magnitude=0.53415, p90=0.82253, p95=1.0764, p99=1.5906
[I]             ---- Histogram ----
                Bin Range          |  Num Elems | Visualization
                (-3.99  , -3.21  ) |        176 | 
                (-3.21  , -2.42  ) |       2074 | 
                (-2.42  , -1.64  ) |      16797 | #
                (-1.64  , -0.855 ) |     120900 | #########
                (-0.855 , -0.0704) |     480100 | ###################################
                (-0.0704, 0.714  ) |     533487 | ########################################
                (0.714  , 1.5    ) |     155587 | ###########
                (1.5    , 2.28   ) |      16819 | #
                (2.28   , 3.07   ) |       1131 | 
                (3.07   , 3.85   ) |         33 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: vision_feature_2 | Stats: mean=-0.02647, std-dev=0.68585, var=0.47039, median=-0.018098, min=-3.9931 at (0, 43, 66, 57), max=3.8522 at (0, 185, 17, 3), avg-magnitude=0.53415, p90=0.82253, p95=1.0764, p99=1.5906
[I]             ---- Histogram ----
                Bin Range          |  Num Elems | Visualization
                (-3.99  , -3.21  ) |        176 | 
                (-3.21  , -2.42  ) |       2074 | 
                (-2.42  , -1.64  ) |      16797 | #
                (-1.64  , -0.855 ) |     120900 | #########
                (-0.855 , -0.0704) |     480098 | ###################################
                (-0.0704, 0.714  ) |     533488 | ########################################
                (0.714  , 1.5    ) |     155588 | ###########
                (1.5    , 2.28   ) |      16819 | #
                (2.28   , 3.07   ) |       1131 | 
                (3.07   , 3.85   ) |         33 | 
[I]         Error Metrics: vision_feature_2
[I]             Minimum Required Tolerance: elemwise error | [abs=0.00030649] OR [rel=18.878] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=4.0281e-06, std-dev=4.2446e-06, var=1.8017e-11, median=3.0398e-06, min=0 at (0, 0, 0, 40), max=0.00030649 at (0, 175, 64, 7), avg-magnitude=4.0281e-06, p90=8.4341e-06, p95=1.0818e-05, p99=1.7703e-05
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 3.06e-05) |    1324953 | ########################################
                    (3.06e-05, 6.13e-05) |       1739 | 
                    (6.13e-05, 9.19e-05) |        218 | 
                    (9.19e-05, 0.000123) |         97 | 
                    (0.000123, 0.000153) |         46 | 
                    (0.000153, 0.000184) |         27 | 
                    (0.000184, 0.000215) |         13 | 
                    (0.000215, 0.000245) |          6 | 
                    (0.000245, 0.000276) |          3 | 
                    (0.000276, 0.000306) |          2 | 
[I]             Relative Difference | Stats: mean=9.2484e-05, std-dev=0.018245, var=0.00033289, median=7.1053e-06, min=0 at (0, 0, 0, 40), max=18.878 at (0, 58, 67, 40), avg-magnitude=9.2484e-05, p90=4.8423e-05, p95=9.8349e-05, p99=0.00049433
[I]                 ---- Histogram ----
                    Bin Range    |  Num Elems | Visualization
                    (0   , 1.89) |    1327099 | ########################################
                    (1.89, 3.78) |          3 | 
                    (3.78, 5.66) |          0 | 
                    (5.66, 7.55) |          1 | 
                    (7.55, 9.44) |          0 | 
                    (9.44, 11.3) |          0 | 
                    (11.3, 13.2) |          0 | 
                    (13.2, 15.1) |          0 | 
                    (15.1, 17  ) |          0 | 
                    (17  , 18.9) |          1 | 
[E]         FAILED | Output: 'vision_feature_2' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'text_features' (dtype=float32, shape=(1, 32, 256)) with 'text_features' (dtype=float32, shape=(1, 32, 256))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: text_features | Stats: mean=-0.0045255, std-dev=0.69494, var=0.48294, median=0.0011168, min=-4.3921 at (0, 6, 54), max=4.1521 at (0, 1, 112), avg-magnitude=0.39579, p90=0.57612, p95=1.1946, p99=2.154
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-4.39 , -3.54 ) |         24 | 
                (-3.54 , -2.68 ) |         36 | 
                (-2.68 , -1.83 ) |         91 | 
                (-1.83 , -0.974) |        318 | ##
                (-0.974, -0.12 ) |       2229 | ##################
                (-0.12 , 0.734 ) |       4806 | ########################################
                (0.734 , 1.59  ) |        458 | ###
                (1.59  , 2.44  ) |        190 | #
                (2.44  , 3.3   ) |         33 | 
                (3.3   , 4.15  ) |          7 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: text_features | Stats: mean=-0.0045255, std-dev=0.69494, var=0.48294, median=0.0011169, min=-4.3921 at (0, 6, 54), max=4.1521 at (0, 1, 112), avg-magnitude=0.39579, p90=0.57612, p95=1.1946, p99=2.154
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-4.39 , -3.54 ) |         24 | 
                (-3.54 , -2.68 ) |         36 | 
                (-2.68 , -1.83 ) |         91 | 
                (-1.83 , -0.974) |        318 | ##
                (-0.974, -0.12 ) |       2229 | ##################
                (-0.12 , 0.734 ) |       4806 | ########################################
                (0.734 , 1.59  ) |        458 | ###
                (1.59  , 2.44  ) |        190 | #
                (2.44  , 3.3   ) |         33 | 
                (3.3   , 4.15  ) |          7 | 
[I]         Error Metrics: text_features
[I]             Minimum Required Tolerance: elemwise error | [abs=4.1723e-06] OR [rel=0.0014184] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=3.3811e-07, std-dev=3.9789e-07, var=1.5831e-13, median=2.0862e-07, min=0 at (0, 0, 45), max=4.1723e-06 at (0, 9, 36), avg-magnitude=3.3811e-07, p90=7.7486e-07, p95=1.0729e-06, p99=1.9981e-06
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 4.17e-07) |       5984 | ########################################
                    (4.17e-07, 8.34e-07) |       1420 | #########
                    (8.34e-07, 1.25e-06) |        489 | ###
                    (1.25e-06, 1.67e-06) |        152 | #
                    (1.67e-06, 2.09e-06) |         73 | 
                    (2.09e-06, 2.5e-06 ) |         44 | 
                    (2.5e-06 , 2.92e-06) |         19 | 
                    (2.92e-06, 3.34e-06) |          3 | 
                    (3.34e-06, 3.76e-06) |          5 | 
                    (3.76e-06, 4.17e-06) |          3 | 
[I]             Relative Difference | Stats: mean=5.1803e-06, std-dev=3.0262e-05, var=9.1576e-10, median=1.1036e-06, min=0 at (0, 0, 45), max=0.0014184 at (0, 23, 201), avg-magnitude=5.1803e-06, p90=6.8691e-06, p95=1.4526e-05, p99=7.9079e-05
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 0.000142) |       8152 | ########################################
                    (0.000142, 0.000284) |         23 | 
                    (0.000284, 0.000426) |         10 | 
                    (0.000426, 0.000567) |          3 | 
                    (0.000567, 0.000709) |          0 | 
                    (0.000709, 0.000851) |          2 | 
                    (0.000851, 0.000993) |          1 | 
                    (0.000993, 0.00113 ) |          0 | 
                    (0.00113 , 0.00128 ) |          0 | 
                    (0.00128 , 0.00142 ) |          1 | 
[E]         FAILED | Output: 'text_features' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'encoder_hidden_state' (dtype=float32, shape=(1, 5184, 256)) with 'encoder_hidden_state' (dtype=float32, shape=(1, 5184, 256))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: encoder_hidden_state | Stats: mean=-0.029215, std-dev=2.0924, var=4.3781, median=0.04309, min=-16.037 at (0, 3240, 233), max=10.017 at (0, 4940, 222), avg-magnitude=1.5971, p90=2.4842, p95=3.2261, p99=4.6539
[I]             ---- Histogram ----
                Bin Range      |  Num Elems | Visualization
                (-18.5, -15.1) |         19 | 
                (-15.1, -11.7) |        263 | 
                (-11.7, -8.27) |       4140 | 
                (-8.27, -4.87) |      17348 | 
                (-4.87, -1.47) |     269728 | #############
                (-1.47, 1.94 ) |     826388 | ########################################
                (1.94 , 5.34 ) |     203948 | #########
                (5.34 , 8.74 ) |       5239 | 
                (8.74 , 12.1 ) |         31 | 
                (12.1 , 15.5 ) |          0 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: encoder_hidden_state | Stats: mean=-0.054315, std-dev=2.3718, var=5.6254, median=0.014089, min=-18.478 at (0, 5121, 233), max=15.546 at (0, 4940, 222), avg-magnitude=1.819, p90=2.8103, p95=3.6918, p99=5.3783
[I]             ---- Histogram ----
                Bin Range      |  Num Elems | Visualization
                (-18.5, -15.1) |         89 | 
                (-15.1, -11.7) |        427 | 
                (-11.7, -8.27) |       4753 | 
                (-8.27, -4.87) |      29375 | #
                (-4.87, -1.47) |     297798 | ###############
                (-1.47, 1.94 ) |     749043 | ########################################
                (1.94 , 5.34 ) |     231736 | ############
                (5.34 , 8.74 ) |      13373 | 
                (8.74 , 12.1 ) |        487 | 
                (12.1 , 15.5 ) |         23 | 
[I]         Error Metrics: encoder_hidden_state
[I]             Minimum Required Tolerance: elemwise error | [abs=8.6287] OR [rel=4.8772e+05] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=1.0321, std-dev=0.84537, var=0.71465, median=0.83463, min=2.3842e-07 at (0, 723, 246), max=8.6287 at (0, 5104, 233), avg-magnitude=1.0321, p90=2.1743, p95=2.6776, p99=3.7953
[I]                 ---- Histogram ----
                    Bin Range         |  Num Elems | Visualization
                    (2.38e-07, 0.863) |     682372 | ########################################
                    (0.863   , 1.73 ) |     407910 | #######################
                    (1.73    , 2.59 ) |     161541 | #########
                    (2.59    , 3.45 ) |      52946 | ###
                    (3.45    , 4.31 ) |      16452 | 
                    (4.31    , 5.18 ) |       4393 | 
                    (5.18    , 6.04 ) |       1113 | 
                    (6.04    , 6.9  ) |        256 | 
                    (6.9     , 7.77 ) |         95 | 
                    (7.77    , 8.63 ) |         26 | 
[I]             Relative Difference | Stats: mean=4.9905, std-dev=582.27, var=3.3904e+05, median=0.57435, min=1.1797e-07 at (0, 723, 246), max=4.8772e+05 at (0, 3625, 33), avg-magnitude=4.9905, p90=3.2239, p95=6.4486, p99=32.176
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (1.18e-07, 4.88e+04) |    1327092 | ########################################
                    (4.88e+04, 9.75e+04) |          6 | 
                    (9.75e+04, 1.46e+05) |          3 | 
                    (1.46e+05, 1.95e+05) |          0 | 
                    (1.95e+05, 2.44e+05) |          0 | 
                    (2.44e+05, 2.93e+05) |          2 | 
                    (2.93e+05, 3.41e+05) |          0 | 
                    (3.41e+05, 3.9e+05 ) |          0 | 
                    (3.9e+05 , 4.39e+05) |          0 | 
                    (4.39e+05, 4.88e+05) |          1 | 
[E]         FAILED | Output: 'encoder_hidden_state' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'decoder_hidden_states' (dtype=float32, shape=(1, 200, 256)) with 'decoder_hidden_states' (dtype=float32, shape=(1, 200, 256))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: decoder_hidden_states | Stats: mean=-0.0075265, std-dev=0.97304, var=0.9468, median=0.023594, min=-5.9228 at (0, 44, 32), max=4.4711 at (0, 143, 171), avg-magnitude=0.71806, p90=1.0877, p95=1.469, p99=2.4004
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-6.03 , -4.93 ) |         65 | 
                (-4.93 , -3.82 ) |        177 | 
                (-3.82 , -2.72 ) |        337 | 
                (-2.72 , -1.62 ) |       1634 | ##
                (-1.62 , -0.518) |      11169 | #################
                (-0.518, 0.584 ) |      25552 | ########################################
                (0.584 , 1.69  ) |      10529 | ################
                (1.69  , 2.79  ) |       1426 | ##
                (2.79  , 3.89  ) |        264 | 
                (3.89  , 4.99  ) |         47 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: decoder_hidden_states | Stats: mean=-0.0072725, std-dev=0.96756, var=0.93617, median=0.022201, min=-6.0283 at (0, 10, 32), max=4.9917 at (0, 79, 171), avg-magnitude=0.713, p90=1.0802, p95=1.4609, p99=2.3643
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-6.03 , -4.93 ) |         80 | 
                (-4.93 , -3.82 ) |        141 | 
                (-3.82 , -2.72 ) |        343 | 
                (-2.72 , -1.62 ) |       1553 | ##
                (-1.62 , -0.518) |      11210 | #################
                (-0.518, 0.584 ) |      25704 | ########################################
                (0.584 , 1.69  ) |      10501 | ################
                (1.69  , 2.79  ) |       1377 | ##
                (2.79  , 3.89  ) |        240 | 
                (3.89  , 4.99  ) |         51 | 
[I]         Error Metrics: decoder_hidden_states
[I]             Minimum Required Tolerance: elemwise error | [abs=3.905] OR [rel=30657] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=0.4291, std-dev=0.38742, var=0.15009, median=0.3242, min=0 at (0, 92, 155), max=3.905 at (0, 106, 159), avg-magnitude=0.4291, p90=0.94021, p95=1.1949, p99=1.7619
[I]                 ---- Histogram ----
                    Bin Range      |  Num Elems | Visualization
                    (0    , 0.39 ) |      29305 | ########################################
                    (0.39 , 0.781) |      13941 | ###################
                    (0.781, 1.17 ) |       5220 | #######
                    (1.17 , 1.56 ) |       1796 | ##
                    (1.56 , 1.95 ) |        649 | 
                    (1.95 , 2.34 ) |        192 | 
                    (2.34 , 2.73 ) |         71 | 
                    (2.73 , 3.12 ) |         23 | 
                    (3.12 , 3.51 ) |          2 | 
                    (3.51 , 3.9  ) |          1 | 
[I]             Relative Difference | Stats: mean=3.8535, std-dev=142.28, var=20243, median=0.586, min=0 at (0, 92, 155), max=30657 at (0, 85, 138), avg-magnitude=3.8535, p90=3.808, p95=7.4858, p99=36.234
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 3.07e+03) |      51197 | ########################################
                    (3.07e+03, 6.13e+03) |          1 | 
                    (6.13e+03, 9.2e+03 ) |          1 | 
                    (9.2e+03 , 1.23e+04) |          0 | 
                    (1.23e+04, 1.53e+04) |          0 | 
                    (1.53e+04, 1.84e+04) |          0 | 
                    (1.84e+04, 2.15e+04) |          0 | 
                    (2.15e+04, 2.45e+04) |          0 | 
                    (2.45e+04, 2.76e+04) |          0 | 
                    (2.76e+04, 3.07e+04) |          1 | 
[E]         FAILED | Output: 'decoder_hidden_states' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'pred_boxes' (dtype=float32, shape=(1, 200, 4)) with 'pred_boxes' (dtype=float32, shape=(1, 200, 4))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: pred_boxes | Stats: mean=0.52495, std-dev=0.20177, var=0.04071, median=0.52565, min=-0.097559 at (0, 89, 0), max=1.0239 at (0, 144, 3), avg-magnitude=0.52519, p90=0.77729, p95=0.8783, p99=0.98599
[I]             ---- Histogram ----
                Bin Range         |  Num Elems | Visualization
                (-0.0976, 0.0146) |          5 | #
                (0.0146 , 0.127 ) |         25 | #####
                (0.127  , 0.239 ) |         30 | ######
                (0.239  , 0.351 ) |         75 | ################
                (0.351  , 0.463 ) |        170 | #####################################
                (0.463  , 0.575 ) |        179 | ########################################
                (0.575  , 0.687 ) |        159 | ###################################
                (0.687  , 0.8   ) |         90 | ####################
                (0.8    , 0.912 ) |         36 | ########
                (0.912  , 1.02  ) |         31 | ######
[I]         onnxrt-runner-N0-01/05/26-15:07:18: pred_boxes | Stats: mean=0.51037, std-dev=0.25017, var=0.062584, median=0.50878, min=-0.0011068 at (0, 111, 0), max=1.0027 at (0, 26, 2), avg-magnitude=0.51038, p90=0.85718, p95=0.9608, p99=0.99875
[I]             ---- Histogram ----
                Bin Range         |  Num Elems | Visualization
                (-0.0976, 0.0146) |          9 | ##
                (0.0146 , 0.127 ) |         43 | ############
                (0.127  , 0.239 ) |         73 | #####################
                (0.239  , 0.351 ) |        102 | #############################
                (0.351  , 0.463 ) |        138 | ########################################
                (0.463  , 0.575 ) |        108 | ###############################
                (0.575  , 0.687 ) |        125 | ####################################
                (0.687  , 0.8   ) |         87 | #########################
                (0.8    , 0.912 ) |         55 | ###############
                (0.912  , 1.02  ) |         60 | #################
[I]         Error Metrics: pred_boxes
[I]             Minimum Required Tolerance: elemwise error | [abs=0.68987] OR [rel=151.79] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=0.077092, std-dev=0.087299, var=0.007621, median=0.046964, min=0.00022468 at (0, 70, 3), max=0.68987 at (0, 71, 0), avg-magnitude=0.077092, p90=0.19431, p95=0.2537, p99=0.38542
[I]                 ---- Histogram ----
                    Bin Range          |  Num Elems | Visualization
                    (0.000225, 0.0692) |        483 | ########################################
                    (0.0692  , 0.138 ) |        169 | #############
                    (0.138   , 0.207 ) |         78 | ######
                    (0.207   , 0.276 ) |         47 | ###
                    (0.276   , 0.345 ) |         14 | #
                    (0.345   , 0.414 ) |          3 | 
                    (0.414   , 0.483 ) |          3 | 
                    (0.483   , 0.552 ) |          0 | 
                    (0.552   , 0.621 ) |          0 | 
                    (0.621   , 0.69  ) |          3 | 
[I]             Relative Difference | Stats: mean=0.8091, std-dev=6.8001, var=46.241, median=0.099547, min=0.00048962 at (0, 70, 3), max=151.79 at (0, 40, 0), avg-magnitude=0.8091, p90=0.67171, p95=1.2416, p99=10.851
[I]                 ---- Histogram ----
                    Bin Range       |  Num Elems | Visualization
                    (0.00049, 15.2) |        792 | ########################################
                    (15.2   , 30.4) |          4 | 
                    (30.4   , 45.5) |          0 | 
                    (45.5   , 60.7) |          1 | 
                    (60.7   , 75.9) |          2 | 
                    (75.9   , 91.1) |          0 | 
                    (91.1   , 106 ) |          0 | 
                    (106    , 121 ) |          0 | 
                    (121    , 137 ) |          0 | 
                    (137    , 152 ) |          1 | 
[E]         FAILED | Output: 'pred_boxes' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'pred_logits' (dtype=float32, shape=(1, 200)) with 'pred_logits' (dtype=float32, shape=(1, 200))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: pred_logits | Stats: mean=-2.2528, std-dev=0.3234, var=0.10459, median=-2.2741, min=-3.3643 at (0, 167), max=-0.99596 at (0, 144), avg-magnitude=2.2528, p90=-1.8565, p95=-1.7238, p99=-1.3785
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-3.36 , -3.13 ) |          1 | 
                (-3.13 , -2.89 ) |          2 | #
                (-2.89 , -2.65 ) |         14 | ########
                (-2.65 , -2.42 ) |         49 | ##############################
                (-2.42 , -2.18 ) |         64 | ########################################
                (-2.18 , -1.94 ) |         40 | #########################
                (-1.94 , -1.71 ) |         20 | ############
                (-1.71 , -1.47 ) |          5 | ###
                (-1.47 , -1.23 ) |          4 | ##
                (-1.23 , -0.996) |          1 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: pred_logits | Stats: mean=-2.3136, std-dev=0.36742, var=0.135, median=-2.3567, min=-3.1664 at (0, 126), max=-1.0731 at (0, 175), avg-magnitude=2.3136, p90=-1.8362, p95=-1.7265, p99=-1.3385
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-3.36 , -3.13 ) |          1 | 
                (-3.13 , -2.89 ) |          8 | ######
                (-2.89 , -2.65 ) |         29 | #######################
                (-2.65 , -2.42 ) |         43 | ##################################
                (-2.42 , -2.18 ) |         50 | ########################################
                (-2.18 , -1.94 ) |         39 | ###############################
                (-1.94 , -1.71 ) |         20 | ################
                (-1.71 , -1.47 ) |          6 | ####
                (-1.47 , -1.23 ) |          3 | ##
                (-1.23 , -0.996) |          1 | 
[I]         Error Metrics: pred_logits
[I]             Minimum Required Tolerance: elemwise error | [abs=1.2043] OR [rel=0.96577] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=0.29036, std-dev=0.22991, var=0.052859, median=0.2241, min=0.00070286 at (0, 67), max=1.2043 at (0, 89), avg-magnitude=0.29036, p90=0.62114, p95=0.76967, p99=0.93489
[I]                 ---- Histogram ----
                    Bin Range         |  Num Elems | Visualization
                    (0.000703, 0.121) |         49 | ####################################
                    (0.121   , 0.241) |         53 | ########################################
                    (0.241   , 0.362) |         39 | #############################
                    (0.362   , 0.482) |         24 | ##################
                    (0.482   , 0.603) |         12 | #########
                    (0.603   , 0.723) |         10 | #######
                    (0.723   , 0.843) |          7 | #####
                    (0.843   , 0.964) |          4 | ###
                    (0.964   , 1.08 ) |          0 | 
                    (1.08    , 1.2  ) |          2 | #
[I]             Relative Difference | Stats: mean=0.13184, std-dev=0.12588, var=0.015846, median=0.09891, min=0.00028166 at (0, 67), max=0.96577 at (0, 89), avg-magnitude=0.13184, p90=0.26058, p95=0.35903, p99=0.56951
[I]                 ---- Histogram ----
                    Bin Range          |  Num Elems | Visualization
                    (0.000282, 0.0968) |         98 | ########################################
                    (0.0968  , 0.193 ) |         62 | #########################
                    (0.193   , 0.29  ) |         23 | #########
                    (0.29    , 0.386 ) |          7 | ##
                    (0.386   , 0.483 ) |          5 | ##
                    (0.483   , 0.58  ) |          3 | #
                    (0.58    , 0.676 ) |          1 | 
                    (0.676   , 0.773 ) |          0 | 
                    (0.773   , 0.869 ) |          0 | 
                    (0.869   , 0.966 ) |          1 | 
[E]         FAILED | Output: 'pred_logits' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'presence_logits' (dtype=float32, shape=(1, 1)) with 'presence_logits' (dtype=float32, shape=(1, 1))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: presence_logits | Stats: mean=-3.6407, std-dev=0, var=0, median=-3.6407, min=-3.6407 at (0, 0), max=-3.6407 at (0, 0), avg-magnitude=3.6407, p90=-3.6407, p95=-3.6407, p99=-3.6407
[I]             ---- Values ----
                    [[-3.640685]]
[I]             ---- Histogram ----
                Bin Range      |  Num Elems | Visualization
                (-3.64, -3.52) |          1 | ########################################
                (-3.52, -3.41) |          0 | 
                (-3.41, -3.29) |          0 | 
                (-3.29, -3.18) |          0 | 
                (-3.18, -3.06) |          0 | 
                (-3.06, -2.95) |          0 | 
                (-2.95, -2.83) |          0 | 
                (-2.83, -2.71) |          0 | 
                (-2.71, -2.6 ) |          0 | 
                (-2.6 , -2.48) |          0 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: presence_logits | Stats: mean=-2.4821, std-dev=0, var=0, median=-2.4821, min=-2.4821 at (0, 0), max=-2.4821 at (0, 0), avg-magnitude=2.4821, p90=-2.4821, p95=-2.4821, p99=-2.4821
[I]             ---- Values ----
                    [[-2.482077]]
[I]             ---- Histogram ----
                Bin Range      |  Num Elems | Visualization
                (-3.64, -3.52) |          0 | 
                (-3.52, -3.41) |          0 | 
                (-3.41, -3.29) |          0 | 
                (-3.29, -3.18) |          0 | 
                (-3.18, -3.06) |          0 | 
                (-3.06, -2.95) |          0 | 
                (-2.95, -2.83) |          0 | 
                (-2.83, -2.71) |          0 | 
                (-2.71, -2.6 ) |          0 | 
                (-2.6 , -2.48) |          1 | ########################################
[I]         Error Metrics: presence_logits
[I]             Minimum Required Tolerance: elemwise error | [abs=1.1586] OR [rel=0.46679] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=1.1586, std-dev=0, var=0, median=1.1586, min=1.1586 at (0, 0), max=1.1586 at (0, 0), avg-magnitude=1.1586, p90=1.1586, p95=1.1586, p99=1.1586
[I]                 ---- Values ----
                        [[1.1586082]]
[I]                 ---- Histogram ----
                    Bin Range      |  Num Elems | Visualization
                    (0.659, 0.759) |          0 | 
                    (0.759, 0.859) |          0 | 
                    (0.859, 0.959) |          0 | 
                    (0.959, 1.06 ) |          0 | 
                    (1.06 , 1.16 ) |          0 | 
                    (1.16 , 1.26 ) |          1 | ########################################
                    (1.26 , 1.36 ) |          0 | 
                    (1.36 , 1.46 ) |          0 | 
                    (1.46 , 1.56 ) |          0 | 
                    (1.56 , 1.66 ) |          0 | 
[I]             Relative Difference | Stats: mean=0.46679, std-dev=0, var=0, median=0.46679, min=0.46679 at (0, 0), max=0.46679 at (0, 0), avg-magnitude=0.46679, p90=0.46679, p95=0.46679, p99=0.46679
[I]                 ---- Values ----
                        [[0.4667898]]
[I]                 ---- Histogram ----
                    Bin Range         |  Num Elems | Visualization
                    (-0.0332, 0.0668) |          0 | 
                    (0.0668 , 0.167 ) |          0 | 
                    (0.167  , 0.267 ) |          0 | 
                    (0.267  , 0.367 ) |          0 | 
                    (0.367  , 0.467 ) |          0 | 
                    (0.467  , 0.567 ) |          1 | ########################################
                    (0.567  , 0.667 ) |          0 | 
                    (0.667  , 0.767 ) |          0 | 
                    (0.767  , 0.867 ) |          0 | 
                    (0.867  , 0.967 ) |          0 | 
[E]         FAILED | Output: 'presence_logits' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'pred_masks' (dtype=float32, shape=(1, 200, 288, 288)) with 'pred_masks' (dtype=float32, shape=(1, 200, 288, 288))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: pred_masks | Stats: mean=-17.966, std-dev=12.069, var=145.66, median=-17.336, min=-78.59 at (0, 24, 230, 154), max=9.0686 at (0, 33, 192, 166), avg-magnitude=17.987, p90=-1.9971, p95=-0.5272, p99=0.28164
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-100  , -89.1 ) |          0 | 
                (-89.1 , -78.1 ) |          1 | 
                (-78.1 , -67   ) |       1281 | 
                (-67   , -56   ) |      31777 | 
                (-56   , -45   ) |     299428 | ##
                (-45   , -34   ) |    1390408 | ##########
                (-34   , -23   ) |    3754791 | ###########################
                (-23   , -11.9 ) |    5382122 | ########################################
                (-11.9 , -0.927) |    4604179 | ##################################
                (-0.927, 10.1  ) |    1124813 | ########
[I]         onnxrt-runner-N0-01/05/26-15:07:18: pred_masks | Stats: mean=-19.907, std-dev=13.444, var=180.73, median=-18.39, min=-100.1 at (0, 100, 184, 157), max=10.092 at (0, 158, 282, 215), avg-magnitude=19.925, p90=-3.4109, p95=-1.288, p99=0.17772
[I]             ---- Histogram ----
                Bin Range        |  Num Elems | Visualization
                (-100  , -89.1 ) |        436 | 
                (-89.1 , -78.1 ) |       3340 | 
                (-78.1 , -67   ) |      37138 | 
                (-67   , -56   ) |     173544 | #
                (-56   , -45   ) |     582371 | ####
                (-45   , -34   ) |    1682030 | ############
                (-34   , -23   ) |    3667927 | ############################
                (-23   , -11.9 ) |    5196738 | ########################################
                (-11.9 , -0.927) |    4558279 | ###################################
                (-0.927, 10.1  ) |     686997 | #####
[I]         Error Metrics: pred_masks
[I]             Minimum Required Tolerance: elemwise error | [abs=71.439] OR [rel=8.2647e+06] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=8.4159, std-dev=7.7006, var=59.299, median=6.2722, min=0 at (0, 4, 227, 131), max=71.439 at (0, 114, 172, 144), avg-magnitude=8.4159, p90=19.056, p95=23.809, p99=33.912
[I]                 ---- Histogram ----
                    Bin Range    |  Num Elems | Visualization
                    (0   , 7.14) |    9077192 | ########################################
                    (7.14, 14.3) |    4377686 | ###################
                    (14.3, 21.4) |    1950703 | ########
                    (21.4, 28.6) |     787138 | ###
                    (28.6, 35.7) |     272937 | #
                    (35.7, 42.9) |      85912 | 
                    (42.9, 50  ) |      26309 | 
                    (50  , 57.2) |       9677 | 
                    (57.2, 64.3) |       1195 | 
                    (64.3, 71.4) |         51 | 
[I]             Relative Difference | Stats: mean=2.5923, std-dev=2148.1, var=4.6144e+06, median=0.39781, min=0 at (0, 4, 227, 131), max=8.2647e+06 at (0, 134, 159, 97), avg-magnitude=2.5923, p90=1.1935, p95=2.3632, p99=11.902
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (0       , 8.26e+05) |   16588798 | ########################################
                    (8.26e+05, 1.65e+06) |          0 | 
                    (1.65e+06, 2.48e+06) |          0 | 
                    (2.48e+06, 3.31e+06) |          1 | 
                    (3.31e+06, 4.13e+06) |          0 | 
                    (4.13e+06, 4.96e+06) |          0 | 
                    (4.96e+06, 5.79e+06) |          0 | 
                    (5.79e+06, 6.61e+06) |          0 | 
                    (6.61e+06, 7.44e+06) |          0 | 
                    (7.44e+06, 8.26e+06) |          1 | 
[E]         FAILED | Output: 'pred_masks' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[I]     Comparing Output: 'semantic_seg' (dtype=float32, shape=(1, 1, 288, 288)) with 'semantic_seg' (dtype=float32, shape=(1, 1, 288, 288))
[I]         Tolerance: [abs=1e-08, rel=1e-05] | Checking elemwise error
[I]         trt-runner-N0-01/05/26-15:07:18: semantic_seg | Stats: mean=-0.31233, std-dev=0.57507, var=0.33071, median=-0.31994, min=-8.318 at (0, 0, 167, 1), max=3.231 at (0, 0, 162, 187), avg-magnitude=0.47049, p90=0.25276, p95=0.54216, p99=1.2357
[I]             ---- Histogram ----
                Bin Range          |  Num Elems | Visualization
                (-15.3  , -13.4  ) |          0 | 
                (-13.4  , -11.5  ) |          0 | 
                (-11.5  , -9.6   ) |          0 | 
                (-9.6   , -7.7   ) |          1 | 
                (-7.7   , -5.8   ) |          4 | 
                (-5.8   , -3.9   ) |         95 | 
                (-3.9   , -2     ) |       1095 | 
                (-2     , -0.0949) |      59160 | ########################################
                (-0.0949, 1.81   ) |      22351 | ###############
                (1.81   , 3.71   ) |        238 | 
[I]         onnxrt-runner-N0-01/05/26-15:07:18: semantic_seg | Stats: mean=-2.5686, std-dev=1.2996, var=1.6889, median=-2.9381, min=-15.308 at (0, 0, 287, 97), max=3.7083 at (0, 0, 117, 179), avg-magnitude=2.6896, p90=-0.51426, p95=0.13506, p99=2.0755
[I]             ---- Histogram ----
                Bin Range          |  Num Elems | Visualization
                (-15.3  , -13.4  ) |          4 | 
                (-13.4  , -11.5  ) |          9 | 
                (-11.5  , -9.6   ) |         16 | 
                (-9.6   , -7.7   ) |         36 | 
                (-7.7   , -5.8   ) |        441 | 
                (-5.8   , -3.9   ) |       1951 | #
                (-3.9   , -2     ) |      64646 | ########################################
                (-2     , -0.0949) |      10397 | ######
                (-0.0949, 1.81   ) |       4274 | ##
                (1.81   , 3.71   ) |       1170 | 
[I]         Error Metrics: semantic_seg
[I]             Minimum Required Tolerance: elemwise error | [abs=13.127] OR [rel=6008.1] (requirements may be lower if both abs/rel tolerances are set)
[I]             Absolute Difference | Stats: mean=2.3233, std-dev=0.89382, var=0.79891, median=2.5438, min=1.0788e-05 at (0, 0, 130, 156), max=13.127 at (0, 0, 287, 97), avg-magnitude=2.3233, p90=3.1925, p95=3.3673, p99=3.7743
[I]                 ---- Histogram ----
                    Bin Range        |  Num Elems | Visualization
                    (1.08e-05, 1.31) |      12790 | ##############
                    (1.31    , 2.63) |      33420 | ####################################
                    (2.63    , 3.94) |      36182 | ########################################
                    (3.94    , 5.25) |        472 | 
                    (5.25    , 6.56) |         43 | 
                    (6.56    , 7.88) |         16 | 
                    (7.88    , 9.19) |          8 | 
                    (9.19    , 10.5) |          9 | 
                    (10.5    , 11.8) |          3 | 
                    (11.8    , 13.1) |          1 | 
[I]             Relative Difference | Stats: mean=1.2131, std-dev=22.438, var=503.48, median=0.87599, min=3.3699e-05 at (0, 0, 130, 156), max=6008.1 at (0, 0, 163, 181), avg-magnitude=1.2131, p90=1.0899, p95=1.3205, p99=4.7536
[I]                 ---- Histogram ----
                    Bin Range            |  Num Elems | Visualization
                    (3.37e-05, 601     ) |      82939 | ########################################
                    (601     , 1.2e+03 ) |          3 | 
                    (1.2e+03 , 1.8e+03 ) |          1 | 
                    (1.8e+03 , 2.4e+03 ) |          0 | 
                    (2.4e+03 , 3e+03   ) |          0 | 
                    (3e+03   , 3.6e+03 ) |          0 | 
                    (3.6e+03 , 4.21e+03) |          0 | 
                    (4.21e+03, 4.81e+03) |          0 | 
                    (4.81e+03, 5.41e+03) |          0 | 
                    (5.41e+03, 6.01e+03) |          1 | 
[E]         FAILED | Output: 'semantic_seg' | Difference exceeds tolerance (rel=1e-05, abs=1e-08)
[E]     FAILED | Mismatched outputs: ['vision_feature_0', 'vision_feature_1', 'vision_feature_2', 'text_features', 'encoder_hidden_state', 'decoder_hidden_states', 'pred_boxes', 'pred_logits', 'presence_logits', 'pred_masks', 'semantic_seg']
[E] Accuracy Summary | trt-runner-N0-01/05/26-15:07:18 vs. onnxrt-runner-N0-01/05/26-15:07:18 | Passed: 0/1 iterations | Pass Rate: 0.0%
Traceback (most recent call last):
  File "/home/zy/open_project/SAM3-TensorRT/python/polygraphy_test.py", line 45, in <module>
    main()
  File "/home/zy/open_project/SAM3-TensorRT/python/polygraphy_test.py", line 37, in main
    assert bool(Comparator.compare_accuracy(run_results, compare_func=CompareFunc.simple(atol=1e-8)))
AssertionError

There are a lot of differences in the encoder_hidden_states. It indicates that the detr_encoder brings some gaps.
Did you notice the gap in your device?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions