wrong size of outputs when using dynamic batch size with tensorrt 10.0

the size of the variable outputs is expected to be (4.4) but i got size (1,)

tensorrt 10.0
cuda 12.2
python 3.8.10
rtx 3090

there is the script, it goes wrong in the function 'common.allocate_buffers', when i pass the profile_index, the shape of 'output' is [81,81,81], i dont know where the shape comes, you can see the source code in the second phase:
```
### my code
import tensorrt as trt
import torch.nn
import numpy as np
import torch

import sys

sys.path.append('/root/nas/TensorRT-10.0.0.6/samples/python/')
import common


class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.mlp = torch.nn.Linear(32, 4)

    def forward(self, x):
        return self.mlp(x)


model_path = "/root/nas/data/trt/model.onnx"
batch_size = 4
model = TestModel()
model.eval()
input_tensor = torch.randn(batch_size, 32)
print(model(input_tensor))
model_script = torch.jit.script(model)
torch.onnx.export(model,  # model being run
                  input_tensor,  # model input (or a tuple for multiple inputs)
                  model_path,  # where to save the model (can be a file or file-like object)
                  export_params=True,  # store the trained parameter weights inside the model file
                  opset_version=14,  # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names=['input'],  # the model's input names
                  output_names=['output'],  # the model's output names
                  dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
                  )

# The Build Phase
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
# Creating a Network Definition in Python
network = builder.create_network()
# Importing a Model Using the ONNX Parser
parser = trt.OnnxParser(network, logger)
with open(model_path, "rb") as model:
    if not parser.parse(model.read()):
        print("ERROR: Failed to parse the ONNX file.")
        for error in range(parser.num_errors):
            print(parser.get_error(error))

# Building an Engine
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1 MiB
profile = builder.create_optimization_profile()
inputTensor = network.get_input(0)
profile.set_shape(inputTensor.name, (1, 32), (1, 32), (batch_size, 32))
config.add_optimization_profile(profile)
serialized_engine = builder.build_serialized_network(network, config)

# Deserializing a Plan
runtime = trt.Runtime(logger)
engine = runtime.deserialize_cuda_engine(serialized_engine)
context = engine.create_execution_context()

inputs, outputs, bindings, stream = common.allocate_buffers(engine, 0)

# Performing Inference
context.set_optimization_profile_async(0, stream)
input_batch = input_tensor.numpy()
inputs[0].host = input_batch.ravel()
context.set_input_shape('input', input_batch.shape)
assert context.all_binding_shapes_specified

trt_outputs = common.do_inference(context, engine=engine, bindings=bindings,
                                  inputs=inputs, outputs=outputs, stream=stream)
print(trt_outputs)
```
```
### source code of tensorrt "TensorRT-10.0.0.6/samples/python/common_runtime.py"
def allocate_buffers(engine: trt.ICudaEngine, profile_idx: Optional[int] = None):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda_call(cudart.cudaStreamCreate())
    tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
    for binding in tensor_names:
        # get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
        # Pick out the max shape to allocate enough memory for the binding.
        shape = engine.get_tensor_shape(binding) if profile_idx is None else engine.get_tensor_profile_shape(binding, profile_idx)[-1]
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

wrong size of outputs when using dynamic batch size with tensorrt 10.0 #3821

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

wrong size of outputs when using dynamic batch size with tensorrt 10.0 #3821

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions