Skip to content

wrong size of outputs when using dynamic batch size with tensorrt 10.0 #3821

@lyxcc127

Description

@lyxcc127

the size of the variable outputs is expected to be (4.4) but i got size (1,)

tensorrt 10.0
cuda 12.2
python 3.8.10
rtx 3090

there is the script, it goes wrong in the function 'common.allocate_buffers', when i pass the profile_index, the shape of 'output' is [81,81,81], i dont know where the shape comes, you can see the source code in the second phase:

### my code
import tensorrt as trt
import torch.nn
import numpy as np
import torch

import sys

sys.path.append('/root/nas/TensorRT-10.0.0.6/samples/python/')
import common


class TestModel(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.mlp = torch.nn.Linear(32, 4)

    def forward(self, x):
        return self.mlp(x)


model_path = "/root/nas/data/trt/model.onnx"
batch_size = 4
model = TestModel()
model.eval()
input_tensor = torch.randn(batch_size, 32)
print(model(input_tensor))
model_script = torch.jit.script(model)
torch.onnx.export(model,  # model being run
                  input_tensor,  # model input (or a tuple for multiple inputs)
                  model_path,  # where to save the model (can be a file or file-like object)
                  export_params=True,  # store the trained parameter weights inside the model file
                  opset_version=14,  # the ONNX version to export the model to
                  do_constant_folding=True,  # whether to execute constant folding for optimization
                  input_names=['input'],  # the model's input names
                  output_names=['output'],  # the model's output names
                  dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
                  )

# The Build Phase
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
# Creating a Network Definition in Python
network = builder.create_network()
# Importing a Model Using the ONNX Parser
parser = trt.OnnxParser(network, logger)
with open(model_path, "rb") as model:
    if not parser.parse(model.read()):
        print("ERROR: Failed to parse the ONNX file.")
        for error in range(parser.num_errors):
            print(parser.get_error(error))

# Building an Engine
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30)  # 1 MiB
profile = builder.create_optimization_profile()
inputTensor = network.get_input(0)
profile.set_shape(inputTensor.name, (1, 32), (1, 32), (batch_size, 32))
config.add_optimization_profile(profile)
serialized_engine = builder.build_serialized_network(network, config)

# Deserializing a Plan
runtime = trt.Runtime(logger)
engine = runtime.deserialize_cuda_engine(serialized_engine)
context = engine.create_execution_context()

inputs, outputs, bindings, stream = common.allocate_buffers(engine, 0)

# Performing Inference
context.set_optimization_profile_async(0, stream)
input_batch = input_tensor.numpy()
inputs[0].host = input_batch.ravel()
context.set_input_shape('input', input_batch.shape)
assert context.all_binding_shapes_specified

trt_outputs = common.do_inference(context, engine=engine, bindings=bindings,
                                  inputs=inputs, outputs=outputs, stream=stream)
print(trt_outputs)
### source code of tensorrt "TensorRT-10.0.0.6/samples/python/common_runtime.py"
def allocate_buffers(engine: trt.ICudaEngine, profile_idx: Optional[int] = None):
    inputs = []
    outputs = []
    bindings = []
    stream = cuda_call(cudart.cudaStreamCreate())
    tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
    for binding in tensor_names:
        # get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
        # Pick out the max shape to allocate enough memory for the binding.
        shape = engine.get_tensor_shape(binding) if profile_idx is None else engine.get_tensor_profile_shape(binding, profile_idx)[-1]

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions