-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Closed
Description
the size of the variable outputs is expected to be (4.4) but i got size (1,)
tensorrt 10.0
cuda 12.2
python 3.8.10
rtx 3090
there is the script, it goes wrong in the function 'common.allocate_buffers', when i pass the profile_index, the shape of 'output' is [81,81,81], i dont know where the shape comes, you can see the source code in the second phase:
### my code
import tensorrt as trt
import torch.nn
import numpy as np
import torch
import sys
sys.path.append('/root/nas/TensorRT-10.0.0.6/samples/python/')
import common
class TestModel(torch.nn.Module):
def __init__(self):
super().__init__()
self.mlp = torch.nn.Linear(32, 4)
def forward(self, x):
return self.mlp(x)
model_path = "/root/nas/data/trt/model.onnx"
batch_size = 4
model = TestModel()
model.eval()
input_tensor = torch.randn(batch_size, 32)
print(model(input_tensor))
model_script = torch.jit.script(model)
torch.onnx.export(model, # model being run
input_tensor, # model input (or a tuple for multiple inputs)
model_path, # where to save the model (can be a file or file-like object)
export_params=True, # store the trained parameter weights inside the model file
opset_version=14, # the ONNX version to export the model to
do_constant_folding=True, # whether to execute constant folding for optimization
input_names=['input'], # the model's input names
output_names=['output'], # the model's output names
dynamic_axes={'input': {0: 'batch_size'}, 'output': {0: 'batch_size'}}
)
# The Build Phase
logger = trt.Logger(trt.Logger.WARNING)
builder = trt.Builder(logger)
# Creating a Network Definition in Python
network = builder.create_network()
# Importing a Model Using the ONNX Parser
parser = trt.OnnxParser(network, logger)
with open(model_path, "rb") as model:
if not parser.parse(model.read()):
print("ERROR: Failed to parse the ONNX file.")
for error in range(parser.num_errors):
print(parser.get_error(error))
# Building an Engine
config = builder.create_builder_config()
config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 1 << 30) # 1 MiB
profile = builder.create_optimization_profile()
inputTensor = network.get_input(0)
profile.set_shape(inputTensor.name, (1, 32), (1, 32), (batch_size, 32))
config.add_optimization_profile(profile)
serialized_engine = builder.build_serialized_network(network, config)
# Deserializing a Plan
runtime = trt.Runtime(logger)
engine = runtime.deserialize_cuda_engine(serialized_engine)
context = engine.create_execution_context()
inputs, outputs, bindings, stream = common.allocate_buffers(engine, 0)
# Performing Inference
context.set_optimization_profile_async(0, stream)
input_batch = input_tensor.numpy()
inputs[0].host = input_batch.ravel()
context.set_input_shape('input', input_batch.shape)
assert context.all_binding_shapes_specified
trt_outputs = common.do_inference(context, engine=engine, bindings=bindings,
inputs=inputs, outputs=outputs, stream=stream)
print(trt_outputs)
### source code of tensorrt "TensorRT-10.0.0.6/samples/python/common_runtime.py"
def allocate_buffers(engine: trt.ICudaEngine, profile_idx: Optional[int] = None):
inputs = []
outputs = []
bindings = []
stream = cuda_call(cudart.cudaStreamCreate())
tensor_names = [engine.get_tensor_name(i) for i in range(engine.num_io_tensors)]
for binding in tensor_names:
# get_tensor_profile_shape returns (min_shape, optimal_shape, max_shape)
# Pick out the max shape to allocate enough memory for the binding.
shape = engine.get_tensor_shape(binding) if profile_idx is None else engine.get_tensor_profile_shape(binding, profile_idx)[-1]
Metadata
Metadata
Assignees
Labels
No labels