Skip to content

Commit 3b9c961

Browse files
authored
10.10-GA Release (#1025)
Signed-off-by: Kevin Chen <[email protected]>
1 parent d5dce67 commit 3b9c961

15 files changed

+283
-173
lines changed

CMakeLists.txt

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
2828
# Version information
2929
#--------------------------------------------------
3030
set(ONNX2TRT_MAJOR 10)
31-
set(ONNX2TRT_MINOR 9)
31+
set(ONNX2TRT_MINOR 10)
3232
set(ONNX2TRT_PATCH 0)
3333
set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
3434

@@ -107,16 +107,16 @@ find_path(TENSORRT_INCLUDE_DIR NvInfer.h
107107
MESSAGE(STATUS "Found TensorRT headers at ${TENSORRT_INCLUDE_DIR}")
108108

109109
# TensorRT Python Headers
110-
find_path(TENSORRT_PYTHON_INCLUDE_DIR plugin.h
110+
find_path(TENSORRT_PYTHON_INCLUDE_DIR NvInferPythonPlugin.h
111111
HINTS ${TENSORRT_ROOT}
112112
PATH_SUFFIXES python/include/impl)
113113

114114
# If header is not found, download it from open source release.
115115
if(NOT TENSORRT_PYTHON_INCLUDE_DIR)
116-
set(PLUGIN_URL "https://raw.githubusercontent.com/NVIDIA/TensorRT/refs/heads/release/${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}/python/include/impl/plugin.h")
117-
set(FILE_DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/plugin.h")
116+
set(PLUGIN_URL "https://raw.githubusercontent.com/NVIDIA/TensorRT/refs/heads/release/${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}/python/include/impl/NvInferPythonPlugin.h")
117+
set(FILE_DESTINATION "${CMAKE_CURRENT_BINARY_DIR}/NvInferPythonPlugin.h")
118118

119-
message(NOTICE "Required header plugin.h not found. Downloading from ${PLUGIN_URL} to ${FILE_DESTINATION}")
119+
message(NOTICE "Required header NvInferPythonPlugin.h not found. Downloading from ${PLUGIN_URL} to ${FILE_DESTINATION}")
120120

121121
file(DOWNLOAD ${PLUGIN_URL} ${FILE_DESTINATION}
122122
SHOW_PROGRESS

ImporterContext.hpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,12 +358,18 @@ class ImporterContext
358358
}
359359
void addLayerOutputTensors(std::string name, std::vector<TensorOrWeights> const& outputs)
360360
{
361+
static std::unordered_set<std::string> duplicateNames;
361362
if (mNodeNameToTensor.find(name) != mNodeNameToTensor.end())
362363
{
363-
auto* ctx = this; // For logging
364-
LOG_WARNING(
365-
"A node named " << name
366-
<< " already exists, the output tensors of this new instance will not be queryable.");
364+
// Log once (only if insertion succeeded) for each unique name.
365+
auto result = duplicateNames.insert(name);
366+
if (result.second)
367+
{
368+
auto* ctx = this; // For logging
369+
LOG_WARNING(
370+
"A node named " << name
371+
<< " already exists, the output tensors of this new instance will not be queryable.");
372+
}
367373
return;
368374
}
369375
for (auto const& output : outputs)

ModelImporter.cpp

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ bool isNodeInPluginRegistry(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto co
113113
OnnxAttrs attrs(node, ctx);
114114
std::string const pluginVersion{attrs.get<std::string>("plugin_version", "1")};
115115
std::string const pluginNamespace{attrs.get<std::string>("plugin_namespace", "")};
116-
LOG_INFO("Checking if node can be treated as plugin: " << node.op_type() << ", plugin_version: " << pluginVersion
116+
LOG_VERBOSE("Checking if node can be treated as plugin: " << node.op_type() << ", plugin_version: " << pluginVersion
117117
<< ", plugin_namespace: " << pluginNamespace);
118118
nvinfer1::IPluginCreatorInterface* creator
119119
= importPluginCreator(ctx, node.op_type(), pluginVersion, pluginNamespace);
@@ -162,6 +162,26 @@ void parseNode(
162162
}
163163
LOG_VERBOSE(ssInputs.str());
164164

165+
// UINT8 weights that are not Q/DQ inputs will be converted to INT32
166+
if (node.op_type() != "QuantizeLinear" && node.op_type() != "DequantizeLinear")
167+
{
168+
for (auto& nodeInput : nodeInputs)
169+
{
170+
if (nodeInput.is_weights()
171+
&& nodeInput.weights().type == static_cast<int32_t>(::ONNX_NAMESPACE::TensorProto::UINT8))
172+
{
173+
auto weights = nodeInput.weights();
174+
LOG_WARNING("UINT8 data " << weights.name << " is being converted to INT32.");
175+
auto uint8_data = static_cast<uint8_t*>(weights.values);
176+
int32_t* int32_data = ctx->getWeightsContext().convertUINT8(uint8_data, weights.shape);
177+
auto int32ShapedWeights = ShapedWeights(
178+
static_cast<int32_t>(::ONNX_NAMESPACE::TensorProto::INT32), int32_data, weights.shape);
179+
ctx->tensors()[std::string(weights.name)] = int32ShapedWeights;
180+
nodeInput = int32ShapedWeights;
181+
}
182+
}
183+
}
184+
165185
// Dispatch to appropriate converter.
166186
NodeImporter const* importFunc{nullptr};
167187
if (opImporters.count(nodeType))
@@ -173,18 +193,18 @@ void parseNode(
173193
// Let plugin take precedence over local function. So first check if this can be dispatched to a plugin.
174194
if (isNodeInPluginRegistry(ctx, node))
175195
{
176-
LOG_INFO("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
196+
LOG_VERBOSE("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
177197
importFunc = &opImporters.at("FallbackPluginImporter");
178198
}
179199
else
180200
{
181-
LOG_INFO("Found registered local function: " << nodeType << ". Importing as a local function.");
201+
LOG_VERBOSE("Found registered local function: " << nodeType << ". Importing as a local function.");
182202
importFunc = &opImporters.at("LocalFunctionImporter");
183203
}
184204
}
185205
else
186206
{
187-
LOG_INFO("No importer registered for op: " << nodeType << ". Attempting to import as plugin.");
207+
LOG_VERBOSE("No importer registered for op: " << nodeType << ". Attempting to import as plugin.");
188208
importFunc = &opImporters.at("FallbackPluginImporter");
189209
}
190210

@@ -269,9 +289,9 @@ void parseNode(
269289
{
270290
ctx->registerTensor(std::move(output), outputName);
271291
}
272-
// UINT8 is only allowed as network inputs and outputs. Therefore any node that produces an UINT8-typed
273-
// output that is not also a graph output is unsupported.
274-
if (output.getType() == "UINT8")
292+
// UINT8 is only allowed as network inputs, network outputs, and constants for QDQ nodes. Therefore any
293+
// non-constant node that produces an UINT8-typed output that is not also a graph output is unsupported.
294+
if (output.getType() == "UINT8" && node.op_type() != "Constant")
275295
{
276296
bool legalUINT8 = false;
277297
for (auto const& graphOutput : ctx->getGraphOutputNames())
@@ -319,18 +339,18 @@ void parseNodeStaticCheck(
319339
// Let plugin take precedence over local function. So first check if this can be dispatched to a plugin.
320340
if (isNodeInPluginRegistry(ctx, node))
321341
{
322-
LOG_INFO("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
342+
LOG_VERBOSE("Found registered plugin: " << nodeType << ". Importing local function as a plugin.");
323343
checkerFunc = &opCheckers.at("FallbackPluginImporter");
324344
}
325345
else
326346
{
327-
LOG_INFO("Found registered local function: " << nodeType << ". Importing as a local function.");
347+
LOG_VERBOSE("Found registered local function: " << nodeType << ". Importing as a local function.");
328348
checkerFunc = &opCheckers.at("LocalFunctionImporter");
329349
}
330350
}
331351
else
332352
{
333-
LOG_INFO("No checker registered for op: " << nodeType << ". Attempting to check as plugin.");
353+
LOG_VERBOSE("No checker registered for op: " << nodeType << ". Attempting to check as plugin.");
334354
checkerFunc = &opCheckers.at("FallbackPluginImporter");
335355
}
336356
(*checkerFunc)(ctx, node, errors, nodeIndex);
@@ -424,11 +444,30 @@ std::vector<Status> importInput(ImporterContext* ctx, ::ONNX_NAMESPACE::ValueInf
424444
return errorList;
425445
}
426446

447+
namespace
448+
{
449+
//! \return true if \p inputName is a minus sign followed by digits.
450+
//! I.e., it matches the regex `^\-\d+$`.
451+
[[nodiscard]] bool isNegativeInteger(std::string_view inputName) noexcept
452+
{
453+
// Check if the string starts with '-' and has more than one character
454+
if (inputName.size() > 1 && inputName.front() == '-')
455+
{
456+
// Use std::all_of to check if all characters after '-' are digits
457+
return std::all_of(inputName.begin() + 1, inputName.end(),
458+
[](char c) {return std::isdigit(static_cast<unsigned char>(c));});
459+
}
460+
return false;
461+
}
462+
} // namespace
463+
427464
static void setDimensionNames(ImporterContext* ctx, std::vector<NamedDimension>& namedDims)
428465
{
429466
for (auto const& namedDim : namedDims)
430467
{
431-
namedDim.tensor->setDimensionName(namedDim.index, namedDim.dimParam.c_str());
468+
std::string const name = isNegativeInteger(namedDim.dimParam) ?
469+
"trt_dynamic_dim_" + std::to_string(namedDim.index) : namedDim.dimParam;
470+
namedDim.tensor->setDimensionName(namedDim.index, name.c_str());
432471
}
433472
}
434473

ModelRefitter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class ModelRefitter : public nvonnxparser::IParserRefitter
9393
{
9494
ONNXTRT_TRY
9595
{
96-
return (index >= 0 && index < mErrors.size()) ? &mErrors.at(index) : nullptr;
96+
return (index >= 0 && static_cast<size_t>(index) < mErrors.size()) ? &mErrors.at(index) : nullptr;
9797
}
9898
ONNXTRT_CATCH_LOG(mLogger)
9999
return nullptr;

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
1616

1717
## Supported TensorRT Versions
1818

19-
Development on the this branch is for the latest version of [TensorRT 10.9](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
19+
Development on the this branch is for the latest version of [TensorRT 10.10](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
2020

2121
For previous versions of TensorRT, refer to their respective branches.
2222

@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
2929
### Dependencies
3030

3131
- [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
32-
- [TensorRT 10.9](https://developer.nvidia.com/tensorrt)
33-
- [TensorRT 10.9 open source libraries](https://github.com/NVIDIA/TensorRT/)
32+
- [TensorRT 10.10](https://developer.nvidia.com/tensorrt)
33+
- [TensorRT 10.10 open source libraries](https://github.com/NVIDIA/TensorRT/)
3434

3535
### Building
3636

@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
8282

8383
Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
8484

85-
TensorRT 10.9 supports ONNX release 1.17.0. Install it with:
85+
TensorRT 10.10 supports ONNX release 1.17.0. Install it with:
8686

8787
python3 -m pip install onnx==1.17.0
8888

Status.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@
1616
#define ENABLE_STD_PLUGIN 1
1717
#endif // ENABLE_STD_PLUGIN
1818

19-
#ifndef ENABLE_SAFE_PLUGIN
20-
#define ENABLE_SAFE_PLUGIN 0
21-
#endif // ENABLE_SAFE_PLUGIN
2219

2320
#ifndef USE_LITE_PROTOBUF
2421
#define USE_LITE_PROTOBUF 0

TensorOrWeights.hpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,15 @@ class TensorOrWeights
125125
return is_tensor() ? tensor().getType() == nvinfer1::DataType::kINT8
126126
: weights().type == ::ONNX_NAMESPACE::TensorProto_DataType_INT8;
127127
}
128+
bool isUint8() const
129+
{
130+
return is_tensor() ? tensor().getType() == nvinfer1::DataType::kUINT8
131+
: weights().type == ::ONNX_NAMESPACE::TensorProto_DataType_UINT8;
132+
}
128133
bool isBool() const
129134
{
130-
return is_tensor() ? tensor().getType() == nvinfer1::DataType::kBOOL : weights().type == ::ONNX_NAMESPACE::TensorProto_DataType_BOOL;
135+
return is_tensor() ? tensor().getType() == nvinfer1::DataType::kBOOL
136+
: weights().type == ::ONNX_NAMESPACE::TensorProto_DataType_BOOL;
131137
}
132138
bool isFp8() const
133139
{

WeightsContext.cpp

Lines changed: 4 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -263,19 +263,7 @@ bool WeightsContext::convertOnnxWeights(
263263
dataPtr = dataBuf.data();
264264

265265
// Cast non-native TRT types to their corresponding proxy types
266-
if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::UINT8)
267-
{
268-
// Cast UINT8 weights to INT32.
269-
dataPtr = convertUINT8(reinterpret_cast<uint8_t const*>(dataPtr), shape);
270-
size_t const sizeOffset = sizeof(int32_t) / sizeof(uint8_t);
271-
if (multiplicationWillOverflow(nbytes, sizeOffset))
272-
{
273-
return false;
274-
}
275-
nbytes = nbytes * sizeOffset;
276-
onnxDtype = ::ONNX_NAMESPACE::TensorProto::INT32;
277-
}
278-
else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::DOUBLE)
266+
if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::DOUBLE)
279267
{
280268
// Cast DOUBLE weights to FLOAT.
281269
dataPtr = convertDouble(reinterpret_cast<double const*>(dataPtr), shape);
@@ -305,34 +293,7 @@ bool WeightsContext::convertOnnxWeights(
305293
// Weights information is within the TensorProto itself
306294

307295
// Cast non-native TRT types to their corresponding proxy types
308-
if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::UINT8)
309-
{
310-
onnxDtype = ::ONNX_NAMESPACE::TensorProto::INT32;
311-
if (onnxTensor.raw_data().size() > 0)
312-
{
313-
dataPtr = convertUINT8(reinterpret_cast<uint8_t const*>(onnxTensor.raw_data().data()), shape);
314-
size_t const sizeOffset = (sizeof(int32_t) / sizeof(uint8_t));
315-
if (multiplicationWillOverflow(nbytes, sizeOffset))
316-
{
317-
return false;
318-
}
319-
nbytes = onnxTensor.raw_data().size() * sizeOffset;
320-
}
321-
else if (onnxTensor.int32_data().size() > 0)
322-
{
323-
dataPtr = (void*) onnxTensor.int32_data().data();
324-
if (multiplicationWillOverflow(nbytes, sizeof(int32_t)))
325-
{
326-
return false;
327-
}
328-
nbytes = onnxTensor.int32_data().size() * sizeof(int32_t);
329-
if (ownAllWeights)
330-
{
331-
dataPtr = ownWeights(dataPtr, onnxDtype, shape, nbytes);
332-
}
333-
}
334-
}
335-
else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::DOUBLE)
296+
if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::DOUBLE)
336297
{
337298
if (onnxTensor.raw_data().size() > 0)
338299
{
@@ -432,7 +393,8 @@ bool WeightsContext::convertOnnxWeights(
432393
dataPtr = ownWeights(dataPtr, onnxDtype, shape, nbytes);
433394
}
434395
}
435-
else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN)
396+
else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN
397+
|| onnxDtype == ::ONNX_NAMESPACE::TensorProto::UINT8)
436398
{
437399
if (onnxTensor.raw_data().size() > 0)
438400
{

docs/Changelog.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
# ONNX-TensorRT Changelog
44

5+
# TensorRT 10.10 GA Release - 2025-5-8
6+
For more details, see the 10.10 GA release notes
7+
8+
- Cleaned up log spam when the ONNX network contained a mixture Plugins and LocalFunctions
9+
- UINT8 constants are now properly imported for QuantizeLinear & DequantizeLinear nodes
10+
- Plugin fallback importer now also reads its namespace from a Node's domain field
11+
512
# TensorRT 10.9 GA Release - 2025-3-7
613
For more details, see the 10.9 GA release notes
714

docs/operators.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Supported ONNX Operators
44

5-
TensorRT 10.9 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
5+
TensorRT 10.10 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
66

77
TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, FP8, FP4, INT32, INT64, INT8, INT4, UINT8, and BOOL
88

0 commit comments

Comments
 (0)