Skip to content

Commit d5dce67

Browse files
authored
ONNX-TensorRT 10.9-GA Release (#1022)
Signed-off-by: Kevin Chen <[email protected]>
1 parent c5ca891 commit d5dce67

File tree

11 files changed

+170
-37
lines changed

11 files changed

+170
-37
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
2828
# Version information
2929
#--------------------------------------------------
3030
set(ONNX2TRT_MAJOR 10)
31-
set(ONNX2TRT_MINOR 7)
31+
set(ONNX2TRT_MINOR 9)
3232
set(ONNX2TRT_PATCH 0)
3333
set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
3434

ModelRefitter.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ class ModelRefitter : public nvonnxparser::IParserRefitter
9393
{
9494
ONNXTRT_TRY
9595
{
96-
return &mErrors.at(index);
96+
return (index >= 0 && index < mErrors.size()) ? &mErrors.at(index) : nullptr;
9797
}
9898
ONNXTRT_CATCH_LOG(mLogger)
9999
return nullptr;

NvOnnxParser.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ class IParser
301301
//!
302302
//! The flags are listed in the OnnxParserFlag enum.
303303
//!
304-
//! \param OnnxParserFlag The flags used when parsing an ONNX model.
304+
//! \param OnnxParserFlags The flags used when parsing an ONNX model.
305305
//!
306306
//! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
307307
//!

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
1616

1717
## Supported TensorRT Versions
1818

19-
Development on the this branch is for the latest version of [TensorRT 10.8](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
19+
Development on the this branch is for the latest version of [TensorRT 10.9](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
2020

2121
For previous versions of TensorRT, refer to their respective branches.
2222

@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
2929
### Dependencies
3030

3131
- [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
32-
- [TensorRT 10.8](https://developer.nvidia.com/tensorrt)
33-
- [TensorRT 10.8 open source libaries] (https://github.com/NVIDIA/TensorRT/)
32+
- [TensorRT 10.9](https://developer.nvidia.com/tensorrt)
33+
- [TensorRT 10.9 open source libraries](https://github.com/NVIDIA/TensorRT/)
3434

3535
### Building
3636

@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
8282

8383
Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
8484

85-
TensorRT 10.8 supports ONNX release 1.17.0. Install it with:
85+
TensorRT 10.9 supports ONNX release 1.17.0. Install it with:
8686

8787
python3 -m pip install onnx==1.17.0
8888

WeightsContext.cpp

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -466,26 +466,21 @@ bool WeightsContext::convertOnnxWeights(
466466
return true;
467467
}
468468

469-
float* WeightsContext::convertFP16Data(void* weightValues, nvinfer1::Dims const& shape)
469+
float* WeightsContext::getFP32Values(ShapedWeights const& w)
470470
{
471-
int64_t const nbWeights = volume(shape);
472-
float* newWeights{static_cast<float*>(createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, shape).values)};
473-
474-
half_float::half* tempValues = static_cast<half_float::half*>(weightValues);
475-
476-
for (int64_t i = 0; i < nbWeights; i++)
471+
if (w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT)
477472
{
478-
newWeights[i] = tempValues[i];
473+
return static_cast<float*>(w.values);
479474
}
480-
return newWeights;
481-
}
482-
483-
float* WeightsContext::getFP32Values(ShapedWeights const& w)
484-
{
485-
assert((w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT || w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT16)
486-
&& "Conversion only valid from FLOAT or FLOAT16");
487-
return (w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT) ? static_cast<float*>(w.values)
488-
: convertFP16Data(w.values, w.shape);
475+
else if (w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT16)
476+
{
477+
return convertToFp32<half_float::half>(w);
478+
}
479+
else if (w.type == ::ONNX_NAMESPACE::TensorProto::BFLOAT16)
480+
{
481+
return convertToFp32<BFloat16>(w);
482+
}
483+
ONNXTRT_THROW(MAKE_ERROR("Invalid type found in getFP32Values() call.", ErrorCode::kINTERNAL_ERROR));
489484
}
490485

491486
ShapedWeights WeightsContext::createNamedTempWeights(ShapedWeights::DataType type, nvinfer1::Dims const& shape,

WeightsContext.hpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
#include "ShapedWeights.hpp"
88
#include "Status.hpp"
9+
#include "errorHelpers.hpp"
910
#include "weightUtils.hpp"
1011
#include <string>
1112
#include <vector>
@@ -64,10 +65,11 @@ class WeightsContext
6465
bool convertOnnxWeights(
6566
::ONNX_NAMESPACE::TensorProto const& onnxTensor, ShapedWeights* weights, bool ownAllWeights = false);
6667

67-
// Helper function to convert weightValues' type from fp16 to fp32.
68-
float* convertFP16Data(void* weightValues, nvinfer1::Dims const& shape);
68+
// Helper function to convert weightValues' type from fp16/bf16 to fp32.
69+
template <typename DataType>
70+
[[nodiscard]] float* convertToFp32(ShapedWeights const& w);
6971

70-
// Helper function to get fp32 representation of fp16 or fp32 weights.
72+
// Helper function to get fp32 representation of fp16, bf16, or fp32 weights.
7173
float* getFP32Values(ShapedWeights const& w);
7274

7375
// Register an unique name for the created weights.
@@ -112,5 +114,14 @@ DataType* WeightsContext::convertInt32Data(int32_t const* weightValues, nvinfer1
112114
}
113115
return newWeights;
114116
}
117+
template <typename DataType>
118+
[[nodiscard]] float* WeightsContext::convertToFp32(ShapedWeights const& w)
119+
{
120+
int64_t const nbWeights = volume(w.shape);
121+
auto result = static_cast<float*>(createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, w.shape).values);
122+
std::copy_n(static_cast<DataType const*>(w.values), nbWeights, result);
123+
124+
return result;
125+
}
115126

116127
} // namespace onnx2trt

docs/Changelog.md

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22

33
# ONNX-TensorRT Changelog
44

5+
# TensorRT 10.9 GA Release - 2025-3-7
6+
For more details, see the 10.9 GA release notes
7+
8+
- Added support for Python AOT plugins
9+
- Added support for opset 21 GroupNorm
10+
- Fixed support for opset 18+ ScatterND
11+
512
# TensorRT 10.8 GA Release - 2025-1-30
613
For more details, see the 10.8 GA release notes
714

@@ -46,7 +53,7 @@ For more details, see the 10.3 GA release notes.
4653
- Added support for tensor `axes` inputs for `Slice` nodes
4754
- Updated `ScatterElements` importer to use an updated plugin
4855

49-
# TensorRT 10.2 GA Release - 2024-7-10
56+
# TensorRT 10.2 GA Release - 2024-7-10
5057
For more details, see the 10.2 GA release notes.
5158

5259
- Improved error handling with new macros and classes
@@ -94,7 +101,7 @@ For more details, see the 9.2 GA release notes for the fixes since 9.1 GA.
94101
For more details, see the 9.1 GA release notes for the fixes since 9.0 GA.
95102

96103
- Added new `ErrorCode` enums to improve error logging
97-
- Added new members to `IParserError` to improve error logging
104+
- Added new members to `IParserError` to improve error logging
98105
- Added static checkers when parsing nodes, resulting better reporting of errors
99106

100107
# TensorRT 9.0 GA Release - 2023-9-5
@@ -108,7 +115,7 @@ For more details, see the 9.0 GA release notes for the fixes since 9.0 EA.
108115
For more details, see the 9.0 EA release notes for the fixes since 8.6 GA.
109116

110117
- Added support for INT64 data type. The ONNX parser no longer automatically casts INT64 to INT32.
111-
- Added support for ONNX local functions when parsing ONNX models with the ONNX parser.
118+
- Added support for ONNX local functions when parsing ONNX models with the ONNX parser.
112119
- Breaking API Change: In TensorRT 9.0, due to the introduction of INT64 as a supported data type, ONNX models with INT64 I/O require INT64 bindings. Note that prior to this release, such models required INT32 bindings.
113120
- Updated ONNX submodule to v1.14.0.
114121

@@ -135,7 +142,7 @@ For more details, see the 8.6 EA release notes for new features added in TensorR
135142

136143
## Changed
137144

138-
- All cast operations will now use the new `CastLayer` over the pervious `IdentityLayer`.
145+
- All cast operations will now use the new `CastLayer` over the pervious `IdentityLayer`.
139146

140147
# TensorRT 8.5 GA Release - 2022-11-2
141148

@@ -172,7 +179,7 @@ For more details, see the 8.5 GA release notes for new features added in TensorR
172179

173180
## TensorRT 8.4 GA Release - 2022-6-6
174181

175-
### Added
182+
### Added
176183

177184
For more details, see the 8.4 GA release notes for new features added in TensorRT 8.4
178185

@@ -197,7 +204,7 @@ See the 8.2 EA release notes for new features added in TensorRT 8.2.
197204
### Fixes
198205
- Removed duplicate constant layer checks that caused some performance regressions
199206
- Fixed expand dynamic shape calculations
200-
- Added parser-side checks for Scatter layer support
207+
- Added parser-side checks for Scatter layer support
201208

202209
## TensorRT 8.2 EA Release - 2021-10-04
203210
### Added

docs/operators.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
# Supported ONNX Operators
44

5-
TensorRT 10.8 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
5+
TensorRT 10.9 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
66

77
TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, FP8, FP4, INT32, INT64, INT8, INT4, UINT8, and BOOL
88

importerUtils.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -982,9 +982,44 @@ std::unique_ptr<nvinfer1::IPluginV3> createPlugin(ImporterContext* ctx, ::ONNX_N
982982
}
983983
else if (creatorVersion == CreatorVersion::kV3QUICK)
984984
{
985+
986+
OnnxAttrs attrs(node, ctx);
987+
nvinfer1::QuickPluginCreationRequest request;
988+
989+
// Node-level specifications override network-level preferences
990+
if (attrs.count("aot"))
991+
{
992+
auto const aotOrJit = static_cast<bool>(attrs.get<int>("aot", 0));
993+
if (aotOrJit)
994+
{
995+
request = nvinfer1::QuickPluginCreationRequest::kSTRICT_AOT;
996+
}
997+
else
998+
{
999+
request = nvinfer1::QuickPluginCreationRequest::kSTRICT_JIT;
1000+
}
1001+
}
1002+
else
1003+
{
1004+
auto const preferAOT
1005+
= ctx->network()->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kPREFER_AOT_PYTHON_PLUGINS);
1006+
auto const preferJIT
1007+
= ctx->network()->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kPREFER_JIT_PYTHON_PLUGINS);
1008+
ONNXTRT_CHECK(!(preferAOT && preferJIT) &&
1009+
"Both NetworkDefinitionCreationFlag::kPREFER_AOT_PYTHON_PLUGINS and "
1010+
"NetworkDefinitionCreationFlag::kPREFER_JIT_PYTHON_PLUGINS cannot be specified at the same time.", ErrorCode::kUNSUPPORTED_GRAPH);
1011+
1012+
// If neither flag is specified, defer to the plugin creator to pick whichever implementation has actually
1013+
// been defined.
1014+
// - If both are defined, the plugin creator will raise an error.
1015+
request = preferJIT ? nvinfer1::QuickPluginCreationRequest::kPREFER_JIT
1016+
: (preferAOT ? nvinfer1::QuickPluginCreationRequest::kPREFER_AOT
1017+
: nvinfer1::QuickPluginCreationRequest::kUNKNOWN);
1018+
}
1019+
9851020
return std::unique_ptr<nvinfer1::IPluginV3>{
9861021
static_cast<nvinfer1::IPluginCreatorV3Quick*>(pluginCreator)
987-
->createPlugin(name.c_str(), pluginNamespace.c_str(), &fc, nvinfer1::TensorRTPhase::kBUILD)};
1022+
->createPlugin(name.c_str(), pluginNamespace.c_str(), &fc, nvinfer1::TensorRTPhase::kBUILD, request)};
9881023
}
9891024
ONNXTRT_CHECK(false && "Found invalid creator version when creating a V3 plugin.", ErrorCode::kINTERNAL_ERROR);
9901025
}

onnxOpImporters.cpp

Lines changed: 86 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1639,6 +1639,12 @@ NodeOutputs QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::Nod
16391639
node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE);
16401640

16411641
bool stronglyTyped = ctx->isStronglyTyped();
1642+
if (!stronglyTyped && chosenDataType != DataType::kINT8)
1643+
{
1644+
LOG_WARNING(
1645+
"A strongly typed network is recommended for networks with QuantizedLinear/DequantizedLinear nodes using "
1646+
"precisions other than int8.");
1647+
}
16421648
if (isDQ)
16431649
{
16441650
// Add and configure a DequantizeLayer.
@@ -2251,8 +2257,86 @@ DEFINE_BUILTIN_OP_IMPORTER(GreaterOrEqual)
22512257
/*greater*/ true);
22522258
}
22532259

2260+
// Support opset21 GroupNorm, where scale and bias is shape [C] instead of [G].
2261+
NodeOutputs groupNorm21Helper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx,
2262+
std::vector<TensorOrWeights>& inputs)
2263+
{
2264+
auto* input = &convertToTensor(inputs.at(0), ctx);
2265+
auto* scale = &convertToTensor(inputs.at(1), ctx);
2266+
auto* bias = &convertToTensor(inputs.at(2), ctx);
2267+
2268+
OnnxAttrs attrs(node, ctx);
2269+
float epsilon = attrs.get("epsilon", 1e-5f);
2270+
int32_t nbGroups = attrs.get("num_groups", 1);
2271+
2272+
auto nbDims = input->getDimensions().nbDims;
2273+
uint32_t axesMask{0};
2274+
std::vector<int32_t> unsqueezeAxes;
2275+
2276+
for (int32_t i = 0; i < nbDims; i++)
2277+
{
2278+
if (i == 1)
2279+
{
2280+
continue;
2281+
}
2282+
// Axes should correspond to the spatial dimensions
2283+
if (i >= 2)
2284+
{
2285+
axesMask |= 1 << i;
2286+
}
2287+
unsqueezeAxes.push_back(i);
2288+
}
2289+
2290+
// Reshape [N, C, ...] to [N, G, C/G, ...]
2291+
auto inShape = shapeOf(*input);
2292+
2293+
auto gnShape = concat(ctx, gather(ctx, inShape, shapeVector(0)), shapeVector(nbGroups));
2294+
gnShape = concat(ctx, gnShape, floorDiv(ctx, gather(ctx, inShape, shapeVector(1)), shapeVector(nbGroups)));
2295+
gnShape = concat(ctx, gnShape, shapeVector(-1));
2296+
auto gnReshaped = &reshape(ctx, *input, gnShape);
2297+
2298+
// Run instanceNorm with scale = 1, bias = 0
2299+
2300+
auto tmpScale
2301+
= constantOfShape(ctx, addConstantScalar(ctx, 1.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0),
2302+
&gather(ctx, shapeOf(*gnReshaped), shapeVector(1)).tensor(ctx));
2303+
auto tmpBias
2304+
= constantOfShape(ctx, addConstantScalar(ctx, 0.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0),
2305+
&gather(ctx, shapeOf(*gnReshaped), shapeVector(1)).tensor(ctx));
2306+
2307+
tmpScale = castHelper(ctx, tmpScale, scale->getType());
2308+
tmpBias = castHelper(ctx, tmpBias, bias->getType());
2309+
2310+
tmpScale = unsqueezeTensor(ctx, *tmpScale, unsqueezeAxes);
2311+
tmpBias = unsqueezeTensor(ctx, *tmpBias, unsqueezeAxes);
2312+
2313+
auto tmpNorm = N_CHECK(ctx->network()->addNormalization(*gnReshaped, *tmpScale, *tmpBias, axesMask));
2314+
tmpNorm->setEpsilon(epsilon);
2315+
2316+
auto normOut = N_CHECK(tmpNorm->getOutput(0));
2317+
2318+
// Reshape back to [N, C, ...]
2319+
auto reshapeBackOut = &reshape(ctx, *normOut, inShape);
2320+
2321+
// Do final scale and bias add.
2322+
using eOp = nvinfer1::ElementWiseOperation;
2323+
scale = unsqueezeTensor(ctx, *scale, unsqueezeAxes);
2324+
bias = unsqueezeTensor(ctx, *bias, unsqueezeAxes);
2325+
auto scaleLayer = N_CHECK(ctx->network()->addElementWise(*scale, *reshapeBackOut, eOp::kPROD));
2326+
auto scaledOutput = N_CHECK(scaleLayer->getOutput(0));
2327+
auto biasLayer = N_CHECK(ctx->network()->addElementWise(*scaledOutput, *bias, eOp::kSUM));
2328+
auto biasOutput = N_CHECK(biasLayer->getOutput(0));
2329+
2330+
return {{biasOutput}};
2331+
}
2332+
22542333
DEFINE_BUILTIN_OP_IMPORTER(GroupNormalization)
22552334
{
2335+
if (ctx->getOpsetVersion() >= 21)
2336+
{
2337+
return groupNorm21Helper(ctx, node, nodeIdx, inputs);
2338+
}
2339+
22562340
return normalizationHelper(ctx, node, nodeIdx, inputs);
22572341
}
22582342

@@ -5504,7 +5588,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice)
55045588
starts = ShapeTensor{*input1};
55055589
ends = ShapeTensor{*input2};
55065590
// "If axes are omitted, they are set to [0, ..., ndim-1]."
5507-
axes = nbInputs > 3 ? ShapeTensor(ctx, inputs.at(3)) : iotaShapeVector(dims.size());
5591+
axes = nbInputs > 3 && !inputs.at(3).isNullTensor() ? ShapeTensor(ctx, inputs.at(3))
5592+
: iotaShapeVector(dims.size());
55085593
ONNXTRT_CHECK_NODE((starts.size() == axes.size()),
55095594
"The shape of input starts misaligns with the shape of input axes. Shape of input starts = "
55105595
<< starts.size() << ", shape of input axes = " << axes.size() << ".",

0 commit comments

Comments
 (0)