ONNX-TensorRT 10.9-GA Release (#1022)

kevinch-nv · web-flow · commit d5dce67db7c2 · 2025-03-07T14:55:03.000-08:00
Signed-off-by: Kevin Chen &lt;kevinch@nvidia.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 7)
+set(ONNX2TRT_MINOR 9)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
diff --git a/ModelRefitter.hpp b/ModelRefitter.hpp
@@ -93,7 +93,7 @@ class ModelRefitter : public nvonnxparser::IParserRefitter
     {
         ONNXTRT_TRY
         {
-            return &mErrors.at(index);
+            return (index >= 0 && index < mErrors.size()) ? &mErrors.at(index) : nullptr;
         }
         ONNXTRT_CATCH_LOG(mLogger)
         return nullptr;
diff --git a/NvOnnxParser.h b/NvOnnxParser.h
@@ -301,7 +301,7 @@ class IParser
     //!
     //! The flags are listed in the OnnxParserFlag enum.
     //!
-    //! \param OnnxParserFlag The flags used when parsing an ONNX model.
+    //! \param OnnxParserFlags The flags used when parsing an ONNX model.
     //!
     //! \note This function will override the previous set flags, rather than bitwise ORing the new flag.
     //!
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the this branch is for the latest version of [TensorRT 10.8](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the this branch is for the latest version of [TensorRT 10.9](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 10.8](https://developer.nvidia.com/tensorrt)
- - [TensorRT 10.8 open source libaries] (https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 10.9](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 10.9 open source libraries](https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
 
 Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
 
-TensorRT 10.8 supports ONNX release 1.17.0. Install it with:
+TensorRT 10.9 supports ONNX release 1.17.0. Install it with:
 
     python3 -m pip install onnx==1.17.0
 
diff --git a/WeightsContext.cpp b/WeightsContext.cpp
@@ -466,26 +466,21 @@ bool WeightsContext::convertOnnxWeights(
     return true;
 }
 
-float* WeightsContext::convertFP16Data(void* weightValues, nvinfer1::Dims const& shape)
+float* WeightsContext::getFP32Values(ShapedWeights const& w)
 {
-    int64_t const nbWeights = volume(shape);
-    float* newWeights{static_cast<float*>(createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, shape).values)};
-
-    half_float::half* tempValues = static_cast<half_float::half*>(weightValues);
-
-    for (int64_t i = 0; i < nbWeights; i++)
+    if (w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT)
     {
-        newWeights[i] = tempValues[i];
+        return static_cast<float*>(w.values);
     }
-    return newWeights;
-}
-
-float* WeightsContext::getFP32Values(ShapedWeights const& w)
-{
-    assert((w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT || w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT16)
-        && "Conversion only valid from FLOAT or FLOAT16");
-    return (w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT) ? static_cast<float*>(w.values)
-                                                            : convertFP16Data(w.values, w.shape);
+    else if (w.type == ::ONNX_NAMESPACE::TensorProto::FLOAT16)
+    {
+        return convertToFp32<half_float::half>(w);
+    }
+    else if (w.type == ::ONNX_NAMESPACE::TensorProto::BFLOAT16)
+    {
+        return convertToFp32<BFloat16>(w);
+    }
+    ONNXTRT_THROW(MAKE_ERROR("Invalid type found in getFP32Values() call.", ErrorCode::kINTERNAL_ERROR));
 }
 
 ShapedWeights WeightsContext::createNamedTempWeights(ShapedWeights::DataType type, nvinfer1::Dims const& shape,
diff --git a/WeightsContext.hpp b/WeightsContext.hpp
@@ -6,6 +6,7 @@
 
 #include "ShapedWeights.hpp"
 #include "Status.hpp"
+#include "errorHelpers.hpp"
 #include "weightUtils.hpp"
 #include <string>
 #include <vector>
@@ -64,10 +65,11 @@ class WeightsContext
     bool convertOnnxWeights(
         ::ONNX_NAMESPACE::TensorProto const& onnxTensor, ShapedWeights* weights, bool ownAllWeights = false);
 
-    // Helper function to convert weightValues' type from fp16 to fp32.
-    float* convertFP16Data(void* weightValues, nvinfer1::Dims const& shape);
+    // Helper function to convert weightValues' type from fp16/bf16 to fp32.
+    template <typename DataType>
+    [[nodiscard]] float* convertToFp32(ShapedWeights const& w);
 
-    // Helper function to get fp32 representation of fp16 or fp32 weights.
+    // Helper function to get fp32 representation of fp16, bf16, or fp32 weights.
     float* getFP32Values(ShapedWeights const& w);
 
     // Register an unique name for the created weights.
@@ -112,5 +114,14 @@ DataType* WeightsContext::convertInt32Data(int32_t const* weightValues, nvinfer1
     }
     return newWeights;
 }
+template <typename DataType>
+[[nodiscard]] float* WeightsContext::convertToFp32(ShapedWeights const& w)
+{
+    int64_t const nbWeights = volume(w.shape);
+    auto result = static_cast<float*>(createTempWeights(::ONNX_NAMESPACE::TensorProto::FLOAT, w.shape).values);
+    std::copy_n(static_cast<DataType const*>(w.values), nbWeights, result);
+
+    return result;
+}
 
 } // namespace onnx2trt
diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -2,6 +2,13 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 10.9 GA Release - 2025-3-7
+For more details, see the 10.9 GA release notes
+
+- Added support for Python AOT plugins
+- Added support for opset 21 GroupNorm
+- Fixed support for opset 18+ ScatterND
+
 # TensorRT 10.8 GA Release - 2025-1-30
 For more details, see the 10.8 GA release notes
 
@@ -46,7 +53,7 @@ For more details, see the 10.3 GA release notes.
 - Added support for tensor `axes` inputs for `Slice` nodes
 - Updated `ScatterElements` importer to use an updated plugin
 
-# TensorRT 10.2 GA Release - 2024-7-10 
+# TensorRT 10.2 GA Release - 2024-7-10
 For more details, see the 10.2 GA release notes.
 
 - Improved error handling with new macros and classes
@@ -94,7 +101,7 @@ For more details, see the 9.2 GA release notes for the fixes since 9.1 GA.
 For more details, see the 9.1 GA release notes for the fixes since 9.0 GA.
 
 - Added new `ErrorCode` enums to improve error logging
-- Added new members to `IParserError` to improve error logging 
+- Added new members to `IParserError` to improve error logging
 - Added static checkers when parsing nodes, resulting better reporting of errors
 
 # TensorRT 9.0 GA Release - 2023-9-5
@@ -108,7 +115,7 @@ For more details, see the 9.0 GA release notes for the fixes since 9.0 EA.
 For more details, see the 9.0 EA release notes for the fixes since 8.6 GA.
 
 - Added support for INT64 data type. The ONNX parser no longer automatically casts INT64 to INT32.
-- Added support for ONNX local functions when parsing ONNX models with the ONNX parser. 
+- Added support for ONNX local functions when parsing ONNX models with the ONNX parser.
 - Breaking API Change: In TensorRT 9.0, due to the introduction of INT64 as a supported data type, ONNX models with INT64 I/O require INT64 bindings. Note that prior to this release, such models required INT32 bindings.
 - Updated ONNX submodule to v1.14.0.
 
@@ -135,7 +142,7 @@ For more details, see the 8.6 EA release notes for new features added in TensorR
 
 ## Changed
 
-- All cast operations will now use the new `CastLayer` over the pervious `IdentityLayer`. 
+- All cast operations will now use the new `CastLayer` over the pervious `IdentityLayer`.
 
 # TensorRT 8.5 GA Release - 2022-11-2
 
@@ -172,7 +179,7 @@ For more details, see the 8.5 GA release notes for new features added in TensorR
 
 ## TensorRT 8.4 GA Release - 2022-6-6
 
-### Added 
+### Added
 
 For more details, see the 8.4 GA release notes for new features added in TensorRT 8.4
 
@@ -197,7 +204,7 @@ See the 8.2 EA release notes for new features added in TensorRT 8.2.
 ### Fixes
 - Removed duplicate constant layer checks that caused some performance regressions
 - Fixed expand dynamic shape calculations
-- Added parser-side checks for Scatter layer support 
+- Added parser-side checks for Scatter layer support
 
 ## TensorRT 8.2 EA Release - 2021-10-04
 ### Added
diff --git a/docs/operators.md b/docs/operators.md
@@ -2,7 +2,7 @@
 
 # Supported ONNX Operators
 
-TensorRT 10.8 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
+TensorRT 10.9 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
 
 TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, FP8, FP4, INT32, INT64, INT8, INT4, UINT8, and BOOL
 
diff --git a/importerUtils.cpp b/importerUtils.cpp
@@ -982,9 +982,44 @@ std::unique_ptr<nvinfer1::IPluginV3> createPlugin(ImporterContext* ctx, ::ONNX_N
     }
     else if (creatorVersion == CreatorVersion::kV3QUICK)
     {
+
+        OnnxAttrs attrs(node, ctx);
+        nvinfer1::QuickPluginCreationRequest request;
+
+        // Node-level specifications override network-level preferences
+        if (attrs.count("aot"))
+        {
+            auto const aotOrJit = static_cast<bool>(attrs.get<int>("aot", 0));
+            if (aotOrJit)
+            {
+                request = nvinfer1::QuickPluginCreationRequest::kSTRICT_AOT;
+            }
+            else
+            {
+                request = nvinfer1::QuickPluginCreationRequest::kSTRICT_JIT;
+            }
+        }
+        else
+        {
+            auto const preferAOT
+                = ctx->network()->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kPREFER_AOT_PYTHON_PLUGINS);
+            auto const preferJIT
+                = ctx->network()->getFlag(nvinfer1::NetworkDefinitionCreationFlag::kPREFER_JIT_PYTHON_PLUGINS);
+            ONNXTRT_CHECK(!(preferAOT && preferJIT) &&
+            "Both NetworkDefinitionCreationFlag::kPREFER_AOT_PYTHON_PLUGINS and "
+            "NetworkDefinitionCreationFlag::kPREFER_JIT_PYTHON_PLUGINS cannot be specified at the same time.", ErrorCode::kUNSUPPORTED_GRAPH);
+
+            // If neither flag is specified, defer to the plugin creator to pick whichever implementation has actually
+            // been defined.
+            //  - If both are defined, the plugin creator will raise an error.
+            request = preferJIT ? nvinfer1::QuickPluginCreationRequest::kPREFER_JIT
+                                : (preferAOT ? nvinfer1::QuickPluginCreationRequest::kPREFER_AOT
+                                             : nvinfer1::QuickPluginCreationRequest::kUNKNOWN);
+        }
+
         return std::unique_ptr<nvinfer1::IPluginV3>{
             static_cast<nvinfer1::IPluginCreatorV3Quick*>(pluginCreator)
-                ->createPlugin(name.c_str(), pluginNamespace.c_str(), &fc, nvinfer1::TensorRTPhase::kBUILD)};
+                ->createPlugin(name.c_str(), pluginNamespace.c_str(), &fc, nvinfer1::TensorRTPhase::kBUILD, request)};
     }
     ONNXTRT_CHECK(false && "Found invalid creator version when creating a V3 plugin.", ErrorCode::kINTERNAL_ERROR);
 }
diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp
@@ -1639,6 +1639,12 @@ NodeOutputs QuantDequantLinearHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::Nod
         node, nodeIdx, nvonnxparser::ErrorCode::kINVALID_NODE);
 
     bool stronglyTyped = ctx->isStronglyTyped();
+    if (!stronglyTyped && chosenDataType != DataType::kINT8)
+    {
+        LOG_WARNING(
+            "A strongly typed network is recommended for networks with QuantizedLinear/DequantizedLinear nodes using "
+            "precisions other than int8.");
+    }
     if (isDQ)
     {
         // Add and configure a DequantizeLayer.
@@ -2251,8 +2257,86 @@ DEFINE_BUILTIN_OP_IMPORTER(GreaterOrEqual)
         /*greater*/ true);
 }
 
+// Support opset21 GroupNorm, where scale and bias is shape [C] instead of [G].
+NodeOutputs groupNorm21Helper(ImporterContext* ctx, const ::ONNX_NAMESPACE::NodeProto& node, size_t const nodeIdx,
+    std::vector<TensorOrWeights>& inputs)
+{
+    auto* input = &convertToTensor(inputs.at(0), ctx);
+    auto* scale = &convertToTensor(inputs.at(1), ctx);
+    auto* bias = &convertToTensor(inputs.at(2), ctx);
+
+    OnnxAttrs attrs(node, ctx);
+    float epsilon = attrs.get("epsilon", 1e-5f);
+    int32_t nbGroups = attrs.get("num_groups", 1);
+
+    auto nbDims = input->getDimensions().nbDims;
+    uint32_t axesMask{0};
+    std::vector<int32_t> unsqueezeAxes;
+
+    for (int32_t i = 0; i < nbDims; i++)
+    {
+        if (i == 1)
+        {
+            continue;
+        }
+        // Axes should correspond to the spatial dimensions
+        if (i >= 2)
+        {
+            axesMask |= 1 << i;
+        }
+        unsqueezeAxes.push_back(i);
+    }
+
+    // Reshape [N, C, ...] to [N, G, C/G, ...]
+    auto inShape = shapeOf(*input);
+
+    auto gnShape = concat(ctx, gather(ctx, inShape, shapeVector(0)), shapeVector(nbGroups));
+    gnShape = concat(ctx, gnShape, floorDiv(ctx, gather(ctx, inShape, shapeVector(1)), shapeVector(nbGroups)));
+    gnShape = concat(ctx, gnShape, shapeVector(-1));
+    auto gnReshaped = &reshape(ctx, *input, gnShape);
+
+    // Run instanceNorm with scale = 1, bias = 0
+
+    auto tmpScale
+        = constantOfShape(ctx, addConstantScalar(ctx, 1.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0),
+            &gather(ctx, shapeOf(*gnReshaped), shapeVector(1)).tensor(ctx));
+    auto tmpBias
+        = constantOfShape(ctx, addConstantScalar(ctx, 0.0F, ::ONNX_NAMESPACE::TensorProto::FLOAT)->getOutput(0),
+            &gather(ctx, shapeOf(*gnReshaped), shapeVector(1)).tensor(ctx));
+
+    tmpScale = castHelper(ctx, tmpScale, scale->getType());
+    tmpBias = castHelper(ctx, tmpBias, bias->getType());
+
+    tmpScale = unsqueezeTensor(ctx, *tmpScale, unsqueezeAxes);
+    tmpBias = unsqueezeTensor(ctx, *tmpBias, unsqueezeAxes);
+
+    auto tmpNorm = N_CHECK(ctx->network()->addNormalization(*gnReshaped, *tmpScale, *tmpBias, axesMask));
+    tmpNorm->setEpsilon(epsilon);
+
+    auto normOut = N_CHECK(tmpNorm->getOutput(0));
+
+    // Reshape back to [N, C, ...]
+    auto reshapeBackOut = &reshape(ctx, *normOut, inShape);
+
+    // Do final scale and bias add.
+    using eOp = nvinfer1::ElementWiseOperation;
+    scale = unsqueezeTensor(ctx, *scale, unsqueezeAxes);
+    bias = unsqueezeTensor(ctx, *bias, unsqueezeAxes);
+    auto scaleLayer = N_CHECK(ctx->network()->addElementWise(*scale, *reshapeBackOut, eOp::kPROD));
+    auto scaledOutput = N_CHECK(scaleLayer->getOutput(0));
+    auto biasLayer = N_CHECK(ctx->network()->addElementWise(*scaledOutput, *bias, eOp::kSUM));
+    auto biasOutput = N_CHECK(biasLayer->getOutput(0));
+
+    return {{biasOutput}};
+}
+
 DEFINE_BUILTIN_OP_IMPORTER(GroupNormalization)
 {
+    if (ctx->getOpsetVersion() >= 21)
+    {
+        return groupNorm21Helper(ctx, node, nodeIdx, inputs);
+    }
+
     return normalizationHelper(ctx, node, nodeIdx, inputs);
 }
 
@@ -5504,7 +5588,8 @@ DEFINE_BUILTIN_OP_IMPORTER(Slice)
         starts = ShapeTensor{*input1};
         ends = ShapeTensor{*input2};
         // "If axes are omitted, they are set to [0, ..., ndim-1]."
-        axes = nbInputs > 3 ? ShapeTensor(ctx, inputs.at(3)) : iotaShapeVector(dims.size());
+        axes = nbInputs > 3 && !inputs.at(3).isNullTensor() ? ShapeTensor(ctx, inputs.at(3))
+                                                            : iotaShapeVector(dims.size());
         ONNXTRT_CHECK_NODE((starts.size() == axes.size()),
             "The shape of input starts misaligns with the shape of input axes. Shape of input starts = "
                 << starts.size() << ", shape of input axes = " << axes.size() << ".",
diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py
@@ -4,4 +4,4 @@
 
 from . import backend
 
-__version__ = "10.8.0"
+__version__ = "10.9.0"

Original file line number	Diff line number	Diff line change
`@@ -93,7 +93,7 @@ class ModelRefitter : public nvonnxparser::IParserRefitter`
`93`	`93`	`{`
`94`	`94`	`ONNXTRT_TRY`
`95`	`95`	`{`
`96`		`- return &mErrors.at(index);`
	`96`	`+ return (index >= 0 && index < mErrors.size()) ? &mErrors.at(index) : nullptr;`
`97`	`97`	`}`
`98`	`98`	`ONNXTRT_CATCH_LOG(mLogger)`
`99`	`99`	`return nullptr;`