10.12-GA Release (#1028)

kevinch-nv · web-flow · commit ff761afc76f4 · 2025-06-16T14:04:05.000-07:00
Signed-off-by: Kevin Chen &lt;kevinch@nvidia.com&gt;
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -28,7 +28,7 @@ add_definitions("-DSOURCE_LENGTH=${SOURCE_LENGTH}")
 # Version information
 #--------------------------------------------------
 set(ONNX2TRT_MAJOR 10)
-set(ONNX2TRT_MINOR 11)
+set(ONNX2TRT_MINOR 12)
 set(ONNX2TRT_PATCH 0)
 set(ONNX2TRT_VERSION "${ONNX2TRT_MAJOR}.${ONNX2TRT_MINOR}.${ONNX2TRT_PATCH}" CACHE STRING "ONNX2TRT version")
 
diff --git a/ImporterContext.cpp b/ImporterContext.cpp
@@ -26,6 +26,126 @@
         }                                                                                                              \
     } while (0)
 
+namespace
+{
+
+//! Translates a "logical" library name into an OS-dependent DSO or DLL name
+std::string getOSLibraryName(char const* logicalName)
+{
+    std::stringstream libName;
+#if defined(_WIN32)
+    libName << logicalName << ".dll";
+#else
+    libName << "lib" << logicalName << ".so." << NV_TENSORRT_MAJOR;
+#endif
+    return libName.str();
+}
+
+//! Platform-agnostic wrapper around dynamic libraries.
+class DynamicLibrary
+{
+public:
+    explicit DynamicLibrary(std::string const& name)
+        : mLibName{name}
+    {
+#if defined(_WIN32)
+        mHandle = LoadLibraryA(name.c_str());
+#else  // defined(_WIN32)
+        int32_t flags{RTLD_LAZY};
+        mHandle = dlopen(name.c_str(), flags);
+#endif // defined(_WIN32)
+
+        if (mHandle == nullptr)
+        {
+            std::string errorStr{};
+#if !defined(_WIN32)
+            errorStr = std::string{" due to "} + std::string{dlerror()};
+#endif
+            throw std::runtime_error("Unable to open library: " + name + errorStr);
+        }
+    }
+
+    DynamicLibrary(DynamicLibrary const&) = delete;
+    DynamicLibrary(DynamicLibrary const&&) = delete;
+
+    ~DynamicLibrary()
+    {
+        try
+        {
+#if defined(_WIN32)
+            RT_ASSERT(static_cast<bool>(FreeLibrary(static_cast<HMODULE>(mHandle))));
+#else
+            RT_ASSERT(dlclose(mHandle) == 0);
+#endif
+        }
+        catch (...)
+        {
+            std::cerr << "Unable to close library: " << mLibName << std::endl;
+        }
+    }
+
+    //!
+    //! Retrieve a function symbol from the loaded library.
+    //!
+    //! \return the loaded symbol on success
+    //! \throw std::invalid_argument if loading the symbol failed.
+    //!
+    template <typename Signature>
+    std::function<Signature> symbolAddress(char const* name)
+    {
+        if (mHandle == nullptr)
+        {
+            throw std::runtime_error("Handle to library is nullptr.");
+        }
+        void* ret;
+#if defined(_MSC_VER)
+        ret = static_cast<void*>(GetProcAddress(static_cast<HMODULE>(mHandle), name));
+#else
+        ret = dlsym(mHandle, name);
+#endif
+        if (ret == nullptr)
+        {
+            std::string const kERROR_MSG(mLibName + ": error loading symbol: " + std::string(name));
+            throw std::invalid_argument(kERROR_MSG);
+        }
+        return reinterpret_cast<Signature*>(ret);
+    }
+
+    std::string getFullPath() const
+    {
+        RT_ASSERT(mHandle != nullptr);
+#if defined(__linux__)
+        link_map* linkMap = nullptr;
+        auto const err = dlinfo(mHandle, RTLD_DI_LINKMAP, &linkMap);
+        RT_ASSERT(err == 0 && linkMap != nullptr && linkMap->l_name != nullptr);
+        return std::string{linkMap->l_name};
+#elif defined(_WIN32)
+        constexpr int32_t kMAX_PATH_LEN{4096};
+        std::string path(kMAX_PATH_LEN, '\0'); // since C++11, std::string storage is guaranteed to be contiguous
+        auto const pathLen = GetModuleFileNameA(static_cast<HMODULE>(mHandle), &path[0], kMAX_PATH_LEN);
+        RT_ASSERT(GetLastError() == ERROR_SUCCESS);
+        path.resize(pathLen);
+        path.shrink_to_fit();
+        return path;
+#else
+        RT_ASSERT(!"Unsupported operation: getFullPath()");
+#endif
+    }
+
+private:
+    std::string mLibName{}; //!< Name of the DynamicLibrary
+    void* mHandle{};        //!< Handle to the DynamicLibrary
+};
+
+//! Translates an OS-dependent DSO/DLL name into a path on the filesystem
+std::string getOSLibraryPath(std::string const& osLibName)
+{
+    DynamicLibrary lib{osLibName};
+    return lib.getFullPath();
+}
+
+} // namespace
+
 namespace onnx2trt
 {
 
@@ -105,7 +225,8 @@ void ImporterContext::registerTensor(TensorOrWeights tensor, std::string const&
     p.first->second = std::move(tensor);
 }
 
-void ImporterContext::registerLayer(nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node)
+void ImporterContext::registerLayer(
+    nvinfer1::ILayer* layer, std::string const& basename, ::ONNX_NAMESPACE::NodeProto const* node)
 {
     // No layer will be added for Constant nodes in ONNX.
     if (layer)
@@ -149,99 +270,6 @@ void ImporterContext::registerLayer(nvinfer1::ILayer* layer, ::ONNX_NAMESPACE::N
     registerLayer(layer, basename, &node);
 }
 
-namespace
-{
-
-//! Translates a "logical" library name into an OS-dependent DSO or DLL name
-std::string getOSLibraryName(char const* logicalName)
-{
-    std::stringstream libName;
-#if defined(_WIN32)
-    libName << logicalName << ".dll";
-#else
-    libName << "lib" << logicalName << ".so." << NV_TENSORRT_MAJOR;
-#endif
-    return libName.str();
-}
-
-//! Platform-agnostic wrapper around dynamic libraries.
-class DynamicLibrary
-{
-public:
-    explicit DynamicLibrary(std::string const& name)
-        : mLibName{name}
-    {
-#if defined(_WIN32)
-        mHandle = LoadLibraryA(name.c_str());
-#else  // defined(_WIN32)
-        int32_t flags{RTLD_LAZY};
-        mHandle = dlopen(name.c_str(), flags);
-#endif // defined(_WIN32)
-
-        if (mHandle == nullptr)
-        {
-            std::string errorStr{};
-#if !defined(_WIN32)
-            errorStr = std::string{" due to "} + std::string{dlerror()};
-#endif
-            throw std::runtime_error("Unable to open library: " + name + errorStr);
-        }
-    }
-
-    DynamicLibrary(DynamicLibrary const&) = delete;
-    DynamicLibrary(DynamicLibrary const&&) = delete;
-
-    ~DynamicLibrary()
-    {
-        try
-        {
-#if defined(_WIN32)
-            RT_ASSERT(static_cast<bool>(FreeLibrary(static_cast<HMODULE>(mHandle))));
-#else
-            RT_ASSERT(dlclose(mHandle) == 0);
-#endif
-        }
-        catch (...)
-        {
-            std::cerr << "Unable to close library: " << mLibName << std::endl;
-        }
-    }
-
-    std::string getFullPath() const
-    {
-        RT_ASSERT(mHandle != nullptr);
-#if defined(__linux__)
-        link_map* linkMap = nullptr;
-        auto const err = dlinfo(mHandle, RTLD_DI_LINKMAP, &linkMap);
-        RT_ASSERT(err == 0 && linkMap != nullptr && linkMap->l_name != nullptr);
-        return std::string{linkMap->l_name};
-#elif defined(_WIN32)
-        constexpr int32_t kMAX_PATH_LEN{4096};
-        std::string path(kMAX_PATH_LEN, '\0'); // since C++11, std::string storage is guaranteed to be contiguous
-        auto const pathLen = GetModuleFileNameA(static_cast<HMODULE>(mHandle), &path[0], kMAX_PATH_LEN);
-        RT_ASSERT(GetLastError() == ERROR_SUCCESS);
-        path.resize(pathLen);
-        path.shrink_to_fit();
-        return path;
-#else
-        RT_ASSERT(!"Unsupported operation: getFullPath()");
-#endif
-    }
-
-private:
-    std::string mLibName{}; //!< Name of the DynamicLibrary
-    void* mHandle{};        //!< Handle to the DynamicLibrary
-};
-
-//! Translates an OS-dependent DSO/DLL name into a path on the filesystem
-std::string getOSLibraryPath(std::string const& osLibName)
-{
-    DynamicLibrary lib{osLibName};
-    return lib.getFullPath();
-}
-
-} // namespace
-
 void ImporterContext::addUsedVCPluginLibrary(
     ::ONNX_NAMESPACE::NodeProto const& node, char const* pluginName, char const* pluginLib)
 {
@@ -272,3 +300,4 @@ std::vector<std::string> ImporterContext::getUsedVCPluginLibraries()
 }
 
 } // namespace onnx2trt
+
diff --git a/ImporterContext.hpp b/ImporterContext.hpp
@@ -8,8 +8,8 @@
 #include "ShapedWeights.hpp"
 #include "Status.hpp"
 #include "TensorOrWeights.hpp"
-#include "onnxErrorRecorder.hpp"
 #include "WeightsContext.hpp"
+#include "onnxErrorRecorder.hpp"
 #include <fstream>
 #include <functional>
 #include <list>
@@ -403,3 +403,4 @@ typedef std::function<void(
     OpStaticErrorChecker;
 
 } // namespace onnx2trt
+
diff --git a/ModelImporter.cpp b/ModelImporter.cpp
@@ -172,6 +172,8 @@ void parseNode(
 
     bool skipUInt8Conversion = (node.op_type() == "QuantizeLinear" || node.op_type() == "DequantizeLinear"
         || (allowUint8Quantization && node.op_type() == "Constant"));
+    skipUInt8Conversion
+        |= (node.op_type() == "TRT_MXFP8QuantizeLinear" || node.op_type() == "TRT_MXFP8DequantizeLinear");
     if (!skipUInt8Conversion)
     {
         for (auto& nodeInput : nodeInputs)
@@ -435,7 +437,9 @@ std::vector<Status> importInput(ImporterContext* ctx, ::ONNX_NAMESPACE::ValueInf
     CHECK_INPUT(
         convertDtype(onnxDtype.elem_type(), &trtDtype) && "Failed to convert ONNX date type to TensorRT data type.",
         ErrorCode::kUNSUPPORTED_NODE, input.name(), errorList);
-    nvinfer1::Dims trt_dims;
+    // If convertOnnxDims fails, trt_dims may not be modified. Also CHECK_INPUT won't return immediately.
+    // So we need to initialize trt_dims to avoid illegal access in the following log verbose.
+    nvinfer1::Dims trt_dims{};
     size_t const oldNbNamedDimensions = namedDims.size();
     CHECK_INPUT(convertOnnxDims(onnxDtype.shape().dim(), trt_dims, namedDims)
             && "Failed to convert ONNX dimensions to TensorRT dimensions.",
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
 
 ## Supported TensorRT Versions
 
-Development on the this branch is for the latest version of [TensorRT 10.11](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
+Development on the this branch is for the latest version of [TensorRT 10.12](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
 
 For previous versions of TensorRT, refer to their respective branches.
 
@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
 ### Dependencies
 
  - [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
- - [TensorRT 10.11](https://developer.nvidia.com/tensorrt)
- - [TensorRT 10.11 open source libraries](https://github.com/NVIDIA/TensorRT/)
+ - [TensorRT 10.12](https://developer.nvidia.com/tensorrt)
+ - [TensorRT 10.12 open source libraries](https://github.com/NVIDIA/TensorRT/)
 
 ### Building
 
@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
 
 Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
 
-TensorRT 10.11 supports ONNX release 1.18.0. Install it with:
+TensorRT 10.12 supports ONNX release 1.18.0. Install it with:
 
     python3 -m pip install onnx==1.18.0
 
diff --git a/Status.hpp b/Status.hpp
@@ -201,6 +201,7 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
     case nvinfer1::DataType::kINT64: return stream << "int64";
     case nvinfer1::DataType::kBOOL: return stream << "bool";
     case nvinfer1::DataType::kFP8: return stream << "float8";
+    case nvinfer1::DataType::kE8M0: return stream << "floatE8M0";
     case nvinfer1::DataType::kINT4: return stream << "int4";
     case nvinfer1::DataType::kFP4: return stream << "fp4";
 
diff --git a/TensorOrWeights.cpp b/TensorOrWeights.cpp
@@ -25,6 +25,7 @@ std::string TensorOrWeights::getType() const
         case nvinfer1::DataType::kFP8: return "FP8";
         case nvinfer1::DataType::kINT4: return "INT4";
         case nvinfer1::DataType::kFP4: return "FP4";
+        case nvinfer1::DataType::kE8M0: return "E8M0";
         }
     }
     else
@@ -85,6 +86,7 @@ ShapedWeights::DataType TensorOrWeights::convertTRTDataType(nvinfer1::DataType d
         case nvinfer1::DataType::kFP8: return ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN;
         case nvinfer1::DataType::kINT4: return ::ONNX_NAMESPACE::TensorProto::INT4;
         case nvinfer1::DataType::kFP4: return ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1;
+        case nvinfer1::DataType::kE8M0: break;
         }
         assert(false && "Unknown datatype");
         return ::ONNX_NAMESPACE::TensorProto::FLOAT;
diff --git a/docs/Changelog.md b/docs/Changelog.md
@@ -2,6 +2,13 @@
 
 # ONNX-TensorRT Changelog
 
+# TensorRT 10.12 GA Release - 2025-6-16
+For more details, see the 10.12 GA release notes
+
+- Added support for integer-typed base tensors for `Pow` operations
+- Added support for custom `MXFP8` quantization operations
+- Added support for ellipses, diagonal, and broadcasting in `Einsum` operations
+
 # TensorRT 10.11 GA Release - 2025-5-16
 For more details, see the 10.11 GA release notes
 
diff --git a/docs/operators.md b/docs/operators.md
@@ -2,7 +2,7 @@
 
 # Supported ONNX Operators
 
-TensorRT 10.11 supports operators in the inclusive range of opset 9 to opset 23. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
+TensorRT 10.12 supports operators in the inclusive range of opset 9 to opset 23. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
 
 TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, FP8, FP4, INT32, INT64, INT8, INT4, UINT8, and BOOL
 
@@ -134,7 +134,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
 | Or                        | Y          | BOOL |
 | Pad                       | Y          | FP32, FP16, BF16, INT32, INT64 |
 | ParametricSoftplus        | Y          | FP32, FP16, BF16 |
-| Pow                       | Y          | FP32, FP16, BF16 |
+| Pow                       | Y          | FP32, FP16, BF16, INT32, INT64 |
 | PRelu                     | Y          | FP32, FP16, BF16 |
 | QLinearConv               | N          |
 | QLinearMatMul             | N          |
diff --git a/errorHelpers.hpp b/errorHelpers.hpp
@@ -78,7 +78,7 @@ inline char const* errorCodeStr(ErrorCode code)
     case ErrorCode::kREFIT_FAILED: return "REFIT_FAILED";
     }
     return "UNKNOWN";
-};
+}
 
 inline std::string const parserErrorStr(nvonnxparser::IParserError const* error)
 {
diff --git a/importerUtils.cpp b/importerUtils.cpp
diff --git a/importerUtils.hpp b/importerUtils.hpp
diff --git a/onnxOpCheckers.cpp b/onnxOpCheckers.cpp
diff --git a/onnxOpImporters.cpp b/onnxOpImporters.cpp
diff --git a/onnx_tensorrt/__init__.py b/onnx_tensorrt/__init__.py

Original file line number	Diff line number	Diff line change
`@@ -25,6 +25,7 @@ std::string TensorOrWeights::getType() const`
`25`	`25`	`case nvinfer1::DataType::kFP8: return "FP8";`
`26`	`26`	`case nvinfer1::DataType::kINT4: return "INT4";`
`27`	`27`	`case nvinfer1::DataType::kFP4: return "FP4";`
	`28`	`+ case nvinfer1::DataType::kE8M0: return "E8M0";`
`28`	`29`	`}`
`29`	`30`	`}`
`30`	`31`	`else`
`@@ -85,6 +86,7 @@ ShapedWeights::DataType TensorOrWeights::convertTRTDataType(nvinfer1::DataType d`
`85`	`86`	`case nvinfer1::DataType::kFP8: return ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN;`
`86`	`87`	`case nvinfer1::DataType::kINT4: return ::ONNX_NAMESPACE::TensorProto::INT4;`
`87`	`88`	`case nvinfer1::DataType::kFP4: return ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1;`
	`89`	`+ case nvinfer1::DataType::kE8M0: break;`
`88`	`90`	`}`
`89`	`91`	`assert(false && "Unknown datatype");`
`90`	`92`	`return ::ONNX_NAMESPACE::TensorProto::FLOAT;`
Original file line number	Diff line number	Diff line change
`@@ -78,7 +78,7 @@ inline char const* errorCodeStr(ErrorCode code)`
`78`	`78`	`case ErrorCode::kREFIT_FAILED: return "REFIT_FAILED";`
`79`	`79`	`}`
`80`	`80`	`return "UNKNOWN";`
`81`		`-};`
	`81`	`+}`
`82`	`82`
`83`	`83`	`inline std::string const parserErrorStr(nvonnxparser::IParserError const* error)`
`84`	`84`	`{`