Skip to content

Commit 118ed0a

Browse files
authored
ONNX-TensorRT 10.8-GA Release (#1012)
* ONNX-TensorRT 10.8-GA Release Signed-off-by: Yuan Yao <[email protected]> * update date Signed-off-by: Yuan Yao <[email protected]> --------- Signed-off-by: Yuan Yao <[email protected]>
1 parent 9c69a24 commit 118ed0a

File tree

13 files changed

+177
-118
lines changed

13 files changed

+177
-118
lines changed

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ For press and other inquiries, please contact Hector Marinez at hmarinez@nvidia.
1616

1717
## Supported TensorRT Versions
1818

19-
Development on the this branch is for the latest version of [TensorRT 10.7](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
19+
Development on the this branch is for the latest version of [TensorRT 10.8](https://developer.nvidia.com/nvidia-tensorrt-download) with full-dimensions and dynamic shape support.
2020

2121
For previous versions of TensorRT, refer to their respective branches.
2222

@@ -29,8 +29,8 @@ Current supported ONNX operators are found in the [operator support matrix](docs
2929
### Dependencies
3030

3131
- [Protobuf >= 3.0.x](https://github.com/google/protobuf/releases)
32-
- [TensorRT 10.7](https://developer.nvidia.com/tensorrt)
33-
- [TensorRT 10.7 open source libaries] (https://github.com/NVIDIA/TensorRT/)
32+
- [TensorRT 10.8](https://developer.nvidia.com/tensorrt)
33+
- [TensorRT 10.8 open source libaries] (https://github.com/NVIDIA/TensorRT/)
3434

3535
### Building
3636

@@ -82,7 +82,7 @@ Refer to the link or run `polygraphy run -h` for more information on CLI options
8282

8383
Python bindings for the ONNX-TensorRT parser are packaged in the shipped `.whl` files.
8484

85-
TensorRT 10.7 supports ONNX release 1.17.0. Install it with:
85+
TensorRT 10.8 supports ONNX release 1.17.0. Install it with:
8686

8787
python3 -m pip install onnx==1.17.0
8888

Status.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ static std::ostream& operator<<(std::ostream& stream, nvinfer1::DataType const&
204204
case nvinfer1::DataType::kBOOL: return stream << "bool";
205205
case nvinfer1::DataType::kFP8: return stream << "float8";
206206
case nvinfer1::DataType::kINT4: return stream << "int4";
207+
case nvinfer1::DataType::kFP4: return stream << "fp4";
207208

208209
default: throw std::runtime_error("Unknown dtype");
209210
}

TensorOrWeights.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ std::string TensorOrWeights::getType() const
2424
case nvinfer1::DataType::kBOOL: return "BOOL";
2525
case nvinfer1::DataType::kFP8: return "FP8";
2626
case nvinfer1::DataType::kINT4: return "INT4";
27+
case nvinfer1::DataType::kFP4: return "FP4";
2728
}
2829
}
2930
else
@@ -42,6 +43,7 @@ std::string TensorOrWeights::getType() const
4243
case ::ONNX_NAMESPACE::TensorProto::INT64: return "INT64";
4344
case ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN: return "FP8";
4445
case ::ONNX_NAMESPACE::TensorProto::INT4: return "INT4";
46+
case ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1: return "FP4";
4547
}
4648
}
4749
return "UNKNOWN TYPE";
@@ -62,6 +64,7 @@ nvinfer1::DataType TensorOrWeights::convertONNXDataType(ShapedWeights::DataType
6264
case ::ONNX_NAMESPACE::TensorProto::INT64: return nvinfer1::DataType::kINT64;
6365
case ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN: return nvinfer1::DataType::kFP8;
6466
case ::ONNX_NAMESPACE::TensorProto::INT4: return nvinfer1::DataType::kINT4;
67+
case ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1: return nvinfer1::DataType::kFP4;
6568
}
6669
assert(false && "Unknown datatype");
6770
return nvinfer1::DataType::kFLOAT;
@@ -81,6 +84,7 @@ ShapedWeights::DataType TensorOrWeights::convertTRTDataType(nvinfer1::DataType d
8184
case nvinfer1::DataType::kINT64: return ::ONNX_NAMESPACE::TensorProto::INT64;
8285
case nvinfer1::DataType::kFP8: return ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN;
8386
case nvinfer1::DataType::kINT4: return ::ONNX_NAMESPACE::TensorProto::INT4;
87+
case nvinfer1::DataType::kFP4: return ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1;
8488
}
8589
assert(false && "Unknown datatype");
8690
return ::ONNX_NAMESPACE::TensorProto::FLOAT;

WeightsContext.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,7 @@ bool WeightsContext::convertOnnxWeights(
356356
else if (onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT32 || onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT64
357357
|| onnxDtype == ::ONNX_NAMESPACE::TensorProto::FLOAT16 || onnxDtype == ::ONNX_NAMESPACE::TensorProto::BFLOAT16
358358
|| onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT8 || onnxDtype == ::ONNX_NAMESPACE::TensorProto::BOOL
359-
|| onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT4)
359+
|| onnxDtype == ::ONNX_NAMESPACE::TensorProto::INT4 || onnxDtype == ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1)
360360
{
361361
if (onnxTensor.raw_data().size() > 0)
362362
{
@@ -399,6 +399,8 @@ bool WeightsContext::convertOnnxWeights(
399399
break;
400400
case ::ONNX_NAMESPACE::TensorProto::INT4:
401401
// int4 data is packed, each int32 element contains one byte (two int4 nibbles)
402+
case ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1:
403+
// int4/fp4 data is packed, each int32 element contains one byte (two int4/fp4 nibbles)
402404
nbytes = onnxTensor.int32_data().size();
403405
dataPtr = convertPackedInt32Data(onnxTensor.int32_data().data(), shape, nbytes, onnxDtype);
404406
break;

docs/Changelog.md

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,15 @@
22

33
# ONNX-TensorRT Changelog
44

5-
# TensorRT 10.7 GA Release - 2024-12-3
5+
# TensorRT 10.8 GA Release - 2025-1-30
6+
For more details, see the 10.8 GA release notes
7+
8+
- Added support for `FLOAT4E2M1` types for quantized networks
9+
- Added support for dynamic axes and improved performance of `CumSum` operations
10+
- Fixed the import of local functions when their input tensor names aliased one from an outside scope
11+
- Added support for `Pow` ops with integer-typed exponent values
12+
13+
# TensorRT 10.7 GA Release - 2024-11-26
614
For more details, see the 10.7 GA release notes
715

816
- Now prioritizes using plugins over local functions when a corresponding plugin is available in the registry

docs/operators.md

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,13 @@
22

33
# Supported ONNX Operators
44

5-
TensorRT 10.7 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
5+
TensorRT 10.8 supports operators in the inclusive range of opset 9 to opset 22. Latest information of ONNX operators can be found [here](https://github.com/onnx/onnx/blob/main/docs/Operators.md). More details and limitations are documented in the chart below.
66

7-
TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, INT32, INT64, FP8, INT8, INT4, UINT8, and BOOL
7+
TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOAT16, FP8, FP4, INT32, INT64, INT8, INT4, UINT8, and BOOL
88

99
> Note: There is limited support for DOUBLE type. TensorRT will attempt to cast DOUBLE down to FLOAT, clamping values to `+-FLT_MAX` if necessary.
1010
11-
> Note: INT8, INT4, and FP8 are treated as `Quantized Types` in TensorRT, where support is available only through quantization from a floating-point type with higher precision. See [section 7.4.2](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#qat-models-work) of the developer guide for more information.
11+
> Note: INT8, INT4, FP8 and FP4 are treated as `Quantized Types` in TensorRT, where support is available only through quantization from a floating-point type with higher precision. See [section 7.4.2](https://docs.nvidia.com/deeplearning/tensorrt/developer-guide/index.html#qat-models-work) of the developer guide for more information.
1212
1313
> Note: UINT8 is only supported as network input or output tensor types.
1414
@@ -47,22 +47,22 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
4747
| Compress | N |
4848
| Concat | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
4949
| ConcatFromSequence | N |
50-
| Constant | Y | FP32, FP16, BF16, INT32, INT64, BOOL | `sparse_value`, `value_string`, and `value_strings` attributes are unsupported.
51-
| ConstantOfShape | Y | FP32, FP16, BF16, INT32, INF64, BOOL |
50+
| Constant | Y | FP32, FP16, BF16, FP8, FP4, INT4, INT32, INT64, BOOL | `sparse_value`, `value_string`, and `value_strings` attributes are unsupported.
51+
| ConstantOfShape | Y | FP32, FP16, BF16, FP8, FP4, INT4, INT32, INF64, BOOL |
5252
| Conv | Y | FP32, FP16, BF16 |
5353
| ConvInteger | N |
5454
| ConvTranspose | Y | FP32, FP16, BF16 |
5555
| Cos | Y | FP32, FP16, BF16 |
5656
| Cosh | Y | FP32, FP16, BF16 |
57-
| CumSum | Y | FP32, FP16, BF16 | `axis` must be an initializer |
57+
| CumSum | Y | FP32, FP16, BF16 | `axis` must be a build-time constant |
5858
| DFT | N |
5959
| DeformConv | Y | FP32, FP16 | `input` must have 1D or 2D spatial dimensions. `pads` for the beginning and end along each spatial axis must be the same
6060
| DepthToSpace | Y | FP32, FP16, BF16, INT32, INT64 |
61-
| DequantizeLinear | Y | INT8, FP8, INT4 | `x_zero_point` must be zero |
61+
| DequantizeLinear | Y | INT8, FP8, FP4, INT4 | `x_zero_point` must be zero |
6262
| Det | N |
6363
| Div | Y | FP32, FP16, BF16, INT32, INT64 |
6464
| Dropout | Y | FP32, FP16, BF16 | `is_traning` must be an initializer and evaluate to False.
65-
| DynamicQuantizeLinear | N |
65+
| DynamicQuantizeLinear | N | Not supported. TensorRT's IDynamicQuantize can be composed from ONNX operators in the form of a model local function.
6666
| Einsum | Y | FP32, FP16, BF16 |
6767
| Elu | Y | FP32, FP16, BF16 |
6868
| Equal | Y | FP32, FP16, BF16, INT32, INT64 |

importerUtils.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ bool convertDtype(int32_t onnx_dtype, nvinfer1::DataType* trt_dtype)
333333
case ::ONNX_NAMESPACE::TensorProto::INT64: *trt_dtype = nvinfer1::DataType::kINT64; break;
334334
case ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN: *trt_dtype = nvinfer1::DataType::kFP8; break;
335335
case ::ONNX_NAMESPACE::TensorProto::INT4: *trt_dtype = nvinfer1::DataType::kINT4; break;
336+
case ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1: *trt_dtype = nvinfer1::DataType::kFP4; break;
336337
default:
337338
std::cerr << "Unsupported ONNX data type: " << getDtypeName(onnx_dtype) << " (" << std::to_string(onnx_dtype)
338339
<< ")" << std::endl;
@@ -506,6 +507,7 @@ std::string getTrtDtypeName(nvinfer1::DataType TrtDtype)
506507
case nvinfer1::DataType::kBF16: return "BF16";
507508
case nvinfer1::DataType::kINT64: return "INT64";
508509
case nvinfer1::DataType::kINT4: return "INT4";
510+
case nvinfer1::DataType::kFP4: return "FP4";
509511
default: return "<UNKNOWN>";
510512
}
511513
}
@@ -903,9 +905,9 @@ nvinfer1::IPluginCreatorInterface* importPluginCreator(ImporterContext* ctx, std
903905
return creator;
904906
}
905907

906-
std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(std::string const& name,
907-
std::string const& /* pluginNamespace */, nvinfer1::IPluginCreator* pluginCreator,
908-
std::vector<nvinfer1::PluginField> const& pluginFields)
908+
std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(ImporterContext* ctx,
909+
::ONNX_NAMESPACE::NodeProto const& node, std::string const& name, std::string const& /* pluginNamespace */,
910+
nvinfer1::IPluginCreator* pluginCreator, std::vector<nvinfer1::PluginField> const& pluginFields)
909911
{
910912
if (!pluginCreator)
911913
{
@@ -953,8 +955,9 @@ CreatorVersion getPluginCreatorVersion(nvinfer1::IPluginCreatorInterface const*
953955
ONNXTRT_CHECK(false && "Unknown plugin creator version.", ErrorCode::kINTERNAL_ERROR);
954956
}
955957

956-
std::unique_ptr<nvinfer1::IPluginV3> createPlugin(std::string const& name, std::string const& pluginNamespace,
957-
nvinfer1::IPluginCreatorInterface* pluginCreator, std::vector<nvinfer1::PluginField> const& pluginFields)
958+
std::unique_ptr<nvinfer1::IPluginV3> createPlugin(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
959+
std::string const& name, std::string const& pluginNamespace, nvinfer1::IPluginCreatorInterface* pluginCreator,
960+
std::vector<nvinfer1::PluginField> const& pluginFields)
958961
{
959962
if (!pluginCreator)
960963
{
@@ -1178,7 +1181,7 @@ NodeOutputs modulatedDeformableConvPluginHelper(ImporterContext* ctx, ::ONNX_NAM
11781181
f.emplace_back("dilation", dilationValues.data(), nvinfer1::PluginFieldType::kINT32, listAttrSize);
11791182

11801183
// Create plugin from registry
1181-
auto const plugin = createPlugin(pluginName, kTRT_STD_PLUGIN_NAMESPACE,
1184+
auto const plugin = createPlugin(ctx, node, pluginName, kTRT_STD_PLUGIN_NAMESPACE,
11821185
static_cast<nvinfer1::IPluginCreator*>(importPluginCreator(ctx, pluginName, pluginVersion)), f);
11831186

11841187
ONNXTRT_CHECK_NODE(plugin != nullptr, "ModulatedDeformConv2d plugin was not found in the plugin registry!", node,
@@ -1295,7 +1298,7 @@ NodeOutputs instanceNormPluginHelper(ImporterContext* ctx, ::ONNX_NAMESPACE::Nod
12951298
f.emplace_back("alpha", &alpha, nvinfer1::PluginFieldType::kFLOAT32, 1);
12961299

12971300
// Create plugin from registry
1298-
auto const plugin = createPlugin(getNodeName(node), kTRT_STD_PLUGIN_NAMESPACE,
1301+
auto const plugin = createPlugin(ctx, node, getNodeName(node), kTRT_STD_PLUGIN_NAMESPACE,
12991302
static_cast<nvinfer1::IPluginCreatorV3One*>(importPluginCreator(ctx, pluginName, pluginVersion)), f);
13001303

13011304
ONNXTRT_CHECK_NODE(plugin != nullptr, "InstanceNormalization plugin was not found in the plugin registry!", node,
@@ -1717,6 +1720,7 @@ ::ONNX_NAMESPACE::TensorProto_DataType trtDataTypeToONNX(nvinfer1::DataType dt)
17171720
case nvinfer1::DataType::kUINT8: return ::ONNX_NAMESPACE::TensorProto::UINT8;
17181721
case nvinfer1::DataType::kFP8: return ::ONNX_NAMESPACE::TensorProto::FLOAT8E4M3FN;
17191722
case nvinfer1::DataType::kINT4: return ::ONNX_NAMESPACE::TensorProto::INT4;
1723+
case nvinfer1::DataType::kFP4: return ::ONNX_NAMESPACE::TensorProto::FLOAT4E2M1;
17201724
}
17211725
return ::ONNX_NAMESPACE::TensorProto_DataType_UNDEFINED;
17221726
}

importerUtils.hpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -250,13 +250,14 @@ nvinfer1::IPluginCreatorInterface* importPluginCreator(ImporterContext* ctx, std
250250
std::string const& pluginVersion, std::string const& pluginNamespace = kTRT_STD_PLUGIN_NAMESPACE);
251251

252252
// Helper function to get a plugin from the PluginRegistry
253-
std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(std::string const& name,
254-
std::string const& pluginNamespace, nvinfer1::IPluginCreator* pluginCreator,
255-
std::vector<nvinfer1::PluginField> const& pluginFields);
253+
std::unique_ptr<nvinfer1::IPluginV2, PluginDeleter> createPlugin(ImporterContext* ctx,
254+
::ONNX_NAMESPACE::NodeProto const& node, std::string const& name, std::string const& pluginNamespace,
255+
nvinfer1::IPluginCreator* pluginCreator, std::vector<nvinfer1::PluginField> const& pluginFields);
256256

257257
// Helper function to get a V3 plugin from the PluginRegistry
258-
std::unique_ptr<nvinfer1::IPluginV3> createPlugin(std::string const& name, std::string const& pluginNamespace,
259-
nvinfer1::IPluginCreatorInterface* pluginCreator, std::vector<nvinfer1::PluginField> const& pluginFields);
258+
std::unique_ptr<nvinfer1::IPluginV3> createPlugin(ImporterContext* ctx, ::ONNX_NAMESPACE::NodeProto const& node,
259+
std::string const& name, std::string const& pluginNamespace, nvinfer1::IPluginCreatorInterface* pluginCreator,
260+
std::vector<nvinfer1::PluginField> const& pluginFields);
260261

261262
// Helper function to return the identity of a TensorOrWeights
262263
TensorOrWeights identity(ImporterContext* ctx, TensorOrWeights input);

onnxOpCheckers.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ DEFINE_OP_EMPTY_CHECKER(TRT_INT4QuantizeLinear)
269269

270270
DEFINE_OP_EMPTY_CHECKER(TRT_INT4DequantizeLinear)
271271

272+
DEFINE_OP_EMPTY_CHECKER(TRT_FP4DynamicQuantize)
272273

273274
DECLARE_OP_CHECKER(Mul);
274275

@@ -526,7 +527,12 @@ DEFINE_OP_EMPTY_CHECKER(Pad)
526527

527528
DEFINE_OP_EMPTY_CHECKER(ParametricSoftplus)
528529

529-
DEFINE_OP_EMPTY_CHECKER(Pow)
530+
DEFINE_OP_CHECKER(Pow)
531+
{
532+
int32_t const nbInputs = node.input().size();
533+
STATIC_CHECK(
534+
nbInputs == 2 && "POW operator expects two inputs!", ErrorCode::kINVALID_NODE, node, errors, nodeIndex);
535+
}
530536

531537
DEFINE_OP_EMPTY_CHECKER(PRelu)
532538

0 commit comments

Comments
 (0)