Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/operators.md
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ TensorRT supports the following ONNX data types: DOUBLE, FLOAT32, FLOAT16, BFLOA
| Reshape | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
| Resize | Y | FP32, FP16, BF16 | Supported resize transformation modes: `half_pixel`, `pytorch_half_pixel`, `tf_half_pixel_for_nn`, `asymmetric`, and `align_corners`.<br />Supported resize modes: `nearest`, `linear`.<br />Supported nearest modes: `floor`, `ceil`, `round_prefer_floor`, `round_prefer_ceil`.<br />Supported aspect ratio policy: `stretch`.<br />When `scales` is a tensor input, `axes` must be an iota vector of length rank(input).<br />Antialiasing is not supported.|
| ReverseSequence | Y | FP32, FP16, BF16, INT32, INT64, BOOL |
| RMSNormalization | Y | FP32, FP16, BF16 | Only the first output `Y` is supported. Introduced in opset 23.
| RNN | Y | FP32, FP16, BF16| For bidirectional RNNs, activation functions must be the same for both the forward and reverse pass
| RoiAlign | Y | FP32, FP16 |
| Round | Y | FP32, FP16, BF16 |
Expand Down
2 changes: 2 additions & 0 deletions onnxOpCheckers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,8 @@ DEFINE_OP_EMPTY_CHECKER(ReduceSum)

DEFINE_OP_EMPTY_CHECKER(ReduceSumSquare)

DEFINE_OP_EMPTY_CHECKER(RMSNormalization)

DEFINE_OP_EMPTY_CHECKER(Relu)

DEFINE_OP_EMPTY_CHECKER(Sign)
Expand Down
86 changes: 86 additions & 0 deletions onnxOpImporters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4791,6 +4791,92 @@ DEFINE_BUILTIN_OP_IMPORTER(ReduceSumSquare)
inputs.size() >= 2 ? inputs.at(1) : TensorOrWeights());
}

// RMSNormalization: Y = (X / sqrt(mean(X^2) + epsilon)) * scale
// Introduced in ONNX opset 23
DEFINE_BUILTIN_OP_IMPORTER(RMSNormalization)
{
using eOp = nvinfer1::ElementWiseOperation;
using uOp = nvinfer1::UnaryOperation;
using rOp = nvinfer1::ReduceOperation;

// Get input tensor
nvinfer1::ITensor* input = &convertToTensor(inputs.at(0), ctx);
auto const nbDims = input->getDimensions().nbDims;
auto const dt = input->getType();

// Validate supported data types
ONNXTRT_CHECK_NODE((dt == DataType::kFLOAT || dt == DataType::kHALF || dt == DataType::kBF16),
"Only float32/float16/bfloat16 inputs/outputs supported in RMSNormalization. The current data type = "
+ getTrtDtypeName(dt) + ".",
node, nodeIdx, ErrorCode::kUNSUPPORTED_NODE_DATATYPE);

// Get scale tensor
nvinfer1::ITensor* scale = &convertToTensor(inputs.at(1), ctx);

// Parse attributes
OnnxAttrs attrs(node, ctx);
float const epsilon = attrs.get("epsilon", 1e-5f);
int32_t axis = attrs.get("axis", -1);
nvinfer1::DataType computeType = nvinfer1::DataType::kFLOAT;
convertDtype(attrs.get<int32_t>("stash_type", 1), &computeType);

// Convert negative axis to positive
convertAxis(axis, nbDims, node, nodeIdx);

// Create axes mask for normalization (from axis to end)
uint32_t axesMask = 0;
for (int32_t i = axis; i < nbDims; i++)
{
axesMask |= 1 << i;
}

// Step 1: Square the input (X^2)
auto* sqrLayer = N_CHECK(ctx->network()->addElementWise(*input, *input, eOp::kPROD));
ctx->registerLayer(sqrLayer, node);
auto* xSquared = N_CHECK(sqrLayer->getOutput(0));

// Step 2: Mean of squared values (mean(X^2))
auto* meanLayer = N_CHECK(ctx->network()->addReduce(*xSquared, rOp::kAVG, axesMask, true));
ctx->registerLayer(meanLayer, node);
auto* meanSquared = N_CHECK(meanLayer->getOutput(0));

// Step 3: Add epsilon (mean(X^2) + epsilon)
nvinfer1::IConstantLayer* epsilonLayer;
if (dt == DataType::kHALF)
{
epsilonLayer = addConstantScalar(ctx, static_cast<half_float::half>(epsilon), ::ONNX_NAMESPACE::TensorProto::FLOAT16);
}
else if (dt == DataType::kBF16)
{
epsilonLayer = addConstantScalar(ctx, static_cast<BFloat16>(epsilon), ::ONNX_NAMESPACE::TensorProto::BFLOAT16);
}
else
{
epsilonLayer = addConstantScalar(ctx, epsilon, ::ONNX_NAMESPACE::TensorProto::FLOAT);
}
auto* epsilonTensor = N_CHECK(epsilonLayer->getOutput(0));
auto* addEpsLayer = N_CHECK(ctx->network()->addElementWise(*meanSquared, *epsilonTensor, eOp::kSUM));
ctx->registerLayer(addEpsLayer, node);
auto* meanPlusEps = N_CHECK(addEpsLayer->getOutput(0));

// Step 4: Square root (sqrt(mean(X^2) + epsilon) = RMS)
auto* sqrtLayer = N_CHECK(ctx->network()->addUnary(*meanPlusEps, uOp::kSQRT));
ctx->registerLayer(sqrtLayer, node);
auto* rms = N_CHECK(sqrtLayer->getOutput(0));

// Step 5: Divide input by RMS (X / RMS = normalized)
auto* divLayer = N_CHECK(ctx->network()->addElementWise(*input, *rms, eOp::kDIV));
ctx->registerLayer(divLayer, node);
auto* normalized = N_CHECK(divLayer->getOutput(0));

// Step 6: Broadcast scale to input size and multiply (normalized * scale)
broadcastTensors(ctx, normalized, scale);
auto* scaleLayer = N_CHECK(ctx->network()->addElementWise(*normalized, *scale, eOp::kPROD));
ctx->registerLayer(scaleLayer, node);

RETURN_FIRST_OUTPUT(scaleLayer, node, nodeIdx);
}

DEFINE_BUILTIN_OP_IMPORTER(Relu)
{
return activationHelper(ctx, node, nodeIdx, inputs, nvinfer1::ActivationType::kRELU);
Expand Down
1 change: 1 addition & 0 deletions onnx_backend_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@
backend_test.include(r'.*test_reduce.*')
backend_test.include(r'.*test_ReLU*')
backend_test.include(r'.*test_relu.*')
backend_test.include(r'.*test_rms_normalization.*')
backend_test.include(r'.*test_selu.*')
backend_test.include(r'.*test_shape.*')
backend_test.include(r'.*test_Sigmoid*')
Expand Down