Add support for AMD Quarks BFPQuantizeDequantizeOp

jorickert · jorickert · commit bc9ebf36ca53 · 2025-11-10T13:18:22.000Z
Signed-off-by: Rickert, Jonas &lt;Jonas.Rickert@amd.com&gt;
diff --git a/docs/Dialects/onnx.md b/docs/Dialects/onnx.md
@@ -1,4 +1,45 @@
 <!-- Autogenerated by mlir-tblgen; don't manually edit -->
+### `onnx.AMDQuarkBFPQuantizeDequantizeOp` (AMDQuarkBFPQuantizeDequantizeOp)
+
+_BFPQuantizeDequantize_
+
+Block Floating Point (BFP) groups numbers (e.g., tensors, arrays) into blocks, where each block shares a common exponent, and the values in the block are represented with individual mantissas (and the sign bit). This approach offers the performance and speed of 8-bit operations while bringing the precision closer to 16-bit operations.
+
+MicroeXponents (MX) extends the concept of BFP by introducing two levels of exponents: shared exponents for entire blocks and micro exponents for finer-grained sub-blocks. This two-level approach enables more precise scaling of individual elements within a block, reducing quantization error and improving the representational range. The paper https://arxiv.org/abs/2302.08007 introduces three specific formats: MX4, MX6 and MX9, which have different bits of mantissa.
+
+This operator converts floating-point values (typically 32-bit floating-point numbers) into BFP or MX values, then convert them back. It approximates the Quantize-Dequantize process and introduces quantization errors.
+
+Traits: `AlwaysSpeculatableImplTrait`, `OpVersionTrait<1>`, `SameOperandsAndResultElementType`
+
+Interfaces: `ConditionallySpeculatable`, `NoMemoryEffect (MemoryEffectOpInterface)`, `ShapeHelperOpInterface`, `ShapeInferenceOpInterface`
+
+Effects: `MemoryEffects::Effect{}`
+
+#### Attributes:
+
+<table>
+<tr><th>Attribute</th><th>MLIR Type</th><th>Description</th></tr>
+<tr><td><code>bfp_method</code></td><td>::mlir::StringAttr</td><td>string attribute</td></tr>
+<tr><td><code>axis</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>bit_width</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>block_size</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>rounding_mode</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>sub_block_size</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+<tr><td><code>sub_block_shift_bits</code></td><td>::mlir::IntegerAttr</td><td>64-bit signed integer attribute</td></tr>
+</table>
+
+#### Operands:
+
+| Operand | Description |
+| :-----: | ----------- |
+| `X` | tensor of 32-bit float values
+
+#### Results:
+
+| Result | Description |
+| :----: | ----------- |
+| `Y` | tensor of 32-bit float values
+
 ### `onnx.Abs` (ONNXAbsOp)
 
 _ONNX Abs operation_
diff --git a/src/Dialect/ONNX/AMDQuarkOps.td b/src/Dialect/ONNX/AMDQuarkOps.td
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//===-- AMDQuarkOps.td -- AMD Quark Ops  -*- tablegen -===//
+//
+// Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
+//
+// =============================================================================
+//
+// Defines Ops from AMD's Quark quantizer, version 0.10
+//
+//===----------------------------------------------------------------------===//
+
+include "mlir/Interfaces/CallInterfaces.td"
+include "mlir/IR/SymbolInterfaces.td"
+include "src/IR/AttrBase.td"
+
+//===----------------------------------------------------------------------===//
+// BFPQuantizeDequantizeOp
+def AMDQuarkBFPQuantizeDequantizeOp: ONNX_Op<"AMDQuarkBFPQuantizeDequantizeOp",
+    [Pure,  OpVersionTrait<1>,
+    DeclareOpInterfaceMethods<ShapeInferenceOpInterface>,
+    DeclareOpInterfaceMethods<ShapeHelperOpInterface>, SameOperandsAndResultElementType]> {
+  let summary = "BFPQuantizeDequantize";
+  let description = [{
+    Block Floating Point (BFP) groups numbers (e.g., tensors, arrays) into blocks, where each block shares a common exponent, and the values in the block are represented with individual mantissas (and the sign bit). This approach offers the performance and speed of 8-bit operations while bringing the precision closer to 16-bit operations.
+
+    MicroeXponents (MX) extends the concept of BFP by introducing two levels of exponents: shared exponents for entire blocks and micro exponents for finer-grained sub-blocks. This two-level approach enables more precise scaling of individual elements within a block, reducing quantization error and improving the representational range. The paper https://arxiv.org/abs/2302.08007 introduces three specific formats: MX4, MX6 and MX9, which have different bits of mantissa.
+
+    This operator converts floating-point values (typically 32-bit floating-point numbers) into BFP or MX values, then convert them back. It approximates the Quantize-Dequantize process and introduces quantization errors.
+  }];
+
+  let arguments = (ins TensorOf<[F32]>:$X,
+                       DefaultValuedStrAttr<StrAttr, "to_bfp">:$bfp_method,
+                       DefaultValuedAttr<SI64Attr, "1">:$axis,
+                       DefaultValuedAttr<SI64Attr, "16">:$bit_width,
+                       DefaultValuedAttr<SI64Attr, "8">:$block_size,
+                       DefaultValuedAttr<SI64Attr, "0">:$rounding_mode,
+                       DefaultValuedAttr<SI64Attr, "2">:$sub_block_size,
+                       DefaultValuedAttr<SI64Attr, "1">:$sub_block_shift_bits
+                      );
+  let results = (outs TensorOf<[F32]>:$Y);
+
+  let hasVerifier = 1;
+
+  let extraClassDeclaration = [{
+    static int getNumberOfOperands() { return 1; }
+    static int getNumberOfResults() { return 1; }
+    static std::vector<int> getTypeMap() { return {30}; } // Same result element type as operand
+    [[nodiscard]] bool isBFP16(bool ignoreAxis = false);
+    [[nodiscard]] bool isMX4(bool ignoreAxis = false);
+    [[nodiscard]] bool isMX6(bool ignoreAxis = false);
+    [[nodiscard]] bool isMX9(bool ignoreAxis = false);
+  }];
+  
+  let extraClassDefinition = [{  
+    onnx_mlir::ONNXOpShapeHelper * AMDQuarkBFPQuantizeDequantizeOp::getShapeHelper(mlir::Operation *op, mlir::ArrayRef<mlir::Value> oper, 
+        onnx_mlir::IndexExprBuilder *ieb, onnx_mlir::IndexExprScope *scope) {
+      onnx_mlir::ONNXOpShapeHelper *sh = new onnx_mlir::AMDQuarkBFPQuantizeDequantizeOpShapeHelper(op, oper, ieb, scope);
+      assert(sh && "failed to allocate shape helper");
+      return sh;
+    }
+  }];
+}
+
diff --git a/src/Dialect/ONNX/CMakeLists.txt b/src/Dialect/ONNX/CMakeLists.txt
@@ -27,6 +27,7 @@ add_onnx_mlir_library(OMONNXOps
   ONNXTypes.cpp
 
   # Support for shape inference and verifiers
+  ONNXOps/Additional/AMDQuark.cpp
   ONNXOps/Additional/ConcatShapeTranspose.cpp
   ONNXOps/Additional/Custom.cpp
   ONNXOps/Additional/Dim.cpp
diff --git a/src/Dialect/ONNX/ONNX.td b/src/Dialect/ONNX/ONNX.td
@@ -257,5 +257,6 @@ class OpVersionTrait<int version>
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "src/Dialect/ONNX/ONNXOps.td.inc"
 include "src/Dialect/ONNX/AdditionalONNXOps.td"
+include "src/Dialect/ONNX/AMDQuarkOps.td"
 
 #endif // ONNX_OPS
diff --git a/src/Dialect/ONNX/ONNXOps/Additional/AMDQuark.cpp b/src/Dialect/ONNX/ONNXOps/Additional/AMDQuark.cpp
@@ -0,0 +1,85 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+//===------------------ AMDQuark.cpp - AMD Quark custom ops ---------------===//
+//
+// Copyright 2025 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/Dialect/ONNX/ONNXOps/OpHelper.hpp"
+#include "src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp"
+
+using namespace mlir;
+using namespace onnx_mlir;
+
+LogicalResult AMDQuarkBFPQuantizeDequantizeOp::verify() {
+  // Verify that the quantization mode is valid.
+  const auto method = getBfpMethod();
+  if (method != "to_bfp" && method != "to_bfp_prime") {
+    return emitOpError("invalid bfp_method attribute value: " + method +
+                       ". Supported values are 'to_bfp' and 'to_bfp_prime'.");
+  }
+  const int64_t roundingMode = getRoundingMode();
+  if (roundingMode < 0 || roundingMode > 3) {
+    return emitOpError(
+        "invalid rounding_mode attribute value: " +
+        std::to_string(roundingMode) +
+        ". Supported values are 0 for rounding half away from zero, 1 for "
+        "rounding half upward and 2 for rounding half to even.");
+  }
+
+  return success();
+}
+
+namespace {
+struct KnownConfig {
+  StringRef method;
+  int64_t axis;
+  int64_t bit_width;
+  int64_t block_size;
+  int64_t rounding_mode;
+  int64_t sub_block_size;
+  int64_t sub_block_shift_bits;
+};
+
+[[nodiscard]] bool isKnownConfig(AMDQuarkBFPQuantizeDequantizeOp *op,
+    const KnownConfig &config, bool ignoreAxis) {
+  if (op->getBfpMethod() != config.method)
+    return false;
+  if (!ignoreAxis && op->getAxis() != config.axis)
+    return false;
+  if (op->getBitWidth() != config.bit_width)
+    return false;
+  if (op->getBlockSize() != config.block_size)
+    return false;
+  if (op->getRoundingMode() != config.rounding_mode)
+    return false;
+  if (op->getBfpMethod() == "to_bfp_prime") {
+    if (op->getSubBlockSize() != config.sub_block_size)
+      return false;
+    if (op->getSubBlockShiftBits() != config.sub_block_shift_bits)
+      return false;
+  }
+  return true;
+}
+} // namespace
+
+bool AMDQuarkBFPQuantizeDequantizeOp::isBFP16(bool ignoreAxis) {
+  return isKnownConfig(this, {"to_bfp", 1, 16, 8, 2, 0, 0}, ignoreAxis);
+}
+bool AMDQuarkBFPQuantizeDequantizeOp::isMX4(bool ignoreAxis) {
+  return isKnownConfig(this, {"to_bfp_prime", 1, 11, 16, 2, 2, 1}, ignoreAxis);
+}
+bool AMDQuarkBFPQuantizeDequantizeOp::isMX6(bool ignoreAxis) {
+  return isKnownConfig(this, {"to_bfp_prime", 1, 13, 16, 2, 2, 1}, ignoreAxis);
+}
+bool AMDQuarkBFPQuantizeDequantizeOp::isMX9(bool ignoreAxis) {
+  return isKnownConfig(this, {"to_bfp_prime", 1, 16, 16, 2, 2, 1}, ignoreAxis);
+}
+
+LogicalResult AMDQuarkBFPQuantizeDequantizeOp::inferShapes(
+    std::function<void(Region &)> /*doShapeInference*/) {
+  return inferShapeForUnaryOps(this->getOperation());
+}
diff --git a/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp b/src/Dialect/ONNX/ONNXOps/ShapeHelper.hpp
@@ -411,6 +411,7 @@ using ONNXTanOpShapeHelper = ONNXUnaryOpShapeHelper;
 using ONNXTanhOpShapeHelper = ONNXUnaryOpShapeHelper;
 using ONNXThresholdedReluOpShapeHelper = ONNXUnaryOpShapeHelper;
 using ONNXTriluOpShapeHelper = ONNXUnaryOpShapeHelper;
+using AMDQuarkBFPQuantizeDequantizeOpShapeHelper = ONNXUnaryOpShapeHelper;
 // clang-format on
 
 //===----------------------------------------------------------------------===//
diff --git a/test/mlir/onnx/invalid.mlir b/test/mlir/onnx/invalid.mlir
@@ -1102,4 +1102,21 @@ func.func @test_attention_bad_kv_num_heads(%q: tensor<1x128x3072xf32>, %k: tenso
   // expected-error @+1 {{onnx.Attention: operand '<block argument> of type 'tensor<1x128x1536xf32>' at index: 1' has dimension at index 2 with value 1536, value should be divisible by 15}}
   %out, %present_k, %present_v, %qk_out = "onnx.Attention"(%q, %k, %v, %none, %none, %none) {q_num_heads = 32: si64, kv_num_heads = 15: si64} : (tensor<1x128x3072xf32>, tensor<1x128x1536xf32>, tensor<1x128x768xf32>, none, none, none) -> (tensor<*xf32>, none, none, none)
   return %out : tensor<*xf32>
+}
+
+
+// -----
+
+func.func @test_bfp_quant_dequant_wrong_method(%arg0: tensor<16x32xf32>) -> tensor<16x32xf32> {
+  // expected-error @+1 {{'onnx.AMDQuarkBFPQuantizeDequantizeOp' op invalid bfp_method attribute value: from_bfp. Supported values are 'to_bfp' and 'to_bfp_prime'.}}
+  %0 = "onnx.AMDQuarkBFPQuantizeDequantizeOp"(%arg0) { bfp_method = "from_bfp"}  : (tensor<16x32xf32>) -> tensor<16x32xf32>
+  return %0 : tensor<16x32xf32>
+}
+
+// -----
+
+func.func @test_bfp_quant_dequant_wrong_rounding_mode(%arg0: tensor<16x32xf32>) -> tensor<16x32xf32> {
+  // expected-error @+1 {{'onnx.AMDQuarkBFPQuantizeDequantizeOp' op invalid rounding_mode attribute value: 4. Supported values are 0 for rounding half away from zero, 1 for rounding half upward and 2 for rounding half to even.}}
+  %0 = "onnx.AMDQuarkBFPQuantizeDequantizeOp"(%arg0) { rounding_mode = 4: si64 }  : (tensor<16x32xf32>) -> tensor<16x32xf32>
+  return %0 : tensor<16x32xf32>
 }
diff --git a/test/mlir/onnx/onnx_shape_inference.mlir b/test/mlir/onnx/onnx_shape_inference.mlir
@@ -4553,4 +4553,18 @@ func.func @test_attention_3d_q_4d_kv(%q: tensor<1x128x3072xf32>, %k: tensor<1x16
 }
 // CHECK-LABEL:  func.func @test_attention_3d_q_4d_kv
 // CHECK:          "onnx.Attention"
-// CHECK-SAME:       (tensor<1x128x3072xf32>, tensor<1x16x128x96xf32>, tensor<1x16x128x48xf32>, none, none, none) -> (tensor<1x128x1536xf32>
+// CHECK-SAME:       (tensor<1x128x3072xf32>, tensor<1x16x128x96xf32>, tensor<1x16x128x48xf32>, none, none, none) -> (tensor<1x128x1536xf32>
+
+// -----
+
+//===----------------------------------------------------------------------===//
+/// Test shape inference for amd.quark.BFPQuantizeDequantize
+//===----------------------------------------------------------------------===//
+
+func.func @test_bfp_quant_dequant(%arg0: tensor<16x32xf32>) -> tensor<*xf32> {
+  %0 = "onnx.AMDQuarkBFPQuantizeDequantizeOp"(%arg0)  : (tensor<16x32xf32>) -> tensor<*xf32>
+  return %0 : tensor<*xf32>
+}
+// CHECK-LABEL:  func.func @test_bfp_quant_dequant
+// CHECK:          "onnx.AMDQuarkBFPQuantizeDequantizeOp"
+// CHECK-SAME:       (tensor<16x32xf32>) -> tensor<16x32xf32>