Merge pull request #408 from SushmitaThakallapalli1980/feature/onnx-to-tosa

julio-gorge · web-flow · commit f1ecdb15182f · 2025-08-15T11:18:04.000+02:00
Created passes for removing redundant cases of  DQ-Concat-Q,  DQ-Cast-Q,  DQ-Slice-Q and DQ-Q.
diff --git a/src/Compiler/CompilerPasses.cpp b/src/Compiler/CompilerPasses.cpp
@@ -141,6 +141,10 @@ void addONNXToMLIRPasses(mlir::PassManager &pm, bool targetCPU,
   pm.addPass(onnx_mlir::createSimplifyShapeRelatedOpsPass(
       opts.enableQuarkQuantizedLegalization));
 
+  // Passes for removing redundant concat, slice and cast QDQ Ops
+  if (opts.enableRemoveDqQOp)
+    pm.addPass(createQDQOptONNXToONNXPass());
+
   // One more call to ONNX shape inference/canonicalization/... to update
   // shape if possible.
   if (enableONNXHybridPass) {
diff --git a/src/Compiler/CompilerPasses.hpp b/src/Compiler/CompilerPasses.hpp
@@ -30,6 +30,7 @@ struct OnnxToMlirOptions {
   bool enableConvTransposeDecompose = false;
   bool enableConvTransposeDecomposeToPhasedConv = false;
   bool enableConvTranspose1dDecomposeToPhasedConv = false;
+  bool enableRemoveDqQOp = true;
 };
 
 void addONNXToMLIRPasses(mlir::PassManager &pm, bool targetCPU,
diff --git a/src/Dialect/ONNX/Transforms/CMakeLists.txt b/src/Dialect/ONNX/Transforms/CMakeLists.txt
@@ -7,6 +7,7 @@ add_onnx_mlir_rewriter(DecomposeConvTranspose1dPhased)
 
 add_onnx_mlir_rewriter(ConstProp)
 add_onnx_mlir_rewriter(ConvOpt)
+add_onnx_mlir_rewriter(QDQOpt)
 
 add_onnx_mlir_library(OMShapeInference
   ShapeInference.cpp
@@ -42,6 +43,7 @@ add_onnx_mlir_library(OMInstrumentONNX
 
 add_onnx_mlir_library(OMONNXRewrite
   ConstProp.cpp
+  QDQOpt.cpp    
   ConvOpt.cpp
   Decompose.cpp
   DecomposeEinsum.cpp
diff --git a/src/Dialect/ONNX/Transforms/QDQOpt.cpp b/src/Dialect/ONNX/Transforms/QDQOpt.cpp
@@ -0,0 +1,123 @@
+//===- QDQOpt.cpp - Remove QDQ operations --------*- C++ -*-===//
+//
+// (c) Copyright 2022 - 2025 Advanced Micro Devices, Inc. All Rights Reserved.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+#include "src/Dialect/ONNX/ONNXOps.hpp"
+#include "src/Dialect/ONNX/ONNXOps/OpHelper.hpp"
+#include "src/Pass/Passes.hpp"
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
+#include <cmath>
+
+using namespace mlir;
+using namespace onnx_mlir;
+
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Helper Functions
+//===----------------------------------------------------------------------===//
+
+static ElementsAttr getElementAttributeFromConstant(Value val) {
+  if (!val)
+    return nullptr;
+  if (auto constOp = val.getDefiningOp<ONNXConstantOp>())
+    return mlir::dyn_cast<ElementsAttr>(constOp.getValueAttr());
+  return nullptr;
+}
+
+//===----------------------------------------------------------------------===//
+// Pattern to remove QDQ pairs
+//===----------------------------------------------------------------------===//
+
+struct FoldQDQPattern : public OpRewritePattern<ONNXQuantizeLinearOp> {
+  using OpRewritePattern<ONNXQuantizeLinearOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(
+      ONNXQuantizeLinearOp qOp, PatternRewriter &rewriter) const override {
+
+    auto dqOp = qOp.getX().getDefiningOp<ONNXDequantizeLinearOp>();
+    if (!dqOp)
+      return failure();
+
+    // 1. Check Attributes
+    if (qOp.getAxis() != dqOp.getAxis())
+      return failure();
+    if (qOp.getBlockSize() != dqOp.getBlockSize())
+      return failure();
+
+    // 2. Check zero-points
+    auto zpAttr1 = getElementAttributeFromConstant(dqOp.getXZeroPoint());
+    auto zpAttr2 = getElementAttributeFromConstant(qOp.getYZeroPoint());
+    if (!zpAttr1 && !zpAttr2)
+      return failure();
+    if (zpAttr1 != zpAttr2)
+      return failure();
+
+    // 3. Check Scales.
+    auto scaleAttr1 = getElementAttributeFromConstant(dqOp.getXScale());
+    auto scaleAttr2 = getElementAttributeFromConstant(qOp.getYScale());
+    if (!scaleAttr1 && !scaleAttr2)
+      return failure();
+    if (scaleAttr1 != scaleAttr2)
+      return failure();
+
+    // 4. Check data type consistency of the entire DQ->Q chain.
+    // The original quantized type before DQ must match the final quantized
+    // type after Q.
+    auto dqInTypeOp = dqOp.getX().getType();
+    auto qOutTypeOp = qOp.getResult().getType();
+
+    if (auto dqInTensorType = dqInTypeOp.dyn_cast<TensorType>()) {
+      if (auto qOutTensorType = qOutTypeOp.dyn_cast<TensorType>()) {
+        if (qOutTensorType.getElementType() !=
+            dqInTensorType.getElementType()) {
+          return failure();
+        }
+      } else {
+        return failure();
+      }
+    } else {
+      return failure();
+    }
+    rewriter.replaceOp(qOp, dqOp.getX());
+    return success();
+  }
+};
+
+//===----------------------------------------------------------------------===//
+// Pass to run QDQ removal
+//===----------------------------------------------------------------------===//
+
+struct QDQOptONNXToONNXPass
+    : public PassWrapper<QDQOptONNXToONNXPass, OperationPass<func::FuncOp>> {
+
+  MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(QDQOptONNXToONNXPass)
+  StringRef getArgument() const override { return "dqq-opt-onnx-to-onnx"; }
+  StringRef getDescription() const override {
+    return "Remove DqQ ops and surrounding DqQ if safe.";
+  }
+
+  void runOnOperation() override {
+    auto function = getOperation();
+    RewritePatternSet patterns(&getContext());
+    patterns.add<FoldQDQPattern>(&getContext());
+    if (failed(applyPatternsGreedily(function, std::move(patterns))))
+      signalPassFailure();
+  }
+};
+} // namespace
+
+namespace onnx_mlir {
+std::unique_ptr<mlir::Pass> createQDQOptONNXToONNXPass() {
+  return std::make_unique<QDQOptONNXToONNXPass>();
+}
+} // namespace onnx_mlir
diff --git a/src/Pass/Passes.hpp b/src/Pass/Passes.hpp
@@ -55,6 +55,8 @@ void configureConstPropONNXToONNXPass(bool roundFPToInt, int expansionBound,
 
 std::unique_ptr<mlir::Pass> createConstPropONNXToONNXPass();
 
+std::unique_ptr<mlir::Pass> createQDQOptONNXToONNXPass();
+
 /// Pass for instrument the ops in specific stage.
 std::unique_ptr<mlir::Pass> createInstrumentPass();
 std::unique_ptr<mlir::Pass> createInstrumentPass(
diff --git a/src/Tools/onnx-mlir-opt/RegisterPasses.cpp b/src/Tools/onnx-mlir-opt/RegisterPasses.cpp
@@ -67,6 +67,10 @@ void registerOMPasses(int optLevel) {
     return createConstPropONNXToONNXPass();
   });
 
+  mlir::registerPass([]() -> std::unique_ptr<mlir::Pass> {
+    return createQDQOptONNXToONNXPass();
+  });
+
   mlir::registerPass(
       []() -> std::unique_ptr<mlir::Pass> { return createInstrumentPass(); });
 
diff --git a/test/mlir/onnx/onnx_remove_cast.mlir b/test/mlir/onnx/onnx_remove_cast.mlir
@@ -0,0 +1,27 @@
+// RUN: onnx-mlir-opt --canonicalize --dqq-opt-onnx-to-onnx %s -split-input-file | FileCheck %s
+
+func.func @test_cast_pattern1(%arg0: tensor<*xui16>) -> tensor<*xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<*xui16>, tensor<f32>, tensor<ui16>) -> tensor<*xf32>
+%3 = "onnx.Cast"(%2) {saturate = 1 : si64, to = f32} : (tensor<*xf32>) -> tensor<*xf32>
+%4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<*xf32>, tensor<f32>, tensor<ui16>) -> tensor<*xui16>
+return %4 : tensor<*xui16>
+}
+
+// CHECK-LABEL: func.func @test_cast_pattern1(%arg0: tensor<*xui16>) -> tensor<*xui16>
+// CHECK-NOT: onnx.DequantizeLinear
+// CHECK-NOT: onnx.Cast
+// CHECK-NOT: onnx.QuantizeLinear
+
+func.func @test_cast_pattern2(%arg0: tensor<*xui16>) -> tensor<*xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.Cast"(%arg0) {saturate = 1 : si64, to = f32} : (tensor<*xui16>) -> tensor<*xf32>
+%3 = "onnx.QuantizeLinear"(%2, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<*xf32>, tensor<f32>, tensor<ui16>) -> tensor<*xui16>
+return %3 : tensor<*xui16>
+}
+
+// CHECK-LABEL: func.func @test_cast_pattern2(%arg0: tensor<*xui16>) -> tensor<*xui16>
+// CHECK: onnx.Cast
+// CHECK: onnx.QuantizeLinear
diff --git a/test/mlir/onnx/onnx_remove_concat.mlir b/test/mlir/onnx/onnx_remove_concat.mlir
@@ -0,0 +1,31 @@
+// RUN: onnx-mlir-opt --canonicalize --dqq-opt-onnx-to-onnx %s -split-input-file | FileCheck %s
+
+func.func @test_concat_pattern1(%arg0: tensor<*xui16>) -> tensor<*xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<*xui16>, tensor<f32>, tensor<ui16>) -> tensor<*xf32>
+%3 = "onnx.Concat"(%2) {axis = 1 : si64} : (tensor<*xf32>) -> tensor<*xf32>
+%4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<*xf32>, tensor<f32>, tensor<ui16>) -> tensor<*xui16>
+return %4 : tensor<*xui16>
+}
+
+// CHECK-LABEL: func.func @test_concat_pattern1(%arg0: tensor<*xui16>) -> tensor<*xui16>
+// CHECK-NOT: onnx.DequantizeLinear
+// CHECK-NOT: onnx.Concat
+// CHECK-NOT: onnx.QuantizeLinear
+// CHECK: return %arg0 : tensor<*xui16>
+
+func.func @test_concat_pattern2(%arg0: tensor<*xui16>) -> tensor<*xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<*xui16>, tensor<f32>, tensor<ui16>) -> tensor<*xf32>
+%3 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<*xui16>, tensor<f32>, tensor<ui16>) -> tensor<*xf32>
+%4 = "onnx.Concat"(%2, %3) {axis = 1 : si64} : (tensor<*xf32>, tensor<*xf32>) -> tensor<*xf32>
+%5 = "onnx.QuantizeLinear"(%4, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<*xf32>, tensor<f32>, tensor<ui16>) -> tensor<*xui16>
+return %5 : tensor<*xui16>
+}
+
+// CHECK-LABEL: func.func @test_concat_pattern2(%arg0: tensor<*xui16>) -> tensor<*xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.Concat
+// CHECK: onnx.QuantizeLinear
diff --git a/test/mlir/onnx/onnx_remove_dqq.mlir b/test/mlir/onnx/onnx_remove_dqq.mlir
@@ -0,0 +1,89 @@
+// RUN: onnx-mlir-opt --dqq-opt-onnx-to-onnx %s -split-input-file | FileCheck %s
+
+func.func @test_qdq_pattern1(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x128x768xui16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%3 = "onnx.QuantizeLinear"(%2, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %3 : tensor<1x128x768xui16>
+
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern1(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16>
+// CHECK: return %arg0 : tensor<1x128x768xui16>
+// CHECK-NOT: onnx.DequantizeLinear
+// CHECK-NOT: onnx.QuantizeLinear
+
+func.func @test_qdq_pattern2(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = onnx.Constant dense<6.57987776E-5> : tensor<f32>
+%3 = onnx.Constant dense<45664> : tensor<ui16>
+%4 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x128x768xui16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%5 = "onnx.QuantizeLinear"(%4, %2, %3) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %5 : tensor<1x128x768xui16>
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern2(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.QuantizeLinear
+
+func.func @test_qdq_pattern3(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 2 : si64, block_size = 0 : si64} : (tensor<1x128x768xui16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%3 = "onnx.QuantizeLinear"(%2, %0, %1) {block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %3 : tensor<1x128x768xui16>
+
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern3(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.QuantizeLinear
+
+func.func @test_qdq_pattern4(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 1 : si64} : (tensor<1x128x768xui16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%3 = "onnx.QuantizeLinear"(%2, %0, %1) {axis = 1 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %3 : tensor<1x128x768xui16>
+
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern4(%arg0: tensor<1x128x768xui16>) -> tensor<1x128x768xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.QuantizeLinear
+
+func.func @test_qdq_pattern6(%arg0: tensor<1x128x768xui16>, %arg1: tensor<f32>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<39664> : tensor<ui16>
+%1 = "onnx.DequantizeLinear"(%arg0, %arg1, %0) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x128x768xui16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%2 = "onnx.QuantizeLinear"(%1, %arg1, %0) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %2 : tensor<1x128x768xui16>
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern6(%arg0: tensor<1x128x768xui16>, %arg1: tensor<f32>) -> tensor<1x128x768xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.QuantizeLinear
+
+func.func @test_qdq_pattern7(%arg0: tensor<1x128x768xui16>, %arg1: tensor<ui16>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = "onnx.DequantizeLinear"(%arg0, %0, %arg1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x128x768xui16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%2 = "onnx.QuantizeLinear"(%1, %0, %arg1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %2 : tensor<1x128x768xui16>
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern7(%arg0: tensor<1x128x768xui16>, %arg1: tensor<ui16>) -> tensor<1x128x768xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.QuantizeLinear
+
+func.func @test_qdq_pattern8(%arg0: tensor<1x128x768xi16>) -> tensor<1x128x768xui16> {
+%0 = onnx.Constant dense<2.57987776E-5> : tensor<f32>
+%1 = onnx.Constant dense<39664> : tensor<ui16>
+%2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x128x768xi16>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xf32>
+%3 = "onnx.QuantizeLinear"(%2, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x128x768xf32>, tensor<f32>, tensor<ui16>) -> tensor<1x128x768xui16>
+return %3 : tensor<1x128x768xui16>
+}
+
+// CHECK-LABEL: func.func @test_qdq_pattern8(%arg0: tensor<1x128x768xi16>) -> tensor<1x128x768xui16>
+// CHECK: onnx.DequantizeLinear
+// CHECK: onnx.QuantizeLinear
diff --git a/test/mlir/onnx/onnx_remove_slice.mlir b/test/mlir/onnx/onnx_remove_slice.mlir