From 6bc6daa6f1ab75b91aae86e19cf04cdbe1af6f47 Mon Sep 17 00:00:00 2001 From: sushmita Date: Fri, 14 Nov 2025 20:01:15 +0530 Subject: [PATCH 1/2] removeQDQArndOP update with const check --- .../ONNX/Transforms/QDQAroundOpOpt.cpp | 87 ++++- test/mlir/onnx/qdq_removal_flatten.mlir | 24 -- test/mlir/onnx/qdq_removal_gather.mlir | 20 - test/mlir/onnx/qdq_removal_reshape.mlir | 28 -- test/mlir/onnx/qdq_removal_resize.mlir | 78 ---- test/mlir/onnx/qdq_removal_slice.mlir | 32 -- test/mlir/onnx/qdq_removal_squeeze.mlir | 26 -- test/mlir/onnx/qdq_removal_transpose.mlir | 16 - test/mlir/onnx/qdq_removal_unsqueeze.mlir | 26 -- test/mlir/onnx_remove_qdq_arnd_op.mlir | 350 ++++++++++++++++++ 10 files changed, 436 insertions(+), 251 deletions(-) delete mode 100644 test/mlir/onnx/qdq_removal_flatten.mlir delete mode 100644 test/mlir/onnx/qdq_removal_gather.mlir delete mode 100644 test/mlir/onnx/qdq_removal_reshape.mlir delete mode 100644 test/mlir/onnx/qdq_removal_resize.mlir delete mode 100644 test/mlir/onnx/qdq_removal_slice.mlir delete mode 100644 test/mlir/onnx/qdq_removal_squeeze.mlir delete mode 100644 test/mlir/onnx/qdq_removal_transpose.mlir delete mode 100644 test/mlir/onnx/qdq_removal_unsqueeze.mlir create mode 100644 test/mlir/onnx_remove_qdq_arnd_op.mlir diff --git a/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp b/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp index 8a74e1fa10..f2745e20ec 100644 --- a/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp +++ b/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp @@ -17,6 +17,39 @@ using namespace mlir; using namespace onnx_mlir; + +/// Check if a value is defined by a constant operation +/// Returns false for NoValue (NoneType) +/// Uses recursive logic to check if all operands are constants (initializers) +static bool isConstantOrInitializer(Value val) { + if (!val) + return false; + + // Return false for NoValue (which has NoneType) + if (mlir::isa(val.getType())) { + return false; + } + + Operation *definingOp = val.getDefiningOp(); + if (!definingOp) { + return false; + } + + // Check if it's a constant op + if (llvm::isa(definingOp)) { + return true; + } + + // Recursively check if all operands are initializers + // If all operands are constants, the result is effectively constant + for (Value operand : definingOp->getOperands()) { + if (!isConstantOrInitializer(operand)) { + return false; + } + } + return true; +} + struct InputAndOutput { Value input; Value output; @@ -54,12 +87,64 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { LogicalResult matchAndRewrite( T op, PatternRewriter &rewriter) const override { + // Special handling for Resize - only support "nearest" mode if (llvm::isa(op)) { - auto &resizeOp = llvm::cast(op); + auto resizeOp = llvm::cast(op); if (resizeOp.getMode() != "nearest") { return failure(); } + + // Resize: require control parameters to be constants + if (!isConstantOrInitializer(resizeOp.getRoi()) || + !isConstantOrInitializer(resizeOp.getScales()) || + !isConstantOrInitializer(resizeOp.getSizes())) { + return failure(); + } + } + + // Unsqueeze requires axes to be a constant + if (llvm::isa(op)) { + auto unsqueezeOp = llvm::cast(op); + if (!isConstantOrInitializer(unsqueezeOp.getAxes())) { + return failure(); + } + } + + // Squeeze requires axes to be a constant + if (llvm::isa(op)) { + auto squeezeOp = llvm::cast(op); + if (!isConstantOrInitializer(squeezeOp.getAxes())) { + return failure(); + } + } + + // Reshape requires shape to be a constant + if (llvm::isa(op)) { + auto reshapeOp = llvm::cast(op); + if (!isConstantOrInitializer(reshapeOp.getShape())) { + return failure(); + } + } + + // Gather requires indices to be a constant + if (llvm::isa(op)) { + auto gatherOp = llvm::cast(op); + if (!isConstantOrInitializer(gatherOp.getIndices())) { + return failure(); + } + } + + // Slice requires all control parameters to be constants + if (llvm::isa(op)) { + auto sliceOp = llvm::cast(op); + if (!isConstantOrInitializer(sliceOp.getStarts()) || + !isConstantOrInitializer(sliceOp.getEnds()) || + !isConstantOrInitializer(sliceOp.getAxes()) || + !isConstantOrInitializer(sliceOp.getSteps())) { + return failure(); + } } + InputAndOutput opIO = getDataInputOutput(op); auto dqOp = opIO.input.getDefiningOp(); diff --git a/test/mlir/onnx/qdq_removal_flatten.mlir b/test/mlir/onnx/qdq_removal_flatten.mlir deleted file mode 100644 index 325dc46e0d..0000000000 --- a/test/mlir/onnx/qdq_removal_flatten.mlir +++ /dev/null @@ -1,24 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - - func.func @flatten_op(%arg0: tensor<1x2x2xui8>) -> (tensor<1x4xui8>) { - %0 = onnx.Constant dense<1.000000e-01> : tensor - %1 = onnx.Constant dense<128> : tensor - %2 = "onnx.DequantizeLinear"(%arg0, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_0"} : (tensor<1x2x2xui8>, tensor, tensor) -> tensor<1x2x2xf32> - %3 = "onnx.Flatten"(%2) {axis = 1 : si64, onnx_node_name = "onnx.Flatten_1"} : (tensor<1x2x2xf32>) -> tensor<1x4xf32> - %4 = "onnx.QuantizeLinear"(%3, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_2", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<1x4xf32>, tensor, tensor) -> tensor<1x4xui8> - return %4 : tensor<1x4xui8> - } - -// CHECK-LABEL: func.func @flatten_op -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x2x2xui8>) -> tensor<1x4xui8> { -// CHECK: [[VAR_0_:%.+]] = "onnx.Flatten"([[PARAM_0_]]) {axis = 1 : si64, onnx_node_name = "onnx.Flatten_1"} : (tensor<1x2x2xui8>) -> tensor<1x4xui8> -// CHECK: return [[VAR_0_]] : tensor<1x4xui8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx/qdq_removal_gather.mlir b/test/mlir/onnx/qdq_removal_gather.mlir deleted file mode 100644 index e7146ce328..0000000000 --- a/test/mlir/onnx/qdq_removal_gather.mlir +++ /dev/null @@ -1,20 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - - func.func @gather_op() -> (tensor<2xui8> {onnx.name = "quantized"}) { - %0 = onnx.Constant dense<[0, 1, 2, 3]> : tensor<4xui8> - %1 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> - %2 = onnx.Constant dense<0> : tensor<1xui8> - %3 = onnx.Constant dense<[0, 2]> : tensor<2xi64> - %4 = "onnx.DequantizeLinear"(%0, %1, %2) {axis = 1 : si64, onnx_node_name = "onnx.DequantizeLinear_0"} : (tensor<4xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<4xf32> - %5 = "onnx.Gather"(%4, %3) {axis = 0 : si64, onnx_node_name = "onnx.Gather_1"} : (tensor<4xf32>, tensor<2xi64>) -> tensor<2xf32> - %6 = "onnx.QuantizeLinear"(%5, %1, %2) {axis = 1 : si64, onnx_node_name = "onnx.QuantizeLinear_2", saturate = 1 : si64} : (tensor<2xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<2xui8> - return %6 : tensor<2xui8> - } - -// CHECK-LABEL: func.func @gather_op -// CHECK-SAME: () -> (tensor<2xui8> {onnx.name = "quantized"}) { -// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<[0, 1, 2, 3]> : tensor<4xui8> -// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<[0, 2]> : tensor<2xi64> -// CHECK: [[VAR_2_:%.+]] = "onnx.Gather"([[VAR_0_]], [[VAR_1_]]) {axis = 0 : si64, onnx_node_name = "onnx.Gather_1"} : (tensor<4xui8>, tensor<2xi64>) -> tensor<2xui8> -// CHECK: return [[VAR_2_]] : tensor<2xui8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx/qdq_removal_reshape.mlir b/test/mlir/onnx/qdq_removal_reshape.mlir deleted file mode 100644 index e24eee83a6..0000000000 --- a/test/mlir/onnx/qdq_removal_reshape.mlir +++ /dev/null @@ -1,28 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - - func.func @reshape_op(%arg0: tensor<1x4xui8> {onnx.name = "input_quant"} loc(unknown)) -> (tensor<2x2xui8> {onnx.name = "output_quant"}) { - %0 = onnx.Constant dense<1.000000e-01> : tensor - %1 = onnx.Constant dense<128> : tensor - %2 = onnx.Constant dense<2> : tensor<2xi64> - %3 = onnx.Constant dense<1.000000e-01> : tensor - %4 = onnx.Constant dense<128> : tensor - %5 = "onnx.DequantizeLinear"(%arg0, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_0"} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> - %6 = "onnx.Reshape"(%5, %2) {allowzero = 0 : si64, onnx_node_name = "onnx.Reshape_1"} : (tensor<1x4xf32>, tensor<2xi64>) -> tensor<2x2xf32> - %7 = "onnx.QuantizeLinear"(%6, %3, %4) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_2", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<2x2xf32>, tensor, tensor) -> tensor<2x2xui8> - return %7 : tensor<2x2xui8> - } - -// CHECK-LABEL: func.func @reshape_op -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8> {onnx.name = "input_quant"}) -> (tensor<2x2xui8> {onnx.name = "output_quant"}) { -// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<2> : tensor<2xi64> -// CHECK: [[VAR_1_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_0_]]) {allowzero = 0 : si64, onnx_node_name = "onnx.Reshape_1"} : (tensor<1x4xui8>, tensor<2xi64>) -> tensor<2x2xui8> -// CHECK: return [[VAR_1_]] : tensor<2x2xui8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx/qdq_removal_resize.mlir b/test/mlir/onnx/qdq_removal_resize.mlir deleted file mode 100644 index af8d44be81..0000000000 --- a/test/mlir/onnx/qdq_removal_resize.mlir +++ /dev/null @@ -1,78 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - func.func @resize_op(%arg0: tensor<1x3x64x64xui8> {onnx.name = "input"}) -> (tensor<1x3x128x128xui8> {onnx.name = "output"}) { - %0 = "onnx.NoValue"() {onnx_node_name = "onnx.NoValue_0", value} : () -> none - %1 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> - %2 = onnx.Constant dense<0> : tensor<1xui8> - %3 = onnx.Constant dense<[1, 3, 128, 128]> : tensor<4xi64> - %4 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> - %5 = onnx.Constant dense<0> : tensor<1xui8> - %6 = "onnx.DequantizeLinear"(%arg0, %1, %2) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_1"} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> - %7 = "onnx.Resize"(%6, %0, %0, %3) { - antialias = 0 : si64, - coordinate_transformation_mode = "asymmetric", - cubic_coeff_a = -7.500000e-01 : f32, - exclude_outside = 0 : si64, - extrapolation_value = 0.000000e+00 : f32, - keep_aspect_ratio_policy = "stretch", - mode = "nearest", - nearest_mode = "round_prefer_floor", - onnx_node_name = "onnx.Resize_2"} : (tensor<1x3x64x64xf32>, none, none, tensor<4xi64>) -> tensor<1x3x128x128xf32> - %8 = "onnx.QuantizeLinear"(%7, %4, %5) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_3", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<1x3x128x128xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x128x128xui8> - return %8 : tensor<1x3x128x128xui8> - } - -// CHECK-LABEL: func.func @resize_op -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x3x64x64xui8> {onnx.name = "input"}) -> (tensor<1x3x128x128xui8> {onnx.name = "output"}) { -// CHECK-DAG: [[VAR_0_:%.+]] = "onnx.NoValue"() {onnx_node_name = "onnx.NoValue_0", value} : () -> none -// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<[1, 3, 128, 128]> : tensor<4xi64> -// CHECK: [[VAR_2_:%.+]] = "onnx.Resize"([[PARAM_0_]], [[VAR_0_]], [[VAR_0_]], [[VAR_1_]]) {antialias = 0 : si64, coordinate_transformation_mode = "asymmetric", cubic_coeff_a = -7.500000e-01 : f32, exclude_outside = 0 : si64, extrapolation_value = 0.000000e+00 : f32, keep_aspect_ratio_policy = "stretch", mode = "nearest", nearest_mode = "round_prefer_floor", onnx_node_name = "onnx.Resize_2"} : (tensor<1x3x64x64xui8>, none, none, tensor<4xi64>) -> tensor<1x3x128x128xui8> -// CHECK: return [[VAR_2_]] : tensor<1x3x128x128xui8> -// CHECK: } - -func.func @resize_op_cubic(%arg0: tensor<1x3x64x64xui8> {onnx.name = "input"}) -> (tensor<1x3x128x128xui8> {onnx.name = "output"}) { - %0 = "onnx.NoValue"() {onnx_node_name = "onnx.NoValue_0", value} : () -> none - %1 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> - %2 = onnx.Constant dense<0> : tensor<1xui8> - %3 = onnx.Constant dense<[1, 3, 128, 128]> : tensor<4xi64> - %4 = "onnx.DequantizeLinear"(%arg0, %1, %2) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_1"} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> - %5 = "onnx.Resize"(%4, %0, %0, %3) { - antialias = 0 : si64, - coordinate_transformation_mode = "asymmetric", - cubic_coeff_a = -7.500000e-01 : f32, - exclude_outside = 0 : si64, - extrapolation_value = 0.000000e+00 : f32, - keep_aspect_ratio_policy = "stretch", - mode = "cubic", - nearest_mode = "round_prefer_floor", - onnx_node_name = "onnx.Resize_2"} : (tensor<1x3x64x64xf32>, none, none, tensor<4xi64>) -> tensor<1x3x128x128xf32> - %6 = "onnx.QuantizeLinear"(%5, %1, %2) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_3", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<1x3x128x128xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x128x128xui8> - return %6 : tensor<1x3x128x128xui8> - } - -// CHECK-LABEL: func.func @resize_op_cubic -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x3x64x64xui8> {onnx.name = "input"}) -> (tensor<1x3x128x128xui8> {onnx.name = "output"}) { -// CHECK-DAG: [[VAR_0_:%.+]] = "onnx.NoValue"() {onnx_node_name = "onnx.NoValue_0", value} : () -> none -// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> -// CHECK-DAG: [[VAR_2_:%.+]] = onnx.Constant dense<0> : tensor<1xui8> -// CHECK-DAG: [[VAR_3_:%.+]] = onnx.Constant dense<[1, 3, 128, 128]> : tensor<4xi64> -// CHECK: [[VAR_4_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_1_]], [[VAR_2_]]) {axis = 1 : si64, block_size = 0 : si64, onnx_node_name = "onnx.DequantizeLinear_1"} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> -// CHECK: [[VAR_5_:%.+]] = "onnx.Resize"([[VAR_4_]], [[VAR_0_]], [[VAR_0_]], [[VAR_3_]]) {antialias = 0 : si64, coordinate_transformation_mode = "asymmetric", cubic_coeff_a = -7.500000e-01 : f32, exclude_outside = 0 : si64, extrapolation_value = 0.000000e+00 : f32, keep_aspect_ratio_policy = "stretch", mode = "cubic", nearest_mode = "round_prefer_floor", onnx_node_name = "onnx.Resize_2"} : (tensor<1x3x64x64xf32>, none, none, tensor<4xi64>) -> tensor<1x3x128x128xf32> -// CHECK: [[VAR_6_:%.+]] = "onnx.QuantizeLinear"([[VAR_5_]], [[VAR_1_]], [[VAR_2_]]) {axis = 1 : si64, block_size = 0 : si64, onnx_node_name = "onnx.QuantizeLinear_3", output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x3x128x128xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x128x128xui8> -// CHECK: return [[VAR_6_]] : tensor<1x3x128x128xui8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx/qdq_removal_slice.mlir b/test/mlir/onnx/qdq_removal_slice.mlir deleted file mode 100644 index af0b2eb611..0000000000 --- a/test/mlir/onnx/qdq_removal_slice.mlir +++ /dev/null @@ -1,32 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - -func.func @slice_op(%arg0: tensor<1x4xui8> {onnx.name = "input_quant"})-> (tensor<1x2xui8> {onnx.name = "output_quant"}) { - %0 = onnx.Constant dense<1.000000e-01> : tensor - %1 = onnx.Constant dense<128> : tensor - %2 = onnx.Constant dense<1> : tensor<1xi64> - %3 = onnx.Constant dense<3> : tensor<1xi64> - %4 = onnx.Constant dense<1> : tensor<1xi64> - %5 = onnx.Constant dense<1> : tensor<1xi64> - %6 = onnx.Constant dense<1.000000e-01> : tensor - %7 = onnx.Constant dense<128> : tensor - %8 = "onnx.DequantizeLinear"(%arg0, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_0"} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> - %9 = "onnx.Slice"(%8, %2, %3, %4, %5) {onnx_node_name = "onnx.Slice_1"} : (tensor<1x4xf32>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x2xf32> - %10 = "onnx.QuantizeLinear"(%9, %6, %7) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_2", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<1x2xf32>, tensor, tensor) -> tensor<1x2xui8> - return %10 : tensor<1x2xui8> - } - -// CHECK-LABEL: func.func @slice_op -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8> {onnx.name = "input_quant"}) -> (tensor<1x2xui8> {onnx.name = "output_quant"}) { -// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> -// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<3> : tensor<1xi64> -// CHECK: [[VAR_2_:%.+]] = "onnx.Slice"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]], [[VAR_0_]], [[VAR_0_]]) {onnx_node_name = "onnx.Slice_1"} : (tensor<1x4xui8>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x2xui8> -// CHECK: return [[VAR_2_]] : tensor<1x2xui8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx/qdq_removal_squeeze.mlir b/test/mlir/onnx/qdq_removal_squeeze.mlir deleted file mode 100644 index ffc164932c..0000000000 --- a/test/mlir/onnx/qdq_removal_squeeze.mlir +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - - func.func @test_squeeze_pattern(%arg0: tensor<1x1x3xi8>) -> (tensor<1x3xi8>) { - %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> - %1 = onnx.Constant dense<-128> : tensor<1xi8> - %2 = onnx.Constant dense<1> : tensor<1xi64> - %3 = "onnx.DequantizeLinear"(%arg0, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_0"} : (tensor<1x1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x1x3xf32> - %4 = "onnx.Squeeze"(%3, %2) {onnx_node_name = "onnx.Squeeze_1"} : (tensor<1x1x3xf32>, tensor<1xi64>) -> tensor<1x3xf32> - %5 = "onnx.QuantizeLinear"(%4, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_2", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<1x3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x3xi8> - return %5 : tensor<1x3xi8> - } - -// CHECK-LABEL: func.func @test_squeeze_pattern -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x1x3xi8>) -> tensor<1x3xi8> { -// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> -// CHECK: [[VAR_1_:%.+]] = "onnx.Squeeze"([[PARAM_0_]], [[VAR_0_]]) {onnx_node_name = "onnx.Squeeze_1"} : (tensor<1x1x3xi8>, tensor<1xi64>) -> tensor<1x3xi8> -// CHECK: return [[VAR_1_]] : tensor<1x3xi8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx/qdq_removal_transpose.mlir b/test/mlir/onnx/qdq_removal_transpose.mlir deleted file mode 100644 index 8ebfb2f29c..0000000000 --- a/test/mlir/onnx/qdq_removal_transpose.mlir +++ /dev/null @@ -1,16 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - - func.func @transpose_op(%arg0: tensor<*xui16>) -> tensor<*xui16> { - %0 = onnx.Constant dense<2.57987776E-5> : tensor - %1 = onnx.Constant dense<39664> : tensor - %2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<*xui16>, tensor, tensor) -> tensor<*xf32> - %3 = "onnx.Transpose"(%2) {saturate = 1 : si64, to = f32} : (tensor<*xf32>) -> tensor<*xf32> - %4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<*xf32>, tensor, tensor) -> tensor<*xui16> - return %4 : tensor<*xui16> - } - -// CHECK-LABEL: func.func @transpose_op -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<*xui16>) -> tensor<*xui16> { -// CHECK: [[VAR_0_:%.+]] = "onnx.Transpose"([[PARAM_0_]]) {saturate = 1 : si64, to = f32} : (tensor<*xui16>) -> tensor<*xui16> -// CHECK: return [[VAR_0_]] : tensor<*xui16> -// CHECK: } diff --git a/test/mlir/onnx/qdq_removal_unsqueeze.mlir b/test/mlir/onnx/qdq_removal_unsqueeze.mlir deleted file mode 100644 index 784161d05c..0000000000 --- a/test/mlir/onnx/qdq_removal_unsqueeze.mlir +++ /dev/null @@ -1,26 +0,0 @@ -// RUN: onnx-mlir-opt --canonicalize --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s - - func.func @unqueeze_op(%arg0: tensor<1x3xi8>) -> (tensor<1x1x3xi8>) { - %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> - %1 = onnx.Constant dense<-128> : tensor<1xi8> - %2 = onnx.Constant dense<1> : tensor<1xi64> - %3 = "onnx.DequantizeLinear"(%arg0, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.DequantizeLinear_0"} : (tensor<1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x3xf32> - %4 = "onnx.Unsqueeze"(%3, %2) {onnx_node_name = "onnx.Unsqueeze_1"} : (tensor<1x3xf32>, tensor<1xi64>) -> tensor<1x1x3xf32> - %5 = "onnx.QuantizeLinear"(%4, %0, %1) { - axis = 1 : si64, - block_size = 0 : si64, - onnx_node_name = "onnx.QuantizeLinear_2", - output_dtype = 0 : si64, - saturate = 1 : si64} : (tensor<1x1x3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x1x3xi8> - return %5 : tensor<1x1x3xi8> - } - -// CHECK-LABEL: func.func @unqueeze_op -// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x3xi8>) -> tensor<1x1x3xi8> { -// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> -// CHECK: [[VAR_1_:%.+]] = "onnx.Unsqueeze"([[PARAM_0_]], [[VAR_0_]]) {onnx_node_name = "onnx.Unsqueeze_1"} : (tensor<1x3xi8>, tensor<1xi64>) -> tensor<1x1x3xi8> -// CHECK: return [[VAR_1_]] : tensor<1x1x3xi8> -// CHECK: } \ No newline at end of file diff --git a/test/mlir/onnx_remove_qdq_arnd_op.mlir b/test/mlir/onnx_remove_qdq_arnd_op.mlir new file mode 100644 index 0000000000..c3adfc7b7c --- /dev/null +++ b/test/mlir/onnx_remove_qdq_arnd_op.mlir @@ -0,0 +1,350 @@ +// RUN: onnx-mlir-opt --qdq-around-op-opt-onnx-to-onnx %s -split-input-file | FileCheck %s + +// ----- + +// Test that Reshape optimization DOES happen when shape is constant +func.func @reshape_with_constant_shape(%arg0: tensor<1x4xui8>) -> tensor<2x2xui8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor + %1 = onnx.Constant dense<128> : tensor + %2 = onnx.Constant dense<2> : tensor<2xi64> + %3 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> + %4 = "onnx.Reshape"(%3, %2) {allowzero = 0 : si64} : (tensor<1x4xf32>, tensor<2xi64>) -> tensor<2x2xf32> + %5 = "onnx.QuantizeLinear"(%4, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<2x2xf32>, tensor, tensor) -> tensor<2x2xui8> + return %5 : tensor<2x2xui8> +} + +// CHECK-LABEL: func.func @reshape_with_constant_shape +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8>) -> tensor<2x2xui8> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<2> : tensor<2xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.Reshape"([[PARAM_0_]], [[VAR_0_]]) {allowzero = 0 : si64} : (tensor<1x4xui8>, tensor<2xi64>) -> tensor<2x2xui8> +// CHECK: return [[VAR_1_]] : tensor<2x2xui8> +// CHECK: } + +// ----- + +// Test that Reshape optimization DOES NOT happen when shape is NOT constant (runtime input) +func.func @reshape_with_dynamic_shape(%arg0: tensor<1x4xui8>, %arg1: tensor<2xi64>) -> tensor { + %0 = onnx.Constant dense<1.000000e-01> : tensor + %1 = onnx.Constant dense<128> : tensor + %2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> + %3 = "onnx.Reshape"(%2, %arg1) {allowzero = 0 : si64} : (tensor<1x4xf32>, tensor<2xi64>) -> tensor + %4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor, tensor, tensor) -> tensor + return %4 : tensor +} + +// CHECK-LABEL: func.func @reshape_with_dynamic_shape +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8>, [[PARAM_1_:%.+]]: tensor<2xi64>) -> tensor { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<128> : tensor +// CHECK: [[VAR_2_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> +// CHECK: [[VAR_3_:%.+]] = "onnx.Reshape"([[VAR_2_]], [[PARAM_1_]]) {allowzero = 0 : si64} : (tensor<1x4xf32>, tensor<2xi64>) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.QuantizeLinear"([[VAR_3_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor, tensor, tensor) -> tensor +// CHECK: return [[VAR_4_]] : tensor +// CHECK: } + +// ----- + +// Test that Unsqueeze optimization DOES happen when axes is constant +func.func @unsqueeze_with_constant_axes(%arg0: tensor<1x3xi8>) -> tensor<1x1x3xi8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %1 = onnx.Constant dense<-128> : tensor<1xi8> + %2 = onnx.Constant dense<1> : tensor<1xi64> + %3 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x3xf32> + %4 = "onnx.Unsqueeze"(%3, %2) : (tensor<1x3xf32>, tensor<1xi64>) -> tensor<1x1x3xf32> + %5 = "onnx.QuantizeLinear"(%4, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x1x3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x1x3xi8> + return %5 : tensor<1x1x3xi8> +} + +// CHECK-LABEL: func.func @unsqueeze_with_constant_axes +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x3xi8>) -> tensor<1x1x3xi8> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.Unsqueeze"([[PARAM_0_]], [[VAR_0_]]) : (tensor<1x3xi8>, tensor<1xi64>) -> tensor<1x1x3xi8> +// CHECK: return [[VAR_1_]] : tensor<1x1x3xi8> +// CHECK: } + +// ----- + +// Test that Unsqueeze optimization DOES NOT happen when axes is NOT constant +func.func @unsqueeze_with_dynamic_axes(%arg0: tensor<1x3xi8>, %arg1: tensor<1xi64>) -> tensor { + %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %1 = onnx.Constant dense<-128> : tensor<1xi8> + %2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x3xf32> + %3 = "onnx.Unsqueeze"(%2, %arg1) : (tensor<1x3xf32>, tensor<1xi64>) -> tensor + %4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xi8>) -> tensor + return %4 : tensor +} + +// CHECK-LABEL: func.func @unsqueeze_with_dynamic_axes +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x3xi8>, [[PARAM_1_:%.+]]: tensor<1xi64>) -> tensor { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<-128> : tensor<1xi8> +// CHECK: [[VAR_2_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x3xf32> +// CHECK: [[VAR_3_:%.+]] = "onnx.Unsqueeze"([[VAR_2_]], [[PARAM_1_]]) : (tensor<1x3xf32>, tensor<1xi64>) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.QuantizeLinear"([[VAR_3_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xi8>) -> tensor +// CHECK: return [[VAR_4_]] : tensor +// CHECK: } + +// ----- + +// Test that Squeeze optimization DOES happen when axes is constant +func.func @squeeze_with_constant_axes(%arg0: tensor<1x1x3xi8>) -> tensor<1x3xi8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %1 = onnx.Constant dense<-128> : tensor<1xi8> + %2 = onnx.Constant dense<1> : tensor<1xi64> + %3 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x1x3xf32> + %4 = "onnx.Squeeze"(%3, %2) : (tensor<1x1x3xf32>, tensor<1xi64>) -> tensor<1x3xf32> + %5 = "onnx.QuantizeLinear"(%4, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x3xf32>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x3xi8> + return %5 : tensor<1x3xi8> +} + +// CHECK-LABEL: func.func @squeeze_with_constant_axes +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x1x3xi8>) -> tensor<1x3xi8> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.Squeeze"([[PARAM_0_]], [[VAR_0_]]) : (tensor<1x1x3xi8>, tensor<1xi64>) -> tensor<1x3xi8> +// CHECK: return [[VAR_1_]] : tensor<1x3xi8> +// CHECK: } + +// ----- + +// Test that Squeeze optimization DOES NOT happen when axes is NOT constant +func.func @squeeze_with_dynamic_axes(%arg0: tensor<1x1x3xi8>, %arg1: tensor<1xi64>) -> tensor { + %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %1 = onnx.Constant dense<-128> : tensor<1xi8> + %2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x1x3xf32> + %3 = "onnx.Squeeze"(%2, %arg1) : (tensor<1x1x3xf32>, tensor<1xi64>) -> tensor + %4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xi8>) -> tensor + return %4 : tensor +} + +// CHECK-LABEL: func.func @squeeze_with_dynamic_axes +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x1x3xi8>, [[PARAM_1_:%.+]]: tensor<1xi64>) -> tensor { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<-128> : tensor<1xi8> +// CHECK: [[VAR_2_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x1x3xi8>, tensor<1xf32>, tensor<1xi8>) -> tensor<1x1x3xf32> +// CHECK: [[VAR_3_:%.+]] = "onnx.Squeeze"([[VAR_2_]], [[PARAM_1_]]) : (tensor<1x1x3xf32>, tensor<1xi64>) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.QuantizeLinear"([[VAR_3_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xi8>) -> tensor +// CHECK: return [[VAR_4_]] : tensor +// CHECK: } + +// ----- + +// Test that Gather optimization DOES happen when indices is constant +func.func @gather_with_constant_indices(%arg0: tensor<4xui8>) -> tensor<2xui8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %1 = onnx.Constant dense<0> : tensor<1xui8> + %2 = onnx.Constant dense<[0, 2]> : tensor<2xi64> + %3 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64} : (tensor<4xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<4xf32> + %4 = "onnx.Gather"(%3, %2) {axis = 0 : si64} : (tensor<4xf32>, tensor<2xi64>) -> tensor<2xf32> + %5 = "onnx.QuantizeLinear"(%4, %0, %1) {axis = 1 : si64, saturate = 1 : si64} : (tensor<2xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<2xui8> + return %5 : tensor<2xui8> +} + +// CHECK-LABEL: func.func @gather_with_constant_indices +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<4xui8>) -> tensor<2xui8> { +// CHECK: [[VAR_0_:%.+]] = onnx.Constant dense<[0, 2]> : tensor<2xi64> +// CHECK: [[VAR_1_:%.+]] = "onnx.Gather"([[PARAM_0_]], [[VAR_0_]]) {axis = 0 : si64} : (tensor<4xui8>, tensor<2xi64>) -> tensor<2xui8> +// CHECK: return [[VAR_1_]] : tensor<2xui8> +// CHECK: } + +// ----- + +// Test that Gather optimization DOES NOT happen when indices is NOT constant +func.func @gather_with_dynamic_indices(%arg0: tensor<4xui8>, %arg1: tensor) -> tensor { + %0 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %1 = onnx.Constant dense<0> : tensor<1xui8> + %2 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64} : (tensor<4xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<4xf32> + %3 = "onnx.Gather"(%2, %arg1) {axis = 0 : si64} : (tensor<4xf32>, tensor) -> tensor + %4 = "onnx.QuantizeLinear"(%3, %0, %1) {axis = 1 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xui8>) -> tensor + return %4 : tensor +} + +// CHECK-LABEL: func.func @gather_with_dynamic_indices +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<4xui8>, [[PARAM_1_:%.+]]: tensor) -> tensor { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<0> : tensor<1xui8> +// CHECK: [[VAR_2_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64} : (tensor<4xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<4xf32> +// CHECK: [[VAR_3_:%.+]] = "onnx.Gather"([[VAR_2_]], [[PARAM_1_]]) {axis = 0 : si64} : (tensor<4xf32>, tensor) -> tensor +// CHECK: [[VAR_4_:%.+]] = "onnx.QuantizeLinear"([[VAR_3_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xui8>) -> tensor +// CHECK: return [[VAR_4_]] : tensor +// CHECK: } + +// ----- + +// Test that Slice optimization DOES happen when all control parameters are constant +func.func @slice_with_constant_params(%arg0: tensor<1x4xui8>) -> tensor<1x2xui8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor + %1 = onnx.Constant dense<128> : tensor + %2 = onnx.Constant dense<1> : tensor<1xi64> + %3 = onnx.Constant dense<3> : tensor<1xi64> + %4 = onnx.Constant dense<1> : tensor<1xi64> + %5 = onnx.Constant dense<1> : tensor<1xi64> + %6 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> + %7 = "onnx.Slice"(%6, %2, %3, %4, %5) : (tensor<1x4xf32>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x2xf32> + %8 = "onnx.QuantizeLinear"(%7, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x2xf32>, tensor, tensor) -> tensor<1x2xui8> + return %8 : tensor<1x2xui8> +} + +// CHECK-LABEL: func.func @slice_with_constant_params +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8>) -> tensor<1x2xui8> { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<3> : tensor<1xi64> +// CHECK: [[VAR_2_:%.+]] = "onnx.Slice"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]], [[VAR_0_]], [[VAR_0_]]) : (tensor<1x4xui8>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x2xui8> +// CHECK: return [[VAR_2_]] : tensor<1x2xui8> +// CHECK: } + +// ----- + +// Test that Slice optimization DOES NOT happen when starts is NOT constant +func.func @slice_with_dynamic_starts(%arg0: tensor<1x4xui8>, %arg1: tensor<1xi64>) -> tensor<1x?xui8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor + %1 = onnx.Constant dense<128> : tensor + %2 = onnx.Constant dense<3> : tensor<1xi64> + %3 = onnx.Constant dense<1> : tensor<1xi64> + %4 = onnx.Constant dense<1> : tensor<1xi64> + %5 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> + %6 = "onnx.Slice"(%5, %arg1, %2, %3, %4) : (tensor<1x4xf32>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x?xf32> + %7 = "onnx.QuantizeLinear"(%6, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x?xf32>, tensor, tensor) -> tensor<1x?xui8> + return %7 : tensor<1x?xui8> +} + +// CHECK-LABEL: func.func @slice_with_dynamic_starts +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8>, [[PARAM_1_:%.+]]: tensor<1xi64>) -> tensor<1x?xui8> { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<128> : tensor +// CHECK-DAG: [[VAR_2_:%.+]] = onnx.Constant dense<3> : tensor<1xi64> +// CHECK-DAG: [[VAR_3_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> +// CHECK: [[VAR_4_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> +// CHECK: [[VAR_5_:%.+]] = "onnx.Slice"([[VAR_4_]], [[PARAM_1_]], [[VAR_2_]], [[VAR_3_]], [[VAR_3_]]) : (tensor<1x4xf32>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x?xf32> +// CHECK: [[VAR_6_:%.+]] = "onnx.QuantizeLinear"([[VAR_5_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x?xf32>, tensor, tensor) -> tensor<1x?xui8> +// CHECK: return [[VAR_6_]] : tensor<1x?xui8> +// CHECK: } + +// ----- + +// Test that Slice optimization DOES NOT happen when ends is NOT constant +func.func @slice_with_dynamic_ends(%arg0: tensor<1x4xui8>, %arg1: tensor<1xi64>) -> tensor<1x?xui8> { + %0 = onnx.Constant dense<1.000000e-01> : tensor + %1 = onnx.Constant dense<128> : tensor + %2 = onnx.Constant dense<1> : tensor<1xi64> + %3 = onnx.Constant dense<1> : tensor<1xi64> + %4 = onnx.Constant dense<1> : tensor<1xi64> + %5 = "onnx.DequantizeLinear"(%arg0, %0, %1) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> + %6 = "onnx.Slice"(%5, %2, %arg1, %3, %4) : (tensor<1x4xf32>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x?xf32> + %7 = "onnx.QuantizeLinear"(%6, %0, %1) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x?xf32>, tensor, tensor) -> tensor<1x?xui8> + return %7 : tensor<1x?xui8> +} + +// CHECK-LABEL: func.func @slice_with_dynamic_ends +// CHECK-SAME: ([[PARAM_0_:%.+]]: tensor<1x4xui8>, [[PARAM_1_:%.+]]: tensor<1xi64>) -> tensor<1x?xui8> { +// CHECK-DAG: [[VAR_0_:%.+]] = onnx.Constant dense<1.000000e-01> : tensor +// CHECK-DAG: [[VAR_1_:%.+]] = onnx.Constant dense<128> : tensor +// CHECK-DAG: [[VAR_2_:%.+]] = onnx.Constant dense<1> : tensor<1xi64> +// CHECK: [[VAR_3_:%.+]] = "onnx.DequantizeLinear"([[PARAM_0_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x4xui8>, tensor, tensor) -> tensor<1x4xf32> +// CHECK: [[VAR_4_:%.+]] = "onnx.Slice"([[VAR_3_]], [[VAR_2_]], [[PARAM_1_]], [[VAR_2_]], [[VAR_2_]]) : (tensor<1x4xf32>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>, tensor<1xi64>) -> tensor<1x?xf32> +// CHECK: [[VAR_5_:%.+]] = "onnx.QuantizeLinear"([[VAR_4_]], [[VAR_0_]], [[VAR_1_]]) {axis = 1 : si64, block_size = 0 : si64, output_dtype = 0 : si64, saturate = 1 : si64} : (tensor<1x?xf32>, tensor, tensor) -> tensor<1x?xui8> +// CHECK: return [[VAR_5_]] : tensor<1x?xui8> +// CHECK: } + +// ----- + +// Test that Resize optimization DOES NOT happen when sizes is constant, scales is NoValue, and mode is "nearest" +func.func @resize_with_constant_sizes_nearest(%arg0: tensor<1x3x64x64xui8>) -> tensor<1x3x128x128xui8> { + %0 = "onnx.NoValue"() {value} : () -> none + %1 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %2 = onnx.Constant dense<0> : tensor<1xui8> + %3 = onnx.Constant dense<[1, 3, 128, 128]> : tensor<4xi64> + %4 = "onnx.DequantizeLinear"(%arg0, %1, %2) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> + %5 = "onnx.Resize"(%4, %0, %0, %3) {mode = "nearest"} : (tensor<1x3x64x64xf32>, none, none, tensor<4xi64>) -> tensor<1x3x128x128xf32> + %6 = "onnx.QuantizeLinear"(%5, %1, %2) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x3x128x128xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x128x128xui8> + return %6 : tensor<1x3x128x128xui8> +} + + +// CHECK-LABEL: func.func @resize_with_constant_sizes_nearest +// CHECK-SAME: (%[[ARG0:.*]]: tensor<1x3x64x64xui8>) -> tensor<1x3x128x128xui8> +// CHECK-DAG: %[[NONE:.*]] = "onnx.NoValue" +// CHECK-DAG: %[[SCALE:.*]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: %[[ZP:.*]] = onnx.Constant dense<0> : tensor<1xui8> +// CHECK-DAG: %[[SIZES:.*]] = onnx.Constant dense<[1, 3, 128, 128]> : tensor<4xi64> +// CHECK: %[[DEQUANT:.*]] = "onnx.DequantizeLinear"(%[[ARG0]], %[[SCALE]], %[[ZP]]) +// CHECK: %[[RESIZE:.*]] = "onnx.Resize"(%[[DEQUANT]], %[[NONE]], %[[NONE]], %[[SIZES]]) +// CHECK: mode = "nearest" +// CHECK: %[[QUANT:.*]] = "onnx.QuantizeLinear"(%[[RESIZE]], %[[SCALE]], %[[ZP]]) +// CHECK: return %[[QUANT]] + +// ----- + +// Test that Resize optimization DOES NOT happen when scales is constant, sizes is NoValue, and mode is "nearest" +func.func @resize_with_constant_scales_nearest(%arg0: tensor<1x3x64x64xui8>) -> tensor<1x3x128x128xui8> { + %0 = "onnx.NoValue"() {value} : () -> none + %1 = onnx.Constant dense<[1.0, 1.0, 2.0, 2.0]> : tensor<4xf32> + %2 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %3 = onnx.Constant dense<0> : tensor<1xui8> + %4 = "onnx.DequantizeLinear"(%arg0, %2, %3) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> + %5 = "onnx.Resize"(%4, %0, %1, %0) {mode = "nearest"} : (tensor<1x3x64x64xf32>, none, tensor<4xf32>, none) -> tensor<1x3x128x128xf32> + %6 = "onnx.QuantizeLinear"(%5, %2, %3) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x3x128x128xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x128x128xui8> + return %6 : tensor<1x3x128x128xui8> +} + + +// CHECK-LABEL: func.func @resize_with_constant_scales_nearest +// CHECK-SAME: (%[[ARG0:.*]]: tensor<1x3x64x64xui8>) -> tensor<1x3x128x128xui8> +// CHECK-DAG: %[[NONE:.*]] = "onnx.NoValue" +// CHECK-DAG: %[[RESIZE_SCALES:.*]] = onnx.Constant dense<[1.000000e+00, 1.000000e+00, 2.000000e+00, 2.000000e+00]> : tensor<4xf32> +// CHECK-DAG: %[[QDQ_SCALE:.*]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: %[[ZP:.*]] = onnx.Constant dense<0> : tensor<1xui8> +// CHECK: %[[DEQUANT:.*]] = "onnx.DequantizeLinear"(%[[ARG0]], %[[QDQ_SCALE]], %[[ZP]]) +// CHECK: %[[RESIZE:.*]] = "onnx.Resize"(%[[DEQUANT]], %[[NONE]], %[[RESIZE_SCALES]], %[[NONE]]) +// CHECK: mode = "nearest" +// CHECK: %[[QUANT:.*]] = "onnx.QuantizeLinear"(%[[RESIZE]], %[[QDQ_SCALE]], %[[ZP]]) +// CHECK: return %[[QUANT]] + +// ----- + +// Test that Resize optimization DOES NOT happen when sizes is NOT constant +func.func @resize_with_dynamic_sizes(%arg0: tensor<1x3x64x64xui8>, %arg1: tensor<4xi64>) -> tensor { + %0 = "onnx.NoValue"() {value} : () -> none + %1 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %2 = onnx.Constant dense<0> : tensor<1xui8> + %3 = "onnx.DequantizeLinear"(%arg0, %1, %2) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> + %4 = "onnx.Resize"(%3, %0, %0, %arg1) {mode = "nearest"} : (tensor<1x3x64x64xf32>, none, none, tensor<4xi64>) -> tensor + %5 = "onnx.QuantizeLinear"(%4, %1, %2) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor, tensor<1xf32>, tensor<1xui8>) -> tensor + return %5 : tensor +} + +// CHECK-LABEL: func.func @resize_with_dynamic_sizes +// CHECK-SAME: (%[[ARG0:.*]]: tensor<1x3x64x64xui8>, %[[ARG1:.*]]: tensor<4xi64>) -> tensor +// CHECK-DAG: %[[NONE:.*]] = "onnx.NoValue" +// CHECK-DAG: %[[SCALE:.*]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: %[[ZP:.*]] = onnx.Constant dense<0> : tensor<1xui8> +// CHECK: %[[DEQUANT:.*]] = "onnx.DequantizeLinear"(%[[ARG0]], %[[SCALE]], %[[ZP]]) +// CHECK: %[[RESIZE:.*]] = "onnx.Resize"(%[[DEQUANT]], %[[NONE]], %[[NONE]], %[[ARG1]]) +// CHECK: mode = "nearest" +// CHECK: %[[QUANT:.*]] = "onnx.QuantizeLinear"(%[[RESIZE]], %[[SCALE]], %[[ZP]]) +// CHECK: return %[[QUANT]] + +// ----- + +// Test that Resize optimization DOES NOT happen when mode is NOT "nearest" (e.g., "linear") +func.func @resize_with_linear_mode(%arg0: tensor<1x3x64x64xui8>) -> tensor<1x3x128x128xui8> { + %0 = "onnx.NoValue"() {value} : () -> none + %1 = onnx.Constant dense<[1.0, 1.0, 2.0, 2.0]> : tensor<4xf32> + %2 = onnx.Constant dense<1.000000e-01> : tensor<1xf32> + %3 = onnx.Constant dense<0> : tensor<1xui8> + %4 = "onnx.DequantizeLinear"(%arg0, %2, %3) {axis = 1 : si64, block_size = 0 : si64} : (tensor<1x3x64x64xui8>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x64x64xf32> + %5 = "onnx.Resize"(%4, %0, %1, %0) {mode = "linear"} : (tensor<1x3x64x64xf32>, none, tensor<4xf32>, none) -> tensor<1x3x128x128xf32> + %6 = "onnx.QuantizeLinear"(%5, %2, %3) {axis = 1 : si64, block_size = 0 : si64, saturate = 1 : si64} : (tensor<1x3x128x128xf32>, tensor<1xf32>, tensor<1xui8>) -> tensor<1x3x128x128xui8> + return %6 : tensor<1x3x128x128xui8> +} + +// CHECK-LABEL: func.func @resize_with_linear_mode +// CHECK-SAME: (%[[ARG0:.*]]: tensor<1x3x64x64xui8>) -> tensor<1x3x128x128xui8> +// CHECK-DAG: %[[NONE:.*]] = "onnx.NoValue" +// CHECK-DAG: %[[RESIZE_SCALES:.*]] = onnx.Constant dense<[1.000000e+00, 1.000000e+00, 2.000000e+00, 2.000000e+00]> : tensor<4xf32> +// CHECK-DAG: %[[QDQ_SCALE:.*]] = onnx.Constant dense<1.000000e-01> : tensor<1xf32> +// CHECK-DAG: %[[ZP:.*]] = onnx.Constant dense<0> : tensor<1xui8> +// CHECK: %[[DEQUANT:.*]] = "onnx.DequantizeLinear"(%[[ARG0]], %[[QDQ_SCALE]], %[[ZP]]) +// CHECK: %[[RESIZE:.*]] = "onnx.Resize"(%[[DEQUANT]], %[[NONE]], %[[RESIZE_SCALES]], %[[NONE]]) +// CHECK: mode = "linear" +// CHECK: %[[QUANT:.*]] = "onnx.QuantizeLinear"(%[[RESIZE]], %[[QDQ_SCALE]], %[[ZP]]) +// CHECK: return %[[QUANT]] \ No newline at end of file From 5d71b6b61e9fc690473d24553faa5e7e54cb9faa Mon Sep 17 00:00:00 2001 From: sushmita Date: Fri, 14 Nov 2025 20:05:24 +0530 Subject: [PATCH 2/2] removeQDQArndOP update with const check --- .../ONNX/Transforms/QDQAroundOpOpt.cpp | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp b/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp index f2745e20ec..fd18500730 100644 --- a/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp +++ b/src/Dialect/ONNX/Transforms/QDQAroundOpOpt.cpp @@ -24,22 +24,22 @@ using namespace onnx_mlir; static bool isConstantOrInitializer(Value val) { if (!val) return false; - + // Return false for NoValue (which has NoneType) if (mlir::isa(val.getType())) { return false; } - + Operation *definingOp = val.getDefiningOp(); if (!definingOp) { return false; } - + // Check if it's a constant op if (llvm::isa(definingOp)) { return true; } - + // Recursively check if all operands are initializers // If all operands are constants, the result is effectively constant for (Value operand : definingOp->getOperands()) { @@ -98,10 +98,10 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { if (!isConstantOrInitializer(resizeOp.getRoi()) || !isConstantOrInitializer(resizeOp.getScales()) || !isConstantOrInitializer(resizeOp.getSizes())) { - return failure(); + return failure(); } } - + // Unsqueeze requires axes to be a constant if (llvm::isa(op)) { auto unsqueezeOp = llvm::cast(op); @@ -109,7 +109,7 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { return failure(); } } - + // Squeeze requires axes to be a constant if (llvm::isa(op)) { auto squeezeOp = llvm::cast(op); @@ -117,7 +117,7 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { return failure(); } } - + // Reshape requires shape to be a constant if (llvm::isa(op)) { auto reshapeOp = llvm::cast(op); @@ -125,7 +125,7 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { return failure(); } } - + // Gather requires indices to be a constant if (llvm::isa(op)) { auto gatherOp = llvm::cast(op); @@ -133,7 +133,7 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { return failure(); } } - + // Slice requires all control parameters to be constants if (llvm::isa(op)) { auto sliceOp = llvm::cast(op); @@ -144,7 +144,7 @@ class RemoveQDQAroundOpPattern : public OpRewritePattern { return failure(); } } - + InputAndOutput opIO = getDataInputOutput(op); auto dqOp = opIO.input.getDefiningOp();