[ONNX] Add support for Onnx.QLinearConcat op (#4116)

vivekkhandelwal1 · web-flow · commit 9e5531e1934c · 2025-04-14T08:57:31.000+05:30
This commit adds the Onnx->Torch lowering for [Onnx.QLinearConcat](https://github.com/microsoft/onnxruntime/blob/main/docs/ContribOperators.md#com.microsoft.QLinearConcat) op.
diff --git a/lib/Conversion/TorchOnnxToTorch/ComMicrosoftDomain.cpp b/lib/Conversion/TorchOnnxToTorch/ComMicrosoftDomain.cpp
@@ -645,4 +645,107 @@ void mlir::torch::onnx_c::populateComMicrosoftDomain(
                                                           y);
         return success();
       });
+  patterns.onOp(
+      "QLinearConcat", 1,
+      [](OpBinder binder, ConversionPatternRewriter &rewriter) {
+        Location loc = binder.getLoc();
+        Torch::ValueTensorType resultType;
+        SmallVector<Value> operands;
+        int64_t axis;
+        if (binder.tensorOperandsList(operands) ||
+            binder.s64IntegerAttr(axis, "axis") ||
+            binder.tensorResultType(resultType))
+          return failure();
+
+        SmallVector<Value> inputs, inputScales, inputZeroPoints;
+        for (unsigned i = 2; i < operands.size(); i = i + 3) {
+          inputs.push_back(operands[i]);
+          inputScales.push_back(operands[i + 1]);
+          inputZeroPoints.push_back(operands[i + 2]);
+        }
+
+        unsigned numInputs = (operands.size() - 2) / 3;
+        if (!(llvm::all_equal({inputs.size(), inputScales.size(),
+                               inputZeroPoints.size()}) &&
+              inputs.size() == numInputs))
+          return rewriter.notifyMatchFailure(
+              binder.op, "Incompatible number of input operands, scales and/or "
+                         "zero-points");
+
+        auto makePerTensor = [&rewriter, &binder](Value v, Value scale,
+                                                  Value zp) -> Value {
+          auto ty = cast<Torch::ValueTensorType>(v.getType());
+          auto newTy = getQTorchTypeFromTorchIntType(ty);
+          return rewriter.create<Torch::Aten_MakePerTensorQuantizedTensorOp>(
+              binder.getLoc(), newTy, v, scale, zp);
+        };
+
+        // Preparing the quantized inputs.
+        SmallVector<Value> quantizedInputs;
+        for (unsigned i = 0; i < numInputs; i++) {
+          Value scale, zeroPoint;
+          if (failed(extractPerTensorQuantizationArguments(
+                  rewriter, loc, /*scale=*/inputScales[i],
+                  /*zero_point=*/inputZeroPoints[i], scale, zeroPoint)))
+            return rewriter.notifyMatchFailure(
+                binder.op, "Incompatible scale and zero-points argument for "
+                           "per-tensor quantization");
+
+          quantizedInputs.push_back(makePerTensor(inputs[i], scale, zeroPoint));
+        }
+
+        // Dequantizing the inputs.
+        SmallVector<Value> dequantizedInputs;
+        for (unsigned i = 0; i < numInputs; i++) {
+          Torch::ValueTensorType inputTy =
+              dyn_cast<Torch::ValueTensorType>(quantizedInputs[i].getType());
+          if (!inputTy || !inputTy.hasSizes())
+            return rewriter.notifyMatchFailure(
+                binder.op, "Expected tensor input operands to be concatenated "
+                           "to have sizes");
+
+          inputTy = rewriter.getType<Torch::ValueTensorType>(
+              inputTy.getOptionalSizes(), rewriter.getF32Type());
+          dequantizedInputs.push_back(
+              rewriter.create<Torch::AtenDequantizeSelfOp>(loc, inputTy,
+                                                           quantizedInputs[i]));
+        }
+
+        // Concatenating the inputs.
+        Type listElemType =
+            cast<Torch::BaseTensorType>(dequantizedInputs[0].getType())
+                .getWithSizesAndDtype(/*optionalSizes=*/std::nullopt,
+                                      /*optionalDtype=*/nullptr);
+        Type listType = Torch::ListType::get(listElemType);
+        Value tensorList = rewriter.create<Torch::PrimListConstructOp>(
+            binder.op->getLoc(), listType, dequantizedInputs);
+        Value cstAxis = rewriter.create<Torch::ConstantIntOp>(
+            loc, rewriter.getI64IntegerAttr(axis));
+        auto concatTy = rewriter.getType<Torch::ValueTensorType>(
+            resultType.getOptionalSizes(), rewriter.getF32Type());
+        Value concat = rewriter.create<Torch::AtenCatOp>(loc, concatTy,
+                                                         tensorList, cstAxis);
+
+        // Quantizing the result of concatenated inputs.
+        Value yScale, yZp;
+        if (failed(extractPerTensorQuantizationArguments(
+                rewriter, loc, /*scale=*/operands[0],
+                /*zero_point=*/operands[1], yScale, yZp)))
+          return rewriter.notifyMatchFailure(
+              binder.op, "Incompatible scale and zero-points argument for "
+                         "per-tensor quantization");
+        Torch::ValueTensorType yTy = dyn_cast<Torch::ValueTensorType>(
+            getQTorchTypeFromTorchIntType(resultType));
+        Value dtyVal = rewriter.create<Torch::ConstantIntOp>(
+            loc, rewriter.getType<Torch::IntType>(),
+            rewriter.getIntegerAttr(
+                rewriter.getIntegerType(64),
+                static_cast<int64_t>(
+                    Torch::getScalarTypeForType(yTy.getDtype()))));
+        Value result = rewriter.create<Torch::AtenQuantizePerTensorOp>(
+            loc, yTy, concat, yScale, yZp, dtyVal);
+        rewriter.replaceOpWithNewOp<Torch::AtenIntReprOp>(binder.op, resultType,
+                                                          result);
+        return success();
+      });
 }
diff --git a/test/Conversion/TorchOnnxToTorch/simple_ops_q_to_z.mlir b/test/Conversion/TorchOnnxToTorch/simple_ops_q_to_z.mlir
@@ -3743,3 +3743,23 @@ func.func @test_qlinearleakyrelu(%arg0: !torch.vtensor<[?,32,?,?],ui8>, %arg1: !
   // CHECK: return %[[OUT]]
   return %0 : !torch.vtensor<[?,32,?,?],ui8>
 }
+
+// -----
+
+// CHECK-LABEL: @test_qlinearconcat(
+func.func @test_qlinearconcat(%arg0: !torch.vtensor<[],f32>, %arg1: !torch.vtensor<[],ui8>, %arg2: !torch.vtensor<[?,?,?,?],ui8>, %arg3: !torch.vtensor<[],f32>, %arg4: !torch.vtensor<[],ui8>, %arg5: !torch.vtensor<[?,?,?,?],ui8>, %arg6: !torch.vtensor<[],f32>, %arg7: !torch.vtensor<[],ui8>) -> !torch.vtensor<[?,?,?,?],ui8> attributes {torch.onnx_meta.ir_version = 5 : si64, torch.onnx_meta.opset_version = 10 : si64} {
+  %0 = torch.operator "onnx.QLinearConcat"(%arg0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7) {torch.onnx.axis = 1 : si64} : (!torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[?,?,?,?],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>, !torch.vtensor<[?,?,?,?],ui8>, !torch.vtensor<[],f32>, !torch.vtensor<[],ui8>) -> !torch.vtensor<[?,?,?,?],ui8>
+  // CHECK-DAG: %[[EMPTY:.+]] = torch.prim.ListConstruct  : () -> !torch.list<int>
+  // CHECK-DAG: %[[QUANT_INPUT_1:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg2, %{{.+}}, %{{.+}} : !torch.vtensor<[?,?,?,?],ui8>, !torch.float, !torch.int -> !torch.vtensor<[?,?,?,?],!torch.quint8>
+  // CHECK-DAG: %[[QUANT_INPUT_2:.+]] = torch.aten._make_per_tensor_quantized_tensor %arg5, %{{.+}}, %{{.+}} : !torch.vtensor<[?,?,?,?],ui8>, !torch.float, !torch.int -> !torch.vtensor<[?,?,?,?],!torch.quint8>
+  // CHECK: %[[DEQUANT_INPUT_1:.+]] = torch.aten.dequantize.self %[[QUANT_INPUT_1]] : !torch.vtensor<[?,?,?,?],!torch.quint8> -> !torch.vtensor<[?,?,?,?],f32>
+  // CHECK: %[[DEQUANT_INPUT_2:.+]] = torch.aten.dequantize.self %[[QUANT_INPUT_2]] : !torch.vtensor<[?,?,?,?],!torch.quint8> -> !torch.vtensor<[?,?,?,?],f32>
+  // CHECK-DAG: %[[CONCAT_LIST:.+]] = torch.prim.ListConstruct %[[DEQUANT_INPUT_1]], %[[DEQUANT_INPUT_2]] : (!torch.vtensor<[?,?,?,?],f32>, !torch.vtensor<[?,?,?,?],f32>) -> !torch.list<vtensor>
+  // CHECK: %[[AXIS:.+]] = torch.constant.int 1
+  // CHECK: %[[CONCAT:.+]] = torch.aten.cat %[[CONCAT_LIST]], %[[AXIS]] : !torch.list<vtensor>, !torch.int -> !torch.vtensor<[?,?,?,?],f32>
+  // CHECK: %[[DTY:.+]] = torch.constant.int 13
+  // CHECK: %[[QO:.+]] = torch.aten.quantize_per_tensor %[[CONCAT]], %{{.+}}, %{{.+}}, %[[DTY]] : !torch.vtensor<[?,?,?,?],f32>, !torch.float, !torch.int, !torch.int -> !torch.vtensor<[?,?,?,?],!torch.quint8>
+  // CHECK: %[[OUT:.+]] = torch.aten.int_repr %[[QO]] : !torch.vtensor<[?,?,?,?],!torch.quint8> -> !torch.vtensor<[?,?,?,?],ui8>
+  // CHECK: return %[[OUT]]
+  return %0 : !torch.vtensor<[?,?,?,?],ui8>
+}