Initial implementation of AtenOuterOp

root · amemov · commit c0d65beeb0f9 · 2025-09-03T15:35:41.000-07:00
- Defined the op in Linear.cpp

TODO:
- Testing, and perhaps add some test(-s) inside torch-mlir?
diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp
@@ -1759,6 +1759,139 @@ struct ConvertAtenFftRfftOp final : OpConversionPattern<AtenFftRfftOp> {
 
 } // namespace
 
+namespace {
+  class ConvertAtenOuterOp : public OpConversionPattern<AtenOuterOp> {
+  public:
+    using OpConversionPattern::OpConversionPattern;
+    LogicalResult
+    matchAndRewrite(AtenOuterOp op, OpAdaptor adaptor,
+                    ConversionPatternRewriter &rewriter) const override {
+
+      Location loc = op->getLoc();
+      Value lhs = adaptor.getSelf();
+      Value rhs = op->getOperand(1);
+  
+      if (failed(verifyLinalgCompatibleTypes(op, rewriter))) {
+        return failure();
+      }
+      auto lhsType = cast<RankedTensorType>(lhs.getType());
+      auto rhsType = cast<RankedTensorType>(rhs.getType());
+  
+      auto lhsTorchType = cast<ValueTensorType>(op.getSelf().getType());
+      auto rhsTorchType = cast<ValueTensorType>(op.getOperand(1).getType());
+  
+      // Get the rank of both matrix.
+      unsigned lhsRank = lhsType.getRank();
+      unsigned rhsRank = rhsType.getRank();
+  
+      Value lhsZeroPoint, rhsZeroPoint;
+      getZeroPoint(op.getSelf(), lhsZeroPoint);
+      getZeroPoint(op.getOperand(1), rhsZeroPoint);
+  
+      if (static_cast<bool>(lhsZeroPoint) != static_cast<bool>(rhsZeroPoint)) {
+        return rewriter.notifyMatchFailure(
+            op, "unsupported: aten.outer with mixed quantization");
+      }
+  
+      bool isUnsigned = torch_to_linalg::isUnsignedTorchType(lhsTorchType);
+      bool isUnsignedR = torch_to_linalg::isUnsignedTorchType(rhsTorchType);
+  
+      if (!lhsZeroPoint && lhsTorchType.getDtype() != rhsTorchType.getDtype()) {
+        // Allows quantized types to mismatch
+        return rewriter.notifyMatchFailure(
+            op, "unsupported: aten.outer with different input element types");
+      }
+  
+      Type newResultType = getTypeConverter()->convertType(op.getType());
+      auto resultType = cast<RankedTensorType>(newResultType);
+      Type elementType = resultType.getElementType();
+      
+      // Quantized case
+      if (lhsZeroPoint) {
+        // get each zero point ready to pass to a quantized_matmul
+        lhsZeroPoint = typeConverter->materializeTargetConversion(
+            rewriter, loc,
+            getTypeConverter()->convertType(lhsZeroPoint.getType()),
+            lhsZeroPoint);
+        rhsZeroPoint = typeConverter->materializeTargetConversion(
+            rewriter, loc,
+            getTypeConverter()->convertType(rhsZeroPoint.getType()),
+            rhsZeroPoint);
+        lhsZeroPoint = rewriter.create<arith::TruncIOp>(
+            loc, rewriter.getI32Type(), lhsZeroPoint);
+        rhsZeroPoint = rewriter.create<arith::TruncIOp>(
+            loc, rewriter.getI32Type(), rhsZeroPoint);
+  
+        // change uint8 quantization -> int8 quantization
+        int64_t numBits =
+            cast<mlir::IntegerType>(lhsType.getElementType()).getWidth();
+        signShift(rewriter, loc, lhs, lhsZeroPoint, isUnsigned, numBits);
+        numBits = cast<mlir::IntegerType>(rhsType.getElementType()).getWidth();
+        signShift(rewriter, loc, rhs, rhsZeroPoint, isUnsignedR, numBits);
+  
+        if (lhsRank == 1 && rhsRank == 1) {
+          int64_t lhsDim = lhsType.getShape()[0];
+          int64_t rhsDim = rhsType.getShape()[0];
+
+          // Unsqueeze: lhs: [n] -> [n, 1] and rhs: [m] -> [1, m]
+          auto lhsUnsqueezeType = RankedTensorType::get({lhsDim, 1}, lhsType.getElementType());
+          auto rhsUnsqueezeType = RankedTensorType::get({1, rhsDim}, rhsType.getElementType());
+          SmallVector<ReassociationIndices> reassociation = {{0, 1}};
+          lhs = rewriter.create<tensor::ExpandShapeOp>(loc, lhsUnsqueezeType, lhs, reassociation);
+          rhs = rewriter.create<tensor::ExpandShapeOp>(loc, rhsUnsqueezeType, rhs, reassociation);
+
+          // Create a zero tensor with shape [lhsDim, rhsDim] for the accumulator.
+          Value lhsDimVal = rewriter.create<tensor::DimOp>(loc, lhs, 0);
+          Value rhsDimVal = rewriter.create<tensor::DimOp>(loc, rhs, 1);
+          Value zeroTensor = createZeroInitTensor(rewriter, loc,
+                                                  ValueRange{lhsDimVal, rhsDimVal},
+                                                  elementType);
+
+          // Use the quantized version of matmul.
+          Value outerProd = rewriter.create<linalg::QuantizedMatmulOp>(
+              loc, zeroTensor.getType(),
+              ValueRange{lhs, rhs, lhsZeroPoint, rhsZeroPoint},
+              zeroTensor).getResult(0);
+
+          rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, outerProd);
+          return success();
+        }
+        return rewriter.notifyMatchFailure(op, "unsupported: quantized aten.outer op case");
+      }
+  
+  
+      // Non Quantized Outter Product
+      if (lhsRank == 1 && rhsRank == 1) {
+        int64_t lhsDim = lhsType.getShape()[0];
+        int64_t rhsDim = rhsType.getShape()[0];
+
+        // Unsqueeze: lhs from [n] -> [n, 1] and rhs from [m] -> [1, m]
+        auto lhsUnsqueezeType = RankedTensorType::get({lhsDim, 1}, lhsType.getElementType());
+        auto rhsUnsqueezeType = RankedTensorType::get({1, rhsDim}, rhsType.getElementType());
+        SmallVector<ReassociationIndices> reassociation = {{0, 1}};
+        lhs = rewriter.create<tensor::ExpandShapeOp>(loc, lhsUnsqueezeType, lhs, reassociation);
+        rhs = rewriter.create<tensor::ExpandShapeOp>(loc, rhsUnsqueezeType, rhs, reassociation);
+
+        // Create a zero-initialized tensor with shape [lhsDim, rhsDim]
+        Value lhsDimVal = rewriter.create<tensor::DimOp>(loc, lhs, 0);
+        Value rhsDimVal = rewriter.create<tensor::DimOp>(loc, rhs, 1);
+        Value zeroTensor = createZeroInitTensor(rewriter, loc,
+                                                ValueRange{lhsDimVal, rhsDimVal},
+                                                elementType);
+
+        // Use linalg::MatmulOp to compute the outer product.
+        Value outerProd = rewriter.create<linalg::MatmulOp>(
+            loc, zeroTensor.getType(), ValueRange{lhs, rhs}, zeroTensor).getResult(0);
+
+        rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, outerProd);
+        return success();
+      }
+    
+      return failure();
+    }
+  };
+} // namespace
+
 void mlir::torch::torch_to_linalg::populateLinearPatternsAndLegality(
     TypeConverter &typeConverter, RewritePatternSet &patterns,
     ConversionTarget &target) {
@@ -1775,4 +1908,6 @@ void mlir::torch::torch_to_linalg::populateLinearPatternsAndLegality(
   patterns.add<ConvertAtenConvolutionOp>(typeConverter, context);
   target.addIllegalOp<AtenFftRfftOp>();
   patterns.add<ConvertAtenFftRfftOp>(typeConverter, context);
+  target.addIllegalOp<AtenOuterOp>();
+  patterns.add<ConvertAtenOuterOp>(typeConverter, context);
 }