[torch] Update torch.bmm to use accumulator type (llvm#3924)

rsuderman · web-flow · commit 061bbc5e1bc4 · 2024-12-19T10:55:15.000-08:00
Batch matmul was using the result type as the accumulator. Updated to
use the preferred accumulator based on input type.
diff --git a/lib/Conversion/TorchToLinalg/Linear.cpp b/lib/Conversion/TorchToLinalg/Linear.cpp
@@ -727,15 +727,21 @@ class ConvertAtenBmmOp : public OpConversionPattern<AtenBmmOp> {
     // Check the matrixs shapes are valid for mulplication.
     checkDimEqualHelper(rewriter, loc, lhsDim2, rhsDim1);
 
+    Type accumulatorDType = getDefaultAccType(rewriter, resultElementType);
     Value initTensor0 = createZeroInitTensor(
-        rewriter, loc, ValueRange{lhsDim0, lhsDim1, rhsDim2},
-        resultElementType);
+        rewriter, loc, ValueRange{lhsDim0, lhsDim1, rhsDim2}, accumulatorDType);
 
     Value bmm =
         rewriter
             .create<linalg::BatchMatmulOp>(loc, initTensor0.getType(),
                                            ValueRange{lhs, rhs}, initTensor0)
             .getResult(0);
+
+    if (accumulatorDType != resultElementType) {
+      bmm = torch_to_linalg::convertTensorToElementType(rewriter, loc, bmm,
+                                                        resultElementType);
+    }
+
     rewriter.replaceOpWithNewOp<tensor::CastOp>(op, newResultType, bmm);
     return success();
   }
diff --git a/projects/pt1/python/torch_mlir_e2e_test/test_suite/basic.py b/projects/pt1/python/torch_mlir_e2e_test/test_suite/basic.py
@@ -87,6 +87,29 @@ def BmmFloatModule_basic(module, tu: TestUtils):
     module.forward(tu.rand(3, 4, 5), tu.rand(3, 5, 4))
 
 
+class BmmFloat16Module(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args(
+        [
+            None,
+            ([-1, -1, -1], torch.float16, True),
+            ([-1, -1, -1], torch.float16, True),
+        ]
+    )
+    def forward(self, lhs, rhs):
+        return torch.bmm(lhs, rhs)
+
+
+@register_test_case(module_factory=lambda: BmmFloat16Module())
+def BmmFloat16Module_basic(module, tu: TestUtils):
+    module.forward(
+        tu.rand(3, 4, 5).to(torch.float16), tu.rand(3, 5, 4).to(torch.float16)
+    )
+
+
 class BmmIntModule(torch.nn.Module):
     def __init__(self):
         super().__init__()