llvm · srcarroll · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025 · Jun 19, 2025
@@ -554,9 +554,11 @@ FailureOr<DropUnitDimsResult> dropUnitDims(RewriterBase &rewriter,
                                            GenericOp genericOp,
                                            const ControlDropUnitDims &options);
 
-/// Fuse two `linalg.generic` operations that have a producer-consumer
+/// Fuse two linalg operations that have a producer-consumer
 /// relationship captured through `fusedOperand`. The method expects
 /// that `areElementwiseOpsFusable` returns true for the given `fusedOperand`.
+/// The resulting fused operation is always a `linalg.generic`.
+/// TODO: Support fusing to named ops when possible.
 struct ElementwiseOpFusionResult {
   Operation *fusedOp;
   llvm::DenseMap<Value, Value> replacements;
@@ -569,8 +571,8 @@ fuseElementwiseOps(RewriterBase &rewriter, OpOperand *fusedOperand);
 /// * There is a chance that the implementation of the transformation does not
 /// agree with the result of this method. This function gives a prediction based
 /// on an optimized fusion.
-llvm::SmallDenseSet<int> getPreservedProducerResults(GenericOp producer,
-                                                     GenericOp consumer,
+llvm::SmallDenseSet<int> getPreservedProducerResults(LinalgOp producer,
+                                                     LinalgOp consumer,
                                                      OpOperand *fusedOperand);
 
 /// Try to peel and canonicalize loop `op` and return the new result.
@@ -1921,8 +1923,10 @@ using ControlFusionFn = std::function<bool(OpOperand *fusedOperand)>;
 
 /// Patterns for fusing linalg operation on tensors.
 
-/// Pattern to fuse `linalg.generic` -> `linalg.generic` operations
-/// when both operations are fusable elementwise operations.
+/// Pattern to fuse two linalg operations
+/// when both operations are fusable operations.
+/// The producer must always be an elementwise operation
+/// and operations are opted into fusion via `controlElementwiseOpFusion`.
 void populateElementwiseOpsFusionPatterns(
     RewritePatternSet &patterns,
     const ControlFusionFn &controlElementwiseOpFusion);

@@ -77,11 +77,11 @@ static AffineMap getIndexingMapOfProducerOperandsInCoordinatesOfFusedOp(
 // of the fused producer & consumer after the fusion can still compute the
 // bounds of the op.
 static bool isOpOperandCanBeDroppedAfterFusedLinalgs(
-    GenericOp producer, GenericOp consumer,
+    LinalgOp producer, LinalgOp consumer,
     ArrayRef<OpOperand *> opOperandsToIgnore) {
   SmallVector<AffineMap> indexingMaps;
 
-  SmallVector<GenericOp> ops = {producer, consumer};
+  SmallVector<LinalgOp> ops = {producer, consumer};
   for (auto &op : ops) {
     for (auto &opOperand : op->getOpOperands()) {
       if (llvm::is_contained(opOperandsToIgnore, &opOperand)) {
@@ -109,8 +109,9 @@ static bool isOpOperandCanBeDroppedAfterFusedLinalgs(
 /// * There is a chance that the implementation of the transformation does not
 /// agree with the result of this method. This function gives a prediction based
 /// on an optimized fusion.
-llvm::SmallDenseSet<int> mlir::linalg::getPreservedProducerResults(
-    GenericOp producer, GenericOp consumer, OpOperand *fusedOperand) {
+llvm::SmallDenseSet<int>
+mlir::linalg::getPreservedProducerResults(LinalgOp producer, LinalgOp consumer,
+                                          OpOperand *fusedOperand) {
   llvm::SmallDenseSet<int> preservedProducerResults;
   llvm::SmallVector<OpOperand *> opOperandsToIgnore;
 
@@ -135,15 +136,15 @@ llvm::SmallDenseSet<int> mlir::linalg::getPreservedProducerResults(
   return preservedProducerResults;
 }
 
-/// Conditions for elementwise fusion of generic operations.
+/// Conditions for elementwise fusion of linalg operations.
 bool mlir::linalg::areElementwiseOpsFusable(OpOperand *fusedOperand) {
   if (!fusedOperand)
     return false;
 
-  auto producer = fusedOperand->get().getDefiningOp<GenericOp>();
-  auto consumer = dyn_cast<GenericOp>(fusedOperand->getOwner());
+  auto producer = fusedOperand->get().getDefiningOp<LinalgOp>();
+  auto consumer = dyn_cast<LinalgOp>(fusedOperand->getOwner());
 
-  // Check producer and consumer are generic ops.
+  // Check producer and consumer are linalg ops.
   if (!producer || !consumer)
     return false;
 
@@ -179,7 +180,7 @@ bool mlir::linalg::areElementwiseOpsFusable(OpOperand *fusedOperand) {
     return false;
 
   // Ensure that the fusion does not remove size information required to
-  // get the loop bounds. For non-reduction generics, this is trivially the
+  // get the loop bounds. For non-reduction ops, this is trivially the
   // case due to the output operand. For reductions, we need to check that after
   // the fusion, each loop dimension has at least one input that defines it.
   if ((consumer.getNumReductionLoops())) {
@@ -219,13 +220,14 @@ static void generateFusedElementwiseOpRegion(
     RewriterBase &rewriter, GenericOp fusedOp,
     AffineMap consumerToProducerLoopsMap, OpOperand *fusedOperand,
     unsigned nloops, llvm::SmallDenseSet<int> &preservedProducerResults) {
-  auto producer = cast<GenericOp>(fusedOperand->get().getDefiningOp());
-  auto consumer = cast<GenericOp>(fusedOperand->getOwner());
+  auto producer = cast<LinalgOp>(fusedOperand->get().getDefiningOp());
+  auto consumer = cast<LinalgOp>(fusedOperand->getOwner());
   // Build the region of the fused op.
-  Block &producerBlock = producer->getRegion(0).front();
-  Block &consumerBlock = consumer->getRegion(0).front();
+
+  Block &producerBlock = *producer.getBlock();
+  Block &consumerBlock = *consumer.getBlock();
   OpBuilder::InsertionGuard guard(rewriter);
-  Block *fusedBlock = rewriter.createBlock(&fusedOp.getRegion());
+  Block *fusedBlock = rewriter.createBlock(&fusedOp->getRegion(0));
 /*methodName=*/"getRegionBuilder", 
 /*methodName=*/"getBlock", 
 /*methodName=*/"getRegionBuilder", 
 /*methodName=*/"getBlock", 
   IRMapping mapper;
 
   // 2. Add an index operation for every fused loop dimension and use the
@@ -331,7 +333,7 @@ static void generateFusedElementwiseOpRegion(
   YieldOp::create(rewriter, fusedOp.getLoc(), fusedYieldValues);
 
   // Sanity checks.
-  assert(fusedBlock->getNumArguments() == fusedOp.getNumOperands() &&
+  assert(fusedBlock->getNumArguments() == fusedOp->getNumOperands() &&
          "Ill-formed GenericOp region");
 }
 
@@ -341,8 +343,8 @@ mlir::linalg::fuseElementwiseOps(RewriterBase &rewriter,
   assert(areElementwiseOpsFusable(fusedOperand) &&
          "expected elementwise operation pre-conditions to pass");
   auto producerResult = cast<OpResult>(fusedOperand->get());
-  auto producer = cast<GenericOp>(producerResult.getOwner());
-  auto consumer = cast<GenericOp>(fusedOperand->getOwner());
+  auto producer = cast<LinalgOp>(producerResult.getOwner());
+  auto consumer = cast<LinalgOp>(fusedOperand->getOwner());
   // TODO: allow fusing the producer of an output operand.
   assert(consumer.isDpsInput(fusedOperand) &&
          "expected producer of input operand");
@@ -419,10 +421,7 @@ mlir::linalg::fuseElementwiseOps(RewriterBase &rewriter,
   // Generate the fused op.
   auto fusedOp = GenericOp::create(
       rewriter, consumer.getLoc(), fusedResultTypes, fusedInputOperands,
-      fusedOutputOperands, rewriter.getAffineMapArrayAttr(fusedIndexMaps),
-      consumer.getIteratorTypes(),
-      /*doc=*/nullptr,
-      /*library_call=*/nullptr);
+      fusedOutputOperands, fusedIndexMaps, consumer.getIteratorTypesArray());
   if (!fusedOp.getShapesToLoopsMap()) {
     // Fused op has invalid indexing maps. Typically this means something is off
     // in the input, but going ahead here would result in verification errors.
@@ -460,18 +459,18 @@ mlir::linalg::fuseElementwiseOps(RewriterBase &rewriter,
 }
 
 namespace {
-/// Patterns to fuse a generic op, with the producer of its operands.
-class FuseElementwiseOps : public OpRewritePattern<GenericOp> {
+/// Patterns to fuse a linalg op, with the producer of its operands.
+class FuseElementwiseOps : public OpInterfaceRewritePattern<LinalgOp> {
 public:
   FuseElementwiseOps(MLIRContext *context, ControlFusionFn fun,
                      PatternBenefit benefit = 1)
-      : OpRewritePattern<GenericOp>(context, benefit),
+      : OpInterfaceRewritePattern<LinalgOp>(context, benefit),
         controlFn(std::move(fun)) {}
 
-  LogicalResult matchAndRewrite(GenericOp genericOp,
+  LogicalResult matchAndRewrite(LinalgOp linalgOp,
                                 PatternRewriter &rewriter) const override {
-    // Find the first operand that is defined by another generic op on tensors.
-    for (OpOperand &opOperand : genericOp->getOpOperands()) {
+    // Find the first operand that is defined by another linalg op on tensors.
+    for (OpOperand &opOperand : linalgOp->getOpOperands()) {
       if (!areElementwiseOpsFusable(&opOperand))
         continue;
       if (!controlFn(&opOperand))
@@ -483,7 +482,7 @@ class FuseElementwiseOps : public OpRewritePattern<GenericOp> {
       FailureOr<ElementwiseOpFusionResult> fusionResult =
           fuseElementwiseOps(rewriter, &opOperand);
       if (failed(fusionResult))
-        return rewriter.notifyMatchFailure(genericOp, "fusion failed");
+        return rewriter.notifyMatchFailure(linalgOp, "fusion failed");
 
       // Perform the fusion.
       for (auto [origVal, replacement] : fusionResult->replacements) {
@@ -492,10 +491,10 @@ class FuseElementwiseOps : public OpRewritePattern<GenericOp> {
           return use.get().getDefiningOp() != producer;
         });
       }
-      rewriter.eraseOp(genericOp);
+      rewriter.eraseOp(linalgOp);
       return success();
     }
-    return failure();
+    return rewriter.notifyMatchFailure(linalgOp, "no fusable operands");
   }
 
 private:
@@ -2279,7 +2278,7 @@ void mlir::linalg::populateCollapseDimensions(
 
 namespace {
 
-/// Pass that fuses generic ops on tensors. Used only for testing.
+/// Pass that fuses linalg ops on tensors. Used only for testing.
 // TODO(ravishankarm): This pass is to be deprecated. The efficacy of the
 // patterns added here heavily depends on the cost function used. Having an
 // opinionated pass of this form is not recommended. Deprecate this pass in

diff --git a/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir b/mlir/test/Dialect/Linalg/fusion-elementwise-ops.mlir
@@ -1017,9 +1017,75 @@ module {
 
 // -----
 
+func.func @map_ops(%in1: tensor<8xf32>, %in2: tensor<8xf32>) -> tensor<8xf32> {
+    %fill = tensor.empty() : tensor<8xf32>
+    %add = linalg.map {arith.addf} ins(%in1, %in2: tensor<8xf32>, tensor<8xf32>) outs(%fill: tensor<8xf32>)
+    %mapped_65 = linalg.map { math.sqrt } ins(%add : tensor<8xf32>) outs(%fill : tensor<8xf32>)
+    return %mapped_65 : tensor<8xf32>
+}
+
+// CHECK-LABEL: func @map_ops
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<8xf32>
+//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]: tensor<8xf32>
+//       CHECK:   %[[EMPTY:.+]] = tensor.empty() : tensor<8xf32>
+//       CHECK:   %[[FUSED_OP:.+]] = linalg.generic
+//  CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] : {{.*}}) outs(%[[EMPTY]] :
+//  CHECK-NEXT:   ^bb0(%[[IN0:.*]]: f32, %[[IN1:.*]]: f32, %[[OUT:.*]]: f32):
+//  CHECK-NEXT:     %[[ADD:.*]] = arith.addf %[[IN0]], %[[IN1]]
+//  CHECK-NEXT:     %[[SQRT:.*]] = math.sqrt %[[ADD]]
+//  CHECK-NEXT:     linalg.yield %[[SQRT]] 
+//   CHECK-NOT:   linalg.generic
+
+// -----
+
+func.func @map_matmul(%in1: tensor<8x10xf32>, %in2: tensor<10x12xf32>) -> tensor<8x12xf32> {
+    %fill0 = tensor.empty() : tensor<8x10xf32>
+    %exp = linalg.map {math.exp} ins(%in1 : tensor<8x10xf32>) outs(%fill0: tensor<8x10xf32>)
+    %fill1 = tensor.empty() : tensor<8x12xf32>
+    %matmul = linalg.matmul ins(%exp, %in2 : tensor<8x10xf32>, tensor<10x12xf32>) outs(%fill1 : tensor<8x12xf32>) -> tensor<8x12xf32>
+    return %matmul : tensor<8x12xf32>
+}
+
+// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0, d1, d2) -> (d0, d2)>
+// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0, d1, d2) -> (d2, d1)>
+// CHECK-DAG: #[[$MAP2:.+]] = affine_map<(d0, d1, d2) -> (d0, d1)>
+// CHECK-LABEL: func @map_matmul
+//  CHECK-SAME:   %[[ARG0:[a-zA-Z0-9]+]]: tensor<8x10xf32>
+//  CHECK-SAME:   %[[ARG1:[a-zA-Z0-9]+]]: tensor<10x12xf32>
+//       CHECK:   %[[EMPTY:.+]] = tensor.empty() : tensor<8x12xf32>
+//       CHECK:   %[[FUSED_OP:.+]] = linalg.generic
+//  CHECK-SAME:       indexing_maps = [#[[$MAP0]], #[[$MAP1]], #[[$MAP2]]]
+//  CHECK-SAME:       iterator_types = ["parallel", "parallel", "reduction"]
+//  CHECK-SAME:       ins(%[[ARG0]], %[[ARG1]] : {{.*}}) outs(%[[EMPTY]] :
+//  CHECK-NEXT:   ^bb0(%[[IN0:.*]]: f32, %[[IN1:.*]]: f32, %[[OUT:.*]]: f32):
+//  CHECK-NEXT:     %[[EXP:.*]] = math.exp %[[IN0]]
+//  CHECK-NEXT:     %[[MUL:.*]] = arith.mulf %[[EXP]], %[[IN1]]
+//  CHECK-NEXT:     %[[ADD:.*]] = arith.addf %[[OUT]], %[[MUL]]
+//  CHECK-NEXT:     linalg.yield %[[ADD]] 
+//   CHECK-NOT:   linalg.generic
+
+// -----
+
+func.func @matmul_map(%in1: tensor<8x10xf32>, %in2: tensor<10x12xf32>) -> tensor<8x12xf32> {
+    %fill1 = tensor.empty() : tensor<8x12xf32>
+    %matmul = linalg.matmul ins(%in1, %in2 : tensor<8x10xf32>, tensor<10x12xf32>) outs(%fill1 : tensor<8x12xf32>) -> tensor<8x12xf32>
+    %exp = linalg.map {math.exp} ins(%matmul : tensor<8x12xf32>) outs(%fill1: tensor<8x12xf32>)
+
+    return %exp : tensor<8x12xf32>
+}
+
+// Should not fuse
+// CHECK-LABEL: func @matmul_map
+// CHECK-NEXT:    tensor.empty
+// CHECK-NEXT:    linalg.matmul
+// CHECK-NEXT:    linalg.map
+// CHECK-NEXT:    return
+
+// -----
+
 // In this test we expect the first two linalg.generic operations to be fused into one, but the third one (the matmul) to remain separate. 
 // The reason is that when the pattern is applied the 1st time, the fusion of the first two operations produces a fused operation with 
-// an additional result and ana dditional output indexing map that is not a permutation / not invertible. 
+// an additional result and an additional output indexing map that is not a permutation / not invertible. 
 // The fused op will still produce also the original result (and its output indexing map), which is preserved because the new indexing map 
 // is not invertible. Thus the fused op will have 2 results, but only the 2nd one will be used by the following matmul op as an input argument.
 // When trying to apply the fusion pattern again, the matmul op won't be fused because the operand to fuse was not produced with an invertible indexing map.
@@ -1079,4 +1145,4 @@ module {
 // CHECK-NOT:     linalg.generic
 // CHECK:         tensor.expand_shape
 // CHECK:         linalg.generic {{.*}}, iterator_types = ["parallel", "parallel", "parallel", "parallel", "parallel", "parallel", "reduction"]}
-// CHECK-SAME:     ins(%[[ARG0]], %[[FUSED]]#1 : tensor<1x1x2x1xf32>, tensor<4x1x1x1xf32>)
+// CHECK-SAME:     ins(%[[ARG0]], %[[FUSED]]#1 : tensor<1x1x2x1xf32>, tensor<4x1x1x1xf32>)