[mlir][linalg] revise based on review comments

javedabsar1 · javedabsar1 · commit b65f967d57cb · 2024-10-10T07:33:54.000-04:00
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.td
@@ -243,6 +243,18 @@ def LinalgStructuredInterface
                            utils::IteratorType::parallel);
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return true if all loops are parallel.
+      }],
+      /*retTy=*/"bool",
+      /*methodName=*/"isAllParallelLoops",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return getNumParallelLoops() ==  getNumParallelLoops();
+      }]
+    >,
     InterfaceMethod<
       /*desc=*/[{
         Return the dims that are parallel loops.
@@ -327,6 +339,18 @@ def LinalgStructuredInterface
         return !getBlock()->getArgument(bbArgNumber).use_empty();
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Returns true only if linalgOp takes one input and produces one result.
+      }],
+      /*retTy=*/"bool",
+      /*methodName=*/"isSingleInputOutput",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return $_op.getNumDpsInputs() == 1 && $_op.getNumDpsInits() == 1;
+      }]
+    >,
     InterfaceMethod<
       /*desc=*/[{
         Return true if `opOperand` is an init tensor. This is true when it is
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -210,6 +210,24 @@ def GenericOp : LinalgStructuredBase_Op<"generic", [
     }
 
     MutableOperandRange getDpsInitsMutable() { return getOutputsMutable(); }
+
+    // Return true only if GenericOp has a single input and single
+    // output, and the body is a single yieldOp that yields the input.
+    // This check is useful when trying to determine if the op is
+    // essentially a transpose, broadcast, copy or something like that.
+    bool isSingleYieldOp() {
+      if (!isSingleInputOutput())
+        return false;
+     Block *body = getBody();
+     if (body->getOperations().size() != 1)
+       return false;
+
+     auto yieldOp = dyn_cast<linalg::YieldOp>(body->back());
+       if (!yieldOp || yieldOp.getNumOperands() != 1 ||
+           yieldOp->getOperand(0) != body->getArgument(0))
+         return false;
+     return true;
+   }
   }];
 
   let hasCanonicalizer = 1;
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp
@@ -50,66 +50,40 @@ bool linalg::detail::canOpOperandsBeDroppedImpl(
   return inversePermutation(concatAffineMaps(indexingMaps)) != AffineMap();
 }
 
-// Returns true if all loops of the linalgOp are parallel
-static bool isAllParallel(LinalgOp op) {
-  return op.getNumParallelLoops() == op.getNumLoops();
-}
-
-// Returns true if and only if linalgOp takes one input and one init.
-static bool isSingleInputOutput(LinalgOp op) {
-  return op.getNumDpsInputs() == 1 && op.getNumDpsInits() == 1;
-}
-// Returns true if genericOp body is just a yieldOp that yields
-// input operand as result.
-static bool isSingleYieldOp(GenericOp op) {
-  if (op.getNumDpsInputs() != 1 || op.getNumDpsInits() != 1)
-    return false;
-
-  Block *body = op.getBody();
-  if (body->getOperations().size() != 1)
-    return false;
-
-  auto yieldOp = dyn_cast<linalg::YieldOp>(body->back());
-  if (!yieldOp || yieldOp.getNumOperands() != 1 ||
-      yieldOp->getOperand(0) != body->getArgument(0))
-    return false;
-  return true;
-}
-
 //===----------------------------------------------------------------------===//
 // CopyOpInterface implementation
 //===----------------------------------------------------------------------===//
 
-bool linalg::isaCopyOpInterface(LinalgOp linalgOp) {
-  // Structural and operands
-  if (!isAllParallel(linalgOp) || !isSingleInputOutput(linalgOp))
+bool linalg::isaCopyOpInterface(LinalgOp op) {
+  // Check all loops are parallel and linalgOp is single input and output.
+  if (!op.isAllParallelLoops() || !op.isSingleInputOutput())
     return false;
 
-  auto mapRange = linalgOp.getIndexingMapsArray();
+  auto mapRange = op.getIndexingMapsArray();
   if (mapRange.size() != 2 || !mapRange.front().isIdentity() ||
       !mapRange.back().isIdentity()) {
     return false;
   }
   // Region.
-  return llvm::hasSingleElement(linalgOp.getBlock()->getOperations());
+  return llvm::hasSingleElement(op.getBlock()->getOperations());
 }
 
 //===----------------------------------------------------------------------===//
 // FillOpInterface implementation
 //===----------------------------------------------------------------------===//
-std::optional<Value> linalg::isaFillOpInterface(GenericOp genericOp) {
+std::optional<Value> linalg::isaFillOpInterface(GenericOp op) {
   // Structural.
-  if (!isAllParallel(genericOp) || !isSingleInputOutput(genericOp) ||
-      !isSingleYieldOp(genericOp))
+  if (!op.isAllParallelLoops() || !op.isSingleInputOutput() ||
+      !op.isSingleYieldOp())
     return std::nullopt;
 
   // Input should be referenced and init should not.
-  if (!genericOp.payloadUsesValueFromOperand(genericOp.getDpsInputOperand(0)) ||
-      genericOp.payloadUsesValueFromOperand(genericOp.getDpsInitOperand(0)))
+  if (!op.payloadUsesValueFromOperand(op.getDpsInputOperand(0)) ||
+      op.payloadUsesValueFromOperand(op.getDpsInitOperand(0)))
     return std::nullopt;
 
-  OpOperand *value = genericOp.getDpsInputOperand(0);
-  if (!genericOp.isScalar(value))
+  OpOperand *value = op.getDpsInputOperand(0);
+  if (!op.isScalar(value))
     return std::nullopt;
   return value->get();
 }
@@ -118,27 +92,30 @@ std::optional<Value> linalg::isaFillOpInterface(GenericOp genericOp) {
 // BroadcastOpInterface implementation
 //===----------------------------------------------------------------------===//
 std::optional<SmallVector<int64_t>>
-linalg::isaBroadcastOpInterface(GenericOp genericOp) {
+linalg::isaBroadcastOpInterface(GenericOp op) {
   // Structural.
-  if (!isAllParallel(genericOp) || !isSingleInputOutput(genericOp) ||
-      !isSingleYieldOp(genericOp))
+  if (!op.isAllParallelLoops() || !op.isSingleInputOutput() ||
+      !op.isSingleYieldOp())
     return std::nullopt;
 
-  auto t0 = genericOp.getDpsInputOperand(0)->get().getType();
-  auto t1 = genericOp.getDpsInitOperand(0)->get().getType();
-  if (!isa<MemRefType, RankedTensorType>(t0) ||
-      !isa<MemRefType, RankedTensorType>(t1))
+  auto srcTy = op.getDpsInputOperand(0)->get().getType();
+  auto dstTy = op.getDpsInitOperand(0)->get().getType();
+  if (!isa<MemRefType, RankedTensorType>(srcTy) ||
+      !isa<MemRefType, RankedTensorType>(dstTy))
     return std::nullopt;
 
-  // Check output is identity map. Injective function could also be
-  // a permutation of indices and expressible in linalg.generic but
-  // is not expressible for named broadcast op.
-  auto dstMap = genericOp.getIndexingMapsArray()[1];
+  // Check output is identity map. Broadcast could additionally be
+  // employing permutation of indices and that would be expressible
+  // in linalg.generic but is not expressible for named broadcast op.
+  auto dstMap = op.getIndexingMapsArray()[1];
   if (!dstMap.isIdentity())
     return std::nullopt;
 
   SmallVector<int64_t> position;
-  auto srcMap = genericOp.getIndexingMapsArray()[0];
+  auto srcMap = op.getIndexingMapsArray()[0];
+
+  if (srcMap.getResults().size() >= dstMap.getResults().size())
+    return std::nullopt;
 
   // Check input map is monotonically increasing DimIds.
   for (unsigned i = 0; i < srcMap.getNumResults(); ++i) {
@@ -153,6 +130,7 @@ linalg::isaBroadcastOpInterface(GenericOp genericOp) {
 
   SmallVector<int64_t> broadcastedDims;
   auto numDims = srcMap.getNumDims();
+  // This is quadratic but number of items is generally small.
   for (auto dim : llvm::seq<int64_t>(0, numDims)) {
     if (!llvm::is_contained(position, dim))
       broadcastedDims.push_back(dim);
@@ -164,86 +142,92 @@ linalg::isaBroadcastOpInterface(GenericOp genericOp) {
 // TranposeOpInterface implementation
 //===----------------------------------------------------------------------===//
 std::optional<SmallVector<int64_t>>
-linalg::isaTransposeOpInterface(GenericOp genericOp) {
-  // Structural.
-  if (!isAllParallel(genericOp) || !isSingleInputOutput(genericOp) ||
-      !isSingleYieldOp(genericOp))
+linalg::isaTransposeOpInterface(GenericOp op) {
+  // To specialize as a transpose op, the genericOp must be
+  // all parallel loops, single input, single output, and its body
+  // should be just a yield op, yielding input as output as is (no compute).
+  if (!op.isAllParallelLoops() || !op.isSingleInputOutput() ||
+      !op.isSingleYieldOp())
     return std::nullopt;
 
-  // mapping checks.
-  auto mapRange = genericOp.getIndexingMapsArray();
-  if (mapRange.size() != 2 || !mapRange.back().isIdentity() ||
-      !mapRange.front().isPermutation())
+  auto mapRange = op.getIndexingMapsArray();
+  if (mapRange.size() != 2)
     return std::nullopt;
 
-  SmallVector<int64_t> permutation;
-  auto map = mapRange.front();
-  for (unsigned i = 0; i < map.getNumResults(); ++i) {
-    auto expr = llvm::cast<AffineDimExpr>(map.getResults()[i]);
-    permutation.push_back(expr.getPosition());
+  auto mapOfInput = mapRange.front();
+  auto mapOfResult = mapRange.back();
+
+  // linalg.transpose permutes the dimensions of input using this
+  // rule: dim(result, i) = dim(input, permutation[i])
+  if (!mapOfResult.isIdentity() || !mapOfInput.isPermutation())
+    return std::nullopt;
+
+  SmallVector<int64_t> permutation(mapOfInput.getNumDims());
+  for (unsigned i = 0; i < mapOfInput.getNumDims(); ++i) {
+    auto expr = llvm::cast<AffineDimExpr>(mapOfInput.getResults()[i]);
+    permutation[expr.getPosition()] = i;
   }
   return permutation;
 }
 
 //===----------------------------------------------------------------------===//
 // Elementwise Single Unary/Binary-OpInterface implementation
 //===----------------------------------------------------------------------===//
-static bool
-isaElemwiseSingleUnaryOrBinaryOpInterface(linalg::GenericOp genericOp,
-                                          unsigned arity) {
+static bool isaElemwiseSingleUnaryOrBinaryOpInterface(linalg::GenericOp op,
+                                                      unsigned arity) {
   // Check all loops are parallel.
-  if (!isAllParallel(genericOp) || genericOp.getNumLoops() < 1)
+  if (!op.isAllParallelLoops() || op.getNumLoops() < 1)
     return false;
 
   // Check there are arity-inputs, 1-output and all are identity-maps.
-  if (genericOp.getNumDpsInputs() != arity || genericOp.getNumDpsInits() != 1 ||
-      !llvm::all_of(genericOp.getIndexingMapsArray(),
+  if (op.getNumDpsInputs() != arity || op.getNumDpsInits() != 1 ||
+      !llvm::all_of(op.getIndexingMapsArray(),
                     [](AffineMap map) { return map.isIdentity(); }))
     return false;
 
   // Init should not be referenced for elementwise operations.
-  if (genericOp.payloadUsesValueFromOperand(genericOp.getDpsInitOperand(0)))
+  if (op.payloadUsesValueFromOperand(op.getDpsInitOperand(0)))
     return false;
 
   // A linalg.generic could be series of elementwise ops e.g. exp(neg(x)) such
   // as resulting from producer-consumer fusion. Here, we restrict to two ops in
   // the body, where the first is the elementwise single op and the second a
   // yield.
-  Block *body = genericOp.getBody();
+  Block *body = op.getBody();
   if (body->getOperations().size() != 2)
     return false;
 
-  Operation *op = &body->front();
-  if (op->getNumOperands() != arity || op->getNumResults() != 1)
+  Operation *oper = &body->front();
+  if (oper->getNumOperands() != arity || oper->getNumResults() != 1)
     return false;
 
   auto yieldOp = dyn_cast<linalg::YieldOp>(body->back());
   if (!yieldOp || yieldOp.getNumOperands() != 1 ||
-      yieldOp->getOperand(0).getDefiningOp() != op)
+      yieldOp->getOperand(0).getDefiningOp() != oper)
     return false;
   return true;
 }
 
-bool linalg::isaElemwiseSingleUnaryOpInterface(linalg::GenericOp genericOp) {
+bool linalg::isaElemwiseSingleUnaryOpInterface(linalg::GenericOp op) {
   // All basic elemwise checks.
-  if (!isaElemwiseSingleUnaryOrBinaryOpInterface(genericOp, 1))
+  if (!isaElemwiseSingleUnaryOrBinaryOpInterface(op, 1))
     return false;
 
   // Check input is actully used.
-  if (!genericOp.payloadUsesValueFromOperand(genericOp.getDpsInputOperand(0)))
+  if (!op.payloadUsesValueFromOperand(op.getDpsInputOperand(0)))
     return false;
   return true;
 }
 
-bool linalg::isaElemwiseSingleBinaryOpInterface(linalg::GenericOp genericOp) {
-  if (!isaElemwiseSingleUnaryOrBinaryOpInterface(genericOp, 2))
+bool linalg::isaElemwiseSingleBinaryOpInterface(linalg::GenericOp op) {
+  if (!isaElemwiseSingleUnaryOrBinaryOpInterface(op, 2))
     return false;
 
   // Check both inputs are used (elementwise).
-  OpOperand *inputOpOperand0 = genericOp.getDpsInputOperand(0);
-  OpOperand *inputOpOperand1 = genericOp.getDpsInputOperand(1);
-  if (!genericOp.payloadUsesValueFromOperand(inputOpOperand0) ||
-      !genericOp.payloadUsesValueFromOperand(inputOpOperand1))
+  OpOperand *inputOpOperand0 = op.getDpsInputOperand(0);
+  OpOperand *inputOpOperand1 = op.getDpsInputOperand(1);
+  if (!op.payloadUsesValueFromOperand(inputOpOperand0) ||
+      !op.payloadUsesValueFromOperand(inputOpOperand1))
     return false;
   return true;
 }
diff --git a/mlir/test/Dialect/Linalg/roundtrip-transpose.mlir b/mlir/test/Dialect/Linalg/roundtrip-transpose.mlir
@@ -1,11 +1,22 @@
 // RUN: mlir-opt %s -linalg-generalize-named-ops | mlir-opt --linalg-specialize-generic-ops | FileCheck %s
 
-// CHECK-LABEL: linalg_transpose
+// CHECK-LABEL: transpose2D
 // CHECK-SAME:  %[[A:.+]]: tensor<16x64xf32>, %[[Out:.+]]: tensor<64x16xf32>
 // CHECK-NOT:   linalg.generic
 // CHECK:  %transposed = linalg.transpose ins(%[[A]] : tensor<16x64xf32>) outs(%[[Out]] : tensor<64x16xf32>) permutation = [1, 0]
 //
-func.func @linalg_transpose(%A: tensor<16x64xf32>, %Out: tensor<64x16xf32>) -> tensor<64x16xf32> {
+func.func @transpose2D(%A: tensor<16x64xf32>, %Out: tensor<64x16xf32>) -> tensor<64x16xf32> {
   %res = linalg.transpose ins(%A: tensor<16x64xf32>) outs(%Out: tensor<64x16xf32>) permutation = [1,0]
   return %res : tensor<64x16xf32>
 }
+
+
+// CHECK-LABEL: transpose3D
+// CHECK-SAME:  %[[A:.+]]: tensor<7x8x9xf32>, %[[Out:.+]]: tensor<9x7x8xf32>
+// CHECK-NOT:   linalg.generic
+// CHECK:  %transposed = linalg.transpose ins(%[[A]] : tensor<7x8x9xf32>) outs(%[[Out]] : tensor<9x7x8xf32>) permutation = [2, 0, 1]
+//
+func.func @transpose3D(%arg0: tensor<7x8x9xf32>, %arg1: tensor<9x7x8xf32>) -> tensor<9x7x8xf32> {
+  %transposed = linalg.transpose ins(%arg0 : tensor<7x8x9xf32>) outs(%arg1 : tensor<9x7x8xf32>) permutation = [2, 0, 1]
+  return %transposed : tensor<9x7x8xf32>
+}
diff --git a/mlir/test/Dialect/Linalg/specialize-generic-ops-fail.mlir b/mlir/test/Dialect/Linalg/specialize-generic-ops-fail.mlir
@@ -0,0 +1,16 @@
+// RUN: mlir-opt %s -split-input-file --linalg-specialize-generic-ops | FileCheck %s
+
+#map = affine_map<(d0, d1, d2) -> (d1, d0)>
+#map1 = affine_map<(d0, d1, d2) -> (d0, d1, d2)>
+// This test checks that linalg.generic does not get incorrectly specialized to transform or broadcast.
+// CHECK-LABEL: @transpose_and_broadcast
+// CHECK: linalg.generic
+func.func @transpose_and_broadcast(%arg0: tensor<7x8xf32>, %arg1: tensor<8x7x9xf32>) -> tensor<8x7x9xf32> {
+  %0 = linalg.generic
+        {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel"]}
+        ins(%arg0 : tensor<7x8xf32>) outs(%arg1 : tensor<8x7x9xf32>) {
+        ^bb0(%in: f32, %out: f32):
+           linalg.yield %in : f32
+  } -> tensor<8x7x9xf32>
+  return %0 : tensor<8x7x9xf32>
+}