[GlobalOpt] Generalize 1x1 group convolutions (#20480)

rkayaith · web-flow · commit 7087972adabd · 2025-04-07T18:40:42.000Z
This allows 1x1 group convolutions to be generalized, since they are
effectively loops around matmuls. On llvmgpu this allows them to go down
the contraction/matmul lowering path.
diff --git a/compiler/src/iree/compiler/GlobalOptimization/GeneralizeLinalgNamedOps.cpp b/compiler/src/iree/compiler/GlobalOptimization/GeneralizeLinalgNamedOps.cpp
@@ -17,6 +17,10 @@
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Pass/Pass.h"
 
+#define DEBUG_TYPE "iree-global-opt-generalize-linalg-named-ops"
+#define DBGS() (llvm::dbgs() << "[" DEBUG_TYPE "]: ")
+#define LDBG(X) LLVM_DEBUG(DBGS() << X << "\n")
+
 namespace mlir::iree_compiler::GlobalOptimization {
 
 #define GEN_PASS_DEF_GENERALIZELINALGNAMEDOPSPASS
@@ -41,17 +45,15 @@ static bool isConvFoldableToContraction(linalg::LinalgOp linalgOp) {
 
   if (!llvm::all_of(convDims.strides,
                     [](int64_t element) { return element == 1; })) {
+    LDBG("conv not foldable: non-unit strides");
     return false;
   }
 
-  // Dont generalize depthwise convolutions.
-  if (!convDims.depth.empty()) {
-    return false;
-  }
-
-  // Dont generalize pooling operations. For pooling ops, the input/output
-  // channel size will be categorized as the additional batch dimension
+  // Dont generalize pooling operations or depthwise convolutions. For pooling
+  // ops, the input/output channel size will be categorized as the additional
+  // batch dimension.
   if (convDims.outputChannel.empty() || convDims.inputChannel.empty()) {
+    LDBG("conv not foldable: missing input or output channel dims");
     return false;
   }
 
@@ -60,6 +62,7 @@ static bool isConvFoldableToContraction(linalg::LinalgOp linalgOp) {
   auto filterShapeType = llvm::dyn_cast<RankedTensorType>(
       linalgOp.getDpsInputOperand(kFilterInputIdx)->get().getType());
   if (!filterShapeType) {
+    LDBG("conv not foldable: filter shape not ranked tensor");
     return false;
   }
   auto filterShape = filterShapeType.getShape();
@@ -68,6 +71,7 @@ static bool isConvFoldableToContraction(linalg::LinalgOp linalgOp) {
     std::optional<int64_t> maybeDim = filterMap.getResultPosition(
         getAffineDimExpr(filterLoop, filterMap.getContext()));
     if (!maybeDim || filterShape[*maybeDim] != 1) {
+      LDBG("conv not foldable: non-unit filter dim");
       return false;
     }
   }
diff --git a/compiler/src/iree/compiler/GlobalOptimization/test/generalize_named_ops.mlir b/compiler/src/iree/compiler/GlobalOptimization/test/generalize_named_ops.mlir
@@ -101,6 +101,21 @@ util.func public @generalize_1x1_conv_2d_dilations(%input: tensor<1x4x?x2xf32>,
 
 // -----
 
+util.func public @generalize_1x1_group_conv_2d(%input: tensor<1x2x3x4x5xf32>, %filter: tensor<4x6x1x1x5xf32>) -> tensor<1x2x3x4x6xf32> {
+    %0 = tensor.empty() : tensor<1x2x3x4x6xf32>
+    %1 = linalg.conv_2d_nhwgc_gfhwc {
+        dilations = dense<1> : tensor<2xi64>,
+        strides = dense<1> : tensor<2xi64>
+    } ins(%input, %filter : tensor<1x2x3x4x5xf32>, tensor<4x6x1x1x5xf32>) outs(%0 : tensor<1x2x3x4x6xf32>) -> tensor<1x2x3x4x6xf32>
+    util.return %1 : tensor<1x2x3x4x6xf32>
+}
+
+// CHECK-LABEL: @generalize_1x1_group_conv_2d
+//       CHECK:   %[[RESULT:.*]] = linalg.generic
+//       CHECK:   util.return %[[RESULT]]
+
+// -----
+
 util.func public @no_generalize_1x1_conv_2d_strides(%input: tensor<1x7x7x2xf32>, %filter: tensor<1x1x2x7xf32>) -> tensor<1x4x4x7xf32> {
     %0 = tensor.empty() : tensor<1x4x4x7xf32>
     %1 = linalg.conv_2d_nhwc_hwcf {
@@ -113,3 +128,18 @@ util.func public @no_generalize_1x1_conv_2d_strides(%input: tensor<1x7x7x2xf32>,
 // CHECK-LABEL: @no_generalize_1x1_conv_2d_strides
 //   CHECK-NOT:   linalg.generic
 //       CHECK:   util.return
+
+// -----
+
+util.func public @no_generalize_1x1_depthwise_conv(%input: tensor<1x2x3x4xf32>, %filter: tensor<1x1x4xf32>) -> tensor<1x2x3x4xf32> {
+    %0 = tensor.empty() : tensor<1x2x3x4xf32>
+    %1 = linalg.depthwise_conv_2d_nhwc_hwc {
+        dilations = dense<1> : tensor<2xi64>,
+        strides = dense<1> : tensor<2xi64>
+    } ins(%input, %filter : tensor<1x2x3x4xf32>, tensor<1x1x4xf32>) outs(%0 : tensor<1x2x3x4xf32>) -> tensor<1x2x3x4xf32>
+    util.return %1 : tensor<1x2x3x4xf32>
+}
+
+// CHECK-LABEL: @no_generalize_1x1_depthwise_conv
+//   CHECK-NOT:   linalg.generic
+//       CHECK:   util.return