[Encoding] Implement matmul_k encoding propagation across reshapes. (iree-org#20367)

hanhanW · web-flow · commit 77243062f163 · 2025-04-14T10:08:27.000-07:00
The revision ports the sdxl propagation effort to the main branch.
Ideally, we should implement it using interfaces and data-flow analysis.

It is a first step of the propagation, and we will incrementally enhance
the encoding propagation pass.

Co-authored-by: MaheshRavishankar
[mahesh.ravishankar@gmail.com](mailto:mahesh.ravishankar@gmail.com)

---------

Signed-off-by: hanhanW &lt;hanhan0912@gmail.com&gt;
diff --git a/compiler/src/iree/compiler/DispatchCreation/BUILD.bazel b/compiler/src/iree/compiler/DispatchCreation/BUILD.bazel
@@ -35,6 +35,7 @@ iree_compiler_cc_library(
         "HoistEncodingOps.cpp",
         "MaterializeDefaultWorkgroupCountRegion.cpp",
         "Passes.cpp",
+        "PropagateEncodings.cpp",
         "SetEncoding.cpp",
         "SinkReshapes.cpp",
         "SplitReduction.cpp",
diff --git a/compiler/src/iree/compiler/DispatchCreation/CMakeLists.txt b/compiler/src/iree/compiler/DispatchCreation/CMakeLists.txt
@@ -37,6 +37,7 @@ iree_cc_library(
     "HoistEncodingOps.cpp"
     "MaterializeDefaultWorkgroupCountRegion.cpp"
     "Passes.cpp"
+    "PropagateEncodings.cpp"
     "SetEncoding.cpp"
     "SinkReshapes.cpp"
     "SplitReduction.cpp"
diff --git a/compiler/src/iree/compiler/DispatchCreation/Passes.cpp b/compiler/src/iree/compiler/DispatchCreation/Passes.cpp
@@ -253,6 +253,7 @@ addDispatchRegionCreationPasses(OpPassManager &passManager,
           return DispatchCreation::createHoistEncodingOpsPass(
               HoistEncodingOpsPassOptions{clHoistEncodingsForConstExpr});
         })
+        .addPass(DispatchCreation::createPropagateEncodingsPass)
         .addPass(
             DispatchCreation::createFuseEncodingOpsIntoDispatchRegionsPass);
   }
diff --git a/compiler/src/iree/compiler/DispatchCreation/Passes.td b/compiler/src/iree/compiler/DispatchCreation/Passes.td
@@ -297,6 +297,14 @@ def HoistEncodingOpsPass :
   ];
 }
 
+def PropagateEncodingsPass :
+    InterfacePass<"iree-dispatch-creation-propagate-encodings", "mlir::FunctionOpInterface"> {
+  let summary = "Propagate encodings across other operations.";
+  let dependentDialects = [
+    "mlir::tensor::TensorDialect",
+    "IREE::Encoding::IREEEncodingDialect",
+  ];
+}
 
 def SetEncodingPass :
     InterfacePass<"iree-dispatch-creation-set-encoding", "mlir::FunctionOpInterface"> {
diff --git a/compiler/src/iree/compiler/DispatchCreation/PropagateEncodings.cpp b/compiler/src/iree/compiler/DispatchCreation/PropagateEncodings.cpp
@@ -0,0 +1,118 @@
+// Copyright 2025 The IREE Authors
+//
+// Licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "iree/compiler/Dialect/Encoding/IR/EncodingDialect.h"
+#include "iree/compiler/Dialect/Encoding/IR/EncodingOps.h"
+#include "iree/compiler/Dialect/Encoding/IR/EncodingTypes.h"
+#include "iree/compiler/Dialect/Flow/Transforms/RegionOpUtils.h"
+#include "iree/compiler/DispatchCreation/Passes.h"
+#include "llvm/ADT/STLExtras.h"
+#include "mlir/Dialect/Tensor/IR/Tensor.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/Interfaces/FunctionInterfaces.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+#define DEBUG_TYPE "iree-dispatch-creation-propagate-encodings"
+
+namespace mlir::iree_compiler::DispatchCreation {
+
+#define GEN_PASS_DEF_PROPAGATEENCODINGSPASS
+#include "iree/compiler/DispatchCreation/Passes.h.inc"
+
+namespace {
+
+/// Pattern to swap `tensor.collapse_shape` -> `iree_encoding.set_encoding`
+struct SwapEncodingOpWithTensorCollapseShapeOp
+    : public OpRewritePattern<IREE::Encoding::SetEncodingOp> {
+  using Base = OpRewritePattern<IREE::Encoding::SetEncodingOp>;
+  using Base::Base;
+  LogicalResult matchAndRewrite(IREE::Encoding::SetEncodingOp encodingOp,
+                                PatternRewriter &rewriter) const override;
+};
+
+// TODO(#20179): Support the propagation through interfaces. It is supposed to
+// be done with data-flow analysis.
+struct PropagateEncodingsPass
+    : public DispatchCreation::impl::PropagateEncodingsPassBase<
+          PropagateEncodingsPass> {
+  void runOnOperation() override;
+};
+
+} // namespace
+
+LogicalResult SwapEncodingOpWithTensorCollapseShapeOp::matchAndRewrite(
+    IREE::Encoding::SetEncodingOp encodingOp, PatternRewriter &rewriter) const {
+  auto encoding = dyn_cast<IREE::Encoding::MatmulKAttr>(
+      encodingOp.getResultType().getEncoding());
+  if (!encoding) {
+    return rewriter.notifyMatchFailure(encodingOp, "only matmul_k is handled");
+  }
+  auto collapseOp =
+      encodingOp.getSource().getDefiningOp<tensor::CollapseShapeOp>();
+  if (!collapseOp) {
+    return rewriter.notifyMatchFailure(encodingOp,
+                                       "expected a collapse_shape producer");
+  }
+  if (!IREE::Flow::isNonNullAndOutsideDispatch(encodingOp) ||
+      !IREE::Flow::isNonNullAndOutsideDispatch(collapseOp)) {
+    return rewriter.notifyMatchFailure(
+        encodingOp, "expected that both operations are outside dispatch");
+  }
+
+  ArrayRef<int32_t> kDims = encoding.getKDims().asArrayRef();
+  llvm::SetVector<int32_t> kDimsSet(kDims.begin(), kDims.end());
+
+  // Bail out if it is not propagable.
+  // TODO: Relax the check to allow transforming innermost reduction dimensions.
+  // We need to revisit the matmul_k encoding semantic.
+  SmallVector<ReassociationIndices, 4> reassociationMaps =
+      collapseOp.getReassociationIndices();
+  for (int32_t k : kDims) {
+    if (reassociationMaps[k].size() != 1) {
+      return rewriter.notifyMatchFailure(
+          encodingOp,
+          "expected collaps_shape ops to not transform k dimensions");
+    }
+  }
+
+  // Get a mapping from original iteration space to expanded iteration space.
+  SmallVector<int32_t> newKDims;
+  for (int32_t kDim : kDims) {
+    newKDims.append(reassociationMaps[kDim].begin(),
+                    reassociationMaps[kDim].end());
+  }
+
+  // Create the new encoding op.
+  MLIRContext *ctx = rewriter.getContext();
+  auto newEncodingAttr = IREE::Encoding::MatmulKAttr::get(ctx, newKDims);
+  RankedTensorType newEncodingType =
+      collapseOp.getSrcType().cloneWithEncoding(newEncodingAttr);
+  Value newEncodingOp = rewriter.create<IREE::Encoding::SetEncodingOp>(
+      encodingOp.getLoc(), newEncodingType, collapseOp.getSrc());
+  Value newCollapseOp = rewriter.create<tensor::CollapseShapeOp>(
+      collapseOp.getLoc(), encodingOp.getResultType(), newEncodingOp,
+      collapseOp.getReassociationIndices());
+  rewriter.replaceOp(encodingOp, newCollapseOp);
+  return success();
+}
+
+void PropagateEncodingsPass::runOnOperation() {
+  mlir::FunctionOpInterface funcOp = getOperation();
+  MLIRContext *ctx = &getContext();
+  RewritePatternSet propagationPatterns(ctx);
+  propagationPatterns.insert<SwapEncodingOpWithTensorCollapseShapeOp>(ctx);
+  GreedyRewriteConfig config;
+  config.fold = true;
+  config.cseConstants = false;
+  if (failed(applyPatternsGreedily(funcOp, std::move(propagationPatterns),
+                                   config))) {
+    funcOp.emitOpError("failed to propagate encodings");
+    return signalPassFailure();
+  }
+}
+
+} // namespace mlir::iree_compiler::DispatchCreation
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/BUILD.bazel b/compiler/src/iree/compiler/DispatchCreation/test/BUILD.bazel
@@ -43,6 +43,7 @@ iree_lit_test_suite(
             "pad_fusion_with_consumer.mlir",
             "pad_fusion_with_producer.mlir",
             "pipeline_tests.mlir",
+            "propagate_encodings.mlir",
             "set_encoding.mlir",
             "set_encoding_pipeline.mlir",
             "sink_reshapes.mlir",
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/CMakeLists.txt b/compiler/src/iree/compiler/DispatchCreation/test/CMakeLists.txt
@@ -41,6 +41,7 @@ iree_lit_test_suite(
     "pad_fusion_with_consumer.mlir"
     "pad_fusion_with_producer.mlir"
     "pipeline_tests.mlir"
+    "propagate_encodings.mlir"
     "set_encoding.mlir"
     "set_encoding_pipeline.mlir"
     "sink_reshapes.mlir"
diff --git a/compiler/src/iree/compiler/DispatchCreation/test/propagate_encodings.mlir b/compiler/src/iree/compiler/DispatchCreation/test/propagate_encodings.mlir
@@ -0,0 +1,33 @@
+// RUN: iree-opt --pass-pipeline="builtin.module(util.func(iree-dispatch-creation-propagate-encodings))" --split-input-file %s | FileCheck %s
+
+#encoding = #iree_encoding.matmul_k<k_dims = [1]>
+util.func public @propagate_encoding_through_collapse_shape(%src: tensor<2x4096x640xf16>) -> tensor<8192x640xf16, #encoding> {
+  %collapsed = tensor.collapse_shape %src [[0, 1], [2]] : tensor<2x4096x640xf16> into tensor<8192x640xf16>
+  %0 = iree_encoding.set_encoding %collapsed : tensor<8192x640xf16> -> tensor<8192x640xf16, #encoding>
+  util.return %0 : tensor<8192x640xf16, #encoding>
+}
+// CHECK-DAG:   #[[$ENCODING0:.+]] = #iree_encoding.matmul_k<k_dims = [1]>
+// CHECK-DAG:   #[[$ENCODING1:.+]] = #iree_encoding.matmul_k<k_dims = [2]>
+// CHECK-LABEL: @propagate_encoding_through_collapse_shape(
+// CHECK-SAME:    %[[SRC:[a-zA-Z0-9]+]]
+// CHECK:         %[[SET_ENCODING:.+]] = iree_encoding.set_encoding %[[SRC]] : tensor<2x4096x640xf16> -> tensor<2x4096x640xf16, #[[$ENCODING1]]>
+// CHECK:         %[[COLLAPSED:.+]] = tensor.collapse_shape %[[SET_ENCODING]] {{\[}}[0, 1], [2]] : tensor<2x4096x640xf16, #[[$ENCODING1]]> into tensor<8192x640xf16, #[[$ENCODING0]]>
+// CHECK:         util.return %[[COLLAPSED]]
+
+// -----
+
+#encoding = #iree_encoding.matmul_k<k_dims = [1]>
+util.func public @propagate_encoding_through_collapse_shape_chain(%src: tensor<2x4096x64x10xf16>) -> tensor<8192x640xf16, #encoding> {
+  %collapsed = tensor.collapse_shape %src [[0], [1], [2, 3]] : tensor<2x4096x64x10xf16> into tensor<2x4096x640xf16>
+  %collapsed_0 = tensor.collapse_shape %collapsed [[0, 1], [2]] : tensor<2x4096x640xf16> into tensor<8192x640xf16>
+  %0 = iree_encoding.set_encoding %collapsed_0 : tensor<8192x640xf16> -> tensor<8192x640xf16, #encoding>
+  util.return %0 : tensor<8192x640xf16, #encoding>
+}
+// CHECK-DAG:   #[[$ENCODING0:.+]] = #iree_encoding.matmul_k<k_dims = [1]>
+// CHECK-DAG:   #[[$ENCODING1:.+]] = #iree_encoding.matmul_k<k_dims = [2]>
+// CHECK-LABEL: @propagate_encoding_through_collapse_shape_chain(
+// CHECK-SAME:    %[[SRC:[a-zA-Z0-9]+]]
+// CHECK:         %[[COLLAPSED_0:.+]] = tensor.collapse_shape %[[SRC]] {{\[}}[0], [1], [2, 3]] : tensor<2x4096x64x10xf16> into tensor<2x4096x640xf16>
+// CHECK:         %[[SET_ENCODING:.+]] = iree_encoding.set_encoding %[[COLLAPSED_0]] : tensor<2x4096x640xf16> -> tensor<2x4096x640xf16, #[[$ENCODING1]]>
+// CHECK:         %[[COLLAPSED_1:.+]] = tensor.collapse_shape %[[SET_ENCODING]] {{\[}}[0, 1], [2]] : tensor<2x4096x640xf16, #[[$ENCODING1]]> into tensor<8192x640xf16, #[[$ENCODING0]]>
+// CHECK:         util.return %[[COLLAPSED_1]]

Original file line number	Diff line number	Diff line change
`@@ -253,6 +253,7 @@ addDispatchRegionCreationPasses(OpPassManager &passManager,`
`253`	`253`	`return DispatchCreation::createHoistEncodingOpsPass(`
`254`	`254`	`HoistEncodingOpsPassOptions{clHoistEncodingsForConstExpr});`
`255`	`255`	`})`
	`256`	`+ .addPass(DispatchCreation::createPropagateEncodingsPass)`
`256`	`257`	`.addPass(`
`257`	`258`	`DispatchCreation::createFuseEncodingOpsIntoDispatchRegionsPass);`
`258`	`259`	`}`