[CodeGen] Clean up the IRs within the MaterializeEncoding pass. (#20625)

hanhanW · web-flow · commit 660cf92ae244 · 2025-04-24T09:19:24.000-07:00
The pass already populates some canonicalization patterns, but the CPU
tests still require running the canonicalizer. The revision explicitly
populates needed patterns, and prevent redundant op creation in the
pass. Since it already cleans the IR, it makes sense to run CSE within
the pass.

The revision slightly improves the compilation time because it does not
create redundant copy operations in the first place.

Signed-off-by: hanhanW &lt;hanhan0912@gmail.com&gt;
diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncoding.cpp
@@ -24,6 +24,7 @@
 #include "mlir/Dialect/Tensor/IR/Tensor.h"
 #include "mlir/Interfaces/FunctionInterfaces.h"
 #include "mlir/Pass/PassManager.h"
+#include "mlir/Transforms/CSE.h"
 #include "mlir/Transforms/DialectConversion.h"
 #include "mlir/Transforms/GreedyPatternRewriteDriver.h"
 
@@ -132,20 +133,26 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp,
     }
   }
 
-  // Add patterns to fold pack/unpack ops with pad/extract_slice ops and
-  // resolve dims ops.
+  // Run patterns to fold pack/unpack ops with pad/extract_slice ops, resolve
+  // dims ops, and eliminate common sub-expressions.
   {
     RewritePatternSet patterns(ctx);
     populateReshapeToInterfaceTensorPatterns(patterns);
     tensor::CastOp::getCanonicalizationPatterns(patterns, ctx);
     tensor::populateFoldTensorEmptyPatterns(patterns);
     linalg::FillOp::getCanonicalizationPatterns(patterns, ctx);
+    linalg::PackOp::getCanonicalizationPatterns(patterns, ctx);
+    linalg::UnPackOp::getCanonicalizationPatterns(patterns, ctx);
     linalg::populateFoldIntoPackAndUnpackPatterns(patterns);
     memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns);
     if (failed(applyPatternsGreedily(funcOp, std::move(patterns)))) {
       funcOp.emitOpError("folding patterns failed");
       return failure();
     }
+
+    IRRewriter rewriter(ctx);
+    DominanceInfo domInfo;
+    mlir::eliminateCommonSubExpressions(rewriter, domInfo, funcOp);
   }
 
   return success();
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir b/compiler/src/iree/compiler/Codegen/Common/test/llvmcpu_materialize_encoding.mlir
@@ -1,4 +1,4 @@
-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding),canonicalize,cse)" --split-input-file %s | FileCheck %s
+// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding))" --split-input-file %s | FileCheck %s
 
 #pipeline_layout = #hal.pipeline.layout<bindings = [
   #hal.pipeline.binding<storage_buffer>,
diff --git a/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp b/compiler/src/iree/compiler/Codegen/ExternalInterfaces/CPUEncodingExternalModels.cpp
@@ -97,10 +97,13 @@ getExpandedType(RankedTensorType type, bool isBatched, bool isTransposed,
 
 /// Given an input Value and a desired output element type, create and return
 /// an element-wise linalg::GenericOp that extends the input Value to the
-/// output element type.
+/// output element type. Returns `input` if casting is not needed.
 static Value createElementWiseExtUIOp(OpBuilder &builder, Value input,
                                       Location loc, Type outElemType) {
   auto inputType = cast<RankedTensorType>(input.getType());
+  if (inputType.getElementType() == outElemType) {
+    return input;
+  }
   SmallVector<AffineMap> maps(
       2, builder.getMultiDimIdentityMap(inputType.getRank()));
   SmallVector<utils::IteratorType> iteratorTypes(inputType.getRank(),

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding),canonicalize,cse)" --split-input-file %s \| FileCheck %s`
	`1`	`+// RUN: iree-opt --pass-pipeline="builtin.module(func.func(iree-codegen-materialize-device-encoding))" --split-input-file %s \| FileCheck %s`
`2`	`2`
`3`	`3`	`#pipeline_layout = #hal.pipeline.layout<bindings = [`
`4`	`4`	`#hal.pipeline.binding<storage_buffer>,`