[CPU] Propagate cache tiling sizes in lowering config propagation. (iree-org#21410)

hanhanW · keshavvinayak01 · commit cc0e843ee4c5 · 2025-09-04T18:04:03.000Z
Signed-off-by: hanhanW &lt;hanhan0912@gmail.com&gt;
Signed-off-by: keshavvinayak01 &lt;keshavvinayakjha@gmail.com&gt;
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.cpp b/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.cpp
@@ -113,6 +113,11 @@ TilingConfig::getTilingLevelInfo() {
   return result;
 }
 
+bool TilingConfig::isValidLevel(IREE::CPU::TilingLevel level) {
+  return tilingLevelToActualLevelMap[static_cast<int64_t>(level)] !=
+         IREE::CPU::TilingLevel::InvalidLevel;
+}
+
 /// Returns the tiling level that contains the vector dim at `dimPos` (which is
 /// an index into the result of `getVectorTileSizes()`).
 std::optional<unsigned>
diff --git a/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.h b/compiler/src/iree/compiler/Codegen/Common/TileSizeSelection.h
@@ -112,6 +112,9 @@ class TilingConfig {
     return result;
   }
 
+  /// Returns true if the `level` is available in TilingConfig.
+  bool isValidLevel(TilingLevel level);
+
   /// Returns the tiling level for cache parallel dimensions.
   unsigned getDistributionLevel() {
     return getActualLevel(TilingLevel::DistributionTiles);
@@ -151,6 +154,10 @@ class TilingConfig {
     return getTileSizesForLevel(getActualLevel(TilingLevel::DistributionTiles));
   }
 
+  SmallVector<int64_t> getCacheParallelSizes() {
+    return getTileSizesForLevel(getCacheParallelLevel());
+  }
+
   SmallVector<int64_t> getCacheReductionSizes() {
     return getTileSizesForLevel(getCacheReductionLevel());
   }
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp b/compiler/src/iree/compiler/Codegen/LLVMCPU/KernelDispatch.cpp
@@ -3154,6 +3154,18 @@ setLoweringConfigForComputeOps(mlir::FunctionOpInterface entryPointFn,
       SmallVector<bool> falseVec(numLoops, 0);
       updateOrAddTilingLevelInfo(newTilingInfo, IREE::CPU::DistributionTiles,
                                  distTileSizes, falseVec);
+      // The cache level tiling sizes are not adjusted, so we use the
+      // config from the rootOp directly.
+      if (tilingConfig->isValidLevel(IREE::CPU::CacheParallelTiles)) {
+        updateOrAddTilingLevelInfo(newTilingInfo, IREE::CPU::CacheParallelTiles,
+                                   tilingConfig->getCacheParallelSizes(),
+                                   falseVec);
+      }
+      if (tilingConfig->isValidLevel(IREE::CPU::CacheReductionTiles)) {
+        updateOrAddTilingLevelInfo(
+            newTilingInfo, IREE::CPU::CacheReductionTiles,
+            tilingConfig->getCacheReductionSizes(), falseVec);
+      }
       updateOrAddTilingLevelInfo(
           newTilingInfo, IREE::CPU::VectorCommonParallelTiles,
           commonVecTileSizes, commonVecScalableTileFlags);
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_lowering_strategy_without_distribution.mlir
@@ -20,7 +20,7 @@ func.func @matmul_static() attributes {hal.executable.target = #executable_targe
   return
 }
 
-//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], [1, 1], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 1, 0], [0, 0, 4], [0, 0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
 //      CHECK: func.func @matmul_static()
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_riscv_lowering_strategy.mlir
@@ -11,7 +11,7 @@ func.func @matmul_riscv(%lhs: tensor<384x512xf32>, %rhs: tensor<512x128xf32>) ->
   %2 = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x128xf32>) outs(%1 : tensor<384x128xf32>) -> tensor<384x128xf32>
   return %2 : tensor<384x128xf32>
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64], [8, 32], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64], [48, 64], [0, 0], [8, 32], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64, 0], [48, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 1], [0, 0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
 //      CHECK: func.func @matmul_riscv(
@@ -31,7 +31,7 @@ func.func @matmul_gemm_riscv_vl512(%lhs: tensor<384x512xf32>, %rhs: tensor<512x1
   %res = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x128xf32>) outs(%fill : tensor<384x128xf32>) -> tensor<384x128xf32>
   return %res : tensor<384x128xf32>
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [7, 64], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64], [64, 64], [0, 0], [7, 64], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 64, 0], [64, 64, 0], [0, 0, 0], [7, 64, 0], [0, 0, 1], [0, 0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
 //      CHECK: func.func @matmul_gemm_riscv_vl512(
@@ -51,15 +51,15 @@ func.func @matmul_gemm_riscv_vl1024(%lhs: tensor<384x512xf32>, %rhs: tensor<512x
   %res = linalg.matmul ins(%lhs, %rhs : tensor<384x512xf32>, tensor<512x256xf32>) outs(%fill : tensor<384x256xf32>) -> tensor<384x256xf32>
   return %res : tensor<384x256xf32>
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 128], [7, 128], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 128], [64, 128], [0, 0], [7, 128], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[64, 128, 0], [64, 128, 0], [0, 0, 0], [7, 128, 0], [0, 0, 1], [0, 0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
 //      CHECK: func.func @matmul_gemm_riscv_vl1024(
 // CHECK-SAME:     translation_info = #[[TRANSLATION]]
 //      CHECK: linalg.matmul
 // CHECK-SAME:     lowering_config = #[[CONFIG2]]
 
-//  CHECK-AGGRESSIVE-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 256], [7, 128], [0, 0], [0, 0]]>
+//  CHECK-AGGRESSIVE-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 256], [32, 256], [0, 0], [7, 128], [0, 0], [0, 0]]>
 //  CHECK-AGGRESSIVE-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[32, 256, 0], [32, 256, 0], [0, 0, 0], [7, 128, 0], [0, 0, 1], [0, 0, 0]]>
 //  CHECK-AGGRESSIVE-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
 //      CHECK-AGGRESSIVE: func.func @matmul_gemm_riscv_vl1024(
@@ -79,7 +79,7 @@ func.func @matmul_gemv_riscv_vl512(%lhs: tensor<1x512xf32>, %rhs: tensor<512x128
   %res = linalg.matmul ins(%lhs, %rhs : tensor<1x512xf32>, tensor<512x128xf32>) outs(%fill : tensor<1x128xf32>) -> tensor<1x128xf32>
   return %res : tensor<1x128xf32>
 }
-//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 128], [1, 128], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 128], [0, 128], [0, 0], [1, 128], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 128, 0], [0, 128, 0], [0, 0, 0], [1, 128, 0], [0, 0, 1], [0, 0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>
 //      CHECK: func.func @matmul_gemv_riscv_vl512(
diff --git a/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir b/compiler/src/iree/compiler/Codegen/LLVMCPU/test/select_x86_64_lowering_strategy.mlir
@@ -1941,7 +1941,7 @@ func.func @custom_op(%arg0 : tensor<384x512xf32>, %arg1 : tensor<512x128xf32>,
   return %1 : tensor<384x128xf32>
 }
 //  CHECK-DAG: #[[CONFIG0:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64]]>
-//  CHECK-DAG: #[[CONFIG1:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64], [8, 32], [0, 0], [0, 0]]>
+//  CHECK-DAG: #[[CONFIG1:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64], [48, 64], [0, 0], [8, 32], [0, 0], [0, 0]]>
 //  CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64, 0], [48, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>
 //  CHECK-DAG: #[[TRANSLATION_INFO:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {enable_loop_peeling}>
 //      CHECK: func @custom_op(

Original file line number	Diff line number	Diff line change
`@@ -112,6 +112,9 @@ class TilingConfig {`
`112`	`112`	`return result;`
`113`	`113`	`}`
`114`	`114`
	`115`	+ /// Returns true if the `level` is available in TilingConfig.
	`116`	`+ bool isValidLevel(TilingLevel level);`
	`117`	`+`
`115`	`118`	`/// Returns the tiling level for cache parallel dimensions.`
`116`	`119`	`unsigned getDistributionLevel() {`
`117`	`120`	`return getActualLevel(TilingLevel::DistributionTiles);`
`@@ -151,6 +154,10 @@ class TilingConfig {`
`151`	`154`	`return getTileSizesForLevel(getActualLevel(TilingLevel::DistributionTiles));`
`152`	`155`	`}`
`153`	`156`
	`157`	`+ SmallVector<int64_t> getCacheParallelSizes() {`
	`158`	`+ return getTileSizesForLevel(getCacheParallelLevel());`
	`159`	`+ }`
	`160`	`+`
`154`	`161`	`SmallVector<int64_t> getCacheReductionSizes() {`
`155`	`162`	`return getTileSizesForLevel(getCacheReductionLevel());`
`156`	`163`	`}`
Original file line number	Diff line number	Diff line change
`@@ -20,7 +20,7 @@ func.func @matmul_static() attributes {hal.executable.target = #executable_targe`
`20`	`20`	`return`
`21`	`21`	`}`
`22`	`22`
`23`		`-// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], [1, 1], [0, 0], [0, 0]]>`
	`23`	`+// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0]]>`
`24`	`24`	`// CHECK-DAG: #[[CONFIG:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[0, 0, 0], [0, 0, 0], [0, 0, 0], [1, 1, 0], [0, 0, 4], [0, 0, 0]]>`
`25`	`25`	`// CHECK-DAG: #[[TRANSLATION:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {{\{}}enable_loop_peeling}>`
`26`	`26`	`// CHECK: func.func @matmul_static()`
Original file line number	Diff line number	Diff line change
`@@ -1941,7 +1941,7 @@ func.func @custom_op(%arg0 : tensor<384x512xf32>, %arg1 : tensor<512x128xf32>,`
`1941`	`1941`	`return %1 : tensor<384x128xf32>`
`1942`	`1942`	`}`
`1943`	`1943`	`// CHECK-DAG: #[[CONFIG0:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64]]>`
`1944`		`-// CHECK-DAG: #[[CONFIG1:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64], [8, 32], [0, 0], [0, 0]]>`
	`1944`	`+// CHECK-DAG: #[[CONFIG1:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64], [48, 64], [0, 0], [8, 32], [0, 0], [0, 0]]>`
`1945`	`1945`	`// CHECK-DAG: #[[CONFIG2:.+]] = #iree_codegen.lowering_config<tile_sizes = {{\[}}[48, 64, 0], [48, 64, 0], [0, 0, 0], [8, 32, 0], [0, 0, 16], [0, 0, 0]]>`
`1946`	`1946`	`// CHECK-DAG: #[[TRANSLATION_INFO:.+]] = #iree_codegen.translation_info<pipeline = CPUDoubleTilingExpert, {enable_loop_peeling}>`
`1947`	`1947`	`// CHECK: func @custom_op(`