iree-org
diff --git a/‎compiler/plugins/target/LLVMCPU/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎compiler/plugins/target/LLVMCPU/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/plugins/target/LLVMCPU/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎compiler/plugins/target/LLVMCPU/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp‎
Lines changed: 2 additions & 0 deletions b/‎compiler/plugins/target/LLVMCPU/LLVMCPUTarget.cpp‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎compiler/plugins/target/VMVX/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎compiler/plugins/target/VMVX/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/plugins/target/VMVX/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎compiler/plugins/target/VMVX/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/plugins/target/VMVX/VMVXTarget.cpp‎
Lines changed: 7 additions & 5 deletions b/‎compiler/plugins/target/VMVX/VMVXTarget.cpp‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/CPU/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/CPU/BUILD.bazel‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/CPU/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Common/CPU/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp‎
Lines changed: 20 additions & 151 deletions b/‎compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp‎
Lines changed: 20 additions & 151 deletions
diff --git a/‎compiler/src/iree/compiler/Codegen/Dialect/CPU/BUILD.bazel‎
Lines changed: 11 additions & 0 deletions b/‎compiler/src/iree/compiler/Codegen/Dialect/CPU/BUILD.bazel‎
Lines changed: 11 additions & 0 deletions
@@ -33,6 +33,7 @@ iree_compiler_cc_library(
         ":StaticLibraryGenerator",
         "//compiler/plugins/target/LLVMCPU/Builtins",
         "//compiler/src/iree/compiler/Codegen/Common",
+        "//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect",
         "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
         "//compiler/src/iree/compiler/Codegen/LLVMCPU",
         "//compiler/src/iree/compiler/Codegen/Utils",
 
@@ -54,6 +54,7 @@ iree_cc_library(
     MLIRTargetLLVMIRExport
     MLIRTransformDialect
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect
     iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
     iree::compiler::Codegen::LLVMCPU
     iree::compiler::Codegen::Utils
 
@@ -15,6 +15,7 @@
 #include "compiler/plugins/target/LLVMCPU/LibraryBuilder.h"
 #include "compiler/plugins/target/LLVMCPU/LinkerTool.h"
 #include "compiler/plugins/target/LLVMCPU/StaticLibraryGenerator.h"
+#include "iree/compiler/Codegen/Dialect/CPU/IR/IREECPUDialect.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/LLVMCPU/Passes.h"
 #include "iree/compiler/Codegen/LLVMCPU/Utils.h"
@@ -218,6 +219,7 @@ class LLVMCPUTargetBackend final : public TargetBackend {
     // TODO: make inclusion of ArmNeon conditional?
     // clang-format off
     registry.insert<IREE::Codegen::IREECodegenDialect,
+                    IREE::CPU::IREECPUDialect,
                     IREE::LinalgExt::IREELinalgExtDialect,
                     mlir::transform::TransformDialect,
                     pdl::PDLDialect,
 
@@ -23,6 +23,7 @@ iree_compiler_cc_library(
         "VMVXTarget.cpp",
     ],
     deps = [
+        "//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect",
         "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
         "//compiler/src/iree/compiler/Codegen/VMVX",
         "//compiler/src/iree/compiler/Dialect/HAL/Target",
 
@@ -27,6 +27,7 @@ iree_cc_library(
     MLIRIR
     MLIRPass
     MLIRSupport
+    iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect
     iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
     iree::compiler::Codegen::VMVX
     iree::compiler::Dialect::HAL::Target
 
@@ -4,6 +4,7 @@
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+#include "iree/compiler/Codegen/Dialect/CPU/IR/IREECPUDialect.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/VMVX/Passes.h"
 #include "iree/compiler/Dialect/HAL/Target/Devices/LocalDevice.h"
@@ -77,9 +78,10 @@ class VMVXTargetBackend final : public TargetBackend {
   }
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<IREE::Codegen::IREECodegenDialect, IREE::VM::VMDialect,
-                    IREE::VMVX::VMVXDialect,
-                    IREE::LinalgExt::IREELinalgExtDialect>();
+    registry
+        .insert<IREE::Codegen::IREECodegenDialect, IREE::CPU::IREECPUDialect,
+                IREE::VM::VMDialect, IREE::VMVX::VMVXDialect,
+                IREE::LinalgExt::IREELinalgExtDialect>();
   }
 
   IREE::VM::TargetOptions
@@ -232,8 +234,8 @@ class VMVXInlineTargetBackend final : public TargetBackend {
   }
 
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry
-        .insert<IREE::Codegen::IREECodegenDialect, IREE::VMVX::VMVXDialect>();
+    registry.insert<IREE::Codegen::IREECodegenDialect,
+                    IREE::CPU::IREECPUDialect, IREE::VMVX::VMVXDialect>();
   }
 
   void
 
@@ -56,6 +56,7 @@ iree_compiler_cc_library(
         ":PassHeaders",
         ":PassesIncGen",
         "//compiler/src/iree/compiler/Codegen/Common",
+        "//compiler/src/iree/compiler/Codegen/Dialect/CPU/IR:IREECPUDialect",
         "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/IR:IREECodegenDialect",
         "//compiler/src/iree/compiler/Codegen/Dialect/Codegen/Utils",
         "//compiler/src/iree/compiler/Codegen/Interfaces:UKernelOpInterface",
 
@@ -78,6 +78,7 @@ iree_cc_library(
     MLIRVectorTransforms
     iree::builtins::ukernel::exported_bits
     iree::compiler::Codegen::Common
+    iree::compiler::Codegen::Dialect::CPU::IR::IREECPUDialect
     iree::compiler::Codegen::Dialect::Codegen::IR::IREECodegenDialect
     iree::compiler::Codegen::Dialect::Codegen::Utils
     iree::compiler::Codegen::Interfaces::UKernelOpInterface
 
@@ -6,6 +6,8 @@
 
 #include "iree/compiler/Codegen/Common/CPU/Passes.h"
 #include "iree/compiler/Codegen/Common/EncodingUtils.h"
+#include "iree/compiler/Codegen/Dialect/CPU/IR/IREECPUDialect.h"
+#include "iree/compiler/Codegen/Dialect/CPU/IR/IREECPUTypes.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenDialect.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/IR/IREECodegenOps.h"
 #include "iree/compiler/Codegen/Dialect/Codegen/Utils/Utils.h"
@@ -38,39 +40,9 @@ using IREE::Codegen::TileMxNxK;
 #define GEN_PASS_DEF_CPUMATERIALIZEHOSTENCODINGPASS
 #include "iree/compiler/Codegen/Common/CPU/Passes.h.inc"
 
-// Enumerate tile sizes to choose from when no specific architecture is
-// targeted. For narrow-{M,N} cases, this only enumerates on narrow M. The
-// narrow-N cases are handled by transposition in chooseMatmulTile.
-static SmallVector<TileMxNxK>
-enumerateMatmulTilesVMVX(linalg::ContractionDimensions cDims,
-                         IREE::Encoding::EncodingAttr encoding,
-                         IREE::HAL::ExecutableTargetAttr target) {
-  bool hasUkernelSupport = hasUkernel(target);
-
-  // TODO(hanchung): The ukernel path does not support 3d
-  // codegen.query_tile_sizes op, so we disable dynamic tile shapes for
-  // batch_matmul. Also, they are not set up for narrow M/N matmul, so it is
-  // disabled when it is the case.
-  if (!cDims.batch.empty() || getMatmulNarrowDim(encoding)) {
-    hasUkernelSupport = false;
-  }
-  if (hasUkernelSupport) {
-    // VMVX+ukernel uses dynamic tile shapes.
-    return {TileMxNxK{ShapedType::kDynamic, ShapedType::kDynamic,
-                      ShapedType::kDynamic}};
-  }
-
-  return {
-      TileMxNxK{8, 8, 4}, // Some vaguely reasonable tile shape.
-      TileMxNxK{4, 8, 4}, // Truncation of the above.
-      TileMxNxK{2, 8, 4}, // Truncation of the above.
-      TileMxNxK{1, 8, 4}, // Truncation of the above.
-  };
-}
-
 // Enumerate tile sizes to choose from on riscv32.
 // For narrow-{M,N} cases, this only enumerates on narrow M. The narrow-N cases
-// are handled by transposition in chooseMatmulTile.
+// are handled by transposition in IREE::Codegen::chooseMatmulTile.
 static SmallVector<TileMxNxK>
 enumerateMatmulTileRiscv32(IREE::HAL::ExecutableTargetAttr target) {
   if (hasUkernel(target)) {
@@ -87,7 +59,7 @@ enumerateMatmulTileRiscv32(IREE::HAL::ExecutableTargetAttr target) {
 
 // Enumerate tile sizes to choose from on arm64.
 // For narrow-{M,N} cases, this only enumerates on narrow M. The narrow-N cases
-// are handled by transposition in chooseMatmulTile.
+// are handled by transposition in IREE::Codegen::chooseMatmulTile.
 static SmallVector<TileMxNxK>
 enumerateMatmulTileArm64(TypeRange elementTypes,
                          IREE::HAL::ExecutableTargetAttr target) {
@@ -178,7 +150,7 @@ enumerateMatmulTileArm64(TypeRange elementTypes,
 
 // Enumerate tile sizes to choose from on x86-64.
 // For narrow-{M,N} cases, this only enumerates on narrow M. The narrow-N cases
-// are handled by transposition in chooseMatmulTile.
+// are handled by transposition in IREE::Codegen::chooseMatmulTile.
 static SmallVector<TileMxNxK>
 enumerateMatmulTileX86_64(TypeRange elementTypes,
                           IREE::HAL::ExecutableTargetAttr target) {
@@ -291,114 +263,6 @@ enumerateMatmulTileX86_64(TypeRange elementTypes,
   return {};
 }
 
-/// Returns the best TileMxNxK from `enumeratedTiles` pool. If the
-/// `hostDefinedUpperBound` is not empty, the chosen tile sizes can not be
-/// greater than the values.
-/// TODO(#16933): Remove `hostDefinedUpperBound` once we can propagate such
-/// information to host. For now, they are defined by host.
-static TileMxNxK
-chooseMatmulTile(ArrayRef<TileMxNxK> enumeratedTiles,
-                 IREE::Encoding::MatmulNarrowDim narrowDim,
-                 ArrayRef<int64_t> hostDefinedUpperBound = {}) {
-  assert((hostDefinedUpperBound.empty() || hostDefinedUpperBound.size() >= 3) &&
-         "expected hostDefinedUpperBound is empty or has upper bound for {M, "
-         "N, K}");
-  // Handle narrow-N by transposing to reduce to narrow-M. Note: the
-  // enumeratedTiles currently only enumerate narrow-M cases.
-  if (narrowDim.isN()) {
-    SmallVector<int64_t> newHostDefinedUpperBound(hostDefinedUpperBound);
-    std::swap(newHostDefinedUpperBound[0], newHostDefinedUpperBound[1]);
-    narrowDim.dim = IREE::Encoding::MatmulNarrowDim::Dim::M;
-    TileMxNxK tile =
-        chooseMatmulTile(enumeratedTiles, narrowDim, newHostDefinedUpperBound);
-    std::swap(tile.M, tile.N);
-    return tile;
-  }
-  // Handle kDynamic: currently this is only used with VMVX, where there is only
-  // one enumerated tile and it has all three M/N/K dimensions dynamic, so for
-  // now we only support that. Generalize that as needed when more dynamic tile
-  // sizes are used outside of VMVX, e.g. perhaps some day with Arm SVE. Decide
-  // how to incorporate the handling of kDynamic in the cost-model evaluation
-  // below to decide when to prefer a dynamic vs a static tile shape.
-  for (auto tile : enumeratedTiles) {
-    if (ShapedType::isDynamic(tile.M) || ShapedType::isDynamic(tile.N) ||
-        ShapedType::isDynamic(tile.K)) {
-      assert(enumeratedTiles.size() == 1);
-      assert(ShapedType::isDynamic(tile.M) && ShapedType::isDynamic(tile.N) &&
-             ShapedType::isDynamic(tile.K));
-      return tile;
-    }
-  }
-  // We're going to "rate" the enumerated tiles.
-  struct RatedTileMxNxK : TileMxNxK {
-    RatedTileMxNxK() {}
-    RatedTileMxNxK(TileMxNxK tile) : TileMxNxK(tile) {}
-    // Penalize tiles that are wider in the M dimension than matmulNarrowM.
-    int64_t paddingPenalty = 0;
-    // Favor larger tiles, as long as they still minimize paddingPenalty.
-    int64_t productMxNxK = 0;
-  };
-  SmallVector<RatedTileMxNxK> ratedTiles;
-  ratedTiles.reserve(enumeratedTiles.size());
-  int64_t bestPaddingPenalty = INT64_MAX;
-  int64_t mUB = INT64_MAX;
-  int64_t nUB = INT64_MAX;
-  int64_t kUB = INT64_MAX;
-  if (!hostDefinedUpperBound.empty()) {
-    mUB = hostDefinedUpperBound[0];
-    nUB = hostDefinedUpperBound[1];
-    kUB = hostDefinedUpperBound[2];
-  }
-  for (auto tile : enumeratedTiles) {
-    if (tile.M > mUB || tile.N > nUB || tile.K > kUB) {
-      LLVM_DEBUG(llvm::dbgs() << "[" << DEBUG_TYPE << "]: tile (";
-                 llvm::interleaveComma(
-                     ArrayRef<int64_t>{tile.M, tile.N, tile.K}, llvm::dbgs());
-                 llvm::dbgs()
-                 << ") is skipped because it is not valid for upper_bound (";
-                 llvm::interleaveComma(ArrayRef<int64_t>{mUB, nUB, kUB},
-                                       llvm::dbgs());
-                 llvm::dbgs() << ")\n");
-      continue;
-    }
-    RatedTileMxNxK ratedTile(tile);
-    ratedTile.paddingPenalty = 0;
-    // If we are choosing a tile for a narrow-M case, we want to minimize
-    // padding along the M dimension.
-    // The PowerOf2Ceil is so that we are OK with padding up to the next
-    // power of two, we just try to avoid padding beyond that. For example,
-    // if matmulNarrowM==7 and we have enumerated tiles with M=8,4,2,1, we
-    // are OK with the tile that has M==8 even though it requires some padding.
-    // Otherwise, we would be penalizing the tiles with M==8,4,2 and we would
-    // end up selecting the vecmat tile (M==1) for that case!
-    if (narrowDim) {
-      ratedTile.paddingPenalty =
-          std::max<int64_t>(tile.M - llvm::PowerOf2Ceil(narrowDim.size), 0);
-    }
-    ratedTile.productMxNxK = tile.M * tile.N * tile.K;
-    ratedTiles.push_back(ratedTile);
-
-    LLVM_DEBUG(llvm::dbgs() << "candidate: "; llvm::interleaveComma(
-                   ArrayRef<int64_t>{tile.M, tile.N, tile.K}, llvm::dbgs());
-               llvm::dbgs() << " penalty:" << ratedTile.paddingPenalty << "\n");
-
-    bestPaddingPenalty = std::min(bestPaddingPenalty, ratedTile.paddingPenalty);
-  }
-  RatedTileMxNxK bestRatedTile;
-  for (auto ratedTile : ratedTiles) {
-    // Choose only among tiles that minimize paddingPenalty. Among those,
-    // maximize productMxNxK.
-    if (ratedTile.paddingPenalty == bestPaddingPenalty &&
-        bestRatedTile.productMxNxK < ratedTile.productMxNxK) {
-      bestRatedTile = ratedTile;
-    }
-  }
-  // Sanity check. This assert can only fail if there's a programming mistake
-  // locally here.
-  assert(bestRatedTile.paddingPenalty == bestPaddingPenalty);
-  return bestRatedTile;
-}
-
 static SmallVector<TileMxNxK>
 enumerateMatmulTileMxNxK(IREE::Encoding::EncodingAttr encoding,
                          IREE::HAL::ExecutableTargetAttr target) {
@@ -410,9 +274,6 @@ enumerateMatmulTileMxNxK(IREE::Encoding::EncodingAttr encoding,
   }
   // Enumerate available tile shapes for the given encoding and target.
   SmallVector<Type> elementTypes = encoding.getElementTypesArray();
-  if (isVMVXBackend(target)) {
-    return enumerateMatmulTilesVMVX(*cDims, encoding, target);
-  }
   if (isAArch64(target)) {
     return enumerateMatmulTileArm64(elementTypes, target);
   }
@@ -442,8 +303,8 @@ materializeEncodingForTarget(RankedTensorType tensorType,
   auto narrowDim = IREE::Encoding::getMatmulNarrowDim(encoding);
   // Choose a final matmul TileMxNxK from the above-enumarated tile shapes,
   // taking narrow dimensions into account.
-  TileMxNxK chosenTileMxNxK = chooseMatmulTile(enumeratedTileMxNxK, narrowDim,
-                                               encoding.getRoundDimsToArray());
+  TileMxNxK chosenTileMxNxK = IREE::Codegen::chooseMatmulTile(
+      enumeratedTileMxNxK, narrowDim, encoding.getRoundDimsToArray());
 
   // Map the matmul TileMxNxK to an actual tile shape for the tensor at hand,
   // based on its operand index in the matmul.
@@ -481,9 +342,15 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp,
   // 2. We use ukernels, and this allows writing 2x fewer narrow ukernels.
   // 3. Heuristics for cache-friendly dispatch tiling can get complex on CPU,
   //    so it is nice that they have fewer narrow cases to consider.
+  IREE::Codegen::LayoutAttrInterface layoutAttr;
+  if (isVMVXBackend(targetAttr)) {
+    layoutAttr = cast<IREE::Codegen::LayoutAttrInterface>(
+        IREE::CPU::VMVXEncodingLayoutAttr::get(ctx,
+                                               targetAttr.getConfiguration()));
+  }
   MaterializeEncodingTypeConverter typeConverter(
       materializeEncodingForTarget, targetAttr, /*transposeNarrowN=*/true,
-      /*layoutAttr=*/{});
+      layoutAttr);
   MaterializeEncodingConversionTarget target(*ctx);
   auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr);
   populateMaterializeEncodingIntoPackUnPackPatterns(
@@ -547,8 +414,9 @@ struct CPUMaterializeHostEncodingPass
     : public impl::CPUMaterializeHostEncodingPassBase<
           CPUMaterializeHostEncodingPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<arith::ArithDialect, tensor::TensorDialect,
-                    IREE::Codegen::IREECodegenDialect>();
+    registry
+        .insert<arith::ArithDialect, tensor::TensorDialect,
+                IREE::Codegen::IREECodegenDialect, IREE::CPU::IREECPUDialect>();
   }
 
   void runOnOperation() override {
@@ -607,8 +475,9 @@ struct CPUMaterializeDeviceEncodingPass
     : public impl::CPUMaterializeDeviceEncodingPassBase<
           CPUMaterializeDeviceEncodingPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<arith::ArithDialect, tensor::TensorDialect,
-                    IREE::Codegen::IREECodegenDialect>();
+    registry
+        .insert<arith::ArithDialect, tensor::TensorDialect,
+                IREE::Codegen::IREECodegenDialect, IREE::CPU::IREECPUDialect>();
   }
 
   void runOnOperation() override {
 
@@ -0,0 +1,11 @@
+# Copyright 2024 The IREE Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+package(
+    default_visibility = ["//visibility:public"],
+    features = ["layering_check"],
+    licenses = ["notice"],  # Apache 2.0
+)