Add cache attributes to xetile.prefetch_tile (#749)

Hardcode84 · web-flow · commit 22867ffc50d5 · 2024-05-17T21:16:23.000+02:00
diff --git a/include/imex/Dialect/XeTile/IR/XeTileAttrs.td b/include/imex/Dialect/XeTile/IR/XeTileAttrs.td
@@ -108,4 +108,29 @@ def XeTile_AtomicRMWKindAttr : I64EnumAttr<
   let cppNamespace = "::imex::xetile";
 }
 
+//===----------------------------------------------------------------------===//
+// XeTile Cache Enums.
+//===----------------------------------------------------------------------===//
+def XeTile_CachePolicyCached:        I32EnumAttrCase<"CACHED", 0, "cached">;                    // valid for read and write
+def XeTile_CachePolicyUncached:      I32EnumAttrCase<"UNCACHED", 1, "uncached">;                // valid for read and write
+def XeTile_CachePolicyStreaming:     I32EnumAttrCase<"STREAMING", 2, "streaming">;              // valid for read only
+def XeTile_CachePolicyInvalid:       I32EnumAttrCase<"READ_INVALIDATE", 3, "read_invalidate">;  // valid for read only
+def XeTile_CachePolicyWriteBack:     I32EnumAttrCase<"WRITE_BACK", 4, "write_back">;            // valid for write only
+def XeTile_CachePolicyWriteThrough:  I32EnumAttrCase<"WRITE_THROUGH", 5, "write_through">;      // valid for write only
+
+def XeTile_CachePolicyEnums : I32EnumAttr<"CachePolicy", "Cache policy",
+  [XeTile_CachePolicyCached, XeTile_CachePolicyUncached,
+   XeTile_CachePolicyStreaming, XeTile_CachePolicyInvalid,
+   XeTile_CachePolicyWriteBack, XeTile_CachePolicyWriteThrough]> {
+  let genSpecializedAttr = 0;
+  let cppNamespace = "::imex::xetile";
+}
+
+def XeTile_CacheHintAttr
+  : EnumAttr<XeTile_Dialect, XeTile_CachePolicyEnums, "cache_hint"> {
+    let summary = [{Describe the cache settings for prefetch/load/store operators}];
+    let assemblyFormat = "`<` $value `>`";
+}
+
+
 #endif // _XETILE_ATTR_DEF_TD_INCLUDED_
diff --git a/include/imex/Dialect/XeTile/IR/XeTileOps.td b/include/imex/Dialect/XeTile/IR/XeTileOps.td
@@ -340,10 +340,13 @@ def XeTile_PrefetchTileOp : XeTile_Op<"prefetch_tile", []> {
 
     }];
 
-    let arguments = (ins XeTile:$tile);
+    let arguments = (ins XeTile:$tile,
+                         OptionalAttr<XeTile_CacheHintAttr>: $l1_hint,
+                         OptionalAttr<XeTile_CacheHintAttr>: $l2_hint,
+                         OptionalAttr<XeTile_CacheHintAttr>: $l3_hint);
 
     let assemblyFormat = [{
-        $tile attr-dict `:`  qualified(type($tile))
+        $tile attr-dict `:` qualified(type($tile))
     }];
 }
 
diff --git a/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp b/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp
@@ -451,6 +451,28 @@ class SgInitTileOpPattern
   }
 };
 
+static mlir::xegpu::CachePolicy
+translateCachePolicy(imex::xetile::CachePolicyAttr val) {
+  if (!val)
+    return mlir::xegpu::CachePolicy::CACHED;
+
+  switch (val.getValue()) {
+  case imex::xetile::CachePolicy::CACHED:
+    return mlir::xegpu::CachePolicy::CACHED;
+  case imex::xetile::CachePolicy::UNCACHED:
+    return mlir::xegpu::CachePolicy::UNCACHED;
+  case imex::xetile::CachePolicy::STREAMING:
+    return mlir::xegpu::CachePolicy::STREAMING;
+  case imex::xetile::CachePolicy::READ_INVALIDATE:
+    return mlir::xegpu::CachePolicy::READ_INVALIDATE;
+  case imex::xetile::CachePolicy::WRITE_BACK:
+    return mlir::xegpu::CachePolicy::WRITE_BACK;
+  case imex::xetile::CachePolicy::WRITE_THROUGH:
+    return mlir::xegpu::CachePolicy::WRITE_THROUGH;
+  }
+  llvm_unreachable("Invalid CachePolicy value");
+}
+
 // It lowers a XeTile::prefetch_tile into one or more mlir::xegpu::prefetch_2d.
 // The adaptor will provide the set of xegpu.create_nd_desc lowered for
 // its input tile.
@@ -481,12 +503,14 @@ struct SgPrefetchTileOpPattern
       return mlir::failure();
     }
 
-    auto L1 = mlir::xegpu::CachePolicyAttr::get(
-        op.getContext(), mlir::xegpu::CachePolicy::CACHED);
-    auto L2 = mlir::xegpu::CachePolicyAttr::get(
-        op.getContext(), mlir::xegpu::CachePolicy::CACHED);
-    auto L3 = mlir::xegpu::CachePolicyAttr::get(
-        op.getContext(), mlir::xegpu::CachePolicy::CACHED);
+    auto getCachePolicy = [&](imex::xetile::CachePolicyAttr val) {
+      return mlir::xegpu::CachePolicyAttr::get(op.getContext(),
+                                               translateCachePolicy(val));
+    };
+
+    auto L1 = getCachePolicy(op.getL1HintAttr());
+    auto L2 = getCachePolicy(op.getL2HintAttr());
+    auto L3 = getCachePolicy(op.getL3HintAttr());
 
     for (auto tile : tiles) {
       rewriter.create<mlir::xegpu::PrefetchNdOp>(op.getLoc(), tile, L1, L2, L3);
diff --git a/test/Conversion/XeTileToXeGPU/prefetch.mlir b/test/Conversion/XeTileToXeGPU/prefetch.mlir
@@ -0,0 +1,29 @@
+// RUN: imex-opt --split-input-file --convert-xetile-to-xegpu %s -verify-diagnostics -o -| FileCheck %s
+
+// CHECK-LABEL: test_prefetch
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<cached>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  xegpu.prefetch_nd %{{.*}} <{l1_hint = #xegpu.cache_hint<uncached>, l2_hint = #xegpu.cache_hint<cached>, l3_hint = #xegpu.cache_hint<streaming>}>
+//       CHECK:  gpu.return
+gpu.module @test_kernel {
+gpu.func @test_prefetch(%a: memref<2x64xf16>) {
+  %c0 = arith.constant 0 : index
+  %0 = xetile.init_tile %a[%c0, %c0] : memref<2x64xf16> -> !xetile.tile<2x64xf16, #xetile.tile_attr<inner_blocks = [1, 16]>>
+  xetile.prefetch_tile %0 : !xetile.tile<2x64xf16, #xetile.tile_attr<inner_blocks = [1, 16]>>
+  xetile.prefetch_tile %0 {l1_hint = #xetile.cache_hint<uncached>, l3_hint = #xetile.cache_hint<streaming>} : !xetile.tile<2x64xf16, #xetile.tile_attr<inner_blocks = [1, 16]>>
+  gpu.return
+}
+}