fix the format

chencha3 · chencha3 · commit 16d1980d08a0 · 2025-04-28T20:47:38.000Z
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -13,6 +13,8 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include <numeric>
 
+using std::optional;
+
 namespace mlir {
 namespace xegpu {
 
@@ -37,18 +39,18 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
                                          xegpu::LayoutAttr attr) {
   assert(attr && "Layout attribute is missing.");
 
-  // Checks whether the given shape can be evenly distributed using the specified
-  // layout and data attributes. If successful, it returns the work size for each
-  // compute unit; otherwise, it returns `std::nullopt`. The work size per compute
-  // unit is calculated as follows:
+  // Checks whether the given shape can be evenly distributed using the
+  // specified layout and data attributes. If successful, it returns the work
+  // size for each compute unit; otherwise, it returns `std::nullopt`. The work
+  // size per compute unit is calculated as follows:
   //   - If `data` is null: newShape[i] = shape[i] / layout[i]
   //   - If `data` is not null: newShape[i] = data[i]
-  // When round-robin distribution (`use_rr`) is enabled, `shape[i]` can be smaller
-  // than `layout[i] * data[i]`, allowing multiple compute units to share the data.
-  auto tryDistribute =
-      [&](llvm::ArrayRef<int64_t> shape, DenseI32ArrayAttr layout,
-          DenseI32ArrayAttr data,
-          bool use_rr = true) -> std::optional<SmallVector<int64_t>> {
+  // When round-robin distribution (`rr`) is enabled, `shape[i]` can be
+  // smaller than `layout[i] * data[i]`, allowing multiple compute units to
+  // share the data.
+  auto tryDistribute = [&](llvm::ArrayRef<int64_t> shape,
+                           DenseI32ArrayAttr layout, DenseI32ArrayAttr data,
+                           bool rr = true) -> optional<SmallVector<int64_t>> {
     llvm::SmallVector<int64_t> newShape(shape);
     if (layout) {
       auto vec = llvm::to_vector_of<int64_t>(layout.asArrayRef());
@@ -65,7 +67,7 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
       if (vec.size() != shape.size())
         return std::nullopt;
       auto ratio = computeShapeRatio(newShape, vec);
-      if (!ratio.has_value() && use_rr)
+      if (!ratio.has_value() && rr)
         ratio = computeShapeRatio(vec, newShape);
       if (!ratio.has_value())
         return std::nullopt;
@@ -91,8 +93,8 @@ bool XeGPUDialect::isEvenlyDistributable(llvm::ArrayRef<int64_t> shape,
   auto instShape = maybeInstShape.value();
 
   // check LaneLayout and LaneData
-  auto maybeLaneShape = tryDistribute(instShape, attr.getLaneLayout(),
-                                          attr.getLaneData(), false);
+  auto maybeLaneShape =
+      tryDistribute(instShape, attr.getLaneLayout(), attr.getLaneData(), false);
   return maybeLaneShape.has_value();
 }
 
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -98,6 +98,17 @@ func.func @test_load_nd_vc_3(%src: memref<8x16xf16>) {
   return
 }
 
+// -----
+func.func @test_load_nd_vc_4(%src: memref<24x32xf32>) {
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
+    !xegpu.tensor_desc<8x16xf32>
+  // expected-error@+1 {{Result shape [8, 1] is not consistent with tensor descriptor}}
+  %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>,
+      l2_hint = #xegpu.cache_hint<uncached>}>
+    : !xegpu.tensor_desc<8x16xf32> -> vector<8x1xf32>
+  return
+}
+
 // -----
 func.func @test_load_nd_layout(%src: memref<24x32xf32>) {
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<16xf32>
@@ -108,13 +119,10 @@ func.func @test_load_nd_layout(%src: memref<24x32xf32>) {
 }
 
 // -----
-func.func @test_load_nd_vc_6(%src: memref<24x32xf32>) {
-  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
-    !xegpu.tensor_desc<8x16xf32>
-  // expected-error@+1 {{Result shape [8, 1] is not consistent with tensor descriptor}}
-  %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>,
-      l2_hint = #xegpu.cache_hint<uncached>}>
-    : !xegpu.tensor_desc<8x16xf32> -> vector<8x1xf32>
+func.func @test_load_nd_simt(%src: memref<24x32xf32>) {
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  // expected-error@+1 {{TensorDesc doesn't need LayoutAttr for SIMT code}}
+  %2 = xegpu.load_nd %1 : !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<8xf32>
   return
 }
 
@@ -156,6 +164,14 @@ func.func @test_store_nd_simt(%dst: memref<24x32xf32>, %data: vector<3xf32>) {
   return
 }
 
+// -----
+func.func @test_store_nd_simt(%src: memref<24x32xf32>, %data: vector<8xf32>) {
+  %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  // expected-error@+1 {{TensorDesc doesn't need LayoutAttr for SIMT code}}
+  xegpu.store_nd %data, %1 : vector<8xf32>, !xegpu.tensor_desc<8x16xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
+  return
+}
+
 // -----
 func.func @test_store_nd_vc_5(%dst: memref<24x32xf32>, %data: vector<8x1xf32>) {
   %1 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf32> ->
diff --git a/mlir/test/Dialect/XeGPU/ops.mlir b/mlir/test/Dialect/XeGPU/ops.mlir
@@ -148,8 +148,8 @@ gpu.func @test_load_nd_vc(%src: memref<8x16xf16>) {
 gpu.func @test_load_nd_simt(%src: memref<8x16xf16>) {
   // CHECK: %[[R0:.*]] = xegpu.create_nd_tdesc %arg0[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
-  // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, packed}> : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
-  %2 = xegpu.load_nd %1 <{packed, l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
+  // CHECK: %[[R1:.*]] = xegpu.load_nd %[[R0]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
+  %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}>
        : !xegpu.tensor_desc<8x16xf16> -> vector<8xf16>
   gpu.return
 }