save work

charithaintc · charithaintc · commit ab59c4659af2 · 2025-02-20T22:27:40.000Z
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/LogicalResult.h"
+#include <cassert>
 
 namespace mlir {
 namespace xegpu {
@@ -281,11 +282,11 @@ LogicalResult TensorDescType::verify(
       // Validate subgroup mapping rules for scattered tensors.
       // A work-item's slice of the tensor with shape [sg_size] or
       // [sg_size, chunk_size] will be [1] or [1, 32/element_ty_bit_width]
-      // respectively, the mapping should reflect that.
+      // respectively, the mapping should reflect that. This is because each
+      // work item access data in 32 bit granularity.
       if (wiData[0] != 1)
         return emitError()
                << "cannot map over non-contiguous scattered row elements";
-
       if (wiData[1] != (32 / elementType.getIntOrFloatBitWidth()))
         return emitError() << "work item data mapping must match the number of "
                               "contiguous elements";
@@ -351,14 +352,13 @@ FailureOr<VectorType> TensorDescType::getDistributedVectorType() {
   }
 
   // Case 1: regular loads/stores
-  auto scatterAttr =
-      llvm::dyn_cast_if_present<ScatterTensorDescAttr>(getEncoding());
+  auto scatterAttr = getEncodingAsScatterTensorDescAttr();
   if (scatterAttr) {
     auto chunkSize = scatterAttr.getChunkSize().getInt();
-    // Check if the first dimension of the tensor descriptor shape is
+    // Verify if the first dimension of the tensor descriptor shape is
     // distributable.
-    if (tdescShape[0] % (wiLayout[0]) != 0)
-      return failure();
+    assert(tdescShape[0] % (wiLayout[0]) == 0 &&
+           "tensor descriptor shape is not distributable");
     if (chunkSize > 1)
       return VectorType::get({chunkSize / wiDataSize, wiDataSize},
                              getElementType());
@@ -369,17 +369,17 @@ FailureOr<VectorType> TensorDescType::getDistributedVectorType() {
   // Tensor descriptor shape can be 1D. For the 1D case, outer dims of wiData
   // and wiLayout must be 1.
   if (tdescShape.size() == 1) {
-    if (wiData[0] != 1 || wiLayout[0] != 1)
-      return failure();
+    assert((wiData[0] == 1 && wiLayout[0] == 1) &&
+           "wi_data[0] and wi_layout[0] must be 1 for 1D tensor descriptor");
     wiData = {wiData[1]};
     wiLayout = {wiLayout[1]};
   }
   // Check if the tensor descriptor shape is distributable.
   int64_t tensorSize = 1;
   for (auto [tdescDim, wiDim, wiDataDim] :
        llvm::zip_equal(tdescShape, wiLayout, wiData)) {
-    if (tdescDim % (wiDim * wiDataDim) != 0)
-      return failure();
+    assert((tdescDim % (wiDim * wiDataDim) == 0) &&
+           "tensor descriptor shape is not distributable");
     tensorSize *= tdescDim;
   }
   // tensorSize must be adjusted for array_length.
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -11,6 +11,7 @@
 #include "mlir/Dialect/XeGPU/IR/XeGPU.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Diagnostics.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/Support/LLVM.h"
 
@@ -76,6 +77,39 @@ static bool isWriteHintOrNone(const CachePolicyAttr &attr) {
          kind == CachePolicy::WRITE_BACK || kind == CachePolicy::WRITE_THROUGH;
 }
 
+// Helper to validate value shape of LoadNd and StoreNd ops.
+static LogicalResult
+isArgShapesValid(TensorDescType tdescTy, VectorType valueTy,
+                 ArrayRef<int64_t> adjustedTdescShape,
+                 function_ref<InFlightDiagnostic()> emitError) {
+  auto sgMap = tdescTy.getSGMapAttr();
+  auto valueShape = valueTy.getShape();
+  // sg_map not present means IR is in SIMD mode. In this case value shape must
+  // match adjusted tensor descriptor shape.
+  if (!sgMap)
+    return valueShape == adjustedTdescShape
+               ? success()
+               : emitError()
+                     << "Value shape " << makeString(valueShape)
+                     << " is not consistent with tensor descriptor " << tdescTy;
+
+  // sg_map present means IR is in SIMT mode. In this case sg_map determines the
+  // value shape.
+  auto expectedValueShapeOrFailure = tdescTy.getDistributedVectorType();
+  if (failed(expectedValueShapeOrFailure))
+    return emitError() << "Failed to compute distributed vector shape for "
+                          "tensor descriptor "
+                       << tdescTy;
+
+  return valueTy == expectedValueShapeOrFailure.value()
+             ? success()
+             : emitError()
+                   << "Result shape " << makeString(valueShape)
+                   << " is not consistent with distributed vector shape "
+                   << makeString(expectedValueShapeOrFailure.value().getShape())
+                   << " for tensor descriptor " << tdescTy;
+}
+
 //===----------------------------------------------------------------------===//
 // XeGPU_CreateNdDescOp
 //===----------------------------------------------------------------------===//
@@ -282,31 +316,8 @@ LogicalResult LoadNdOp::verify() {
     adjustedTdescShape.insert(it, array_len);
   }
 
-  auto sgMap = tdescTy.getSGMapAttr();
-  // sg_map not present means IR is in SIMD mode. In this case value shape must
-  // match adjusted tensor descriptor shape.
-  if (!sgMap)
-    return valueShape == adjustedTdescShape
-               ? success()
-               : emitOpError()
-                     << "Result shape " << makeString(valueShape)
-                     << " is not consistent with tensor descriptor " << tdescTy;
-
-  // sg_map present means IR is in SIMT mode. In this case sg_map determines the
-  // value shape.
-  auto expectedValueShapeOrFailure = tdescTy.getDistributedVectorType();
-  if (failed(expectedValueShapeOrFailure))
-    return emitOpError() << "Failed to compute distributed vector shape for "
-                            "tensor descriptor "
-                         << tdescTy;
-
-  return valueTy == expectedValueShapeOrFailure.value()
-             ? success()
-             : emitOpError()
-                   << "Result shape " << makeString(valueShape)
-                   << " is not consistent with distributed vector shape "
-                   << makeString(expectedValueShapeOrFailure.value().getShape())
-                   << " for tensor descriptor " << tdescTy;
+  return isArgShapesValid(tdescTy, valueTy, adjustedTdescShape,
+                          [&]() { return emitOpError(); });
 }
 
 //===----------------------------------------------------------------------===//
@@ -337,32 +348,8 @@ LogicalResult StoreNdOp::verify() {
   auto tdescShape = getShapeOf(dstTy);
   auto valueShape = getShapeOf(valTy);
 
-  auto sgMap = dstTy.getSGMapAttr();
-  // sg_map not present means IR is in SIMD mode. In this case value shape must
-  // match adjusted tensor descriptor shape.
-  if (!sgMap)
-    return valueShape == tdescShape
-               ? success()
-               : emitOpError()
-                     << "Result shape " << makeString(valueShape)
-                     << " is not consistent with tensor descriptor shape "
-                     << makeString(tdescShape);
-
-  // sg_map present means IR is in SIMT mode. In this case sg_map determines the
-  // value shape.
-  auto expectedValueShapeOrFailure = dstTy.getDistributedVectorType();
-  if (failed(expectedValueShapeOrFailure))
-    return emitOpError() << "Failed to compute distributed vector shape for "
-                            "tensor descriptor "
-                         << dstTy;
-
-  return valTy == expectedValueShapeOrFailure.value()
-             ? success()
-             : emitOpError()
-                   << "Result shape " << makeString(valueShape)
-                   << " is not consistent with distributed vector shape "
-                   << makeString(expectedValueShapeOrFailure.value().getShape())
-                   << " for tensor descriptor " << dstTy;
+  return isArgShapesValid(dstTy, valTy, tdescShape,
+                          [&]() { return emitOpError(); });
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -105,7 +105,7 @@ func.func @test_load_nd_sg_map(%src: memref<24x32xf32>) {
 func.func @test_load_nd_vc_6(%src: memref<24x32xf32>) {
   %1 = xegpu.create_nd_tdesc %src[0, 0] : memref<24x32xf32> ->
     !xegpu.tensor_desc<8x16xf32>
-  // expected-error@+1 {{Result shape [8, 1] is not consistent with tensor descriptor}}
+  // expected-error@+1 {{Value shape [8, 1] is not consistent with tensor descriptor}}
   %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint<cached>,
       l2_hint = #xegpu.cache_hint<uncached>}>
     : !xegpu.tensor_desc<8x16xf32> -> vector<8x1xf32>
@@ -157,7 +157,7 @@ func.func @test_store_nd_sg_map(%dst: memref<24x32xf32>, %data: vector<2xf32>) {
 func.func @test_store_nd_vc_5(%dst: memref<24x32xf32>, %data: vector<8x1xf32>) {
   %1 = xegpu.create_nd_tdesc %dst[0, 0] : memref<24x32xf32> ->
     !xegpu.tensor_desc<8x16xf32>
-  // expected-error@+1 {{Result shape [8, 1] is not consistent with tensor descriptor shape [8, 16]}}
+  // expected-error@+1 {{Value shape [8, 1] is not consistent with tensor descriptor}}
   xegpu.store_nd %data, %1 : vector<8x1xf32>, !xegpu.tensor_desc<8x16xf32>
   return
 }