refine verifier for gather/scatter

chencha3 · chencha3 · commit 775d039bb7a5 · 2025-04-15T18:46:55.000Z
diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUOps.cpp
@@ -547,38 +547,27 @@ LogicalResult LoadGatherOp::verify() {
     return emitOpError("dim-0 of the Mask and TensorDesc should be the same.");
 
   auto chunkSize = tdescTy.getChunkSize();
-  // for SIMT code, the value should be 1D vector with size of chunkSize.
-  if (valueTy.getRank() == 1 && valueTy.getNumElements() != tdescShape[0]) {
-    if (valueTy.getNumElements() != chunkSize) {
+
+  // a valid shape for SIMT case
+  if (valueTy.getRank() == 1 && valueTy.getNumElements() == chunkSize) {
+    if (tdescTy.getLayoutAttr())
       return emitOpError()
-             << "Result shape " << makeString(valueShape)
-             << " is not a valid distribution for tensor descriptor "
-             << tdescTy;
-    } else { // valid SIMT code doesn't need LayoutAttr and TransposeAttr.
-      if (tdescTy.getLayoutAttr())
-        return emitOpError()
-               << "TensorDesc doesn't need LayoutAttr for SIMT code";
-      if (getTransposeAttr())
-        return emitOpError() << "doesn't need TransposeAttr for SIMT code";
-    }
-    return success();
-  } else if (valueTy.getRank() == 1 && tdescShape[0] == chunkSize) {
-    // for 1D vector and valueTy.getNumElements() == tdescShape[0] case,
-    // it is a valid SIMT code if chunkSize happens to be the same as
-    // subgroup size, e.g., tensor_desc<16x16xf16, chunkSize = 16>
+             << "TensorDesc doesn't need LayoutAttr for SIMT code";
+    if (getTransposeAttr())
+      return emitOpError() << "doesn't need TransposeAttr for SIMT code";
     return success();
   }
 
-  // For SIMD code verification.
-  if (tdescTy.getRank() == 2) {
+  if (tdescTy.getRank() == 2 && valueTy.getRank() == 2) {
     if (!getTransposeAttr())
       return emitOpError("load of rank-2 tensor has to be transposed.");
     transpose({1, 0}, tdescShape);
   }
 
   if (tdescShape != valueShape)
     return emitOpError() << "Result shape " << makeString(valueShape)
-                         << " is not consistent with tensor descriptor "
+                         << " is neither a valid distribution for SIMT nor "
+                            "consistent with the tensor descriptor for SIMD "
                          << tdescTy;
   return success();
 }
@@ -613,38 +602,27 @@ LogicalResult StoreScatterOp::verify() {
     return emitOpError("dim-0 of the Mask and TensorDesc should be the same.");
 
   auto chunkSize = tdescTy.getChunkSize();
-  // for SIMT code, the value should be 1D vector with size of chunkSize.
-  if (valueTy.getRank() == 1 && valueTy.getNumElements() != tdescShape[0]) {
-    if (valueTy.getNumElements() != chunkSize) {
+
+  // a valid shape for SIMT case
+  if (valueTy.getRank() == 1 && valueTy.getNumElements() == chunkSize) {
+    if (tdescTy.getLayoutAttr())
       return emitOpError()
-             << "Value shape " << makeString(valueShape)
-             << " is not a valid distribution for tensor descriptor "
-             << tdescTy;
-    } else { // valid SIMT code doesn't need LayoutAttr and TransposeAttr.
-      if (tdescTy.getLayoutAttr())
-        return emitOpError()
-               << "TensorDesc doesn't need LayoutAttr for SIMT code";
-      if (getTransposeAttr())
-        return emitOpError() << "doesn't need TransposeAttr for SIMT code";
-    }
-    return success();
-  } else if (valueTy.getRank() == 1 && tdescShape[0] == chunkSize) {
-    // for 1D vector and valueTy.getNumElements() == tdescShape[0] case,
-    // it is a valid SIMT code if chunkSize happens to be the same as
-    // subgroup size, e.g., tensor_desc<16x16xf16, chunkSize = 16>
+             << "TensorDesc doesn't need LayoutAttr for SIMT code";
+    if (getTransposeAttr())
+      return emitOpError() << "doesn't need TransposeAttr for SIMT code";
     return success();
   }
 
-  // for SIMD code verification.
-  if (tdescTy.getRank() == 2) {
+  if (tdescTy.getRank() == 2 && valueTy.getRank() == 2) {
     if (!getTransposeAttr())
       return emitOpError("Store of a rank-2 tensor has to be transposed.");
     transpose({1, 0}, tdescShape);
   }
 
   if (tdescShape != valueShape)
     return emitOpError() << "Value shape " << makeString(valueShape)
-                         << " is not consistent with tensor descriptor "
+                         << " is neither a valid distribution for SIMT nor "
+                            "consistent with the tensor descriptor for SIMD "
                          << tdescTy;
 
   return success();
diff --git a/mlir/test/Dialect/XeGPU/invalid.mlir b/mlir/test/Dialect/XeGPU/invalid.mlir
@@ -255,7 +255,7 @@ func.func @test_load_gather_simt_1(%src: ui64) {
   %0 = arith.constant dense<1>: vector<4xi1>
   %cst = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
   %1 = xegpu.create_tdesc %src, %cst : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>
-  // expected-error@+1 {{Result shape [6] is not a valid distribution for tensor descriptor}}
+  // expected-error@+1 {{Result shape [6] is neither a valid distribution for SIMT nor consistent with the tensor descriptor for SIMD}}
   %2 = xegpu.load %1, %0 <{l1_hint = #xegpu.cache_hint<cached>}> : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>, vector<4xi1> -> vector<6xf32>
   return
 }
@@ -266,7 +266,7 @@ func.func @test_store_scatter_simt_1(%src: ui64) {
   %cst = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
   %val = arith.constant dense<2.9>: vector<6xf32>
   %1 = xegpu.create_tdesc %src, %cst : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>
-  // expected-error@+1 {{Value shape [6] is not a valid distribution for tensor descriptor}}
+  // expected-error@+1 {{Value shape [6] is neither a valid distribution for SIMT nor consistent with the tensor descriptor for SIMD}}
   xegpu.store %val, %1, %0 <{l1_hint = #xegpu.cache_hint<cached>}> : vector<6xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2>>, vector<4xi1>
   return
 }