Add check

nbpatel · nbpatel · commit 77f32611477d · 2025-09-09T04:04:31.000Z
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp
@@ -1000,6 +1000,23 @@ struct WgToSgVectorShapeCastOp
     if (!onlyUnitDims(srcType.getShape(), sgShape))
       return failure();
 
+    // Check to verify that if expanding dims, the input operand's layout
+    // is sliceAttr and if reducing dims, result's layout is
+    // sliceAttr.
+    int srcRank = srcType.getRank();
+    int dstRank = sgShape.size();
+    if (dstRank > srcRank) {
+      // Expanding dims: input operand's layout must be a SliceAttr
+      auto srcLayout = xegpu::getDistributeLayoutAttr(op.getSource());
+      if (!srcLayout || !isa<xegpu::SliceAttr>(srcLayout))
+        return failure();
+    } else if (dstRank < srcRank) {
+      // Reducing dims: result's layout must be a SliceAttr
+      auto resLayout = xegpu::getDistributeLayoutAttr(op.getResult());
+      if (!resLayout || !isa<xegpu::SliceAttr>(resLayout))
+        return failure();
+    }
+
     SmallVector<Value> newShapeCastOps;
     for (auto src : adaptor.getSource()) {
       auto newShapeCast =
diff --git a/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir b/mlir/test/Dialect/XeGPU/xegpu-wg-to-sg-unify-ops.mlir
@@ -408,23 +408,20 @@ gpu.module @test_distribution {
   }
 
   // CHECK-LABEL: vector_shape_cast
-  gpu.func @vector_shape_cast(%src: memref<256x128xf32>) {
-    %tdesc = xegpu.create_nd_tdesc %src : memref<256x128xf32>
-      -> !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-    %load =  xegpu.load_nd %tdesc[0, 0]
-      : !xegpu.tensor_desc<256x128xf32, #xegpu.layout<sg_layout = [8, 4], sg_data = [32, 32], lane_layout = [1, 16], lane_data = [1, 1]>>
-      -> vector<256x128xf32>
-    //CHECK: vector.shape_cast {{.*}} : vector<32x32xf32> to vector<32x1x32x1xf32>
-    %cast = vector.shape_cast %load {layout_result_0 = #xegpu.layout<sg_layout = [8, 1, 4, 1], sg_data = [32, 1, 32, 1]>} : vector<256x128xf32> to vector<256x1x128x1xf32>
+  gpu.func @vector_shape_cast() {
+    %cst = arith.constant {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>, dims = [0, 1, 2]>} dense<10> : vector<128xindex>
+    %step = vector.step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>, dims = [0, 1, 2]>} : vector<128xindex>
+    %muli = arith.muli %cst, %step {layout_result_0 = #xegpu.slice<#xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>, dims = [0, 1, 2]>} : vector<128xindex>
+    //CHECK: vector.shape_cast {{.*}} : vector<32xindex> to vector<1x1x1x32xindex>
+    %shape_cast = vector.shape_cast %muli {layout_result_0 = #xegpu.layout<sg_layout = [8, 1, 1, 4], sg_data = [1, 1, 1, 32]>} : vector<128xindex> to vector<1x1x1x128xindex>
     gpu.return
   }
 
-  // CHECK-LABEL: broadcast
-  // CHECK-SAME: %[[ARG_0:.*]]: index, %[[ARG_1:.*]]: index
-  gpu.func @broadcast(%arg0: index, %arg1: index) {
-      %muli = arith.muli %arg0, %arg1 : index
-      // CHECK: vector.broadcast {{.*}} : index to vector<1x1x1x32xindex>
-      %broadcast = vector.broadcast %muli {layout_result_0 = #xegpu.layout<sg_layout = [4, 2, 6, 1], sg_data = [1, 1, 1, 32]>} : index to vector<4x2x6x32xindex>
-      gpu.return
-   }
+  // CHECK-LABEL: vector_broadcast
+  gpu.func @vector_broadcast(%arg0: index, %arg1: index) {
+    %muli = arith.muli %arg0, %arg1 : index
+    // CHECK: vector.broadcast {{.*}} : index to vector<1x1x1x32xindex>
+    %broadcast = vector.broadcast %muli {layout_result_0 = #xegpu.layout<sg_layout = [4, 2, 6, 1], sg_data = [1, 1, 1, 32]>} : index to vector<4x2x6x32xindex>
+    gpu.return
+  }
 }