save work

charithaintc · charithaintc · commit 5820c15c393a · 2025-03-11T18:54:34.000Z
diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp
@@ -16,6 +16,7 @@
 #include "mlir/Dialect/XeGPU/Transforms/Passes.h"
 #include "mlir/Dialect/XeGPU/Transforms/Transforms.h"
 #include "mlir/IR/Builders.h"
+#include "mlir/Support/LLVM.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace mlir {
@@ -242,10 +243,6 @@ class SGMapPropagation : public SparseBackwardDataFlowAnalysis<SGMapLattice> {
                             ArrayRef<SGMapLattice *> operands,
                             ArrayRef<const SGMapLattice *> results);
 
-  void visitCreateNdDescOp(xegpu::CreateNdDescOp createNdDesc,
-                           ArrayRef<SGMapLattice *> operands,
-                           ArrayRef<const SGMapLattice *> results);
-
   void visitCreateDescOp(xegpu::CreateDescOp createDesc,
                          ArrayRef<SGMapLattice *> operands,
                          ArrayRef<const SGMapLattice *> results);
@@ -296,8 +293,6 @@ SGMapPropagation::visitOperation(Operation *op,
     visitVectorBitcastOp(bitcast, operands, results);
   else if (auto loadGather = dyn_cast<xegpu::LoadGatherOp>(op))
     visitLoadGatherOp(loadGather, operands, results);
-  else if (auto createNdDesc = dyn_cast<xegpu::CreateNdDescOp>(op))
-    visitCreateNdDescOp(createNdDesc, operands, results);
   else if (auto createDesc = dyn_cast<xegpu::CreateDescOp>(op))
     visitCreateDescOp(createDesc, operands, results);
   else if (auto storeScatter = dyn_cast<xegpu::StoreScatterOp>(op))
@@ -306,6 +301,10 @@ SGMapPropagation::visitOperation(Operation *op,
     visitUpdateNdOffsetOp(updateNdOffset, operands, results);
   else if (auto reduction = dyn_cast<vector::MultiDimReductionOp>(op))
     visitVectorMultiReductionOp(reduction, operands, results);
+  /// No need to propagate the layout to operands in CreateNdDescOp because they
+  /// are scalars (offsets, sizes, etc.).
+  else if (auto createNdDesc = dyn_cast<xegpu::CreateNdDescOp>(op))
+    return success();
   /// All other ops
   else {
     for (const SGMapLattice *r : results) {
@@ -355,11 +354,6 @@ void SGMapPropagation::visitUpdateNdOffsetOp(
     return;
   /// Propagate the layout to the source operand.
   propagateIfChanged(operands[0], operands[0]->meet(resultLayout));
-  /// For all other operands use 1D default layout.
-  SGMap layout = getDefaultSgMap(1);
-  for (size_t i = 1; i < operands.size(); ++i) {
-    propagateIfChanged(operands[i], operands[i]->meet(layout));
-  }
 }
 
 /// Set the layouts for DPAS A, B, and C operands.
@@ -403,7 +397,8 @@ void SGMapPropagation::visitLoadNdOp(xegpu::LoadNdOp load,
   /// LoadNdOp has the transpose effect. However, at the stage of this analyis
   /// this effect is not expected and should be abstracted away. Emit a warning.
   if (auto transpose = load.getTranspose()) {
-    load.emitWarning("Transpose effect is not expected for LoadNdOp");
+    load.emitWarning("Transpose effect is not expected for LoadNdOp at "
+                     "SGMapPropagation stage.");
     tensorDescLayout = valueLayout.getTransposedLayout(transpose.value());
   }
   /// Propagate the new layout to the tensor descriptor operand.
@@ -476,7 +471,8 @@ void SGMapPropagation::visitLoadGatherOp(
     /// LoadGatherOp has the transpose effect. However, at the stage of this
     /// analyis this effect is not expected and should be abstracted away. Emit
     /// a warning.
-    load.emitWarning("Transpose effect is not expected for LoadGatherOp");
+    load.emitWarning("Transpose effect is not expected for LoadGatherOp at "
+                     "SGMapPropagation stage.");
     tensorDescLayout = valueLayout.getTransposedLayout({1, 0});
   } else
     tensorDescLayout = valueLayout;
@@ -488,24 +484,7 @@ void SGMapPropagation::visitLoadGatherOp(
   propagateIfChanged(operands[1], operands[1]->meet(maskLayout));
 }
 
-/// Propagate the layout of the descriptor to the operands in CreateNdDescOp.
-void SGMapPropagation::visitCreateNdDescOp(
-    xegpu::CreateNdDescOp createNdDesc, ArrayRef<SGMapLattice *> operands,
-    ArrayRef<const SGMapLattice *> results) {
-  auto descLayout = results[0]->getValue();
-  /// Need the layout of the descriptor to propagate to the operands.
-  if (!descLayout.isAssigned())
-    return;
-  /// Propagate the layout to the source operand.
-  propagateIfChanged(operands[0], operands[0]->meet(descLayout));
-  /// For all other operands use 1D default layout.
-  SGMap layout = getDefaultSgMap(1);
-  for (size_t i = 1; i < operands.size(); ++i) {
-    propagateIfChanged(operands[i], operands[i]->meet(layout));
-  }
-}
-
-/// Propagate the layout of the descriptor to the source and offset operands in
+/// Propagate the layout of the descriptor to the vector offset operand in
 /// CreateDescOp.
 void SGMapPropagation::visitCreateDescOp(
     xegpu::CreateDescOp createDesc, ArrayRef<SGMapLattice *> operands,
@@ -514,8 +493,6 @@ void SGMapPropagation::visitCreateDescOp(
   /// Need the layout of the descriptor to propagate to the operands.
   if (!descLayout.isAssigned())
     return;
-  /// Propagate the layout to the source operand.
-  propagateIfChanged(operands[0], operands[0]->meet(descLayout));
   /// For offset operand propagate 1D default layout.
   SGMap layout = getDefaultSgMap(1);
   propagateIfChanged(operands[1], operands[1]->meet(layout));
@@ -526,14 +503,23 @@ void SGMapPropagation::visitCreateDescOp(
 void SGMapPropagation::visitStoreScatterOp(
     xegpu::StoreScatterOp storeScatter, ArrayRef<SGMapLattice *> operands,
     ArrayRef<const SGMapLattice *> results) {
+  /// Currently, for 2D StoreScatterOp we expect that the height dimension of
+  /// the tensor descriptor is evenly divisible by the subgroup size.
+  /// TODO: Add support for other 2D shapes.
+  auto tdescShape = storeScatter.getTensorDescType().getShape();
+  if (tdescShape.size() > 1 && tdescShape[0] % subgroupSize != 0) {
+    storeScatter.emitError("Height dimension of the tensor descriptor should "
+                           "be evenly divisible by the subgroup size.");
+    return;
+  }
   auto valueLayout = getDefaultSgMap(storeScatter.getValueType());
   SGMap storeScatterLayout;
   if (storeScatter.getTranspose()) {
     /// StoreScatteOp allows transpose effect. However, at the stage of this
     /// analyis this effect is not expected and should be abstracted away. Emit
     /// a warning.
-    storeScatter.emitWarning(
-        "Transpose effect is not expected for StoreScatterOp");
+    storeScatter.emitWarning("Transpose effect is not expected for "
+                             "StoreScatterOp at SGMapPropagation stage.");
     storeScatterLayout = valueLayout.getTransposedLayout({1, 0});
   } else
     storeScatterLayout = valueLayout;
diff --git a/mlir/test/Dialect/XeGPU/subgroup-map-propagation.mlir b/mlir/test/Dialect/XeGPU/subgroup-map-propagation.mlir
@@ -1,8 +1,14 @@
 // RUN: mlir-opt -xegpu-subgroup-distribute='print-analysis-only=true' -split-input-file %s | FileCheck %s
 
-// CHECK: function: test_dpas_op_1:
-// CHECK: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK: function: test_dpas_f16:
+// CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf16>' at index: 0
+// CHECK-NEXT: sg_map  : Not assigned.
+// CHECK-NEXT: argument: <block argument> of type 'memref<16x16xf16>' at index: 1
+// CHECK-NEXT: sg_map  : Not assigned.
+// CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf32>' at index: 2
+// CHECK-NEXT: sg_map  : Not assigned.
+// CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant dense<0.000000e+00> : vector<8x16xf32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -17,7 +23,7 @@
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %{{.*}} = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xf32> -> !xegpu.tensor_desc<8x16xf32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
-func.func @test_dpas_op_1(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) {
+func.func @test_dpas_f16(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %arg2: memref<8x16xf32>) {
   %c0 = arith.constant 0 : index
   %cst = arith.constant dense<0.000000e+00> : vector<8x16xf32>
   %0 = xegpu.create_nd_tdesc %arg0[%c0, %c0] : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -32,20 +38,20 @@ func.func @test_dpas_op_1(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>, %ar
 
 
 // -----
-// CHECK: function: test_dpas_op_2:
+// CHECK: function: test_dpas_i8:
 // CHECK-NEXT: argument: <block argument> of type 'vector<8x32xi8>' at index: 0
 // CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 2]
 // CHECK-NEXT: argument: <block argument> of type 'vector<32x16xi8>' at index: 1
 // CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [4, 1]
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xi32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.dpas %{{.*}} : vector<8x32xi8>, vector<32x16xi8> -> vector<8x16xi32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xi32> -> !xegpu.tensor_desc<8x16xi32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
-func.func @test_dpas_op_2(%arg0: vector<8x32xi8>, %arg1: vector<32x16xi8>, %arg2: memref<8x16xi32>) {
+func.func @test_dpas_i8(%arg0: vector<8x32xi8>, %arg1: vector<32x16xi8>, %arg2: memref<8x16xi32>) {
   %c0 = arith.constant 0 : index
   %0 = xegpu.dpas %arg0, %arg1 : vector<8x32xi8>, vector<32x16xi8> -> vector<8x16xi32>
   %1 = xegpu.create_nd_tdesc %arg2[%c0, %c0] : memref<8x16xi32> -> !xegpu.tensor_desc<8x16xi32>
@@ -56,13 +62,13 @@ func.func @test_dpas_op_2(%arg0: vector<8x32xi8>, %arg1: vector<32x16xi8>, %arg2
 // -----
 // CHECK: function: test_transpose_op_1:
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf16>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<16x16xf16>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [16, 1], wi_data: [1, 2]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -92,13 +98,13 @@ func.func @test_transpose_op_1(%arg0: memref<8x16xf16>, %arg1: memref<16x16xf16>
 
 // -----
 // CHECK: argument: <block argument> of type 'memref<8x16xf16>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<16x16xf16>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [16, 1], wi_data: [1, 2]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[CST:.*]] = arith.constant dense<0.000000e+00> : vector<8x16xf32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
@@ -156,13 +162,13 @@ func.func @test_extf_truncf_op(%arg0: !xegpu.tensor_desc<8x16xf16>, %arg1: !xegp
 // -----
 // CHECK: function: test_load_gather_op_1:
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf16>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<256xf16>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [16, 1], wi_data: [1, 2]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xf16> -> !xegpu.tensor_desc<8x16xf16>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T1:.*]] = xegpu.load_nd %[[T0]]  : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16>
@@ -195,7 +201,7 @@ func.func @test_load_gather_op_1(%arg0: memref<8x16xf16>, %arg1: memref<256xf16>
 
 // -----
 // CHECK: argument: <block argument> of type 'memref<256xf32>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type '!xegpu.tensor_desc<16xf32>' at index: 1
 // CHECK-NEXT: sg_map  : wi_layout: [16], wi_data: [1]
 // CHECK-NEXT: op    : %[[CST:.*]] = arith.constant dense<[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240]> : vector<16xindex>
@@ -217,7 +223,7 @@ func.func @test_load_gather_op_2(%arg0: memref<256xf32>, %arg1: !xegpu.tensor_de
 
 // -----
 // CHECK: argument: <block argument> of type 'memref<128xf32>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [16, 1], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<8x16xf32>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[CST0:.*]] = arith.constant dense<true> : vector<16xi1>
@@ -239,7 +245,7 @@ func.func @test_store_scatter_op_1(%arg0: memref<128xf32>) {
 // CHECK: argument: <block argument> of type 'vector<16xf32>' at index: 0
 // CHECK-NEXT: sg_map  : wi_layout: [16], wi_data: [1]
 // CHECK-NEXT: argument: <block argument> of type 'memref<256xf32>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %[[CST:.*]] = arith.constant dense<[0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240]> : vector<16xindex>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
 // CHECK-NEXT: op    : %[[CST1:.*]] = arith.constant dense<true> : vector<16xi1>
@@ -256,13 +262,13 @@ func.func @test_store_scatter_op_2(%arg0: vector<16xf32>, %arg1: memref<256xf32>
 
 // -----
 // CHECK: argument: <block argument> of type 'memref<8x16xi16>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<32x16xi8>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [4, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xi32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x16xi16> -> !xegpu.tensor_desc<8x16xi16>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<32x16xi8> -> !xegpu.tensor_desc<32x16xi8>
@@ -292,13 +298,13 @@ func.func @test_vector_bitcast_op_1(%arg0: memref<8x16xi16>, %arg1: memref<32x16
 
 // -----
 // CHECK: argument: <block argument> of type 'memref<8x32xi8>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 2]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<16x32xi8>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [4, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x32xi8> -> !xegpu.tensor_desc<8x32xi8>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 2]
 // CHECK-NEXT: op    : %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<16x32xi8> -> !xegpu.tensor_desc<16x32xi8>
@@ -388,17 +394,17 @@ func.func @test_binary_op_2(%arg0: !xegpu.tensor_desc<8x16xf16>, %arg1: !xegpu.t
 
 // -----
 // CHECK: argument: <block argument> of type 'memref<8x128xf16>' at index: 0
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<128x16xf16>' at index: 1
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [2, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: argument: <block argument> of type 'memref<8x16xf32>' at index: 2
-// CHECK-NEXT: sg_map  : wi_layout: [1, 16], wi_data: [1, 1]
+// CHECK-NEXT: sg_map  : Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 0 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 128 : index
 // CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %{{.*}} = arith.constant 16 : index
-// CHECK-NEXT: sg_map for result #0: wi_layout: [16], wi_data: [1]
+// CHECK-NEXT: sg_map for result #0: Not assigned.
 // CHECK-NEXT: op    : %[[T0:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<8x128xf16> -> !xegpu.tensor_desc<8x16xf16>
 // CHECK-NEXT: sg_map for result #0: wi_layout: [1, 16], wi_data: [1, 1]
 // CHECK-NEXT: op    : %[[T1:.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<128x16xf16> -> !xegpu.tensor_desc<16x16xf16>