only set layouts for anchor ops

dchigarev · dchigarev · commit 2a43ee6bbf97 · 2025-10-16T13:37:08.000Z
Signed-off-by: dchigarev &lt;dmitry.chigarev@intel.com&gt;
diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp
@@ -389,27 +389,22 @@ static Value computeOffsets(PatternRewriter &rewriter, OpType gatScatOp,
         arith::AddIOp::create(rewriter, loc, baseOffset, offsetContrib);
   }
   Value indices = gatScatOp.getIndices();
-  // Extract indices layout and propagate it to all 'vector' ops created here
-  auto indicesLayout = xegpu::getDistributeLayoutAttr(indices);
   VectorType vecType = cast<VectorType>(indices.getType());
 
-  auto strideVector =
-      vector::BroadcastOp::create(rewriter, loc, vecType, strides.back());
-  xegpu::setDistributeLayoutAttr(strideVector->getOpResult(0), indicesLayout);
-
-  auto stridedIndices =
-      arith::MulIOp::create(rewriter, loc, strideVector.getResult(), indices);
-  xegpu::setDistributeLayoutAttr(stridedIndices->getOpResult(0), indicesLayout);
-
-  auto baseVector = vector::BroadcastOp::create(
-      rewriter, loc,
-      VectorType::get(vecType.getShape(), rewriter.getIndexType()), baseOffset);
-  xegpu::setDistributeLayoutAttr(baseVector->getOpResult(0), indicesLayout);
-
-  auto result = arith::AddIOp::create(rewriter, loc, baseVector.getResult(),
-                                      stridedIndices.getResult());
-  xegpu::setDistributeLayoutAttr(result->getOpResult(0), indicesLayout);
-  return result.getResult();
+  Value strideVector =
+      vector::BroadcastOp::create(rewriter, loc, vecType, strides.back())
+          .getResult();
+  Value stridedIndices =
+      arith::MulIOp::create(rewriter, loc, strideVector, indices).getResult();
+
+  Value baseVector =
+      vector::BroadcastOp::create(
+          rewriter, loc,
+          VectorType::get(vecType.getShape(), rewriter.getIndexType()),
+          baseOffset)
+          .getResult();
+  return arith::AddIOp::create(rewriter, loc, baseVector, stridedIndices)
+      .getResult();
 }
 
 template <
@@ -659,7 +654,6 @@ struct GatherLowering : public OpRewritePattern<vector::GatherOp> {
         arith::SelectOp::create(rewriter, loc, gatherOp.getMask(),
                                 xeGatherOp.getResult(), gatherOp.getPassThru());
     xegpu::setDistributeLayoutAttr(selectOp.getConditionMutable(), layoutMask);
-    xegpu::setDistributeLayoutAttr(selectOp.getTrueValueMutable(), layoutRes);
     xegpu::setDistributeLayoutAttr(selectOp.getFalseValueMutable(),
                                    layoutPassThru);
     xegpu::setDistributeLayoutAttr(selectOp->getOpResult(0), layoutRes);
diff --git a/mlir/test/Conversion/VectorToXeGPU/gather-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/gather-to-xegpu.mlir
@@ -268,13 +268,7 @@ gpu.func @load_dynamic_layout_operands(%source: memref<?x?xf32>,
   gpu.return %res : vector<8x16xf32>
 }
 // CHECK-LABEL:  @load_dynamic_layout_operands(
-// CHECK-SAME:   %[[SRC:.+]]: memref<?x?xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index,
-// CHECK-SAME:   %[[INDICES:.+]]: vector<8x16xindex>, %[[MASK:.+]]: vector<8x16xi1>, %[[PASS:.+]]: vector<8x16xf32>) -> vector<8x16xf32> {
-// %indices producer doesn't have a layout, so as 'broadcast/add' ops computing linear index.
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} : vector<8x16xindex>
-// CHECK:        %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        %[[VEC:.+]] = xegpu.load {{[^{]*}}
 // CHECK-SAME:   {layout_operand_1 = #xegpu.layout<sg_layout = [1]>, layout_operand_2 = #xegpu.layout<sg_layout = [2]>,
 // CHECK-SAME:   layout_result_0 = #xegpu.layout<sg_layout = [0]>}
 // CHECK:        %[[RES:.+]] = arith.select {{[^{]*}}
@@ -305,14 +299,7 @@ gpu.func @load_dynamic_layout_mixed(%source: memref<?x?x?xf32>,
   gpu.return %res2 : vector<8x16xf32>
 }
 // CHECK-LABEL:  @load_dynamic_layout_mixed(
-// CHECK-SAME:   %[[SRC:.+]]: memref<?x?x?xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
-// CHECK-SAME:   %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
-// CHECK:        %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
-// Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
-// CHECK:        %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        %[[VEC:.+]] = xegpu.load {{[^{]*}}
 // CHECK-SAME:   {{{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [7]>
 // CHECK-SAME:   {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [6]>}
 // CHECK:        %[[RES:.+]] = arith.select {{[^{]*}}
@@ -343,14 +330,7 @@ gpu.func @load_static_layout_mixed(%source: memref<8x16x32xf32>,
   gpu.return %res2 : vector<8x16xf32>
 }
 // CHECK-LABEL:  @load_static_layout_mixed(
-// CHECK-SAME:   %[[SRC:.+]]: memref<8x16x32xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
-// CHECK-SAME:   %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
-// CHECK:        %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
-// Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
-// CHECK:        %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        %[[VEC:.+]] = xegpu.load {{[^{]*}}
 // CHECK-SAME:   {{{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [7]>
 // CHECK-SAME:   {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [6]>}
 // CHECK:        %[[RES:.+]] = arith.select {{[^{]*}}
@@ -381,15 +361,7 @@ gpu.func @load_dynamic_layout_mixed_override(%source: memref<?x?x?xf32>,
   gpu.return %res2 : vector<8x16xf32>
 }
 // CHECK-LABEL:  @load_dynamic_layout_mixed_override(
-// CHECK-SAME:   %[[SRC:.+]]: memref<?x?x?xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
-// CHECK-SAME:   %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
-// CHECK:        %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
-// Verify that linear-indices computation uses layout from the 'indices' producer op (%2)
-// and not it's overriden version from the scatter_op (sg_layout = [99])
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
-// CHECK:        %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        %[[VEC:.+]] = xegpu.load {{[^{]*}}
 // CHECK-SAME:   {layout_operand_1 = #xegpu.layout<sg_layout = [99]>, layout_operand_2 = #xegpu.layout<sg_layout = [7]>
 // CHECK-SAME:   {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [6]>}
 // CHECK:        %[[RES:.+]] = arith.select {{[^{]*}}
diff --git a/mlir/test/Conversion/VectorToXeGPU/scatter-to-xegpu.mlir b/mlir/test/Conversion/VectorToXeGPU/scatter-to-xegpu.mlir
@@ -219,13 +219,7 @@ gpu.func @store_dynamic_layout_operands(%vec: vector<8x16xf32>, %source: memref<
   gpu.return
 }
 // CHECK-LABEL:  @store_dynamic_layout_operands(
-// CHECK-SAME:   %[[VEC:.+]]: vector<8x16xf32>, %[[SRC:.+]]: memref<?x?xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index,
-// CHECK-SAME:   %[[INDICES:.+]]: vector<8x16xindex>, %[[MASK:.+]]: vector<8x16xi1>) {
-// %indices producer doesn't have a layout, so as 'broadcast/add' ops computing linear index.
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} : vector<8x16xindex>
-// CHECK:        xegpu.store %[[VEC]], %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        xegpu.store {{[^{]*}}
 // CHECK-SAME:   {layout_operand_0 = #xegpu.layout<sg_layout = [2]>, layout_operand_2 = #xegpu.layout<sg_layout = [0]>, layout_operand_3 = #xegpu.layout<sg_layout = [1]>}
 }
 
@@ -248,14 +242,7 @@ gpu.func @store_dynamic_layout_mixed(%source: memref<?x?x?xf32>,
   gpu.return
 }
 // CHECK-LABEL:  @store_dynamic_layout_mixed(
-// CHECK-SAME:   %[[SRC:.+]]: memref<?x?x?xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
-// CHECK-SAME:   %[[MASK:.+]]: vector<8x16xi1>) {
-// CHECK:        %[[VEC:.+]] = arith.constant {layout_operand_0 = #xegpu.layout<sg_layout = [0]>} dense<1.000000e+00> : vector<8x16xf32>
-// Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
-// CHECK:        xegpu.store %[[VEC]], %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        xegpu.store {{[^{]*}}
 // CHECK-SAME:   {{[^}]*}}layout_operand_3 = #xegpu.layout<sg_layout = [6]>}
 }
 
@@ -278,14 +265,7 @@ gpu.func @store_static_layout_mixed(%source: memref<8x16x32xf32>,
   gpu.return
 }
 // CHECK-LABEL:  @store_static_layout_mixed(
-// CHECK-SAME:   %[[SRC:.+]]: memref<8x16x32xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
-// CHECK-SAME:   %[[MASK:.+]]: vector<8x16xi1>) {
-// CHECK:        %[[VEC:.+]] = arith.constant {layout_operand_0 = #xegpu.layout<sg_layout = [0]>} dense<1.000000e+00> : vector<8x16xf32>
-// Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
-// CHECK:        xegpu.store %[[VEC]], %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        xegpu.store {{[^{]*}}
 // CHECK-SAME:   {{[^}]*}}layout_operand_3 = #xegpu.layout<sg_layout = [6]>}
 }
 
@@ -309,15 +289,7 @@ gpu.func @store_dynamic_layout_mixed_override(%source: memref<?x?x?xf32>,
   gpu.return
 }
 // CHECK-LABEL:  @store_dynamic_layout_mixed_override(
-// CHECK-SAME:   %[[SRC:.+]]: memref<?x?x?xf32>,
-// CHECK-SAME:   %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
-// CHECK-SAME:   %[[MASK:.+]]: vector<8x16xi1>) {
-// CHECK:        %[[VEC:.+]] = arith.constant {layout_operand_0 = #xegpu.layout<sg_layout = [0]>} dense<1.000000e+00> : vector<8x16xf32>
-// Verify that linear-indices computation uses layout from the 'indices' producer op (%2)
-// and not it's overriden version from the scatter_op (sg_layout = [99])
-// CHECK:        %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} :  index to vector<8x16xindex>
-// CHECK:        %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
-// CHECK:        xegpu.store %[[VEC]], %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
+// CHECK:        xegpu.store {{[^{]*}}
 // CHECK-SAME:   {{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [99]>,
 // CHECK-SAME:   {{[^}]*}}layout_operand_3 = #xegpu.layout<sg_layout = [6]>}
 }