@@ -268,13 +268,7 @@ gpu.func @load_dynamic_layout_operands(%source: memref<?x?xf32>,
268268 gpu.return %res : vector <8 x16 xf32 >
269269}
270270// CHECK-LABEL: @load_dynamic_layout_operands(
271- // CHECK-SAME: %[[SRC:.+]]: memref<?x?xf32>,
272- // CHECK-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index,
273- // CHECK-SAME: %[[INDICES:.+]]: vector<8x16xindex>, %[[MASK:.+]]: vector<8x16xi1>, %[[PASS:.+]]: vector<8x16xf32>) -> vector<8x16xf32> {
274- // %indices producer doesn't have a layout, so as 'broadcast/add' ops computing linear index.
275- // CHECK: %[[SPLAT:.+]] = vector.broadcast {{.*}} : index to vector<8x16xindex>
276- // CHECK: %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} : vector<8x16xindex>
277- // CHECK: %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
271+ // CHECK: %[[VEC:.+]] = xegpu.load {{[^{]*}}
278272// CHECK-SAME: {layout_operand_1 = #xegpu.layout<sg_layout = [1]>, layout_operand_2 = #xegpu.layout<sg_layout = [2]>,
279273// CHECK-SAME: layout_result_0 = #xegpu.layout<sg_layout = [0]>}
280274// CHECK: %[[RES:.+]] = arith.select {{[^{]*}}
@@ -305,14 +299,7 @@ gpu.func @load_dynamic_layout_mixed(%source: memref<?x?x?xf32>,
305299 gpu.return %res2 : vector <8 x16 xf32 >
306300}
307301// CHECK-LABEL: @load_dynamic_layout_mixed(
308- // CHECK-SAME: %[[SRC:.+]]: memref<?x?x?xf32>,
309- // CHECK-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
310- // CHECK-SAME: %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
311- // CHECK: %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
312- // Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
313- // CHECK: %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : index to vector<8x16xindex>
314- // CHECK: %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
315- // CHECK: %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
302+ // CHECK: %[[VEC:.+]] = xegpu.load {{[^{]*}}
316303// CHECK-SAME: {{{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [7]>
317304// CHECK-SAME: {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [6]>}
318305// CHECK: %[[RES:.+]] = arith.select {{[^{]*}}
@@ -343,14 +330,7 @@ gpu.func @load_static_layout_mixed(%source: memref<8x16x32xf32>,
343330 gpu.return %res2 : vector <8 x16 xf32 >
344331}
345332// CHECK-LABEL: @load_static_layout_mixed(
346- // CHECK-SAME: %[[SRC:.+]]: memref<8x16x32xf32>,
347- // CHECK-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
348- // CHECK-SAME: %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
349- // CHECK: %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
350- // Verify that linear-indices computation uses layout from the 'indices' producer op (%2).
351- // CHECK: %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : index to vector<8x16xindex>
352- // CHECK: %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
353- // CHECK: %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
333+ // CHECK: %[[VEC:.+]] = xegpu.load {{[^{]*}}
354334// CHECK-SAME: {{{[^}]*}}layout_operand_2 = #xegpu.layout<sg_layout = [7]>
355335// CHECK-SAME: {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [6]>}
356336// CHECK: %[[RES:.+]] = arith.select {{[^{]*}}
@@ -381,15 +361,7 @@ gpu.func @load_dynamic_layout_mixed_override(%source: memref<?x?x?xf32>,
381361 gpu.return %res2 : vector <8 x16 xf32 >
382362}
383363// CHECK-LABEL: @load_dynamic_layout_mixed_override(
384- // CHECK-SAME: %[[SRC:.+]]: memref<?x?x?xf32>,
385- // CHECK-SAME: %[[OFF1:.+]]: index, %[[OFF2:.+]]: index, %[[OFF3:.+]]: index,
386- // CHECK-SAME: %[[MASK:.+]]: vector<8x16xi1>) -> vector<8x16xf32> {
387- // CHECK: %[[PASS_THRU:.+]] = arith.constant {layout_result_0 = #xegpu.layout<sg_layout = [0]>} dense<0.000000e+00> : vector<8x16xf32>
388- // Verify that linear-indices computation uses layout from the 'indices' producer op (%2)
389- // and not it's overriden version from the scatter_op (sg_layout = [99])
390- // CHECK: %[[SPLAT:.+]] = vector.broadcast {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : index to vector<8x16xindex>
391- // CHECK: %[[LIN_IDX:.+]] = arith.addi %[[SPLAT]], {{.*}} {layout_result_0 = #xegpu.layout<sg_layout = [5]>} : vector<8x16xindex>
392- // CHECK: %[[VEC:.+]] = xegpu.load %[[BASE_I64:.+]]{{\[}}%[[LIN_IDX]]{{\]}}, %[[MASK]]
364+ // CHECK: %[[VEC:.+]] = xegpu.load {{[^{]*}}
393365// CHECK-SAME: {layout_operand_1 = #xegpu.layout<sg_layout = [99]>, layout_operand_2 = #xegpu.layout<sg_layout = [7]>
394366// CHECK-SAME: {{[^}]*}}layout_result_0 = #xegpu.layout<sg_layout = [6]>}
395367// CHECK: %[[RES:.+]] = arith.select {{[^{]*}}
0 commit comments