@@ -221,13 +221,35 @@ gpu.module @test {
221221// CHECK: %[[OFFSETS:.*]] = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<12> : vector<16xindex>
222222// CHECK: %[[LOAD_VEC:.*]] = xegpu.load %[[ARG0]][%[[OFFSETS]]], %[[MASK]]
223223// CHECK-SAME: {layout_result_0 = #xegpu.layout<lane_layout = [8], lane_data = [1]>} : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16>
224- // CHECK: xegpu.store %[[LOAD_VEC]], %[[ARG0]][%[[OFFSETS]]], %[[MASK]]
224+ // CHECK: %[[ADD_RES:.*]] = arith.addf %[[LOAD_VEC]], %[[LOAD_VEC]] {layout_result_0 = #xegpu.layout<lane_layout = [8], lane_data = [1]>} : vector<16xf16>
225+ // CHECK: xegpu.store %[[ADD_RES]], %[[ARG0]][%[[OFFSETS]]], %[[MASK]]
225226// CHECK-SAME <{layout = #xegpu.layout<lane_layout = [8], lane_data = [1]>}> : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
226227func.func @scatter_ops_custom_perm_layout (%src: memref <256 xf16 >) {
227228 %1 = arith.constant dense <1 >: vector <16 xi1 >
228229 %offset = arith.constant dense <12 > : vector <16 xindex >
229230 %3 = xegpu.load %src [%offset ], %1 : memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 > -> vector <16 xf16 >
230- xegpu.store %3 , %src [%offset ], %1 <{layout = #xegpu.layout <lane_layout = [8 ], lane_data = [1 ]>}> : vector <16 xf16 >, memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 >
231+ %4 = arith.addf %3 , %3 : vector <16 xf16 >
232+ xegpu.store %4 , %src [%offset ], %1 <{layout = #xegpu.layout <lane_layout = [8 ], lane_data = [1 ]>}> : vector <16 xf16 >, memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 >
233+ return
234+ }
235+ }
236+ // -----
237+ gpu.module @test {
238+ // CHECK-LABEL: func.func @scatter_ops_preserve_load_perm_layout(
239+ // CHECK-SAME: %[[ARG0:[0-9a-zA-Z]+]]: memref<256xf16>) {
240+ // CHECK: %[[MASK:.*]] = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<true> : vector<16xi1>
241+ // CHECK: %[[OFFSETS:.*]] = arith.constant {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} dense<12> : vector<16xindex>
242+ // CHECK: %[[LOAD_VEC:.*]] = xegpu.load %[[ARG0]][%[[OFFSETS]]], %[[MASK]] <{layout = #xegpu.layout<lane_layout = [16], lane_data = [1]>}>
243+ // CHECK-SAME: {layout_result_0 = #xegpu.layout<lane_layout = [16], lane_data = [1]>} : memref<256xf16>, vector<16xindex>, vector<16xi1> -> vector<16xf16>
244+ // CHECK: %[[ADD_RES:.*]] = arith.addf %[[LOAD_VEC]], %[[LOAD_VEC]] {layout_result_0 = #xegpu.layout<lane_layout = [8], lane_data = [1]>} : vector<16xf16>
245+ // CHECK: xegpu.store %[[ADD_RES]], %[[ARG0]][%[[OFFSETS]]], %[[MASK]]
246+ // CHECK-SAME <{layout = #xegpu.layout<lane_layout = [8], lane_data = [1]>}> : vector<16xf16>, memref<256xf16>, vector<16xindex>, vector<16xi1>
247+ func.func @scatter_ops_preserve_load_perm_layout (%src: memref <256 xf16 >) {
248+ %1 = arith.constant dense <1 >: vector <16 xi1 >
249+ %offset = arith.constant dense <12 > : vector <16 xindex >
250+ %3 = xegpu.load %src [%offset ], %1 <{layout = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>}> : memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 > -> vector <16 xf16 >
251+ %4 = arith.addf %3 , %3 : vector <16 xf16 >
252+ xegpu.store %4 , %src [%offset ], %1 <{layout = #xegpu.layout <lane_layout = [8 ], lane_data = [1 ]>}> : vector <16 xf16 >, memref <256 xf16 >, vector <16 xindex >, vector <16 xi1 >
231253 return
232254}
233255}
0 commit comments