@@ -271,8 +271,8 @@ gpu.module @xevm_module{
271271// CHECK: %[[LAYOUT_X:.*]] = arith.constant 8 : index
272272// CHECK: %[[LAYOUT_Y:.*]] = arith.constant 2 : index
273273// CHECK: %[[LANE_ID:.*]] = gpu.lane_id
274- // CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%0 ]
275- // CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%0 ]
274+ // CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]] ]
275+ // CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]] ]
276276// CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_Y]], %[[LAYOUT_Y]]
277277// CHECK: %[[LANE_X_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[LAYOUT_X]]
278278// CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<1x1xf32>
@@ -285,3 +285,42 @@ gpu.module @xevm_module{
285285 gpu.return
286286 }
287287}
288+
289+ // -----
290+ // CHECK-LABEL: gpu.func @load_store_matrix_2({{.*}}) {
291+ // CHECK: %[[DIST_UNIT_HEIGHT_X:.*]] = arith.constant 4 : index
292+ // CHECK: %[[DIST_UNIT_HEIGHT_Y:.*]] = arith.constant 8 : index
293+ // CHECK: %[[LANE_DATA_Y:.*]] = arith.constant 2 : index
294+ // CHECK: %[[LANE_ID:.*]] = gpu.lane_id
295+ // CHECK: %[[DELINEARIZED_LANE_Y:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
296+ // CHECK: %[[DELINEARIZED_LANE_X:.*]] = affine.apply #{{.*}}()[%[[LANE_ID]]]
297+ // CHECK: %[[LANE_Y_OFFSET_1:.*]] = index.mul %[[DELINEARIZED_LANE_Y]], %[[LANE_DATA_Y]]
298+ // CHECK: %[[LANE_Y_OFFSET:.*]] = index.remu %[[LANE_Y_OFFSET_1]], %[[DIST_UNIT_HEIGHT_Y]]
299+ // CHECK: %[[LANE_X_OFFSET:.*]] = index.remu %[[DELINEARIZED_LANE_X]], %[[DIST_UNIT_HEIGHT_X]]
300+ // CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : !xegpu.mem_desc<32x32xf32>, index, index -> vector<2x1xf32>
301+ // CHECK: xegpu.store_matrix %[[MAT]], %arg0[%[[LANE_Y_OFFSET]], %[[LANE_X_OFFSET]]] : vector<2x1xf32>, !xegpu.mem_desc<32x32xf32>, index, index
302+ gpu.module @xevm_module {
303+ gpu.func @load_store_matrix_2 (%arg0: !xegpu.mem_desc <32 x32 xf32 >) {
304+ %c0 = arith.constant 0 : index
305+ %1 = xegpu.load_matrix %arg0 [%c0 , %c0 ] <{layout = #xegpu.layout <lane_layout = [4 , 4 ], lane_data = [2 , 1 ]>}> : !xegpu.mem_desc <32 x32 xf32 >, index , index -> vector <8 x4 xf32 >
306+ xegpu.store_matrix %1 , %arg0 [%c0 , %c0 ] <{layout = #xegpu.layout <lane_layout = [4 , 4 ], lane_data = [2 , 1 ]>}> : vector <8 x4 xf32 >, !xegpu.mem_desc <32 x32 xf32 >, index , index
307+ gpu.return
308+ }
309+ }
310+
311+ // -----
312+ // CHECK-LABEL: gpu.func @load_store_matrix_3({{.*}}) {
313+ // CHECK: %[[MAT:.*]] = xegpu.load_matrix %arg0[%{{.*}}, %{{.*}}] <{subgroup_block_io}>:
314+ // CHECK-SAME: !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<block = [16, 16], stride = [1, 32]>>, index, index -> vector<2x1xf32>
315+ // CHECK: xegpu.store_matrix %[[MAT]], %arg0[%{{.*}}, %{{.*}}] <{subgroup_block_io}>:
316+ // CHECK-SAME: vector<2x1xf32>, !xegpu.mem_desc<32x32xf32, #xegpu.mem_layout<block = [16, 16], stride = [1, 32]>>, index, index
317+ gpu.module @xevm_module {
318+ gpu.func @load_store_matrix_3 (%arg0: !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ], block = [16 , 16 ]>>) {
319+ %c0 = arith.constant 0 : index
320+ %1 = xegpu.load_matrix %arg0 [%c0 , %c0 ] {subgroup_block_io , layout = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [2 , 1 ]>} :
321+ !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ], block = [16 , 16 ]>>, index , index -> vector <2 x16 xf32 >
322+ xegpu.store_matrix %1 , %arg0 [%c0 , %c0 ] {subgroup_block_io , layout = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [2 , 1 ]>} :
323+ vector <2 x16 xf32 >, !xegpu.mem_desc <32 x32 xf32 , #xegpu.mem_layout <stride = [1 , 32 ], block = [16 , 16 ]>>, index , index
324+ gpu.return
325+ }
326+ }
0 commit comments