@@ -21,6 +21,33 @@ gpu.func @test_create_nd_tdesc_with_sg_map(%src: memref<24x32xf32>) {
2121 gpu.return
2222}
2323
24+ // CHECK: gpu.func @test_load_nd_tdesc_with_sg_map(%[[arg0:.*]]: memref<32x32xi8>) {
25+ gpu.func @test_load_nd_tdesc_with_sg_map (%src: memref <32 x32 xi8 >) {
26+ // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<32x32xi8> -> !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map<wi_layout = [1, 16], wi_data = [4, 1]>>
27+ %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <32 x32 xi8 > -> !xegpu.tensor_desc <32 x16 xi8 , #xegpu.sg_map <wi_layout = [1 , 16 ], wi_data = [4 , 1 ]>>
28+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[REG]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, packed}> : !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map<wi_layout = [1, 16], wi_data = [4, 1]>> -> vector<8x1x4xi8>
29+ %2 = xegpu.load_nd %1 <{packed , l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}>: !xegpu.tensor_desc <32 x16 xi8 , #xegpu.sg_map <wi_layout = [1 , 16 ], wi_data = [4 , 1 ]>> -> vector <8 x1 x4 xi8 >
30+ gpu.return
31+ }
32+
33+ // CHECK: gpu.func @test_load_nd_tdesc_with_sg_map_2(%[[arg0:.*]]: memref<24x32xf32>) {
34+ gpu.func @test_load_nd_tdesc_with_sg_map_2 (%src: memref <24 x32 xf32 >) {
35+ // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>>
36+ %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <24 x32 xf32 > -> !xegpu.tensor_desc <8 x16 xf32 , #xegpu.sg_map <wi_layout = [1 , 16 ], wi_data = [1 , 1 ]>>
37+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[REG]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<wi_layout = [1, 16], wi_data = [1, 1]>> -> vector<8x1xf32>
38+ %2 = xegpu.load_nd %1 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}> : !xegpu.tensor_desc <8 x16 xf32 , #xegpu.sg_map <wi_layout = [1 , 16 ], wi_data = [1 , 1 ]>> -> vector <8 x1 xf32 >
39+ gpu.return
40+ }
41+
42+ // CHECK: gpu.func @test_load_nd_tdesc_with_sg_map_3(%[[arg0:.*]]: memref<32x32xf32>) {
43+ gpu.func @test_load_nd_tdesc_with_sg_map_3 (%src: memref <32 x32 xf32 >) {
44+ // CHECK: %[[REG:.*]] = xegpu.create_nd_tdesc %[[arg0]][0, 0] : memref<32x32xf32> -> !xegpu.tensor_desc<16x8xf32, #xegpu.sg_map<wi_layout = [16, 1], wi_data = [1, 1]>>
45+ %1 = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <32 x32 xf32 > -> !xegpu.tensor_desc <16 x8 xf32 , #xegpu.sg_map <wi_layout = [16 , 1 ], wi_data = [1 , 1 ]>>
46+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[REG]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose = array<i64: 1, 0>}> : !xegpu.tensor_desc<16x8xf32, #xegpu.sg_map<wi_layout = [16, 1], wi_data = [1, 1]>> -> vector<8x1xf32>
47+ %2 = xegpu.load_nd %1 <{transpose = array<i64 : 1 , 0 >, l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}> : !xegpu.tensor_desc <16 x8 xf32 , #xegpu.sg_map <wi_layout = [16 , 1 ], wi_data = [1 , 1 ]>> -> vector <8 x1 xf32 >
48+ gpu.return
49+ }
50+
2451// CHECK: gpu.func @test_create_nd_tdesc_vc_2(%[[arg0:.*]]: ui64, %[[arg1:.*]]: index, %[[arg2:.*]]: index, %[[arg3:.*]]: index, %[[arg4:.*]]: index) {
2552gpu.func @test_create_nd_tdesc_vc_2 (%src: ui64 , %w : index , %h : index , %x : index , %y : index ) {
2653 //CHECK: %[[C:.*]] = arith.constant 1 : index
0 commit comments