@@ -163,11 +163,69 @@ gpu.func @test_create_tdesc_vc_1(%src: memref<?xf32, 3>) {
163163gpu.func @test_create_tdesc_vc_with_sg_map (%src: ui64 ) {
164164 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
165165 %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
166- //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 1 ]>>
167- %1 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 1 ]>>
166+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2 ]>>
167+ %1 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>
168168 gpu.return
169169}
170170
171+ // CHECK: gpu.func @test_load_with_sg_map(%[[arg0:.*]]: ui64) {
172+ gpu.func @test_load_with_sg_map (%src: ui64 ) {
173+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
174+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
175+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
176+ %1 = arith.constant dense <1 >: vector <4 xi1 >
177+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>
178+ %2 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>
179+ //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>, transpose}> : !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>, vector<4xi1> -> vector<2x1xf32>
180+ %3 = xegpu.load %2 , %1 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >, transpose }> : !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>, vector <4 xi1 > -> vector <2 x1 xf32 >
181+ gpu.return
182+ }
183+
184+ // CHECK: gpu.func @test_load_with_sg_map_2(%[[arg0:.*]]: ui64) {
185+ gpu.func @test_load_with_sg_map_2 (%src: ui64 ) {
186+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
187+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
188+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
189+ %1 = arith.constant dense <1 >: vector <4 xi1 >
190+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>
191+ %2 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>
192+ //CHECK: %[[R1:.*]] = xegpu.load %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<cached>, l2_hint = #xegpu.cache_hint<uncached>}> : !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>, vector<4xi1> -> vector<1xf32>
193+ %3 = xegpu.load %2 , %1 <{l1_hint = #xegpu.cache_hint <cached >, l2_hint = #xegpu.cache_hint <uncached >}> : !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>, vector <4 xi1 > -> vector <1 xf32 >
194+ gpu.return
195+ }
196+
197+ // CHECK: gpu.func @test_store_with_sg_map(%[[arg0:.*]]: ui64) {
198+ gpu.func @test_store_with_sg_map (%src: ui64 ) {
199+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
200+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
201+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
202+ %1 = arith.constant dense <1 >: vector <4 xi1 >
203+ //CHECK: %[[cst2:.*]] = arith.constant dense<2.900000e+00> : vector<2x1xf32>
204+ %2 = arith.constant dense <2.9 >: vector <2 x1 xf32 >
205+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>
206+ %3 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>
207+ //CHECK: xegpu.store %[[cst2]], %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>, transpose}> : vector<2x1xf32>, !xegpu.tensor_desc<4x2xf32, #xegpu.scatter_tdesc_attr<chunk_size = 2 : i64>, #xegpu.sg_map<wi_layout = [4, 1], wi_data = [1, 2]>>, vector<4xi1>
208+ xegpu.store %2 , %3 , %1 <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >, transpose }> : vector <2 x1 xf32 >, !xegpu.tensor_desc <4 x2 xf32 , #xegpu.scatter_tdesc_attr <chunk_size = 2 >, #xegpu.sg_map <wi_layout = [4 , 1 ], wi_data = [1 , 2 ]>>, vector <4 xi1 >
209+ gpu.return
210+ }
211+
212+ // CHECK: gpu.func @test_store_with_sg_map_2(%[[arg0:.*]]: ui64) {
213+ gpu.func @test_store_with_sg_map_2 (%src: ui64 ) {
214+ //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
215+ %0 = arith.constant dense <[0 , 8 , 16 , 24 ]> : vector <4 xindex >
216+ //CHECK: %[[cst1:.*]] = arith.constant dense<true> : vector<4xi1>
217+ %1 = arith.constant dense <1 >: vector <4 xi1 >
218+ //CHECK: %[[cst2:.*]] = arith.constant dense<2.900000e+00> : vector<1xf32>
219+ %2 = arith.constant dense <2.9 >: vector <1 xf32 >
220+ //CHECK: %[[R0:.*]] = xegpu.create_tdesc %[[arg0]], %[[cst]] : ui64, vector<4xindex> -> !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>
221+ %3 = xegpu.create_tdesc %src , %0 : ui64 , vector <4 xindex > -> !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>
222+ //CHECK: xegpu.store %[[cst2]], %[[R0]], %[[cst1]] <{l1_hint = #xegpu.cache_hint<write_back>, l2_hint = #xegpu.cache_hint<uncached>}> : vector<1xf32>, !xegpu.tensor_desc<4xf32, #xegpu.scatter_tdesc_attr<>, #xegpu.sg_map<wi_layout = [1, 4], wi_data = [1, 1]>>, vector<4xi1>
223+ xegpu.store %2 , %3 , %1 <{l1_hint = #xegpu.cache_hint <write_back >, l2_hint = #xegpu.cache_hint <uncached >}> : vector <1 xf32 >, !xegpu.tensor_desc <4 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.sg_map <wi_layout = [1 , 4 ], wi_data = [1 , 1 ]>>, vector <4 xi1 >
224+ gpu.return
225+ }
226+
227+
228+
171229// CHECK: gpu.func @test_prefetch_vc(%[[arg0:.*]]: ui64) {
172230gpu.func @test_prefetch_vc (%src: ui64 ) {
173231 //CHECK: %[[cst:.*]] = arith.constant dense<[0, 8, 16, 24]> : vector<4xindex>
0 commit comments