@@ -158,4 +158,147 @@ gpu.module @test {
158158 %c = xegpu.dpas %a , %b : vector <32 x32 xf16 >, vector <32 x32 xf16 > -> vector <32 x32 xf32 >
159159 gpu.return %c : vector <32 x32 xf32 >
160160 }
161+
162+ //-----
163+
164+ // CHECK-LABEL: test_create_tdesc_vec
165+ // CHECK-SAME: [[arg0:%.+]]: ui64
166+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
167+ gpu.func @test_create_tdesc_vec (%src: ui64 ) -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>> {
168+ %cst = arith.constant dense <[
169+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
170+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
171+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
172+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
173+ ]> : vector <32 xindex >
174+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
175+ gpu.return %tdesc : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
176+ }
177+
178+ //-----
179+
180+ // CHECK-LABEL: test_create_tdesc_step
181+ // CHECK-SAME: [[arg0:%.+]]: ui64
182+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
183+ gpu.func @test_create_tdesc_step (%src: ui64 ) -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>> {
184+ %step = arith.constant dense <8 > : vector <32 xindex >
185+ %seq = vector.step : vector <32 xindex >
186+ %cst = arith.muli %seq , %step : vector <32 xindex >
187+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
188+ gpu.return %tdesc : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
189+ }
190+
191+ //-----
192+
193+ // CHECK-LABEL: test_load
194+ // CHECK-SAME: [[arg0:%.+]]: ui64
195+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
196+ // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
197+ gpu.func @test_load (%src: ui64 ) -> vector <32 xf32 > {
198+ %cst = arith.constant dense <[
199+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
200+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
201+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
202+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
203+ ]> : vector <32 xindex >
204+
205+ %c17 = arith.constant 17 : index
206+ %mask = vector.create_mask %c17: vector <32 xi1 >
207+
208+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
209+ %ld = xegpu.load %tdesc , %mask: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 > -> vector <32 xf32 >
210+
211+ gpu.return %ld : vector <32 xf32 >
212+ }
213+
214+ //-----
215+
216+ // CHECK-LABEL: test_prefetch
217+ // CHECK-SAME: [[arg0:%.+]]: ui64
218+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
219+ // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
220+ gpu.func @test_prefetch (%src: ui64 ) {
221+
222+ %cst = arith.constant dense <[
223+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
224+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
225+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
226+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
227+ ]> : vector <32 xindex >
228+
229+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
230+
231+ xegpu.prefetch %tdesc: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
232+ gpu.return
233+ }
234+
235+ //-----
236+
237+ // CHECK-LABEL: test_store
238+ // CHECK-SAME: [[arg0:%.+]]: ui64
239+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
240+ // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
241+ gpu.func @test_store (%src: ui64 ) {
242+ %cst = arith.constant dense <[
243+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
244+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
245+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
246+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
247+ ]> : vector <32 xindex >
248+
249+ %c17 = arith.constant 17 : index
250+ %mask = vector.create_mask %c17: vector <32 xi1 >
251+
252+ %st_vec = arith.constant dense <1023. >: vector <32 xf32 >
253+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
254+ xegpu.store %st_vec , %tdesc , %mask: vector <32 xf32 >, !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 >
255+
256+ gpu.return
257+ }
258+
259+ //-----
260+
261+ // CHECK-LABEL: test_prefetch_load_store_update
262+ // CHECK-SAME: [[arg0:%.+]]: ui64
263+ // CHECK-COUNT-2: xegpu.create_tdesc [[arg0]], {{.*}} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
264+ // CHECK-COUNT-2: xegpu.prefetch {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>
265+ // CHECK-COUNT-2: xegpu.update_offset {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xindex>
266+ // CHECK-COUNT-2: xegpu.load {{.*}} : !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1> -> vector<16xf32>
267+ // CHECK-COUNT-2: xegpu.store {{.*}} : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scatter_tdesc_attr<>>, vector<16xi1>
268+
269+ gpu.func @test_prefetch_load_store_update (%src: ui64 ) {
270+
271+ %cst = arith.constant dense <[
272+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
273+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
274+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
275+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
276+ ]> : vector <32 xindex >
277+
278+ %tdesc = xegpu.create_tdesc %src , %cst : ui64 , vector <32 xindex > -> !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
279+
280+ xegpu.prefetch %tdesc: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>
281+
282+ %delta = arith.constant dense <[
283+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 32 ,
284+ 32 , 32 , 32 , 32 , 32 , 32 , 32 , 64 ,
285+ 128 , 128 , 128 , 128 , 128 , 128 , 128 , 128 ,
286+ 128 , 128 , 128 , 128 , 128 , 128 , 128 , 256
287+ ]> : vector <32 xindex >
288+ %new_tdesc = xegpu.update_offset %tdesc , %delta
289+ : !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xindex >
290+
291+ %c17 = arith.constant 17 : index
292+ %mask = vector.create_mask %c17: vector <32 xi1 >
293+
294+ %ld_vec = xegpu.load %new_tdesc , %mask: !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>, vector <32 xi1 > -> vector <32 xf32 >
295+
296+ %st_vec = arith.addf %ld_vec , %ld_vec : vector <32 xf32 >
297+ xegpu.store %st_vec , %tdesc , %mask:
298+ vector <32 xf32 >,
299+ !xegpu.tensor_desc <32 xf32 , #xegpu.scatter_tdesc_attr <>, #xegpu.layout <inst_data = [16 ]>>,
300+ vector <32 xi1 >
301+
302+ gpu.return
303+ }
161304}
0 commit comments