@@ -210,6 +210,27 @@ gpu.module @test {
210
210
gpu.return %ld : vector <32 xf32 >
211
211
}
212
212
213
+ //-----
214
+
215
+
216
+ // CHECK-LABEL: load_with_offsets
217
+ // CHECK-SAME: [[arg0:%.+]]: ui64
218
+ // CHECK-COUNT-2: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16xf32>
219
+ gpu.func @load_with_offsets (%src: ui64 ) -> vector <32 xf32 > {
220
+ %cst = arith.constant dense <[
221
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
222
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
223
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
224
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
225
+ ]> : vector <32 xindex >
226
+
227
+ %c17 = arith.constant 17 : index
228
+ %mask = vector.create_mask %c17: vector <32 xi1 >
229
+ %ld = xegpu.load %src [%cst ], %mask {chunk_size = 1 , layout_result_0 = #xegpu.layout <inst_data = [16 ]>, l1_hint = #xegpu.cache_hint <cached >} : ui64 , vector <32 xindex >, vector <32 xi1 > -> vector <32 xf32 >
230
+
231
+ gpu.return %ld : vector <32 xf32 >
232
+ }
233
+
213
234
//-----
214
235
215
236
// CHECK-LABEL: prefetch
@@ -254,6 +275,28 @@ gpu.module @test {
254
275
255
276
gpu.return
256
277
}
278
+
279
+ //-----
280
+
281
+ // CHECK-LABEL: store_with_offsets
282
+ // CHECK-SAME: [[arg0:%.+]]: ui64
283
+ // CHECK-COUNT-2: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16xf32>, ui64, vector<16xindex>, vector<16xi1>
284
+ gpu.func @store_with_offsets (%src: ui64 ) {
285
+ %cst = arith.constant dense <[
286
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
287
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
288
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
289
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
290
+ ]> : vector <32 xindex >
291
+
292
+ %c17 = arith.constant 17 : index
293
+ %mask = vector.create_mask %c17: vector <32 xi1 >
294
+
295
+ %st_vec = arith.constant dense <1023.0 >: vector <32 xf32 >
296
+ xegpu.store %st_vec , %src [%cst ], %mask {chunk_size = 1 , layout = #xegpu.layout <inst_data = [16 ]>, l1_hint = #xegpu.cache_hint <cached >} : vector <32 xf32 >, ui64 , vector <32 xindex >, vector <32 xi1 >
297
+
298
+ gpu.return
299
+ }
257
300
258
301
//-----
259
302
// CHECK-LABEL: create_tdesc_step_chunk
@@ -319,6 +362,29 @@ gpu.module @test {
319
362
gpu.return %ld : vector <32 x4 xf32 >
320
363
}
321
364
365
+ //-----
366
+ // CHECK-LABEL: load_with_offsets_chunk
367
+ // CHECK-SAME: [[arg0:%.+]]: ui64
368
+ // CHECK: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32x4xf32>
369
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
370
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
371
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
372
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
373
+ // CHECK-COUNT-4: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16x2xf32>
374
+ gpu.func @load_with_offsets_chunk (%src: ui64 ) -> vector <32 x4 xf32 > {
375
+ %cst = arith.constant dense <[
376
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
377
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
378
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
379
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
380
+ ]> : vector <32 xindex >
381
+
382
+ %c17 = arith.constant 17 : index
383
+ %mask = vector.create_mask %c17: vector <32 xi1 >
384
+ %ld = xegpu.load %src [%cst ], %mask {chunk_size = 4 , layout_result_0 = #xegpu.layout <inst_data = [16 , 2 ]>, l1_hint = #xegpu.cache_hint <cached >} : ui64 , vector <32 xindex >, vector <32 xi1 > -> vector <32 x4 xf32 >
385
+ gpu.return %ld : vector <32 x4 xf32 >
386
+ }
387
+
322
388
//-----
323
389
// CHECK-LABEL: store_chunk
324
390
// CHECK-SAME: [[arg0:%.+]]: ui64
@@ -342,6 +408,31 @@ gpu.module @test {
342
408
gpu.return
343
409
}
344
410
411
+ //-----
412
+ // CHECK-LABEL: store_with_offsets_chunk
413
+ // CHECK-SAME: [[arg0:%.+]]: ui64
414
+ // CHECK: [[cst:%.+]] = arith.constant dense<1.023000e+03> : vector<16x2xf32
415
+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
416
+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
417
+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
418
+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
419
+ // CHECK-COUNT-4: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16x2xf32>, ui64, vector<16xindex>, vector<16xi1>
420
+ gpu.func @store_with_offsets_chunk (%src: ui64 ) {
421
+ %cst = arith.constant dense <[
422
+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
423
+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
424
+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
425
+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
426
+ ]> : vector <32 xindex >
427
+
428
+ %c17 = arith.constant 17 : index
429
+ %mask = vector.create_mask %c17: vector <32 xi1 >
430
+
431
+ %st_vec = arith.constant dense <1023. >: vector <32 x4 xf32 >
432
+ xegpu.store %st_vec , %src [%cst ], %mask {chunk_size = 4 , layout = #xegpu.layout <inst_data = [16 , 2 ]>, l1_hint = #xegpu.cache_hint <cached >} : vector <32 x4 xf32 >, ui64 , vector <32 xindex >, vector <32 xi1 >
433
+ gpu.return
434
+ }
435
+
345
436
//-----
346
437
// CHECK-LABEL: prefetch_chunk
347
438
// CHECK-SAME: [[arg0:%.+]]: ui64
0 commit comments