@@ -210,6 +210,27 @@ gpu.module @test {
210210 gpu.return %ld : vector <32 xf32 >
211211 }
212212
213+ //-----
214+
215+
216+ // CHECK-LABEL: load_with_offsets
217+ // CHECK-SAME: [[arg0:%.+]]: ui64
218+ // CHECK-COUNT-2: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16xf32>
219+ gpu.func @load_with_offsets (%src: ui64 ) -> vector <32 xf32 > {
220+ %cst = arith.constant dense <[
221+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
222+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
223+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
224+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
225+ ]> : vector <32 xindex >
226+
227+ %c17 = arith.constant 17 : index
228+ %mask = vector.create_mask %c17: vector <32 xi1 >
229+ %ld = xegpu.load %src [%cst ], %mask {chunk_size = 1 , layout_result_0 = #xegpu.layout <inst_data = [16 ]>, l1_hint = #xegpu.cache_hint <cached >} : ui64 , vector <32 xindex >, vector <32 xi1 > -> vector <32 xf32 >
230+
231+ gpu.return %ld : vector <32 xf32 >
232+ }
233+
213234//-----
214235
215236 // CHECK-LABEL: prefetch
@@ -254,6 +275,28 @@ gpu.module @test {
254275
255276 gpu.return
256277 }
278+
279+ //-----
280+
281+ // CHECK-LABEL: store_with_offsets
282+ // CHECK-SAME: [[arg0:%.+]]: ui64
283+ // CHECK-COUNT-2: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 1 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16xf32>, ui64, vector<16xindex>, vector<16xi1>
284+ gpu.func @store_with_offsets (%src: ui64 ) {
285+ %cst = arith.constant dense <[
286+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
287+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
288+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
289+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
290+ ]> : vector <32 xindex >
291+
292+ %c17 = arith.constant 17 : index
293+ %mask = vector.create_mask %c17: vector <32 xi1 >
294+
295+ %st_vec = arith.constant dense <1023.0 >: vector <32 xf32 >
296+ xegpu.store %st_vec , %src [%cst ], %mask {chunk_size = 1 , layout = #xegpu.layout <inst_data = [16 ]>, l1_hint = #xegpu.cache_hint <cached >} : vector <32 xf32 >, ui64 , vector <32 xindex >, vector <32 xi1 >
297+
298+ gpu.return
299+ }
257300
258301//-----
259302 // CHECK-LABEL: create_tdesc_step_chunk
@@ -319,6 +362,29 @@ gpu.module @test {
319362 gpu.return %ld : vector <32 x4 xf32 >
320363 }
321364
365+ //-----
366+ // CHECK-LABEL: load_with_offsets_chunk
367+ // CHECK-SAME: [[arg0:%.+]]: ui64
368+ // CHECK: [[cst:%.+]] = arith.constant dense<0.000000e+00> : vector<32x4xf32>
369+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
370+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
371+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
372+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
373+ // CHECK-COUNT-4: xegpu.load {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : ui64, vector<16xindex>, vector<16xi1> -> vector<16x2xf32>
374+ gpu.func @load_with_offsets_chunk (%src: ui64 ) -> vector <32 x4 xf32 > {
375+ %cst = arith.constant dense <[
376+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
377+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
378+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
379+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
380+ ]> : vector <32 xindex >
381+
382+ %c17 = arith.constant 17 : index
383+ %mask = vector.create_mask %c17: vector <32 xi1 >
384+ %ld = xegpu.load %src [%cst ], %mask {chunk_size = 4 , layout_result_0 = #xegpu.layout <inst_data = [16 , 2 ]>, l1_hint = #xegpu.cache_hint <cached >} : ui64 , vector <32 xindex >, vector <32 xi1 > -> vector <32 x4 xf32 >
385+ gpu.return %ld : vector <32 x4 xf32 >
386+ }
387+
322388//-----
323389 // CHECK-LABEL: store_chunk
324390 // CHECK-SAME: [[arg0:%.+]]: ui64
@@ -342,6 +408,31 @@ gpu.module @test {
342408 gpu.return
343409 }
344410
411+ //-----
412+ // CHECK-LABEL: store_with_offsets_chunk
413+ // CHECK-SAME: [[arg0:%.+]]: ui64
414+ // CHECK: [[cst:%.+]] = arith.constant dense<1.023000e+03> : vector<16x2xf32
415+ // CHECK: [[cst0:%.+]] = arith.constant dense<[130, 138, 146, 154, 162, 170, 178, 186, 194, 202, 210, 218, 226, 234, 242, 250]> : vector<16xindex>
416+ // CHECK: [[cst1:%.+]] = arith.constant dense<[2, 10, 18, 26, 34, 42, 50, 58, 66, 74, 82, 90, 98, 106, 114, 122]> : vector<16xindex>
417+ // CHECK: [[cst2:%.+]] = arith.constant dense<[128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248]> : vector<16xindex>
418+ // CHECK: [[cst3:%.+]] = arith.constant dense<[0, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120]> : vector<16xindex>
419+ // CHECK-COUNT-4: xegpu.store {{.*}}[{{.*}}], {{.*}} <{chunk_size = 2 : i64, l1_hint = #xegpu.cache_hint<cached>}> : vector<16x2xf32>, ui64, vector<16xindex>, vector<16xi1>
420+ gpu.func @store_with_offsets_chunk (%src: ui64 ) {
421+ %cst = arith.constant dense <[
422+ 0 , 8 , 16 , 24 , 32 , 40 , 48 , 56 ,
423+ 64 , 72 , 80 , 88 , 96 , 104 , 112 , 120 ,
424+ 128 , 136 , 144 , 152 , 160 , 168 , 176 , 184 ,
425+ 192 , 200 , 208 , 216 , 224 , 232 , 240 , 248
426+ ]> : vector <32 xindex >
427+
428+ %c17 = arith.constant 17 : index
429+ %mask = vector.create_mask %c17: vector <32 xi1 >
430+
431+ %st_vec = arith.constant dense <1023. >: vector <32 x4 xf32 >
432+ xegpu.store %st_vec , %src [%cst ], %mask {chunk_size = 4 , layout = #xegpu.layout <inst_data = [16 , 2 ]>, l1_hint = #xegpu.cache_hint <cached >} : vector <32 x4 xf32 >, ui64 , vector <32 xindex >, vector <32 xi1 >
433+ gpu.return
434+ }
435+
345436//-----
346437 // CHECK-LABEL: prefetch_chunk
347438 // CHECK-SAME: [[arg0:%.+]]: ui64
0 commit comments