@@ -27,11 +27,11 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
2727 }
2828
2929 //CHECK-LABEL: load_store_matrix_plain_2d_input
30- gpu.func @load_store_matrix_plain (%arg0: memref <8192 xi8 , 3 >) -> f32 {
31-
32- %view = memref.view %arg0 [0 ][]: memref <8192 xi8 , 3 > to memref <64 x32 xf32 , 3 >
30+ gpu.func @load_store_matrix_plain_2d_input (%arg0: memref <8192 xi8 , 3 >) -> f32 {
31+ %c0 = arith.constant 0 : index
32+ %view = memref.view %arg0 [%c0 ][]: memref <8192 xi8 , 3 > to memref <64 x32 xf32 , 3 >
3333
34- %subview = memref.subview %view [64 , 0 ] [64 , 128 ] [1 , 1 ] : memref <64 x32 xf32 , 3 > to memref <32 x32 xf32 , strided <[32 , 1 ], offset : 1024 >, 3 >
34+ %subview = memref.subview %view [32 , 0 ] [32 , 32 ] [1 , 1 ] : memref <64 x32 xf32 , 3 > to memref <32 x32 xf32 , strided <[32 , 1 ], offset : 1024 >, 3 >
3535
3636 %0 = xegpu.create_mem_desc %subview : memref <32 x32 xf32 , strided <[32 , 1 ], offset : 1024 >, 3 > -> !xegpu.mem_desc <32 x32 xf32 >
3737
@@ -43,7 +43,7 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
4343 //CHECK: llvm.load {{.*}} : !llvm.ptr<3> -> f32
4444
4545 %tid_x = gpu.thread_id x
46- %c0 = arith.constant 0 : index
46+
4747 %1 = xegpu.load_matrix %0 [%c0 , %tid_x ]: !xegpu.mem_desc <32 x32 xf32 >, index , index -> f32
4848
4949 //CHECK: llvm.store {{.*}}, {{.*}} : f32, !llvm.ptr<3>
@@ -59,15 +59,15 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
5959 //CHECK-LABEL: load_store_matrix_blocked_strided
6060 gpu.func @load_store_matrix_blocked_strided (%arg0: memref <4096 xi8 , 3 >) -> f16 {
6161 %0 = xegpu.create_mem_desc %arg0 : memref <4096 xi8 , 3 > -> !xegpu.mem_desc <32 x64 xf16 , #xegpu.mem_layout <stride = [1 , 32 ], block = [16 , 16 ]>>
62- //CHECK: %[[c0:.*]] = arith.constant 0 : index
62+
6363 //CHECK: %[[tid_x:.*]] = gpu.thread_id x
6464 //CHECK: %[[c13:.*]] = arith.constant 13 : index
6565 //CHECK: %[[c16:.*]] = arith.constant 16 : index
6666 //CHECK: %[[offsetx_0:.*]] = arith.divsi %[[c13]], %[[c16]] : index
6767 //CHECK: %[[offsetx_1:.*]] = arith.remsi %[[c13]], %[[c16]] : index
6868 //CHECK: %[[offsety_0:.*]] = arith.divsi %[[tid_x]], %[[c16]] : index
6969 //CHECK: %[[offsety_1:.*]] = arith.remsi %[[tid_x]], %[[c16]] : index
70-
70+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
7171 //CHECK: %[[c256:.*]] = arith.constant 256 : index
7272 //CHECK: %[[mul0:.*]] = arith.muli %[[offsetx_0]], %[[c256]] : index
7373 //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
@@ -98,22 +98,22 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
9898 // its memory layout tuple is ([2,4,16,16],[1024,256,16,1])
9999 //CHECK-LABEL: load_store_matrix_blocked_nostride
100100 gpu.func @load_store_matrix_blocked_nostride (%arg0: memref <4096 xi8 , 3 >) -> f16 {
101- //CHECK: %[[c0:.*]] = arith.constant 0 : index
102- //CHECK: %[[view:.*]] = memref.view %arg0[%[[c0]]][] : memref<4096xi8, 3> to memref<2048xf16, 3>
101+
102+ //CHECK: %[[intptr:.*]] = memref.extract_aligned_pointer_as_index %arg0 : memref<4096xi8, 3> -> index
103+ //CHECK: %[[basePtrI64:.*]] = arith.index_castui %[[intptr]] : index to i32
103104 %0 = xegpu.create_mem_desc %arg0 : memref <4096 xi8 , 3 > -> !xegpu.mem_desc <32 x64 xf16 , #xegpu.mem_layout <block = [16 , 16 ]>>
104105
105106 //CHECK: %[[tid_x:.*]] = gpu.thread_id x
106107 //CHECK: %[[c19:.*]] = arith.constant 19 : index
107108 %tid_x = gpu.thread_id x
108109 %c19 = arith.constant 19 : index
109110
110- //CHECK: %[[intptr:.*]] = memref.extract_aligned_pointer_as_index %[[view]] : memref<2048xf16, 3> -> index
111- //CHECK: %[[basePtrI64:.*]] = arith.index_castui %[[intptr]] : index to i32
112111 //CHECK: %[[c16:.*]] = arith.constant 16 : index
113112 //CHECK: %[[offsetx_0:.*]] = arith.divsi %[[c19]], %[[c16]] : index
114113 //CHECK: %[[offsetx_1:.*]] = arith.remsi %[[c19]], %[[c16]] : index
115114 //CHECK: %[[offsety_0:.*]] = arith.divsi %[[tid_x]], %[[c16]] : index
116115 //CHECK: %[[offsety_1:.*]] = arith.remsi %[[tid_x]], %[[c16]] : index
116+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
117117 //CHECK: %[[c1024:.*]] = arith.constant 1024 : index
118118 //CHECK: %[[mul0:.*]] = arith.muli %[[offsetx_0]], %[[c1024]] : index
119119 //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
@@ -125,7 +125,6 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
125125 //CHECK: %[[c1:.*]] = arith.constant 1 : index
126126 //CHECK: %[[mul3:.*]] = arith.muli %[[offsety_1]], %[[c1]] : index
127127 //CHECK: %[[add3:.*]] = arith.addi %[[mul3]], %[[add2]] : index
128-
129128 //CHECK: %[[loaded:.*]] = llvm.load {{.*}} : !llvm.ptr<3> -> f16
130129 %1 = xegpu.load_matrix %0 [%c19 , %tid_x ]: !xegpu.mem_desc <32 x64 xf16 , #xegpu.mem_layout <block = [16 , 16 ]>>, index , index -> f16
131130
@@ -142,15 +141,13 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
142141 gpu.func @load_store_matrix_blocked_strided_return_vector (%arg0: memref <4096 xi8 , 3 >) -> vector <8 xf16 > {
143142 %0 = xegpu.create_mem_desc %arg0 : memref <4096 xi8 , 3 > -> !xegpu.mem_desc <32 x64 xf16 , #xegpu.mem_layout <stride = [1 , 32 ], block = [16 , 16 ]>>
144143
145- //CHECK: %[[c0:.*]] = arith.constant 0 : index
146144 //CHECK: %[[tid_x:.*]] = gpu.thread_id x
147-
148145 //CHECK: %[[c16:.*]] = arith.constant 16 : index
149146 //CHECK: %[[offsetx_0:.*]] = arith.divsi %[[c16]], %[[c16]] : index
150147 //CHECK: %[[offsetx_1:.*]] = arith.remsi %[[c16]], %[[c16]] : index
151148 //CHECK: %[[offsety_0:.*]] = arith.divsi %[[tid_x]], %[[c16]] : index
152149 //CHECK: %[[offsety_1:.*]] = arith.remsi %[[tid_x]], %[[c16]] : index
153-
150+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
154151 //CHECK: %[[c256:.*]] = arith.constant 256 : index
155152 //CHECK: %[[mul0:.*]] = arith.muli %[[offsetx_0]], %[[c256]] : index
156153 //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
@@ -180,23 +177,22 @@ gpu.module @test_kernel [#xevm.target<chip = "pvc">] {
180177 // its memory layout tuple is ([2,4,16,16],[1024,256,16,1])
181178 //CHECK-LABEL: load_store_matrix_blocked_subgroupblockio
182179 gpu.func @load_store_matrix_blocked_subgroupblockio (%arg0: memref <4096 xi8 , 3 >) -> vector <8 xf16 > {
183- //CHECK: %[[c0:.*]] = arith.constant 0 : index
184- //CHECK: %[[view:.*]] = memref.view %arg0[%[[c0]]][] : memref<4096xi8, 3> to memref<2048xf16, 3>
185-
186- %0 = xegpu.create_mem_desc %arg0 : memref <4096 xi8 , 3 > -> !xegpu.mem_desc <32 x64 xf16 , #xegpu.mem_layout <block = [16 , 16 ]>>
187-
180+
181+ //CHECK: %[[intptr:.*]] = memref.extract_aligned_pointer_as_index %arg0 : memref<4096xi8, 3> -> index
182+ //CHECK: %[[basePtrI64:.*]] = arith.index_castui %[[intptr]] : index to i32
183+ %0 = xegpu.create_mem_desc %arg0 : memref <4096 xi8 , 3 > -> !xegpu.mem_desc <32 x64 xf16 , #xegpu.mem_layout <block = [16 , 16 ]>>
184+
185+
188186 //CHECK: %[[c16:.*]] = arith.constant 16 : index
189187 //CHECK: %[[c48:.*]] = arith.constant 48 : index
190-
191188 %c16 = arith.constant 16 : index
192189 %c48 = arith.constant 48 : index
193190
194- //CHECK: %[[intptr:.*]] = memref.extract_aligned_pointer_as_index %[[view]] : memref<2048xf16, 3> -> index
195- //CHECK: %[[basePtrI64:.*]] = arith.index_castui %[[intptr]] : index to i32
196191 //CHECK: %[[offset0:.*]] = arith.divsi %[[c16]], %[[c16]] : index
197192 //CHECK: %[[offset1:.*]] = arith.remsi %[[c16]], %[[c16]] : index
198193 //CHECK: %[[offset2:.*]] = arith.divsi %[[c48]], %[[c16]] : index
199194 //CHECK: %[[offset3:.*]] = arith.remsi %[[c48]], %[[c16]] : index
195+ //CHECK: %[[c0:.*]] = arith.constant 0 : index
200196 //CHECK: %[[c1024:.*]] = arith.constant 1024 : index
201197 //CHECK: %[[mul0:.*]] = arith.muli %[[offset0]], %[[c1024]] : index
202198 //CHECK: %[[add0:.*]] = arith.addi %[[mul0]], %[[c0]] : index
0 commit comments