@@ -24,22 +24,22 @@ gpu.module @test_distribution {
2424
2525 // CHECK-LABEL: load_nd_tdesc_with_offset
2626 gpu.func @load_nd_tdesc_with_offset (%src: memref <256 x128 xf32 >) {
27- //CHECK: %[[SGID :.*]] = gpu.subgroup_id : index
28- //CHECK: %[[C4 :.*]] = arith.constant 4 : index
29- //CHECK: %[[SGIDX :.*]] = index.remu %[[SGID]], %[[C4]]
30- //CHECK: %[[SGIDY_TMP :.*]] = index.divu %[[SGID]], %[[C4]]
31- //CHECK: %[[C8 :.*]] = arith.constant 8 : index
32- //CHECK: %[[SGIDY :.*]] = index.remu %[[SGIDY_TMP]], %[[C8]]
33- //CHECK: %[[C32 :.*]] = arith.constant 32 : index
34- //CHECK: %[[L_OFF_Y :.*]] = index.mul %[[SGIDY]], %[[C32]]
35- //CHECK: %[[L_OFF_X :.*]] = index.mul %[[SGIDX ]], %[[C32]]
36- //CHECK: %[[C256 :.*]] = arith.constant 256 : index
37- //CHECK: %[[OFF_Y :.*]] = index.remu %[[L_OFF_Y]], %[[C256]]
38- //CHECK: %[[C128 :.*]] = arith.constant 128 : index
39- //CHECK: %[[OFF_X :.*]] = index.remu %[[L_OFF_X]], %[[C128]]
40- //CHECK: %[[TDESC :.*]] = xegpu.create_nd_tdesc %{{.*}}[%[[OFF_Y ]], %[[OFF_X]]] : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
41- //CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][{{%.*}}, {{%.*}}] : !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<32x32xf32>
42- %tdesc = xegpu.create_nd_tdesc %src [ 0 , 0 ] : memref <256 x128 xf32 >
27+ //CHECK: %[[TDESC :.*]] = xegpu.create_nd_tdesc %{{.*}} : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>>
28+ //CHECK-DAG : %[[SGID :.*]] = gpu.subgroup_id : index
29+ //CHECK-DAG : %[[C4 :.*]] = arith.constant 4 : index
30+ //CHECK-DAG : %[[SGIDX :.*]] = index.remu %[[SGID]], %[[C4]]
31+ //CHECK-DAG : %[[SGIDY_TMP :.*]] = index.divu %[[SGID]], %[[C4]]
32+ //CHECK-DAG : %[[C8 :.*]] = arith.constant 8 : index
33+ //CHECK-DAG : %[[SGIDY :.*]] = index.remu %[[SGIDY_TMP]], %[[C8]]
34+ //CHECK-DAG : %[[C32 :.*]] = arith.constant 32 : index
35+ //CHECK-DAG : %[[L_OFF_Y :.*]] = index.mul %[[SGIDY ]], %[[C32]]
36+ //CHECK-DAG : %[[L_OFF_X :.*]] = index.mul %[[SGIDX]], %[[C32]]
37+ //CHECK-DAG : %[[C256 :.*]] = arith.constant 256 : index
38+ //CHECK-DAG : %[[OFF_Y :.*]] = index.remu %[[L_OFF_Y]], %[[C256]]
39+ //CHECK-DAG : %[[C128 :.*]] = arith.constant 128 : index
40+ //CHECK-DAG : %[[OFF_X :.*]] = index.remu %[[L_OFF_X ]], %[[C128]]
41+ //CHECK-DAG : %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]][{{%.*}}, {{%.*}}] : !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [1, 16], lane_data = [1, 1]>> -> vector<32x32xf32>
42+ %tdesc = xegpu.create_nd_tdesc %src : memref <256 x128 xf32 >
4343 -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
4444 %load = xegpu.load_nd %tdesc [0 , 0 ]
4545 : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
0 commit comments