@@ -5,24 +5,24 @@ gpu.module @test_round_robin_assignment {
55 // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
66 gpu.func @create_nd_tdesc (%src: memref <256 x128 xf32 >) {
77 // CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf32>
8- // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
8+ // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
99 // CHECK-NOT: xegpu.create_nd_tdesc
1010 %tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
11- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
11+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
1212 gpu.return
1313 }
1414
1515 // CHECK-LABEL: load_nd_tdesc
1616 // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
1717 gpu.func @load_nd_tdesc (%src: memref <256 x128 xf32 >) {
1818 %tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
19- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
19+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
2020 // CHECK-COUNT-4: xegpu.load_nd %{{.*}}
21- // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
21+ // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<2x2xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
2222 // CHECK-SAME-COUNT-4: -> vector<16x16xf32>
2323 // CHECK-NOT: xegpu.load_nd
2424 %load = xegpu.load_nd %tdesc
25- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
25+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
2626 -> vector <256 x128 xf32 >
2727 gpu.return
2828 }
@@ -31,36 +31,36 @@ gpu.module @test_round_robin_assignment {
3131 // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
3232 gpu.func @store_nd (%src: memref <256 x128 xf32 >) {
3333 %tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
34- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
34+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
3535 // CHECK-COUNT-4: xegpu.store_nd %{{.*}}, %{{.*}}
36- // CHECK-SAME-COUNT-4: : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
36+ // CHECK-SAME-COUNT-4: : vector<16x16xf32>, !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
3737 // CHECK-NOT : xegpu.store_nd
3838 %load = xegpu.load_nd %tdesc
39- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
39+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
4040 -> vector <256 x128 xf32 >
4141 xegpu.store_nd %load , %tdesc
42- : vector <256 x128 xf32 >, !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
42+ : vector <256 x128 xf32 >, !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
4343 gpu.return
4444 }
4545
4646 // CHECK-LABEL: update_nd
4747 // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
4848 gpu.func @update_nd (%src: memref <256 x128 xf32 >){
4949 %tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
50- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
50+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
5151 // CHECK-COUNT-4: xegpu.update_nd_offset %{{.*}}, [0, 16]
52- // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>>
52+ // CHECK-SAME-COUNT-4: : !xegpu.tensor_desc<16x16xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>>
5353 // CHECK-NOT: xegpu.update_nd_offset
5454 %update = xegpu.update_nd_offset %tdesc , [0 , 16 ]
55- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
55+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
5656 gpu.return
5757 }
5858
5959 // CHECK-LABEL: dpas
6060 // CHECK-SAME: (%[[ARG_0:.*]]: memref<256x128xf16>, %[[ARG_1:.*]]: memref<128x256xf16>)
6161 gpu.func @dpas (%a: memref <256 x128 xf16 >, %b: memref <128 x256 xf16 >) {
6262 // CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_0]][%{{.*}}, %{{.*}}] : memref<256x128xf16>
63- // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
63+ // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
6464 // CHECK-NOT: xegpu.create_nd_tdesc
6565 // CHECK-COUNT-4: xegpu.create_nd_tdesc %[[ARG_1]][%{{.*}}, %{{.*}}] : memref<128x256xf16>
6666 // CHECK-SAME-COUNT-4: -> !xegpu.tensor_desc<16x16xf16, #xegpu.layout<lane_layout = [4, 8], lane_data = [1, 1]>>
@@ -89,12 +89,12 @@ gpu.module @test_round_robin_assignment {
8989 // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
9090 gpu.func @prefetch_nd_tdesc (%src: memref <256 x128 xf32 >) {
9191 // CHECK-COUNT-4: xegpu.prefetch_nd %{{.*}}
92- // CHECK-SAME-COUNT-4: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<lane_layout = [8, 4 ], lane_data = [1, 1]>>
92+ // CHECK-SAME-COUNT-4: !xegpu.tensor_desc<256x128xf32, #xegpu.layout<lane_layout = [1, 16 ], lane_data = [1, 1]>>
9393 // CHECK-NOT: xegpu.prefetch_nd
9494 %tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
95- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
95+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
9696 xegpu.prefetch_nd %tdesc
97- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [8 , 4 ], lane_data = [1 , 1 ]>>
97+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [16 , 16 ], lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
9898 gpu.return
9999 }
100100
0 commit comments