@@ -370,8 +370,8 @@ gpu.module @test_distribution {
370370 // CHECK-SAME: %[[ARG_0:.*]]: memref<256x128xf32>
371371 gpu.func @vector_reduce (%src: memref <256 x128 xf32 >) {
372372 // CHECK: %[[CST:.*]] = arith.constant dense<1.000000e+00> : vector<32xf32>
373- // CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]][{{%.*}}, {{%.*}}] : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [8, 4], lane_data = [1, 1]> >
374- // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]] : !xegpu.tensor_desc<32x32xf32, #xegpu.layout<lane_layout = [8, 4], lane_data = [1, 1]>> -> vector<32x32xf32 >
373+ // CHECK: %[[TDESC:.*]] = xegpu.create_nd_tdesc %[[ARG_0]][{{%.*}}, {{%.*}}] : memref<256x128xf32> -> !xegpu.tensor_desc<32x32xf32>
374+ // CHECK: %[[LOAD:.*]] = xegpu.load_nd %[[TDESC]] : !xegpu.tensor_desc<32x32xf32>
375375 // CHECK: %[[REDUCE:.*]] = vector.multi_reduction <add>, {{%.*}}, %[[CST]] [0] : vector<32x32xf32> to vector<32xf32>
376376 // CHECK: %[[SHAPECAST:.*]] = vector.shape_cast %[[REDUCE]] : vector<32xf32> to vector<1x32xf32>
377377 // CHECK: %[[ALLOCA:.*]] = memref.alloca() : memref<4096xi8, 3>
@@ -396,9 +396,9 @@ gpu.module @test_distribution {
396396 // CHECK: %[[REDUCE:.*]] = vector.multi_reduction <add>, %[[LOAD]], %[[CST]] [0] : vector<8x4xf32> to vector<4xf32>
397397 %cst = arith.constant {layout_result_0 = #xegpu.slice <#xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ]>, dims = [0 ]>} dense <1.0 > : vector <128 xf32 >
398398 %tdesc = xegpu.create_nd_tdesc %src [0 , 0 ] : memref <256 x128 xf32 >
399- -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ], lane_layout = [ 8 , 4 ], lane_data = [ 1 , 1 ] >>
399+ -> !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ]>>
400400 %load = xegpu.load_nd %tdesc
401- : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ], lane_layout = [ 8 , 4 ], lane_data = [ 1 , 1 ] >>
401+ : !xegpu.tensor_desc <256 x128 xf32 , #xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ]>>
402402 -> vector <256 x128 xf32 >
403403 %reduce = vector.multi_reduction <add >, %load , %cst {layout_result_0 = #xegpu.slice <#xegpu.layout <sg_layout = [8 , 4 ], sg_data = [32 , 32 ]>, dims = [0 ]>} [0 ]
404404 : vector <256 x128 xf32 > to vector <128 xf32 >
0 commit comments