@@ -341,8 +341,8 @@ gpu.module @test {
341341gpu.func @vector_multi_reduction_dim1_distributed_dim0_reduction () {
342342 %0 = " some_def" () : () -> !xegpu.tensor_desc <1 x32 xf32 , #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>>
343343 %src = " some_def" () {layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>} : () -> (vector <16 x32 xf32 >)
344- %acc = arith.constant {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} dense <0.0 > : vector <32 xf32 >
345- %1 = vector.multi_reduction <add >, %src , %acc {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} [0 ]
344+ %acc = arith.constant {layout_result_0 = #xegpu.slice < #xegpu. layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>, dims = [ 0 ]>} dense <0.0 > : vector <32 xf32 >
345+ %1 = vector.multi_reduction <add >, %src , %acc {layout_result_0 = #xegpu.slice < #xegpu. layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>, dims = [ 0 ]>} [0 ]
346346 : vector <16 x32 xf32 > to vector <32 xf32 >
347347 %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>}
348348 : vector <32 xf32 > to vector <1 x32 xf32 >
@@ -394,10 +394,10 @@ gpu.module @test {
394394gpu.func @vector_multi_reduction_dim0_distributed_dim1_reduction () {
395395 %0 = " some_def" () : () -> !xegpu.tensor_desc <32 x1 xf32 , #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 1 ]>>
396396 %src = " some_def" () {layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 1 ]>} : () -> (vector <32 x16 xf32 >)
397- %acc = arith.constant {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} dense <0.0 > : vector <32 xf32 >
398- %1 = vector.multi_reduction <add >, %src , %acc {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>} [1 ]
397+ %acc = arith.constant {layout_result_0 = #xegpu.slice < #xegpu. layout <lane_layout = [16 , 1 ], lane_data = [ 1 , 1 ]>, dims = [1 ]>} dense <0.0 > : vector <32 xf32 >
398+ %1 = vector.multi_reduction <add >, %src , %acc {layout_result_0 = #xegpu.slice < #xegpu. layout <lane_layout = [16 , 1 ], lane_data = [ 1 , 1 ]>, dims = [1 ]>} [1 ]
399399 : vector <32 x16 xf32 > to vector <32 xf32 >
400- %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>}
400+ %3 = vector.shape_cast %1 {layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 1 ]>}
401401 : vector <32 xf32 > to vector <32 x1 xf32 >
402402 xegpu.store_nd %3 , %0 : vector <32 x1 xf32 >, !xegpu.tensor_desc <32 x1 xf32 , #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 1 ]>>
403403 gpu.return
0 commit comments