@@ -530,7 +530,7 @@ gpu.module @xevm_module{
530530// CHECK-NEXT: }
531531// CHECK-NEXT: %[[T1:.*]] = vector.transpose %[[W]]#1, [1, 0] : vector<1x2xf32> to vector<2x1xf32>
532532gpu.module @xevm_module {
533- gpu.func @vector_transpose (%arg0: memref < 2 x 16 x f32 >, % laneid: index ) {
533+ gpu.func @vector_transpose (%laneid: index ) {
534534 %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <2 x1 xf32 >) {
535535 %cst = " some_op" ()
536536 {layout_result_0 = #xegpu.layout <lane_layout = [16 , 1 ], lane_data = [1 , 1 ]>}
@@ -556,7 +556,7 @@ gpu.module @xevm_module{
556556// CHECK: }
557557// CHECK: vector.bitcast %[[W]]#1 : vector<4x2xi8> to vector<4x1xi16>
558558gpu.module @xevm_module {
559- gpu.func @vector_bitcast (%arg0: memref < 4 x 16 x i16 >, % laneid: index ) {
559+ gpu.func @vector_bitcast (%laneid: index ) {
560560 %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <4 x1 xi16 >) {
561561 %cst = " some_op" ()
562562 {layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 2 ]>}
@@ -573,3 +573,82 @@ gpu.module @xevm_module{
573573 gpu.return
574574 }
575575}
576+
577+ // -----
578+ // CHECK-LABEL: gpu.func @vector_shapecast_rank_increasing
579+ // CHECK: %{{.*}}:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<1x1xf32>, vector<1xf32>) {
580+ // CHECK: gpu.yield %{{.*}} : vector<1x16xf32>, vector<16xf32>
581+ // CHECK: }
582+ // CHECK: %{{.*}} = vector.shape_cast %{{.*}}#1 : vector<1xf32> to vector<1x1xf32>
583+ gpu.module @xevm_module {
584+ gpu.func @vector_shapecast_rank_increasing (%laneid: index ) {
585+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <1 x1 xf32 >) {
586+ %cst = " some_op" ()
587+ {layout_result_0 = #xegpu.slice <#xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>, dims = [0 ]>}
588+ : () -> (vector <16 xf32 >)
589+ %cast = vector.shape_cast %cst
590+ {
591+ layout_operand_0 = #xegpu.slice <#xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>, dims = [0 ]>,
592+ layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>
593+ }
594+ : vector <16 xf32 > to vector <1 x16 xf32 >
595+ gpu.yield %cast : vector <1 x16 xf32 >
596+ }
597+ " some_user_op" (%r ) : (vector <1 x1 xf32 >) -> ()
598+ gpu.return
599+ }
600+ }
601+
602+ // -----
603+ // CHECK-LABEL: gpu.func @vector_shapecast_rank_reducing(
604+ // CHECK: %{{.*}}:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<1xf32>, vector<1x1xf32>) {
605+ // CHECK: gpu.yield %{{.*}} : vector<16xf32>, vector<1x16xf32>
606+ // CHECK: }
607+ // CHECK: %{{.*}} = vector.shape_cast %{{.*}}#1 : vector<1x1xf32> to vector<1xf32>
608+ gpu.module @xevm_module {
609+ gpu.func @vector_shapecast_rank_reducing (%laneid: index ) {
610+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <1 xf32 >) {
611+ %cst = " some_op" ()
612+ {layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>}
613+ : () -> (vector <1 x16 xf32 >)
614+ %cast = vector.shape_cast %cst
615+ {
616+ layout_operand_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>,
617+ layout_result_0 = #xegpu.slice <#xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>, dims = [0 ]>
618+ }
619+ : vector <1 x16 xf32 > to vector <16 xf32 >
620+ gpu.yield %cast : vector <16 xf32 >
621+ }
622+ " some_user_op" (%r ) : (vector <1 xf32 >) -> ()
623+ gpu.return
624+ }
625+ }
626+
627+ // -----
628+ // NOTE: Layouts are still valid, but distribution still requires a slice layout for the operand.
629+ //
630+ // CHECK-LABEL: gpu.func @vector_shapecast_unsupported
631+ // CHECK: %[[W:.*]] = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<1x1xf32>) {
632+ // CHECK: %[[T1:.*]] = vector.shape_cast %{{.*}} : vector<16xf32> to vector<1x16xf32>
633+ // CHECK: gpu.yield %[[T1]] : vector<1x16xf32>
634+ // CHECK: }
635+ // CHECK: "some_user_op"(%[[W]]) : (vector<1x1xf32>) -> ()
636+ // CHECK: gpu.return
637+ gpu.module @xevm_module {
638+ gpu.func @vector_shapecast_unsupported (%laneid: index ) {
639+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <1 x1 xf32 >) {
640+ %cst = " some_op" ()
641+ {layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]> }
642+ : () -> (vector <16 xf32 >)
643+ %cast = vector.shape_cast %cst
644+ {
645+ layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
646+ layout_result_0 = #xegpu.layout <lane_layout = [1 , 16 ], lane_data = [1 , 1 ]>
647+ }
648+ : vector <16 xf32 > to vector <1 x16 xf32 >
649+ gpu.yield %cast : vector <1 x16 xf32 >
650+ }
651+ " some_user_op" (%r ) : (vector <1 x1 xf32 >) -> ()
652+ gpu.return
653+ }
654+ }
0 commit comments