@@ -651,7 +651,6 @@ gpu.func @vector_extract_strided_slice_inner_distributed(%laneid: index) {
651651 gpu.return
652652}
653653
654-
655654// CHECK-LABEL: gpu.func @vector_extract_strided_slice_1d
656655// CHECK: %[[W:.*]]:2 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>, vector<4xf32>) {
657656// CHECK: %[[S:.*]] = "some_def"() : () -> vector<64xf32>
@@ -674,6 +673,42 @@ gpu.func @vector_extract_strided_slice_1d(%laneid: index) {
674673 gpu.return
675674}
676675
676+ // CHECK-LABEL: gpu.func @vector_extract_strided_slice_unsopported_offset
677+ // CHECK: %{{.*}} = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) {
678+ // CHECK: }
679+ // CHECK-NOT: %{{.*}} = vector.extract_strided_slice
680+ gpu.func @vector_extract_strided_slice_unsopported_offset (%laneid: index ) {
681+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <2 xf32 >) {
682+ %0 = " some_def" () : () -> (vector <64 xf32 >)
683+ %1 = vector.extract_strided_slice %0 { offsets = [3 ], sizes = [32 ], strides = [1 ],
684+ layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
685+ layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>
686+ }
687+ : vector <64 xf32 > to vector <32 xf32 >
688+ gpu.yield %1 : vector <32 xf32 >
689+ }
690+ " some_use" (%r ) : (vector <2 xf32 >) -> ()
691+ gpu.return
692+ }
693+
694+ // CHECK-LABEL: gpu.func @vector_extract_strided_slice_unsopported_source
695+ // CHECK: %{{.*}} = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) {
696+ // CHECK: }
697+ // CHECK-NOT: %{{.*}} = vector.extract_strided_slice
698+ gpu.func @vector_extract_strided_slice_unsopported_source (%laneid: index ) {
699+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <2 xf32 >) {
700+ %0 = " some_def" () : () -> (vector <54 xf32 >)
701+ %1 = vector.extract_strided_slice %0 { offsets = [0 ], sizes = [32 ], strides = [1 ],
702+ layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
703+ layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>
704+ }
705+ : vector <54 xf32 > to vector <32 xf32 >
706+ gpu.yield %1 : vector <32 xf32 >
707+ }
708+ " some_use" (%r ) : (vector <2 xf32 >) -> ()
709+ gpu.return
710+ }
711+
677712// CHECK-LABEL: gpu.func @vector_insert_strided_slice_inner_distributed
678713// CHECK: %[[W:.*]]:3 = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<64x2xf32>, vector<16x1xf32>, vector<64x2xf32>) {
679714// CHECK-NEXT: %[[S:.*]] = "some_def"() : () -> vector<16x16xf32>
@@ -749,39 +784,43 @@ gpu.func @vector_insert_strided_slice_1d(%laneid: index) {
749784 gpu.return
750785}
751786
752- // CHECK-LABEL: gpu.func @vector_extract_strided_slice_unsopported_offset
753- // CHECK: %{{.*}} = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) {
754- // CHECK: }
755- // CHECK-NOT: %{{.*}} = vector.extract_strided_slice
756- gpu.func @vector_extract_strided_slice_unsopported_offset (%laneid: index ) {
757- %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <2 xf32 >) {
758- %0 = " some_def" () : () -> (vector <64 xf32 >)
759- %1 = vector.extract_strided_slice %0 { offsets = [3 ], sizes = [32 ], strides = [1 ],
760- layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
761- layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>
762- }
763- : vector <64 xf32 > to vector <32 xf32 >
764- gpu.yield %1 : vector <32 xf32 >
787+ // CHECK-LABEL: gpu.func @vector_insert_strided_slice_unsupported_source
788+ // CHECK: %{{.*}} = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<3xf32>) {
789+ // CHECK: }
790+ // CHECK-NOT: %{{.*}} = vector.insert_strided_slice
791+ gpu.func @vector_insert_strided_slice_unsupported_source (%laneid: index ) {
792+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <3 xf32 >) {
793+ %0 = " some_def" () : () -> (vector <8 xf32 >)
794+ %1 = " some_def" () : () -> (vector <48 xf32 >)
795+ %2 = vector.insert_strided_slice %0 , %1 { offsets = [16 ], strides = [1 ],
796+ layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
797+ layout_operand_1 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
798+ layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>
799+ }
800+ : vector <8 xf32 > into vector <48 xf32 >
801+ gpu.yield %2 : vector <48 xf32 >
765802 }
766- " some_use" (%r ) : (vector <2 x f32 >) -> ()
803+ " some_use" (%r ) : (vector <3 x f32 >) -> ()
767804 gpu.return
768805}
769806
770- // CHECK-LABEL: gpu.func @vector_extract_strided_slice_unsopported_source
771- // CHECK: %{{.*}} = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<2xf32>) {
772- // CHECK: }
773- // CHECK-NOT: %{{.*}} = vector.extract_strided_slice
774- gpu.func @vector_extract_strided_slice_unsopported_source (%laneid: index ) {
775- %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <2 xf32 >) {
776- %0 = " some_def" () : () -> (vector <54 xf32 >)
777- %1 = vector.extract_strided_slice %0 { offsets = [0 ], sizes = [32 ], strides = [1 ],
778- layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
779- layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>
780- }
781- : vector <54 xf32 > to vector <32 xf32 >
782- gpu.yield %1 : vector <32 xf32 >
807+ // CHECK-LABEL: gpu.func @vector_insert_strided_slice_unsupported_offset
808+ // CHECK: %{{.*}} = gpu.warp_execute_on_lane_0(%{{.*}})[16] -> (vector<3xf32>) {
809+ // CHECK: }
810+ // CHECK-NOT: %{{.*}} = vector.insert_strided_slice
811+ gpu.func @vector_insert_strided_slice_unsupported_offset (%laneid: index ) {
812+ %r = gpu.warp_execute_on_lane_0 (%laneid )[16 ] -> (vector <3 xf32 >) {
813+ %0 = " some_def" () : () -> (vector <16 xf32 >)
814+ %1 = " some_def" () : () -> (vector <48 xf32 >)
815+ %2 = vector.insert_strided_slice %0 , %1 { offsets = [3 ], strides = [1 ],
816+ layout_operand_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
817+ layout_operand_1 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>,
818+ layout_result_0 = #xegpu.layout <lane_layout = [16 ], lane_data = [1 ]>
819+ }
820+ : vector <16 xf32 > into vector <48 xf32 >
821+ gpu.yield %2 : vector <48 xf32 >
783822 }
784- " some_use" (%r ) : (vector <2 x f32 >) -> ()
823+ " some_use" (%r ) : (vector <3 x f32 >) -> ()
785824 gpu.return
786825}
787826
0 commit comments