@@ -775,6 +775,38 @@ define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
775775 ret <32 x i8 > %4
776776}
777777
778+ ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
779+ define <32 x i8 > @concat_add_unnecessary (<16 x i8 > %a0 , <16 x i8 > noundef %a1 , <16 x i8 > %a2 ) nounwind {
780+ ; CHECK-LABEL: concat_add_unnecessary:
781+ ; CHECK: # %bb.0:
782+ ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
783+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
784+ ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
785+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
786+ ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
787+ ; CHECK-NEXT: ret{{[l|q]}}
788+ %lo = add <16 x i8 > %a0 , %a1
789+ %hi = add <16 x i8 > %a0 , %a2
790+ %res = shufflevector <16 x i8 > %lo , <16 x i8 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
791+ ret <32 x i8 > %res
792+ }
793+
794+ ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
795+ define <16 x i16 > @concat_mul_unnecessary (<8 x i16 > %a0 , <8 x i16 > %a1 , <8 x i16 > %a2 ) nounwind {
796+ ; CHECK-LABEL: concat_mul_unnecessary:
797+ ; CHECK: # %bb.0:
798+ ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
799+ ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
800+ ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
801+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
802+ ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
803+ ; CHECK-NEXT: ret{{[l|q]}}
804+ %lo = mul <8 x i16 > %a0 , %a1
805+ %hi = mul <8 x i16 > %a0 , %a2
806+ %res = shufflevector <8 x i16 > %lo , <8 x i16 > %hi , <16 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 >
807+ ret <16 x i16 > %res
808+ }
809+
778810; Not beneficial to concatenate both inputs just to create a 256-bit palignr
779811define <32 x i8 > @concat_alignr_unnecessary (<16 x i8 > %a0 , <16 x i8 > noundef %a1 , <16 x i8 > %a2 ) nounwind {
780812; CHECK-LABEL: concat_alignr_unnecessary:
0 commit comments