@@ -775,31 +775,27 @@ define <32 x i8> @combine_pshufb_pshufb_or_pshufb(<32 x i8> %a0) {
775775 ret <32 x i8 > %4
776776}
777777
778- ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
778+ ; Not beneficial to concatenate both inputs just to create a 256-bit vpaddb
779779define <32 x i8 > @concat_add_unnecessary (<16 x i8 > %a0 , <16 x i8 > noundef %a1 , <16 x i8 > %a2 ) nounwind {
780780; CHECK-LABEL: concat_add_unnecessary:
781781; CHECK: # %bb.0:
782- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
783- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
784- ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
785- ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
786- ; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0
782+ ; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm1
783+ ; CHECK-NEXT: vpaddb %xmm2, %xmm0, %xmm0
784+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
787785; CHECK-NEXT: ret{{[l|q]}}
788786 %lo = add <16 x i8 > %a0 , %a1
789787 %hi = add <16 x i8 > %a0 , %a2
790788 %res = shufflevector <16 x i8 > %lo , <16 x i8 > %hi , <32 x i32 > <i32 0 , i32 1 , i32 2 , i32 3 , i32 4 , i32 5 , i32 6 , i32 7 , i32 8 , i32 9 , i32 10 , i32 11 , i32 12 , i32 13 , i32 14 , i32 15 , i32 16 , i32 17 , i32 18 , i32 19 , i32 20 , i32 21 , i32 22 , i32 23 , i32 24 , i32 25 , i32 26 , i32 27 , i32 28 , i32 29 , i32 30 , i32 31 >
791789 ret <32 x i8 > %res
792790}
793791
794- ; TODO: Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
792+ ; Not beneficial to concatenate both inputs just to create a 256-bit vpmullw
795793define <16 x i16 > @concat_mul_unnecessary (<8 x i16 > %a0 , <8 x i16 > %a1 , <8 x i16 > %a2 ) nounwind {
796794; CHECK-LABEL: concat_mul_unnecessary:
797795; CHECK: # %bb.0:
798- ; CHECK-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1
799- ; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
800- ; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
801- ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
802- ; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
796+ ; CHECK-NEXT: vpmullw %xmm1, %xmm0, %xmm1
797+ ; CHECK-NEXT: vpmullw %xmm2, %xmm0, %xmm0
798+ ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0
803799; CHECK-NEXT: ret{{[l|q]}}
804800 %lo = mul <8 x i16 > %a0 , %a1
805801 %hi = mul <8 x i16 > %a0 , %a2
0 commit comments