@@ -1754,10 +1754,9 @@ define void @vec256_i16_widen_to_i64_factor4_broadcast_to_v4i64_factor4(ptr %in.
17541754; AVX512BW: # %bb.0:
17551755; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
17561756; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
1757- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1758- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,1,2,3,16,5,6,7,16,9,10,11,16,13,14,15]
1759- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1760- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
1757+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,17,18,19,0,21,22,23,0,25,26,27,0,29,30,31]
1758+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1759+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
17611760; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
17621761; AVX512BW-NEXT: vzeroupper
17631762; AVX512BW-NEXT: retq
@@ -1870,10 +1869,9 @@ define void @vec256_i16_widen_to_i128_factor8_broadcast_to_v2i128_factor2(ptr %i
18701869; AVX512BW: # %bb.0:
18711870; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
18721871; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
1873- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1874- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,1,2,3,4,5,6,7,16,9,10,11,12,13,14,15]
1875- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
1876- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
1872+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,17,18,19,20,21,22,23,0,25,26,27,28,29,30,31]
1873+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
1874+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
18771875; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
18781876; AVX512BW-NEXT: vzeroupper
18791877; AVX512BW-NEXT: retq
@@ -3776,12 +3774,11 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
37763774; AVX512BW: # %bb.0:
37773775; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
37783776; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3779- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3780- ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [16,9,10,11,16,13,14,15,16,9,10,11,16,13,14,15]
3781- ; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
3782- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
3777+ ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,25,26,27,0,29,30,31,0,25,26,27,0,29,30,31]
3778+ ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
3779+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
37833780; AVX512BW-NEXT: vpbroadcastw %xmm0, %ymm0
3784- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2 , %zmm0
3781+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1 , %zmm0
37853782; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
37863783; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
37873784; AVX512BW-NEXT: vzeroupper
@@ -3911,11 +3908,10 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
39113908; AVX512BW: # %bb.0:
39123909; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
39133910; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
3914- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
3915- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,9,10,11,12,13,16,15,0,0,0,0,16,0,0,0]
3916- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
3911+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,25,26,27,28,29,0,31,0,0,0,0,0,0,0,0]
3912+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
39173913; AVX512BW-NEXT: vpbroadcastw %xmm0, %ymm0
3918- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2 , %zmm0
3914+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1 , %zmm0
39193915; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
39203916; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
39213917; AVX512BW-NEXT: vzeroupper
@@ -4037,11 +4033,10 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
40374033; AVX512BW: # %bb.0:
40384034; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
40394035; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4040- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
4041- ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = [16,9,10,11,12,13,14,15,16,9,10,11,12,13,14,15]
4042- ; AVX512BW-NEXT: # ymm2 = mem[0,1,0,1]
4043- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
4044- ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
4036+ ; AVX512BW-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [0,25,26,27,28,29,30,31,0,25,26,27,28,29,30,31]
4037+ ; AVX512BW-NEXT: # ymm1 = mem[0,1,0,1]
4038+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm1
4039+ ; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
40454040; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
40464041; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
40474042; AVX512BW-NEXT: vzeroupper
@@ -4151,10 +4146,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
41514146; AVX512BW: # %bb.0:
41524147; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
41534148; AVX512BW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
4154- ; AVX512BW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
4155- ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm2 = [16,9,10,11,12,13,14,15,0,0,0,0,16,0,0,0]
4156- ; AVX512BW-NEXT: vpermi2w %ymm0, %ymm1, %ymm2
4157- ; AVX512BW-NEXT: vpaddb (%rdx), %zmm2, %zmm0
4149+ ; AVX512BW-NEXT: vpmovsxbw {{.*#+}} ymm1 = [0,25,26,27,28,29,30,31,0,0,0,0,0,0,0,0]
4150+ ; AVX512BW-NEXT: vpermw %zmm0, %zmm1, %zmm0
4151+ ; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
41584152; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
41594153; AVX512BW-NEXT: vzeroupper
41604154; AVX512BW-NEXT: retq
0 commit comments