@@ -1952,7 +1952,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
1952
1952
; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
1953
1953
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
1954
1954
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1955
- ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
1955
+ ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
1956
1956
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1957
1957
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
1958
1958
; AVX2-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1965,7 +1965,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
1965
1965
; AVX512F-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
1966
1966
; AVX512F-SLOW-NEXT: vmovdqa (%rdi), %xmm1
1967
1967
; AVX512F-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1968
- ; AVX512F-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
1968
+ ; AVX512F-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
1969
1969
; AVX512F-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1970
1970
; AVX512F-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
1971
1971
; AVX512F-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -1991,7 +1991,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
1991
1991
; AVX512DQ-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
1992
1992
; AVX512DQ-SLOW-NEXT: vmovdqa (%rdi), %xmm1
1993
1993
; AVX512DQ-SLOW-NEXT: vpaddb (%rsi), %xmm1, %xmm1
1994
- ; AVX512DQ-SLOW-NEXT: vpbroadcastq %xmm1, %ymm1
1994
+ ; AVX512DQ-SLOW-NEXT: vpbroadcastd %xmm1, %ymm1
1995
1995
; AVX512DQ-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
1996
1996
; AVX512DQ-SLOW-NEXT: vpaddb (%rdx), %ymm0, %ymm0
1997
1997
; AVX512DQ-SLOW-NEXT: vmovdqa %ymm0, (%rcx)
@@ -2016,7 +2016,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
2016
2016
; AVX512BW-SLOW-NEXT: vmovdqa64 (%rdi), %zmm0
2017
2017
; AVX512BW-SLOW-NEXT: vpaddb (%rsi), %zmm0, %zmm0
2018
2018
; AVX512BW-SLOW-NEXT: vextracti64x4 $1, %zmm0, %ymm1
2019
- ; AVX512BW-SLOW-NEXT: vpbroadcastq %xmm0, %ymm0
2019
+ ; AVX512BW-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
2020
2020
; AVX512BW-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
2021
2021
; AVX512BW-SLOW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
2022
2022
; AVX512BW-SLOW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -4649,11 +4649,10 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
4649
4649
; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
4650
4650
; AVX2-SLOW-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4651
4651
; AVX2-SLOW-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4652
- ; AVX2-SLOW-NEXT: vpbroadcastq %xmm0, %ymm2
4653
- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2 [0],ymm1[1],ymm2 [2],ymm1[3],ymm2 [4,5,6,7]
4652
+ ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %ymm0
4653
+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm0 [0],ymm1[1],ymm0 [2],ymm1[3],ymm0 [4,5,6,7]
4654
4654
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
4655
4655
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
4656
- ; AVX2-SLOW-NEXT: vpbroadcastd %xmm0, %xmm0
4657
4656
; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
4658
4657
; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3]
4659
4658
; AVX2-SLOW-NEXT: vpaddb (%rdx), %ymm1, %ymm1
@@ -4669,7 +4668,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
4669
4668
; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
4670
4669
; AVX2-FAST-PERLANE-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4671
4670
; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4672
- ; AVX2-FAST-PERLANE-NEXT: vpbroadcastq %xmm0, %ymm2
4671
+ ; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm2
4673
4672
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
4674
4673
; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
4675
4674
; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
@@ -4687,7 +4686,7 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
4687
4686
; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
4688
4687
; AVX2-FAST-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
4689
4688
; AVX2-FAST-NEXT: vpaddb (%rsi), %xmm0, %xmm0
4690
- ; AVX2-FAST-NEXT: vpbroadcastq %xmm0, %ymm2
4689
+ ; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm2
4691
4690
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
4692
4691
; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
4693
4692
; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
@@ -6582,7 +6581,7 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
6582
6581
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
6583
6582
; AVX2-NEXT: vmovdqa (%rdi), %xmm1
6584
6583
; AVX2-NEXT: vpaddb (%rsi), %xmm1, %xmm1
6585
- ; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1
6584
+ ; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1
6586
6585
; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
6587
6586
; AVX2-NEXT: vpaddb 32(%rdx), %ymm0, %ymm1
6588
6587
; AVX2-NEXT: vpaddb (%rdx), %ymm0, %ymm0
0 commit comments