@@ -2837,8 +2837,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28372837; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28382838; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28392839; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2840- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2841- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2840+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
28422841; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28432842; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28442843; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2855,8 +2854,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28552854; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28562855; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28572856; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2858- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2859- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
2857+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
28602858; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28612859; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28622860; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2872,7 +2870,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28722870; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28732871; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28742872; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2875- ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
2873+ ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
28762874; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
28772875; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
28782876; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3100,8 +3098,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31003098; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31013099; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31023100; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3103- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3104- ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3101+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31053102; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31063103; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31073104; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3118,8 +3115,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31183115; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31193116; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31203117; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3121- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3122- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
3118+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31233119; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31243120; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31253121; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3135,7 +3131,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31353131; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31363132; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31373133; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3138- ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3134+ ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
31393135; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
31403136; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
31413137; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3868,12 +3864,11 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38683864; AVX512F: # %bb.0:
38693865; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
38703866; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3867+ ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38713868; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38723869; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3873- ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3874- ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38753870; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3876- ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3871+ ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
38773872; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38783873; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38793874; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3885,12 +3880,11 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38853880; AVX512DQ: # %bb.0:
38863881; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
38873882; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3883+ ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38883884; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38893885; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3890- ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3891- ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38923886; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3893- ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, % ymm1
3887+ ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
38943888; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38953889; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38963890; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
0 commit comments