@@ -2837,7 +2837,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28372837; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28382838; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28392839; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
2840- ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2840+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2841+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28412842; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28422843; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28432844; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2854,7 +2855,8 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28542855; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28552856; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28562857; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
2857- ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2858+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
2859+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
28582860; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
28592861; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
28602862; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -2870,7 +2872,7 @@ define void @vec384_i8_widen_to_i48_factor6_broadcast_to_v8i48_factor8(ptr %in.v
28702872; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
28712873; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,15,6,7,8,9,10,15,12,13,14]
28722874; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
2873- ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
2875+ ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
28742876; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
28752877; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
28762878; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3098,7 +3100,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
30983100; AVX512F-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
30993101; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31003102; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0
3101- ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3103+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3104+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31023105; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31033106; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31043107; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3115,7 +3118,8 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31153118; AVX512DQ-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31163119; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31173120; AVX512DQ-NEXT: vpbroadcastb %xmm0, %ymm0
3118- ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3121+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3122+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm1
31193123; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
31203124; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
31213125; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3131,7 +3135,7 @@ define void @vec384_i8_widen_to_i96_factor12_broadcast_to_v4i96_factor4(ptr %in.
31313135; AVX512BW-NEXT: vpalignr {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
31323136; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[15,0,1,2,3,4,5,6,7,8,9,10,15,12,13,14]
31333137; AVX512BW-NEXT: vpbroadcastb %xmm0, %ymm0
3134- ; AVX512BW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3138+ ; AVX512BW-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
31353139; AVX512BW-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
31363140; AVX512BW-NEXT: vpaddb (%rdx), %zmm0, %zmm0
31373141; AVX512BW-NEXT: vmovdqa64 %zmm0, (%rcx)
@@ -3864,11 +3868,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38643868; AVX512F: # %bb.0:
38653869; AVX512F-NEXT: vmovdqa (%rdi), %xmm0
38663870; AVX512F-NEXT: vmovdqa 48(%rdi), %xmm1
3867- ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38683871; AVX512F-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38693872; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0
3873+ ; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3874+ ; AVX512F-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38703875; AVX512F-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3871- ; AVX512F-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3876+ ; AVX512F-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
38723877; AVX512F-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38733878; AVX512F-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38743879; AVX512F-NEXT: vmovdqa %ymm0, 32(%rcx)
@@ -3880,11 +3885,12 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
38803885; AVX512DQ: # %bb.0:
38813886; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
38823887; AVX512DQ-NEXT: vmovdqa 48(%rdi), %xmm1
3883- ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38843888; AVX512DQ-NEXT: vpaddb (%rsi), %xmm0, %xmm0
38853889; AVX512DQ-NEXT: vpbroadcastw %xmm0, %ymm0
3890+ ; AVX512DQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
3891+ ; AVX512DQ-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
38863892; AVX512DQ-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5],xmm0[6],xmm1[7]
3887- ; AVX512DQ-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7]
3893+ ; AVX512DQ-NEXT: vinserti128 $1, %xmm0, % ymm1, %ymm1
38883894; AVX512DQ-NEXT: vpaddb (%rdx), %ymm1, %ymm1
38893895; AVX512DQ-NEXT: vpaddb 32(%rdx), %ymm0, %ymm0
38903896; AVX512DQ-NEXT: vmovdqa %ymm0, 32(%rcx)
0 commit comments