@@ -3567,14 +3567,13 @@ define void @vec384_i16_widen_to_i32_factor2_broadcast_to_v12i32_factor12(ptr %i
35673567; AVX-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
35683568; AVX-NEXT: vpshuflw {{.*#+}} xmm2 = xmm0[0,0,0,0,4,5,6,7]
35693569; AVX-NEXT: vpunpcklwd {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
3570- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
35713570; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
35723571; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
35733572; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3574- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), % xmm0, % xmm0
3573+ ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1],zero,zero,xmm0[0,1],zero,zero, xmm0[0,1],zero,zero, xmm0[0,1],zero,zero
35753574; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
3576- ; AVX-NEXT: vmovdqa %xmm1, (%rcx)
35773575; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
3576+ ; AVX-NEXT: vmovdqa %xmm1, (%rcx)
35783577; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
35793578; AVX-NEXT: retq
35803579;
@@ -3757,14 +3756,14 @@ define void @vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8(ptr %in.
37573756; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
37583757; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2],xmm0[3],xmm1[4,5],xmm0[6],xmm1[7]
37593758; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3760- ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
3759+ ; AVX-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1],xmm0[2],xmm2[3,4],xmm0[5],xmm2[6,7]
37613760; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3762- ; AVX-NEXT: vpaddb 32(%rdx), %xmm2 , %xmm2
3763- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), % xmm0, % xmm0
3761+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm3 , %xmm3
3762+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1],xmm2[2,3], xmm0[4],xmm2[5,6], xmm0[7]
37643763; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
37653764; AVX-NEXT: vmovdqa %xmm1, (%rcx)
37663765; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
3767- ; AVX-NEXT: vmovdqa %xmm2 , 32(%rcx)
3766+ ; AVX-NEXT: vmovdqa %xmm3 , 32(%rcx)
37683767; AVX-NEXT: retq
37693768;
37703769; AVX2-LABEL: vec384_i16_widen_to_i48_factor3_broadcast_to_v8i48_factor8:
@@ -3955,10 +3954,9 @@ define void @vec384_i16_widen_to_i64_factor4_broadcast_to_v6i64_factor6(ptr %in.
39553954; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
39563955; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
39573956; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
3958- ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
3957+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7]
39593958; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
3960- ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
3961- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
3959+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
39623960; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
39633961; AVX-NEXT: vmovdqa %xmm1, (%rcx)
39643962; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4181,17 +4179,16 @@ define void @vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4(ptr %in.
41814179; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
41824180; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,0,0]
41834181; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5],xmm2[6],xmm1[7]
4184- ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,1,0,1]
4185- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1]
4182+ ; AVX-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[0,0,1,1]
41864183; AVX-NEXT: vpxor %xmm3, %xmm3, %xmm3
4187- ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm3[0,1],xmm0 [2],xmm3[3,4,5,6,7]
4184+ ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2 [2],xmm3[3,4,5,6,7]
41884185; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4189- ; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm0
4190- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2
4191- ; AVX-NEXT: vpaddb 16(%rdx), %xmm2, %xmm2
4186+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
4187+ ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1],zero,zero,zero,zero,zero,zero
4188+ ; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
4189+ ; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
41924190; AVX-NEXT: vmovdqa %xmm1, (%rcx)
4193- ; AVX-NEXT: vmovdqa %xmm2, 16(%rcx)
4194- ; AVX-NEXT: vmovdqa %xmm0, 32(%rcx)
4191+ ; AVX-NEXT: vmovdqa %xmm2, 32(%rcx)
41954192; AVX-NEXT: retq
41964193;
41974194; AVX2-LABEL: vec384_i16_widen_to_i96_factor6_broadcast_to_v4i96_factor4:
@@ -4379,10 +4376,9 @@ define void @vec384_i16_widen_to_i128_factor8_broadcast_to_v3i128_factor3(ptr %i
43794376; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
43804377; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
43814378; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
4382- ; AVX-NEXT: vpblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3,4,5,6,7]
4379+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3,4,5,6,7]
43834380; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4384- ; AVX-NEXT: vpaddb 32(%rdx), %xmm2, %xmm2
4385- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm0, %xmm0
4381+ ; AVX-NEXT: vpaddb 32(%rdx), %xmm0, %xmm2
43864382; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
43874383; AVX-NEXT: vmovdqa %xmm1, (%rcx)
43884384; AVX-NEXT: vmovdqa %xmm0, 16(%rcx)
@@ -4517,10 +4513,9 @@ define void @vec384_i16_widen_to_i192_factor12_broadcast_to_v2i192_factor2(ptr %
45174513; AVX-NEXT: vpaddb (%rsi), %xmm0, %xmm0
45184514; AVX-NEXT: vpaddb 48(%rsi), %xmm1, %xmm1
45194515; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3,4,5,6,7]
4520- ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
45214516; AVX-NEXT: vmovaps 32(%rdx), %ymm2
45224517; AVX-NEXT: vpaddb (%rdx), %xmm1, %xmm1
4523- ; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), % xmm0, % xmm0
4518+ ; AVX-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero, xmm0[0,1],zero,zero,zero,zero,zero,zero
45244519; AVX-NEXT: vpaddb 16(%rdx), %xmm0, %xmm0
45254520; AVX-NEXT: vmovaps %ymm2, 32(%rcx)
45264521; AVX-NEXT: vmovdqa %xmm1, (%rcx)
0 commit comments