@@ -1560,8 +1560,7 @@ define void @vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4(ptr %in.
15601560;
15611561; AVX2-LABEL: vec256_i32_widen_to_i64_factor2_broadcast_to_v4i64_factor4:
15621562; AVX2: # %bb.0:
1563- ; AVX2-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
1564- ; AVX2-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
1563+ ; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
15651564; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],mem[1],ymm0[2],mem[3],ymm0[4],mem[5],ymm0[6],mem[7]
15661565; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
15671566; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
@@ -3723,53 +3722,49 @@ define void @vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6(ptr %in.
37233722;
37243723; AVX2-SLOW-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
37253724; AVX2-SLOW: # %bb.0:
3726- ; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %xmm0
3727- ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
3728- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
3729- ; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm1
3730- ; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3731- ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3732- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3725+ ; AVX2-SLOW-NEXT: vmovdqa 48(%rdi), %xmm0
3726+ ; AVX2-SLOW-NEXT: vpbroadcastd (%rdi), %ymm1
3727+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4,5,6,7]
37333728; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
3734- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3735- ; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3736- ; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3737- ; AVX2-SLOW-NEXT: vmovdqa %ymm0, 32(%rdx)
3738- ; AVX2-SLOW-NEXT: vmovdqa %ymm1, (%rdx)
3729+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm2[5],ymm0[6],ymm2[7]
3730+ ; AVX2-SLOW-NEXT: vpxor %xmm2, %xmm2, %xmm2
3731+ ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
3732+ ; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm1, %ymm1
3733+ ; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3734+ ; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
3735+ ; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
37393736; AVX2-SLOW-NEXT: vzeroupper
37403737; AVX2-SLOW-NEXT: retq
37413738;
37423739; AVX2-FAST-PERLANE-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
37433740; AVX2-FAST-PERLANE: # %bb.0:
37443741; AVX2-FAST-PERLANE-NEXT: vmovdqa (%rdi), %xmm0
37453742; AVX2-FAST-PERLANE-NEXT: vmovdqa 48(%rdi), %xmm1
3746- ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3747- ; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3748- ; AVX2-FAST-PERLANE-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3749- ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3750- ; AVX2-FAST-PERLANE-NEXT: vpxor %xmm2, %xmm2, %xmm2
3751- ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3752- ; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3753- ; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3754- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, 32(%rdx)
3755- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, (%rdx)
3743+ ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3744+ ; AVX2-FAST-PERLANE-NEXT: vpbroadcastd %xmm0, %ymm0
3745+ ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
3746+ ; AVX2-FAST-PERLANE-NEXT: vpxor %xmm1, %xmm1, %xmm1
3747+ ; AVX2-FAST-PERLANE-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
3748+ ; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3749+ ; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
3750+ ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
3751+ ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
37563752; AVX2-FAST-PERLANE-NEXT: vzeroupper
37573753; AVX2-FAST-PERLANE-NEXT: retq
37583754;
37593755; AVX2-FAST-LABEL: vec384_i32_widen_to_i64_factor2_broadcast_to_v6i64_factor6:
37603756; AVX2-FAST: # %bb.0:
37613757; AVX2-FAST-NEXT: vmovdqa (%rdi), %xmm0
37623758; AVX2-FAST-NEXT: vmovdqa 48(%rdi), %xmm1
3763- ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3764- ; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm2 = mem[0,1,0,1]
3765- ; AVX2-FAST-NEXT: vpshufd {{.*#+}} ymm2 = ymm2[0,1,0,1,4,5,4,5]
3766- ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm2[0],ymm1[1],ymm2[2],ymm1[3],ymm2[4,5,6,7]
3767- ; AVX2-FAST-NEXT: vpxor %xmm2, %xmm2, %xmm2
3768- ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm2[5],ymm1[6],ymm2[7]
3769- ; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm1, %ymm1
3770- ; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm0
3771- ; AVX2-FAST-NEXT: vmovdqa %ymm0, 32(%rdx)
3772- ; AVX2-FAST-NEXT: vmovdqa %ymm1, (%rdx)
3759+ ; AVX2-FAST-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[0,1,2,3],zero,zero,zero,zero,xmm0[0,1,2,3],zero,zero,zero,zero
3760+ ; AVX2-FAST-NEXT: vpbroadcastd %xmm0, %ymm0
3761+ ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4,5,6,7]
3762+ ; AVX2-FAST-NEXT: vpxor %xmm1, %xmm1, %xmm1
3763+ ; AVX2-FAST-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4],ymm1[5],ymm0[6],ymm1[7]
3764+ ; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
3765+ ; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm2, %ymm1
3766+ ; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
3767+ ; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
37733768; AVX2-FAST-NEXT: vzeroupper
37743769; AVX2-FAST-NEXT: retq
37753770;
@@ -5317,40 +5312,17 @@ define void @vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8(ptr %in.
53175312; AVX-NEXT: vzeroupper
53185313; AVX-NEXT: retq
53195314;
5320- ; AVX2-SLOW-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5321- ; AVX2-SLOW: # %bb.0:
5322- ; AVX2-SLOW-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5323- ; AVX2-SLOW-NEXT: vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
5324- ; AVX2-SLOW-NEXT: vpxor %xmm1, %xmm1, %xmm1
5325- ; AVX2-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5326- ; AVX2-SLOW-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5327- ; AVX2-SLOW-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5328- ; AVX2-SLOW-NEXT: vmovdqa %ymm0, (%rdx)
5329- ; AVX2-SLOW-NEXT: vmovdqa %ymm1, 32(%rdx)
5330- ; AVX2-SLOW-NEXT: vzeroupper
5331- ; AVX2-SLOW-NEXT: retq
5332- ;
5333- ; AVX2-FAST-PERLANE-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5334- ; AVX2-FAST-PERLANE: # %bb.0:
5335- ; AVX2-FAST-PERLANE-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5336- ; AVX2-FAST-PERLANE-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
5337- ; AVX2-FAST-PERLANE-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5338- ; AVX2-FAST-PERLANE-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5339- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm0, (%rdx)
5340- ; AVX2-FAST-PERLANE-NEXT: vmovdqa %ymm1, 32(%rdx)
5341- ; AVX2-FAST-PERLANE-NEXT: vzeroupper
5342- ; AVX2-FAST-PERLANE-NEXT: retq
5343- ;
5344- ; AVX2-FAST-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5345- ; AVX2-FAST: # %bb.0:
5346- ; AVX2-FAST-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
5347- ; AVX2-FAST-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero,ymm0[16,17,18,19],zero,zero,zero,zero
5348- ; AVX2-FAST-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5349- ; AVX2-FAST-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5350- ; AVX2-FAST-NEXT: vmovdqa %ymm0, (%rdx)
5351- ; AVX2-FAST-NEXT: vmovdqa %ymm1, 32(%rdx)
5352- ; AVX2-FAST-NEXT: vzeroupper
5353- ; AVX2-FAST-NEXT: retq
5315+ ; AVX2-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
5316+ ; AVX2: # %bb.0:
5317+ ; AVX2-NEXT: vpbroadcastd (%rdi), %ymm0
5318+ ; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
5319+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
5320+ ; AVX2-NEXT: vpaddb 32(%rsi), %ymm0, %ymm1
5321+ ; AVX2-NEXT: vpaddb (%rsi), %ymm0, %ymm0
5322+ ; AVX2-NEXT: vmovdqa %ymm0, (%rdx)
5323+ ; AVX2-NEXT: vmovdqa %ymm1, 32(%rdx)
5324+ ; AVX2-NEXT: vzeroupper
5325+ ; AVX2-NEXT: retq
53545326;
53555327; AVX512F-LABEL: vec512_i32_widen_to_i64_factor2_broadcast_to_v8i64_factor8:
53565328; AVX512F: # %bb.0:
0 commit comments