@@ -12867,46 +12867,25 @@ define void @mask_replication_factor8_vf8(ptr %in.maskvec, ptr %in.vec, ptr %out
1286712867; AVX512DQ-NEXT: vzeroupper
1286812868; AVX512DQ-NEXT: retq
1286912869;
12870- ; AVX512BW-ONLY-LABEL: mask_replication_factor8_vf8:
12871- ; AVX512BW-ONLY: # %bb.0:
12872- ; AVX512BW-ONLY-NEXT: kmovq (%rdi), %k0
12873- ; AVX512BW-ONLY-NEXT: vpmovm2b %k0, %zmm0
12874- ; AVX512BW-ONLY-NEXT: vpbroadcastq %xmm0, %zmm0
12875- ; AVX512BW-ONLY-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19,36,36,36,36,36,36,36,36,37,37,37,37,37,37,37,37,54,54,54,54,54,54,54,54,55,55,55,55,55,55,55,55]
12876- ; AVX512BW-ONLY-NEXT: vpmovb2m %zmm0, %k1
12877- ; AVX512BW-ONLY-NEXT: kshiftrq $16, %k1, %k2
12878- ; AVX512BW-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
12879- ; AVX512BW-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
12880- ; AVX512BW-ONLY-NEXT: kshiftrq $48, %k1, %k2
12881- ; AVX512BW-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
12882- ; AVX512BW-ONLY-NEXT: kshiftrq $32, %k1, %k1
12883- ; AVX512BW-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
12884- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx)
12885- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx)
12886- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx)
12887- ; AVX512BW-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx)
12888- ; AVX512BW-ONLY-NEXT: vzeroupper
12889- ; AVX512BW-ONLY-NEXT: retq
12890- ;
12891- ; AVX512VBMI-ONLY-LABEL: mask_replication_factor8_vf8:
12892- ; AVX512VBMI-ONLY: # %bb.0:
12893- ; AVX512VBMI-ONLY-NEXT: kmovq (%rdi), %k0
12894- ; AVX512VBMI-ONLY-NEXT: vpmovm2b %k0, %zmm0
12895- ; AVX512VBMI-ONLY-NEXT: vpmovsxbq %xmm0, %zmm0
12896- ; AVX512VBMI-ONLY-NEXT: vpmovb2m %zmm0, %k1
12897- ; AVX512VBMI-ONLY-NEXT: kshiftrq $16, %k1, %k2
12898- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
12899- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
12900- ; AVX512VBMI-ONLY-NEXT: kshiftrq $48, %k1, %k2
12901- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
12902- ; AVX512VBMI-ONLY-NEXT: kshiftrq $32, %k1, %k1
12903- ; AVX512VBMI-ONLY-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
12904- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm3, 128(%rdx)
12905- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm2, 192(%rdx)
12906- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm1, (%rdx)
12907- ; AVX512VBMI-ONLY-NEXT: vmovdqa64 %zmm0, 64(%rdx)
12908- ; AVX512VBMI-ONLY-NEXT: vzeroupper
12909- ; AVX512VBMI-ONLY-NEXT: retq
12870+ ; AVX512BW-LABEL: mask_replication_factor8_vf8:
12871+ ; AVX512BW: # %bb.0:
12872+ ; AVX512BW-NEXT: kmovq (%rdi), %k0
12873+ ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0
12874+ ; AVX512BW-NEXT: vpmovsxbq %xmm0, %zmm0
12875+ ; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
12876+ ; AVX512BW-NEXT: kshiftrq $16, %k1, %k2
12877+ ; AVX512BW-NEXT: vmovdqa32 64(%rsi), %zmm0 {%k2} {z}
12878+ ; AVX512BW-NEXT: vmovdqa32 (%rsi), %zmm1 {%k1} {z}
12879+ ; AVX512BW-NEXT: kshiftrq $48, %k1, %k2
12880+ ; AVX512BW-NEXT: vmovdqa32 192(%rsi), %zmm2 {%k2} {z}
12881+ ; AVX512BW-NEXT: kshiftrq $32, %k1, %k1
12882+ ; AVX512BW-NEXT: vmovdqa32 128(%rsi), %zmm3 {%k1} {z}
12883+ ; AVX512BW-NEXT: vmovdqa64 %zmm3, 128(%rdx)
12884+ ; AVX512BW-NEXT: vmovdqa64 %zmm2, 192(%rdx)
12885+ ; AVX512BW-NEXT: vmovdqa64 %zmm1, (%rdx)
12886+ ; AVX512BW-NEXT: vmovdqa64 %zmm0, 64(%rdx)
12887+ ; AVX512BW-NEXT: vzeroupper
12888+ ; AVX512BW-NEXT: retq
1291012889 %src.mask.padded = load <64 x i1>, ptr %in.maskvec, align 64
1291112890 %src.mask = shufflevector <64 x i1> %src.mask.padded, <64 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1291212891 %tgt.mask = shufflevector <8 x i1> %src.mask, <8 x i1> poison, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
0 commit comments