@@ -402,28 +402,36 @@ entry:
402402define <32 x i1 > @i64_mask_extract_32 (i64 %mask ) {
403403; X64-AVX512-LABEL: i64_mask_extract_32:
404404; X64-AVX512: # %bb.0: # %entry
405- ; X64-AVX512-NEXT: kmovq %rdi, %k0
406- ; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
407- ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
408- ; X64-AVX512-NEXT: kshiftrq $24, %k0, %k2
409- ; X64-AVX512-NEXT: kshiftrd $16, %k0, %k0
410- ; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k0
411- ; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
405+ ; X64-AVX512-NEXT: movq %rdi, %rax
406+ ; X64-AVX512-NEXT: kmovd %eax, %k0
407+ ; X64-AVX512-NEXT: movzbl %ah, %ecx
408+ ; X64-AVX512-NEXT: kmovd %ecx, %k1
409+ ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
410+ ; X64-AVX512-NEXT: movl %eax, %ecx
411+ ; X64-AVX512-NEXT: shrl $24, %ecx
412+ ; X64-AVX512-NEXT: kmovd %ecx, %k1
413+ ; X64-AVX512-NEXT: shrl $16, %eax
414+ ; X64-AVX512-NEXT: movzbl %al, %eax
415+ ; X64-AVX512-NEXT: kmovd %eax, %k2
416+ ; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
417+ ; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
412418; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
413419; X64-AVX512-NEXT: retq
414420;
415421; X64-KNL-LABEL: i64_mask_extract_32:
416422; X64-KNL: # %bb.0: # %entry
417423; X64-KNL-NEXT: movq %rdi, %rax
418- ; X64-KNL-NEXT: shrq $24 , %rax
424+ ; X64-KNL-NEXT: movl %eax , %ecx
419425; X64-KNL-NEXT: kmovw %eax, %k0
420- ; X64-KNL-NEXT: movl %edi, %eax
421- ; X64-KNL-NEXT: shrl $16, %eax
426+ ; X64-KNL-NEXT: movzbl %ah, %edx
427+ ; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
428+ ; X64-KNL-NEXT: shrl $24, %eax
422429; X64-KNL-NEXT: kmovw %eax, %k1
423- ; X64-KNL-NEXT: kunpckbw %k1, %k0, %k1
424- ; X64-KNL-NEXT: kmovw %edi, %k0
425- ; X64-KNL-NEXT: shrl $8, %edi
426- ; X64-KNL-NEXT: kmovw %edi, %k2
430+ ; X64-KNL-NEXT: shrl $16, %ecx
431+ ; X64-KNL-NEXT: movzbl %cl, %eax
432+ ; X64-KNL-NEXT: kmovw %eax, %k2
433+ ; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
434+ ; X64-KNL-NEXT: kmovw %edx, %k2
427435; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
428436; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
429437; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
@@ -490,56 +498,82 @@ entry:
490498define <64 x i1 > @i64_mask_extract_64 (i64 %mask ) {
491499; X64-AVX512-LABEL: i64_mask_extract_64:
492500; X64-AVX512: # %bb.0: # %entry
493- ; X64-AVX512-NEXT: kmovq %rdi, %k0
494- ; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
495- ; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
501+ ; X64-AVX512-NEXT: movq %rdi, %rax
502+ ; X64-AVX512-NEXT: kmovd %eax, %k0
503+ ; X64-AVX512-NEXT: movzbl %ah, %ecx
504+ ; X64-AVX512-NEXT: kmovd %ecx, %k1
505+ ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
506+ ; X64-AVX512-NEXT: movl %eax, %ecx
507+ ; X64-AVX512-NEXT: shrl $24, %ecx
508+ ; X64-AVX512-NEXT: kmovd %ecx, %k1
509+ ; X64-AVX512-NEXT: movl %eax, %ecx
510+ ; X64-AVX512-NEXT: shrl $16, %ecx
511+ ; X64-AVX512-NEXT: movzbl %cl, %ecx
512+ ; X64-AVX512-NEXT: kmovd %ecx, %k2
513+ ; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
514+ ; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
515+ ; X64-AVX512-NEXT: movq %rdi, %rcx
516+ ; X64-AVX512-NEXT: shrq $32, %rcx
517+ ; X64-AVX512-NEXT: movzbl %cl, %ecx
518+ ; X64-AVX512-NEXT: kmovd %ecx, %k1
519+ ; X64-AVX512-NEXT: movq %rdi, %rcx
520+ ; X64-AVX512-NEXT: shrq $40, %rcx
521+ ; X64-AVX512-NEXT: movzbl %cl, %ecx
522+ ; X64-AVX512-NEXT: kmovd %ecx, %k2
496523; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
497- ; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
498- ; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
499- ; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
524+ ; X64-AVX512-NEXT: movq %rdi, %rcx
525+ ; X64-AVX512-NEXT: shrq $56, %rcx
526+ ; X64-AVX512-NEXT: kmovd %ecx, %k2
527+ ; X64-AVX512-NEXT: shrq $48, %rax
528+ ; X64-AVX512-NEXT: movzbl %al, %eax
529+ ; X64-AVX512-NEXT: kmovd %eax, %k3
530+ ; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
500531; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
501- ; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
502- ; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
503- ; X64-AVX512-NEXT: kshiftrq $24, %k0, %k3
504- ; X64-AVX512-NEXT: kshiftrd $16, %k0, %k0
505- ; X64-AVX512-NEXT: kunpckbw %k0, %k3, %k0
506- ; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
507532; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
508533; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
509534; X64-AVX512-NEXT: retq
510535;
511536; X64-KNL-LABEL: i64_mask_extract_64:
512537; X64-KNL: # %bb.0: # %entry
513- ; X64-KNL-NEXT: movq %rdi, %rax
514- ; X64-KNL-NEXT: kmovw %esi, %k0
515- ; X64-KNL-NEXT: movl %esi, %ecx
516- ; X64-KNL-NEXT: shrl $8, %ecx
517- ; X64-KNL-NEXT: kmovw %ecx, %k1
518- ; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
538+ ; X64-KNL-NEXT: pushq %rbx
539+ ; X64-KNL-NEXT: .cfi_def_cfa_offset 16
540+ ; X64-KNL-NEXT: .cfi_offset %rbx, -16
519541; X64-KNL-NEXT: movq %rsi, %rcx
520- ; X64-KNL-NEXT: shrq $24, %rcx
542+ ; X64-KNL-NEXT: movq %rdi, %rax
543+ ; X64-KNL-NEXT: movl %ecx, %edx
544+ ; X64-KNL-NEXT: movq %rsi, %rdi
545+ ; X64-KNL-NEXT: movq %rsi, %r8
546+ ; X64-KNL-NEXT: movq %rsi, %r9
547+ ; X64-KNL-NEXT: kmovw %ecx, %k0
548+ ; X64-KNL-NEXT: movzbl %ch, %ebx
549+ ; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
550+ ; X64-KNL-NEXT: shrl $24, %ecx
521551; X64-KNL-NEXT: kmovw %ecx, %k1
522- ; X64-KNL-NEXT: movl %esi , %ecx
523- ; X64-KNL-NEXT: shrl $16 , %ecx
552+ ; X64-KNL-NEXT: shrl $16 , %edx
553+ ; X64-KNL-NEXT: movzbl %dl , %ecx
524554; X64-KNL-NEXT: kmovw %ecx, %k2
525- ; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
526- ; X64-KNL-NEXT: movq %rsi, %rcx
527- ; X64-KNL-NEXT: shrq $32, %rcx
528- ; X64-KNL-NEXT: kmovw %ecx, %k2
529- ; X64-KNL-NEXT: movq %rsi, %rcx
530- ; X64-KNL-NEXT: shrq $40, %rcx
555+ ; X64-KNL-NEXT: shrq $32, %rsi
556+ ; X64-KNL-NEXT: movzbl %sil, %ecx
531557; X64-KNL-NEXT: kmovw %ecx, %k3
532- ; X64-KNL-NEXT: kunpckbw %k2 , %k3, %k2
533- ; X64-KNL-NEXT: movq %rsi , %rcx
534- ; X64-KNL-NEXT: shrq $48 , %rcx
535- ; X64-KNL-NEXT: kmovw %ecx , %k3
536- ; X64-KNL-NEXT: shrq $56, %rsi
537- ; X64-KNL-NEXT: kmovw %esi , %k4
558+ ; X64-KNL-NEXT: shrq $40 , %rdi
559+ ; X64-KNL-NEXT: movzbl %dil , %ecx
560+ ; X64-KNL-NEXT: kmovw %ecx , %k4
561+ ; X64-KNL-NEXT: kunpckbw %k2 , %k1, %k1
562+ ; X64-KNL-NEXT: shrq $56, %r8
563+ ; X64-KNL-NEXT: kmovw %r8d , %k2
538564; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
539- ; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
540- ; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
541- ; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
542- ; X64-KNL-NEXT: kmovw %k0, (%rdi)
565+ ; X64-KNL-NEXT: shrq $48, %r9
566+ ; X64-KNL-NEXT: movzbl %r9b, %ecx
567+ ; X64-KNL-NEXT: kmovw %ecx, %k4
568+ ; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
569+ ; X64-KNL-NEXT: kmovw %ebx, %k4
570+ ; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
571+ ; X64-KNL-NEXT: kmovw %k0, (%rax)
572+ ; X64-KNL-NEXT: kmovw %k2, 6(%rax)
573+ ; X64-KNL-NEXT: kmovw %k3, 4(%rax)
574+ ; X64-KNL-NEXT: kmovw %k1, 2(%rax)
575+ ; X64-KNL-NEXT: popq %rbx
576+ ; X64-KNL-NEXT: .cfi_def_cfa_offset 8
543577; X64-KNL-NEXT: retq
544578entry:
545579 %.splatinsert = insertelement <64 x i64 > poison, i64 %mask , i64 0
0 commit comments