@@ -386,36 +386,28 @@ define <32 x i1> @invert_i32_mask_extract_32(i32 %mask) {
386386define <32 x i1 > @i64_mask_extract_32 (i64 %mask ) {
387387; X64-AVX512-LABEL: i64_mask_extract_32:
388388; X64-AVX512: # %bb.0:
389- ; X64-AVX512-NEXT: movq %rdi, %rax
390- ; X64-AVX512-NEXT: kmovd %eax, %k0
391- ; X64-AVX512-NEXT: movzbl %ah, %ecx
392- ; X64-AVX512-NEXT: kmovd %ecx, %k1
393- ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
394- ; X64-AVX512-NEXT: movl %eax, %ecx
395- ; X64-AVX512-NEXT: shrl $24, %ecx
396- ; X64-AVX512-NEXT: kmovd %ecx, %k1
397- ; X64-AVX512-NEXT: shrl $16, %eax
398- ; X64-AVX512-NEXT: movzbl %al, %eax
399- ; X64-AVX512-NEXT: kmovd %eax, %k2
400- ; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
401- ; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
389+ ; X64-AVX512-NEXT: kmovq %rdi, %k0
390+ ; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
391+ ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
392+ ; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
393+ ; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
394+ ; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
395+ ; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
402396; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
403397; X64-AVX512-NEXT: retq
404398;
405399; X64-KNL-LABEL: i64_mask_extract_32:
406400; X64-KNL: # %bb.0:
407- ; X64-KNL-NEXT: movq %rdi , %rax
408- ; X64-KNL-NEXT: movl %eax , %ecx
401+ ; X64-KNL-NEXT: movl %edi , %eax
402+ ; X64-KNL-NEXT: shrl $16 , %eax
409403; X64-KNL-NEXT: kmovw %eax, %k0
410- ; X64-KNL-NEXT: movzbl %ah, %edx
411- ; X64-KNL-NEXT: # kill: def $eax killed $eax killed $rax
404+ ; X64-KNL-NEXT: movl %edi, %eax
412405; X64-KNL-NEXT: shrl $24, %eax
413406; X64-KNL-NEXT: kmovw %eax, %k1
414- ; X64-KNL-NEXT: shrl $16, %ecx
415- ; X64-KNL-NEXT: movzbl %cl, %eax
416- ; X64-KNL-NEXT: kmovw %eax, %k2
417- ; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
418- ; X64-KNL-NEXT: kmovw %edx, %k2
407+ ; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
408+ ; X64-KNL-NEXT: kmovw %edi, %k0
409+ ; X64-KNL-NEXT: shrl $8, %edi
410+ ; X64-KNL-NEXT: kmovw %edi, %k2
419411; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
420412; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
421413; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
@@ -480,82 +472,56 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
480472define <64 x i1 > @i64_mask_extract_64 (i64 %mask ) {
481473; X64-AVX512-LABEL: i64_mask_extract_64:
482474; X64-AVX512: # %bb.0:
483- ; X64-AVX512-NEXT: movq %rdi, %rax
484- ; X64-AVX512-NEXT: kmovd %eax, %k0
485- ; X64-AVX512-NEXT: movzbl %ah, %ecx
486- ; X64-AVX512-NEXT: kmovd %ecx, %k1
487- ; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k0
488- ; X64-AVX512-NEXT: movl %eax, %ecx
489- ; X64-AVX512-NEXT: shrl $24, %ecx
490- ; X64-AVX512-NEXT: kmovd %ecx, %k1
491- ; X64-AVX512-NEXT: movl %eax, %ecx
492- ; X64-AVX512-NEXT: shrl $16, %ecx
493- ; X64-AVX512-NEXT: movzbl %cl, %ecx
494- ; X64-AVX512-NEXT: kmovd %ecx, %k2
495- ; X64-AVX512-NEXT: kunpckbw %k2, %k1, %k1
496- ; X64-AVX512-NEXT: kunpckwd %k0, %k1, %k0
497- ; X64-AVX512-NEXT: movq %rdi, %rcx
498- ; X64-AVX512-NEXT: shrq $32, %rcx
499- ; X64-AVX512-NEXT: movzbl %cl, %ecx
500- ; X64-AVX512-NEXT: kmovd %ecx, %k1
501- ; X64-AVX512-NEXT: movq %rdi, %rcx
502- ; X64-AVX512-NEXT: shrq $40, %rcx
503- ; X64-AVX512-NEXT: movzbl %cl, %ecx
504- ; X64-AVX512-NEXT: kmovd %ecx, %k2
475+ ; X64-AVX512-NEXT: kmovq %rdi, %k0
476+ ; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
477+ ; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
505478; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
506- ; X64-AVX512-NEXT: movq %rdi, %rcx
507- ; X64-AVX512-NEXT: shrq $56, %rcx
508- ; X64-AVX512-NEXT: kmovd %ecx, %k2
509- ; X64-AVX512-NEXT: shrq $48, %rax
510- ; X64-AVX512-NEXT: movzbl %al, %eax
511- ; X64-AVX512-NEXT: kmovd %eax, %k3
512- ; X64-AVX512-NEXT: kunpckbw %k3, %k2, %k2
479+ ; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
480+ ; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
481+ ; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
513482; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
483+ ; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
484+ ; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
485+ ; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
486+ ; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
487+ ; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
488+ ; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
514489; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
515490; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
516491; X64-AVX512-NEXT: retq
517492;
518493; X64-KNL-LABEL: i64_mask_extract_64:
519494; X64-KNL: # %bb.0:
520- ; X64-KNL-NEXT: pushq %rbx
521- ; X64-KNL-NEXT: .cfi_def_cfa_offset 16
522- ; X64-KNL-NEXT: .cfi_offset %rbx, -16
523- ; X64-KNL-NEXT: movq %rsi, %rcx
524495; X64-KNL-NEXT: movq %rdi, %rax
525- ; X64-KNL-NEXT: movl %ecx, %edx
526- ; X64-KNL-NEXT: movq %rsi, %rdi
527- ; X64-KNL-NEXT: movq %rsi, %r8
528- ; X64-KNL-NEXT: movq %rsi, %r9
529- ; X64-KNL-NEXT: kmovw %ecx, %k0
530- ; X64-KNL-NEXT: movzbl %ch, %ebx
531- ; X64-KNL-NEXT: # kill: def $ecx killed $ecx killed $rcx
532- ; X64-KNL-NEXT: shrl $24, %ecx
496+ ; X64-KNL-NEXT: kmovw %esi, %k0
497+ ; X64-KNL-NEXT: movl %esi, %ecx
498+ ; X64-KNL-NEXT: shrl $8, %ecx
499+ ; X64-KNL-NEXT: kmovw %ecx, %k1
500+ ; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
501+ ; X64-KNL-NEXT: movl %esi, %ecx
502+ ; X64-KNL-NEXT: shrl $16, %ecx
533503; X64-KNL-NEXT: kmovw %ecx, %k1
534- ; X64-KNL-NEXT: shrl $16 , %edx
535- ; X64-KNL-NEXT: movzbl %dl , %ecx
504+ ; X64-KNL-NEXT: movl %esi , %ecx
505+ ; X64-KNL-NEXT: shrl $24 , %ecx
536506; X64-KNL-NEXT: kmovw %ecx, %k2
537- ; X64-KNL-NEXT: shrq $32, %rsi
538- ; X64-KNL-NEXT: movzbl %sil, %ecx
507+ ; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
508+ ; X64-KNL-NEXT: movq %rsi, %rcx
509+ ; X64-KNL-NEXT: shrq $32, %rcx
510+ ; X64-KNL-NEXT: kmovw %ecx, %k2
511+ ; X64-KNL-NEXT: movq %rsi, %rcx
512+ ; X64-KNL-NEXT: shrq $40, %rcx
513+ ; X64-KNL-NEXT: kmovw %ecx, %k3
514+ ; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
515+ ; X64-KNL-NEXT: movq %rsi, %rcx
516+ ; X64-KNL-NEXT: shrq $48, %rcx
539517; X64-KNL-NEXT: kmovw %ecx, %k3
540- ; X64-KNL-NEXT: shrq $40, %rdi
541- ; X64-KNL-NEXT: movzbl %dil, %ecx
542- ; X64-KNL-NEXT: kmovw %ecx, %k4
543- ; X64-KNL-NEXT: kunpckbw %k2, %k1, %k1
544- ; X64-KNL-NEXT: shrq $56, %r8
545- ; X64-KNL-NEXT: kmovw %r8d, %k2
518+ ; X64-KNL-NEXT: shrq $56, %rsi
519+ ; X64-KNL-NEXT: kmovw %esi, %k4
546520; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
547- ; X64-KNL-NEXT: shrq $48, %r9
548- ; X64-KNL-NEXT: movzbl %r9b, %ecx
549- ; X64-KNL-NEXT: kmovw %ecx, %k4
550- ; X64-KNL-NEXT: kunpckbw %k4, %k2, %k2
551- ; X64-KNL-NEXT: kmovw %ebx, %k4
552- ; X64-KNL-NEXT: kunpckbw %k0, %k4, %k0
553- ; X64-KNL-NEXT: kmovw %k0, (%rax)
554- ; X64-KNL-NEXT: kmovw %k2, 6(%rax)
555- ; X64-KNL-NEXT: kmovw %k3, 4(%rax)
556- ; X64-KNL-NEXT: kmovw %k1, 2(%rax)
557- ; X64-KNL-NEXT: popq %rbx
558- ; X64-KNL-NEXT: .cfi_def_cfa_offset 8
521+ ; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
522+ ; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
523+ ; X64-KNL-NEXT: kmovw %k1, 2(%rdi)
524+ ; X64-KNL-NEXT: kmovw %k0, (%rdi)
559525; X64-KNL-NEXT: retq
560526 %.splatinsert = insertelement <64 x i64 > poison, i64 %mask , i64 0
561527 %.splat = shufflevector <64 x i64 > %.splatinsert , <64 x i64 > poison, <64 x i32 > zeroinitializer
0 commit comments