Skip to content

Commit 7925a9e

Browse files
authored
[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop(),logicop()) patterns. (#169998)
1 parent 3e16aef commit 7925a9e

File tree

2 files changed

+26
-32
lines changed

2 files changed

+26
-32
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59323,7 +59323,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5932359323
case X86ISD::ANDNP:
5932459324
// TODO: AVX512 targets should only use CombineSubOperand like AVX1/2.
5932559325
if (!IsSplat && (VT.is256BitVector() ||
59326-
(VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
59326+
(VT.is512BitVector() && Subtarget.useAVX512Regs()) ||
59327+
(EltSizeInBits == 1 && TLI.isTypeLegal(VT)))) {
5932759328
// Don't concatenate root AVX1 NOT patterns.
5932859329
// TODO: Allow NOT folding if Concat0 succeeds.
5932959330
if (Opcode == ISD::XOR && Depth == 0 && !Subtarget.hasInt256() &&
@@ -59333,7 +59334,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5933359334
break;
5933459335
SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
5933559336
SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
59336-
if (Concat0 || Concat1 || Subtarget.useAVX512Regs())
59337+
if (Concat0 || Concat1 ||
59338+
(EltSizeInBits != 1 && Subtarget.useAVX512Regs()))
5933759339
return DAG.getNode(Opcode, DL, VT,
5933859340
Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
5933959341
Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
@@ -59727,6 +59729,14 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
5972759729
}
5972859730
}
5972959731

59732+
// Attempt to merge logic ops if the type is legal.
59733+
if (TLI.isTypeLegal(VT) && all_of(Ops, [](SDValue Op) {
59734+
return ISD::isBitwiseLogicOp(Op.getOpcode());
59735+
}))
59736+
if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops,
59737+
DAG, Subtarget))
59738+
return R;
59739+
5973059740
// Don't do anything else for i1 vectors.
5973159741
return SDValue();
5973259742
}

llvm/test/CodeGen/X86/kmov.ll

Lines changed: 14 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -477,16 +477,13 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
477477
; X64-AVX512-LABEL: invert_i64_mask_extract_32:
478478
; X64-AVX512: # %bb.0:
479479
; X64-AVX512-NEXT: kmovq %rdi, %k0
480-
; X64-AVX512-NEXT: knotb %k0, %k1
481-
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
482-
; X64-AVX512-NEXT: knotb %k2, %k2
483-
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
480+
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k1
481+
; X64-AVX512-NEXT: kunpckbw %k0, %k1, %k1
484482
; X64-AVX512-NEXT: kshiftrd $16, %k0, %k2
485-
; X64-AVX512-NEXT: knotb %k2, %k2
486483
; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
487-
; X64-AVX512-NEXT: knotb %k0, %k0
488484
; X64-AVX512-NEXT: kunpckbw %k2, %k0, %k0
489485
; X64-AVX512-NEXT: kunpckwd %k1, %k0, %k0
486+
; X64-AVX512-NEXT: knotd %k0, %k0
490487
; X64-AVX512-NEXT: vpmovm2b %k0, %ymm0
491488
; X64-AVX512-NEXT: retq
492489
;
@@ -495,18 +492,16 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
495492
; X64-KNL-NEXT: movl %edi, %eax
496493
; X64-KNL-NEXT: shrl $16, %eax
497494
; X64-KNL-NEXT: kmovw %eax, %k0
498-
; X64-KNL-NEXT: knotw %k0, %k0
499495
; X64-KNL-NEXT: movl %edi, %eax
500496
; X64-KNL-NEXT: shrl $24, %eax
501497
; X64-KNL-NEXT: kmovw %eax, %k1
502-
; X64-KNL-NEXT: knotw %k1, %k1
503-
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k1
498+
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
499+
; X64-KNL-NEXT: knotw %k0, %k1
504500
; X64-KNL-NEXT: kmovw %edi, %k0
505-
; X64-KNL-NEXT: knotw %k0, %k0
506501
; X64-KNL-NEXT: shrl $8, %edi
507502
; X64-KNL-NEXT: kmovw %edi, %k2
508-
; X64-KNL-NEXT: knotw %k2, %k2
509-
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k2
503+
; X64-KNL-NEXT: kunpckbw %k0, %k2, %k0
504+
; X64-KNL-NEXT: knotw %k0, %k2
510505
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
511506
; X64-KNL-NEXT: vpmovdb %zmm0, %xmm0
512507
; X64-KNL-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
@@ -586,66 +581,55 @@ define <64 x i1> @invert_i64_mask_extract_64(i64 %mask) {
586581
; X64-AVX512: # %bb.0:
587582
; X64-AVX512-NEXT: kmovq %rdi, %k0
588583
; X64-AVX512-NEXT: kshiftrq $32, %k0, %k1
589-
; X64-AVX512-NEXT: knotb %k1, %k1
590584
; X64-AVX512-NEXT: kshiftrq $40, %k0, %k2
591-
; X64-AVX512-NEXT: knotb %k2, %k2
592585
; X64-AVX512-NEXT: kunpckbw %k1, %k2, %k1
593586
; X64-AVX512-NEXT: kshiftrq $48, %k0, %k2
594-
; X64-AVX512-NEXT: knotb %k2, %k2
595587
; X64-AVX512-NEXT: kshiftrq $56, %k0, %k3
596-
; X64-AVX512-NEXT: knotb %k3, %k3
597588
; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
598589
; X64-AVX512-NEXT: kunpckwd %k1, %k2, %k1
599-
; X64-AVX512-NEXT: knotb %k0, %k2
600-
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k3
601-
; X64-AVX512-NEXT: knotb %k3, %k3
602-
; X64-AVX512-NEXT: kunpckbw %k2, %k3, %k2
590+
; X64-AVX512-NEXT: kshiftrd $8, %k0, %k2
591+
; X64-AVX512-NEXT: kunpckbw %k0, %k2, %k2
603592
; X64-AVX512-NEXT: kshiftrd $16, %k0, %k3
604-
; X64-AVX512-NEXT: knotb %k3, %k3
605593
; X64-AVX512-NEXT: kshiftrd $24, %k0, %k0
606-
; X64-AVX512-NEXT: knotb %k0, %k0
607594
; X64-AVX512-NEXT: kunpckbw %k3, %k0, %k0
608595
; X64-AVX512-NEXT: kunpckwd %k2, %k0, %k0
609596
; X64-AVX512-NEXT: kunpckdq %k0, %k1, %k0
597+
; X64-AVX512-NEXT: knotq %k0, %k0
610598
; X64-AVX512-NEXT: vpmovm2b %k0, %zmm0
611599
; X64-AVX512-NEXT: retq
612600
;
613601
; X64-KNL-LABEL: invert_i64_mask_extract_64:
614602
; X64-KNL: # %bb.0:
615603
; X64-KNL-NEXT: movq %rdi, %rax
616604
; X64-KNL-NEXT: kmovw %esi, %k0
617-
; X64-KNL-NEXT: knotw %k0, %k0
618605
; X64-KNL-NEXT: movl %esi, %ecx
619606
; X64-KNL-NEXT: shrl $8, %ecx
620607
; X64-KNL-NEXT: kmovw %ecx, %k1
621-
; X64-KNL-NEXT: knotw %k1, %k1
622608
; X64-KNL-NEXT: kunpckbw %k0, %k1, %k0
609+
; X64-KNL-NEXT: knotw %k0, %k0
623610
; X64-KNL-NEXT: movl %esi, %ecx
624611
; X64-KNL-NEXT: shrl $16, %ecx
625612
; X64-KNL-NEXT: kmovw %ecx, %k1
626-
; X64-KNL-NEXT: knotw %k1, %k1
627613
; X64-KNL-NEXT: movl %esi, %ecx
628614
; X64-KNL-NEXT: shrl $24, %ecx
629615
; X64-KNL-NEXT: kmovw %ecx, %k2
630-
; X64-KNL-NEXT: knotw %k2, %k2
631616
; X64-KNL-NEXT: kunpckbw %k1, %k2, %k1
617+
; X64-KNL-NEXT: knotw %k1, %k1
632618
; X64-KNL-NEXT: movq %rsi, %rcx
633619
; X64-KNL-NEXT: shrq $32, %rcx
634620
; X64-KNL-NEXT: kmovw %ecx, %k2
635-
; X64-KNL-NEXT: knotw %k2, %k2
636621
; X64-KNL-NEXT: movq %rsi, %rcx
637622
; X64-KNL-NEXT: shrq $40, %rcx
638623
; X64-KNL-NEXT: kmovw %ecx, %k3
639-
; X64-KNL-NEXT: knotw %k3, %k3
640624
; X64-KNL-NEXT: kunpckbw %k2, %k3, %k2
625+
; X64-KNL-NEXT: knotw %k2, %k2
641626
; X64-KNL-NEXT: movq %rsi, %rcx
642627
; X64-KNL-NEXT: shrq $48, %rcx
643628
; X64-KNL-NEXT: kmovw %ecx, %k3
644-
; X64-KNL-NEXT: knotw %k3, %k3
645629
; X64-KNL-NEXT: shrq $56, %rsi
646630
; X64-KNL-NEXT: kmovw %esi, %k4
647-
; X64-KNL-NEXT: knotw %k4, %k4
648631
; X64-KNL-NEXT: kunpckbw %k3, %k4, %k3
632+
; X64-KNL-NEXT: knotw %k3, %k3
649633
; X64-KNL-NEXT: kmovw %k3, 6(%rdi)
650634
; X64-KNL-NEXT: kmovw %k2, 4(%rdi)
651635
; X64-KNL-NEXT: kmovw %k1, 2(%rdi)

0 commit comments

Comments
 (0)