[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop(),logicop()) patterns. #169998

RKSimon · 2025-11-29T13:59:47Z

No description provided.

…,logicop()) patterns.

llvmbot · 2025-11-29T14:00:16Z

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/169998.diff

2 Files Affected:

(modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+12-2)
(modified) llvm/test/CodeGen/X86/kmov.ll (+14-30)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 50df19b3e6e47..1b0bf6823e390 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59323,7 +59323,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
     case X86ISD::ANDNP:
       // TODO: AVX512 targets should only use CombineSubOperand like AVX1/2.
       if (!IsSplat && (VT.is256BitVector() ||
-                       (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+                       (VT.is512BitVector() && Subtarget.useAVX512Regs()) ||
+                       (EltSizeInBits == 1 && TLI.isTypeLegal(VT)))) {
         // Don't concatenate root AVX1 NOT patterns.
         // TODO: Allow NOT folding if Concat0 succeeds.
         if (Opcode == ISD::XOR && Depth == 0 && !Subtarget.hasInt256() &&
@@ -59333,7 +59334,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
           break;
         SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
         SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
-        if (Concat0 || Concat1 || Subtarget.useAVX512Regs())
+        if (Concat0 || Concat1 ||
+            (EltSizeInBits != 1 && Subtarget.useAVX512Regs()))
           return DAG.getNode(Opcode, DL, VT,
                              Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
                              Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
@@ -59727,6 +59729,14 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
       }
     }
 
+    // Attempt to merge logic ops if the type is legal.
+    if (TLI.isTypeLegal(VT) && all_of(Ops, [](SDValue Op) {
+          return ISD::isBitwiseLogicOp(Op.getOpcode());
+        }))
+      if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops,
+                                             DAG, Subtarget))
+        return R;
+
     // Don't do anything else for i1 vectors.
     return SDValue();
   }
diff --git a/llvm/test/CodeGen/X86/kmov.ll b/llvm/test/CodeGen/X86/kmov.ll
index 8b1e69a97d545..5d216a218cf9b 100644
--- a/llvm/test/CodeGen/X86/kmov.ll
+++ b/llvm/test/CodeGen/X86/kmov.ll
@@ -477,16 +477,13 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
 ; X64-AVX512-LABEL: invert_i64_mask_extract_32:
 ; X64-AVX512:       # %bb.0:
 ; X64-AVX512-NEXT:    kmovq %rdi, %k0
-; X64-AVX512-NEXT:    knotb %k0, %k1
-; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
-; X64-AVX512-NEXT:    kunpckbw %k1, %k2, %k1
+; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k1
+; X64-AVX512-NEXT:    kunpckbw %k0, %k1, %k1
 ; X64-AVX512-NEXT:    kshiftrd $16, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
 ; X64-AVX512-NEXT:    kshiftrd $24, %k0, %k0
-; X64-AVX512-NEXT:    knotb %k0, %k0
 ; X64-AVX512-NEXT:    kunpckbw %k2, %k0, %k0
 ; X64-AVX512-NEXT:    kunpckwd %k1, %k0, %k0
+; X64-AVX512-NEXT:    knotd %k0, %k0
 ; X64-AVX512-NEXT:    vpmovm2b %k0, %ymm0
 ; X64-AVX512-NEXT:    retq
 ;
@@ -495,18 +492,16 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
 ; X64-KNL-NEXT:    movl %edi, %eax
 ; X64-KNL-NEXT:    shrl $16, %eax
 ; X64-KNL-NEXT:    kmovw %eax, %k0
-; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    movl %edi, %eax
 ; X64-KNL-NEXT:    shrl $24, %eax
 ; X64-KNL-NEXT:    kmovw %eax, %k1
-; X64-KNL-NEXT:    knotw %k1, %k1
-; X64-KNL-NEXT:    kunpckbw %k0, %k1, %k1
+; X64-KNL-NEXT:    kunpckbw %k0, %k1, %k0
+; X64-KNL-NEXT:    knotw %k0, %k1
 ; X64-KNL-NEXT:    kmovw %edi, %k0
-; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    shrl $8, %edi
 ; X64-KNL-NEXT:    kmovw %edi, %k2
-; X64-KNL-NEXT:    knotw %k2, %k2
-; X64-KNL-NEXT:    kunpckbw %k0, %k2, %k2
+; X64-KNL-NEXT:    kunpckbw %k0, %k2, %k0
+; X64-KNL-NEXT:    knotw %k0, %k2
 ; X64-KNL-NEXT:    vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
 ; X64-KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; X64-KNL-NEXT:    vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
@@ -586,27 +581,20 @@ define <64 x i1> @invert_i64_mask_extract_64(i64 %mask) {
 ; X64-AVX512:       # %bb.0:
 ; X64-AVX512-NEXT:    kmovq %rdi, %k0
 ; X64-AVX512-NEXT:    kshiftrq $32, %k0, %k1
-; X64-AVX512-NEXT:    knotb %k1, %k1
 ; X64-AVX512-NEXT:    kshiftrq $40, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
 ; X64-AVX512-NEXT:    kunpckbw %k1, %k2, %k1
 ; X64-AVX512-NEXT:    kshiftrq $48, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
 ; X64-AVX512-NEXT:    kshiftrq $56, %k0, %k3
-; X64-AVX512-NEXT:    knotb %k3, %k3
 ; X64-AVX512-NEXT:    kunpckbw %k2, %k3, %k2
 ; X64-AVX512-NEXT:    kunpckwd %k1, %k2, %k1
-; X64-AVX512-NEXT:    knotb %k0, %k2
-; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k3
-; X64-AVX512-NEXT:    knotb %k3, %k3
-; X64-AVX512-NEXT:    kunpckbw %k2, %k3, %k2
+; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k2
+; X64-AVX512-NEXT:    kunpckbw %k0, %k2, %k2
 ; X64-AVX512-NEXT:    kshiftrd $16, %k0, %k3
-; X64-AVX512-NEXT:    knotb %k3, %k3
 ; X64-AVX512-NEXT:    kshiftrd $24, %k0, %k0
-; X64-AVX512-NEXT:    knotb %k0, %k0
 ; X64-AVX512-NEXT:    kunpckbw %k3, %k0, %k0
 ; X64-AVX512-NEXT:    kunpckwd %k2, %k0, %k0
 ; X64-AVX512-NEXT:    kunpckdq %k0, %k1, %k0
+; X64-AVX512-NEXT:    knotq %k0, %k0
 ; X64-AVX512-NEXT:    vpmovm2b %k0, %zmm0
 ; X64-AVX512-NEXT:    retq
 ;
@@ -614,38 +602,34 @@ define <64 x i1> @invert_i64_mask_extract_64(i64 %mask) {
 ; X64-KNL:       # %bb.0:
 ; X64-KNL-NEXT:    movq %rdi, %rax
 ; X64-KNL-NEXT:    kmovw %esi, %k0
-; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    movl %esi, %ecx
 ; X64-KNL-NEXT:    shrl $8, %ecx
 ; X64-KNL-NEXT:    kmovw %ecx, %k1
-; X64-KNL-NEXT:    knotw %k1, %k1
 ; X64-KNL-NEXT:    kunpckbw %k0, %k1, %k0
+; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    movl %esi, %ecx
 ; X64-KNL-NEXT:    shrl $16, %ecx
 ; X64-KNL-NEXT:    kmovw %ecx, %k1
-; X64-KNL-NEXT:    knotw %k1, %k1
 ; X64-KNL-NEXT:    movl %esi, %ecx
 ; X64-KNL-NEXT:    shrl $24, %ecx
 ; X64-KNL-NEXT:    kmovw %ecx, %k2
-; X64-KNL-NEXT:    knotw %k2, %k2
 ; X64-KNL-NEXT:    kunpckbw %k1, %k2, %k1
+; X64-KNL-NEXT:    knotw %k1, %k1
 ; X64-KNL-NEXT:    movq %rsi, %rcx
 ; X64-KNL-NEXT:    shrq $32, %rcx
 ; X64-KNL-NEXT:    kmovw %ecx, %k2
-; X64-KNL-NEXT:    knotw %k2, %k2
 ; X64-KNL-NEXT:    movq %rsi, %rcx
 ; X64-KNL-NEXT:    shrq $40, %rcx
 ; X64-KNL-NEXT:    kmovw %ecx, %k3
-; X64-KNL-NEXT:    knotw %k3, %k3
 ; X64-KNL-NEXT:    kunpckbw %k2, %k3, %k2
+; X64-KNL-NEXT:    knotw %k2, %k2
 ; X64-KNL-NEXT:    movq %rsi, %rcx
 ; X64-KNL-NEXT:    shrq $48, %rcx
 ; X64-KNL-NEXT:    kmovw %ecx, %k3
-; X64-KNL-NEXT:    knotw %k3, %k3
 ; X64-KNL-NEXT:    shrq $56, %rsi
 ; X64-KNL-NEXT:    kmovw %esi, %k4
-; X64-KNL-NEXT:    knotw %k4, %k4
 ; X64-KNL-NEXT:    kunpckbw %k3, %k4, %k3
+; X64-KNL-NEXT:    knotw %k3, %k3
 ; X64-KNL-NEXT:    kmovw %k3, 6(%rdi)
 ; X64-KNL-NEXT:    kmovw %k2, 4(%rdi)
 ; X64-KNL-NEXT:    kmovw %k1, 2(%rdi)

…,logicop()) patterns. (llvm#169998)

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop()…

0f16755

…,logicop()) patterns.

llvmbot added the backend:X86 label Nov 29, 2025

RKSimon enabled auto-merge (squash) November 29, 2025 13:59

RKSimon merged commit 7925a9e into llvm:main Nov 29, 2025
11 of 12 checks passed

RKSimon deleted the x86-vXi1-concat-ops branch November 29, 2025 14:38

aahrun pushed a commit to aahrun/llvm-project that referenced this pull request Dec 1, 2025

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop()…

55db881

…,logicop()) patterns. (llvm#169998)

augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop()…

c7aefbc

…,logicop()) patterns. (llvm#169998)

kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop()…

80c4cb8

…,logicop()) patterns. (llvm#169998)

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop(),logicop()) patterns. #169998

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop(),logicop()) patterns. #169998

Uh oh!

RKSimon commented Nov 29, 2025

Uh oh!

llvmbot commented Nov 29, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop(),logicop()) patterns. #169998

[X86] combineConcatVectorOps - add handling for vXi1 concat(logicop(),logicop()) patterns. #169998

Uh oh!

Conversation

RKSimon commented Nov 29, 2025

Uh oh!

llvmbot commented Nov 29, 2025

Uh oh!

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

2 participants