Skip to content

Conversation

@RKSimon
Copy link
Collaborator

@RKSimon RKSimon commented Nov 29, 2025

No description provided.

@RKSimon RKSimon enabled auto-merge (squash) November 29, 2025 13:59
@llvmbot
Copy link
Member

llvmbot commented Nov 29, 2025

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/169998.diff

2 Files Affected:

  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+12-2)
  • (modified) llvm/test/CodeGen/X86/kmov.ll (+14-30)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 50df19b3e6e47..1b0bf6823e390 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -59323,7 +59323,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
     case X86ISD::ANDNP:
       // TODO: AVX512 targets should only use CombineSubOperand like AVX1/2.
       if (!IsSplat && (VT.is256BitVector() ||
-                       (VT.is512BitVector() && Subtarget.useAVX512Regs()))) {
+                       (VT.is512BitVector() && Subtarget.useAVX512Regs()) ||
+                       (EltSizeInBits == 1 && TLI.isTypeLegal(VT)))) {
         // Don't concatenate root AVX1 NOT patterns.
         // TODO: Allow NOT folding if Concat0 succeeds.
         if (Opcode == ISD::XOR && Depth == 0 && !Subtarget.hasInt256() &&
@@ -59333,7 +59334,8 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
           break;
         SDValue Concat0 = CombineSubOperand(VT, Ops, 0);
         SDValue Concat1 = CombineSubOperand(VT, Ops, 1);
-        if (Concat0 || Concat1 || Subtarget.useAVX512Regs())
+        if (Concat0 || Concat1 ||
+            (EltSizeInBits != 1 && Subtarget.useAVX512Regs()))
           return DAG.getNode(Opcode, DL, VT,
                              Concat0 ? Concat0 : ConcatSubOperand(VT, Ops, 0),
                              Concat1 ? Concat1 : ConcatSubOperand(VT, Ops, 1));
@@ -59727,6 +59729,14 @@ static SDValue combineCONCAT_VECTORS(SDNode *N, SelectionDAG &DAG,
       }
     }
 
+    // Attempt to merge logic ops if the type is legal.
+    if (TLI.isTypeLegal(VT) && all_of(Ops, [](SDValue Op) {
+          return ISD::isBitwiseLogicOp(Op.getOpcode());
+        }))
+      if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops,
+                                             DAG, Subtarget))
+        return R;
+
     // Don't do anything else for i1 vectors.
     return SDValue();
   }
diff --git a/llvm/test/CodeGen/X86/kmov.ll b/llvm/test/CodeGen/X86/kmov.ll
index 8b1e69a97d545..5d216a218cf9b 100644
--- a/llvm/test/CodeGen/X86/kmov.ll
+++ b/llvm/test/CodeGen/X86/kmov.ll
@@ -477,16 +477,13 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
 ; X64-AVX512-LABEL: invert_i64_mask_extract_32:
 ; X64-AVX512:       # %bb.0:
 ; X64-AVX512-NEXT:    kmovq %rdi, %k0
-; X64-AVX512-NEXT:    knotb %k0, %k1
-; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
-; X64-AVX512-NEXT:    kunpckbw %k1, %k2, %k1
+; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k1
+; X64-AVX512-NEXT:    kunpckbw %k0, %k1, %k1
 ; X64-AVX512-NEXT:    kshiftrd $16, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
 ; X64-AVX512-NEXT:    kshiftrd $24, %k0, %k0
-; X64-AVX512-NEXT:    knotb %k0, %k0
 ; X64-AVX512-NEXT:    kunpckbw %k2, %k0, %k0
 ; X64-AVX512-NEXT:    kunpckwd %k1, %k0, %k0
+; X64-AVX512-NEXT:    knotd %k0, %k0
 ; X64-AVX512-NEXT:    vpmovm2b %k0, %ymm0
 ; X64-AVX512-NEXT:    retq
 ;
@@ -495,18 +492,16 @@ define <32 x i1> @invert_i64_mask_extract_32(i64 %mask) {
 ; X64-KNL-NEXT:    movl %edi, %eax
 ; X64-KNL-NEXT:    shrl $16, %eax
 ; X64-KNL-NEXT:    kmovw %eax, %k0
-; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    movl %edi, %eax
 ; X64-KNL-NEXT:    shrl $24, %eax
 ; X64-KNL-NEXT:    kmovw %eax, %k1
-; X64-KNL-NEXT:    knotw %k1, %k1
-; X64-KNL-NEXT:    kunpckbw %k0, %k1, %k1
+; X64-KNL-NEXT:    kunpckbw %k0, %k1, %k0
+; X64-KNL-NEXT:    knotw %k0, %k1
 ; X64-KNL-NEXT:    kmovw %edi, %k0
-; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    shrl $8, %edi
 ; X64-KNL-NEXT:    kmovw %edi, %k2
-; X64-KNL-NEXT:    knotw %k2, %k2
-; X64-KNL-NEXT:    kunpckbw %k0, %k2, %k2
+; X64-KNL-NEXT:    kunpckbw %k0, %k2, %k0
+; X64-KNL-NEXT:    knotw %k0, %k2
 ; X64-KNL-NEXT:    vpternlogd {{.*#+}} zmm0 {%k2} {z} = -1
 ; X64-KNL-NEXT:    vpmovdb %zmm0, %xmm0
 ; X64-KNL-NEXT:    vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
@@ -586,27 +581,20 @@ define <64 x i1> @invert_i64_mask_extract_64(i64 %mask) {
 ; X64-AVX512:       # %bb.0:
 ; X64-AVX512-NEXT:    kmovq %rdi, %k0
 ; X64-AVX512-NEXT:    kshiftrq $32, %k0, %k1
-; X64-AVX512-NEXT:    knotb %k1, %k1
 ; X64-AVX512-NEXT:    kshiftrq $40, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
 ; X64-AVX512-NEXT:    kunpckbw %k1, %k2, %k1
 ; X64-AVX512-NEXT:    kshiftrq $48, %k0, %k2
-; X64-AVX512-NEXT:    knotb %k2, %k2
 ; X64-AVX512-NEXT:    kshiftrq $56, %k0, %k3
-; X64-AVX512-NEXT:    knotb %k3, %k3
 ; X64-AVX512-NEXT:    kunpckbw %k2, %k3, %k2
 ; X64-AVX512-NEXT:    kunpckwd %k1, %k2, %k1
-; X64-AVX512-NEXT:    knotb %k0, %k2
-; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k3
-; X64-AVX512-NEXT:    knotb %k3, %k3
-; X64-AVX512-NEXT:    kunpckbw %k2, %k3, %k2
+; X64-AVX512-NEXT:    kshiftrd $8, %k0, %k2
+; X64-AVX512-NEXT:    kunpckbw %k0, %k2, %k2
 ; X64-AVX512-NEXT:    kshiftrd $16, %k0, %k3
-; X64-AVX512-NEXT:    knotb %k3, %k3
 ; X64-AVX512-NEXT:    kshiftrd $24, %k0, %k0
-; X64-AVX512-NEXT:    knotb %k0, %k0
 ; X64-AVX512-NEXT:    kunpckbw %k3, %k0, %k0
 ; X64-AVX512-NEXT:    kunpckwd %k2, %k0, %k0
 ; X64-AVX512-NEXT:    kunpckdq %k0, %k1, %k0
+; X64-AVX512-NEXT:    knotq %k0, %k0
 ; X64-AVX512-NEXT:    vpmovm2b %k0, %zmm0
 ; X64-AVX512-NEXT:    retq
 ;
@@ -614,38 +602,34 @@ define <64 x i1> @invert_i64_mask_extract_64(i64 %mask) {
 ; X64-KNL:       # %bb.0:
 ; X64-KNL-NEXT:    movq %rdi, %rax
 ; X64-KNL-NEXT:    kmovw %esi, %k0
-; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    movl %esi, %ecx
 ; X64-KNL-NEXT:    shrl $8, %ecx
 ; X64-KNL-NEXT:    kmovw %ecx, %k1
-; X64-KNL-NEXT:    knotw %k1, %k1
 ; X64-KNL-NEXT:    kunpckbw %k0, %k1, %k0
+; X64-KNL-NEXT:    knotw %k0, %k0
 ; X64-KNL-NEXT:    movl %esi, %ecx
 ; X64-KNL-NEXT:    shrl $16, %ecx
 ; X64-KNL-NEXT:    kmovw %ecx, %k1
-; X64-KNL-NEXT:    knotw %k1, %k1
 ; X64-KNL-NEXT:    movl %esi, %ecx
 ; X64-KNL-NEXT:    shrl $24, %ecx
 ; X64-KNL-NEXT:    kmovw %ecx, %k2
-; X64-KNL-NEXT:    knotw %k2, %k2
 ; X64-KNL-NEXT:    kunpckbw %k1, %k2, %k1
+; X64-KNL-NEXT:    knotw %k1, %k1
 ; X64-KNL-NEXT:    movq %rsi, %rcx
 ; X64-KNL-NEXT:    shrq $32, %rcx
 ; X64-KNL-NEXT:    kmovw %ecx, %k2
-; X64-KNL-NEXT:    knotw %k2, %k2
 ; X64-KNL-NEXT:    movq %rsi, %rcx
 ; X64-KNL-NEXT:    shrq $40, %rcx
 ; X64-KNL-NEXT:    kmovw %ecx, %k3
-; X64-KNL-NEXT:    knotw %k3, %k3
 ; X64-KNL-NEXT:    kunpckbw %k2, %k3, %k2
+; X64-KNL-NEXT:    knotw %k2, %k2
 ; X64-KNL-NEXT:    movq %rsi, %rcx
 ; X64-KNL-NEXT:    shrq $48, %rcx
 ; X64-KNL-NEXT:    kmovw %ecx, %k3
-; X64-KNL-NEXT:    knotw %k3, %k3
 ; X64-KNL-NEXT:    shrq $56, %rsi
 ; X64-KNL-NEXT:    kmovw %esi, %k4
-; X64-KNL-NEXT:    knotw %k4, %k4
 ; X64-KNL-NEXT:    kunpckbw %k3, %k4, %k3
+; X64-KNL-NEXT:    knotw %k3, %k3
 ; X64-KNL-NEXT:    kmovw %k3, 6(%rdi)
 ; X64-KNL-NEXT:    kmovw %k2, 4(%rdi)
 ; X64-KNL-NEXT:    kmovw %k1, 2(%rdi)

@RKSimon RKSimon merged commit 7925a9e into llvm:main Nov 29, 2025
11 of 12 checks passed
@RKSimon RKSimon deleted the x86-vXi1-concat-ops branch November 29, 2025 14:38
aahrun pushed a commit to aahrun/llvm-project that referenced this pull request Dec 1, 2025
augusto2112 pushed a commit to augusto2112/llvm-project that referenced this pull request Dec 3, 2025
kcloudy0717 pushed a commit to kcloudy0717/llvm-project that referenced this pull request Dec 4, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants