From 1a92b33f53d8a7d94cd9b1dca458606d283da10c Mon Sep 17 00:00:00 2001 From: Brandon Date: Mon, 20 Oct 2025 14:14:19 -0500 Subject: [PATCH 1/5] [X86] Add new baseline tests for combineX86AddSub --- llvm/test/CodeGen/X86/combine-adc.ll | 48 ++++++++++++++ llvm/test/CodeGen/X86/combine-sbb.ll | 95 ++++++++++++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/llvm/test/CodeGen/X86/combine-adc.ll b/llvm/test/CodeGen/X86/combine-adc.ll index 22417363f1093..0e46f2956a77e 100644 --- a/llvm/test/CodeGen/X86/combine-adc.ll +++ b/llvm/test/CodeGen/X86/combine-adc.ll @@ -89,4 +89,52 @@ define i32 @adc_merge_constants(i32 %a0) nounwind { ret i32 %sum } +define i32 @adc_merge_sub(i32 %a0) nounwind { +; X86-LABEL: adc_merge_sub: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %edi +; X86-NEXT: addl $42, %edi +; X86-NEXT: setb %al +; X86-NEXT: pushl %eax +; X86-NEXT: calll use@PLT +; X86-NEXT: addl $4, %esp +; X86-NEXT: movl $-42, %eax +; X86-NEXT: subl %esi, %eax +; X86-NEXT: xorl %edi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: adc_merge_sub: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %ebx +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: movl %ebx, %ebp +; X64-NEXT: addl $42, %ebp +; X64-NEXT: setb %dil +; X64-NEXT: callq use@PLT +; X64-NEXT: movl $-42, %eax +; X64-NEXT: subl %ebx, %eax +; X64-NEXT: xorl %ebp, %eax +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %rbp +; X64-NEXT: retq + %adc = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %a0, i32 42) + %carry = extractvalue { i8, i32 } %adc, 0 + call void @use(i8 %carry) + %sum = extractvalue { i8, i32 } %adc, 1 + %sub = sub i32 -42, %a0 + %result = xor i32 %sum, %sub + ret i32 %result +} + declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32) +declare void @use(i8) diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index 89aee965a2c1f..ee74d97c3b690 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -333,4 +333,99 @@ define i32 @PR40483_sub6(ptr, i32) nounwind { ret i32 %10 } +define i32 @sbb_merge_add1(i32 %a0) nounwind { +; X86-LABEL: sbb_merge_add1: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: movl %esi, %edi +; X86-NEXT: subl $42, %edi +; X86-NEXT: setb %al +; X86-NEXT: pushl %eax +; X86-NEXT: calll use@PLT +; X86-NEXT: addl $4, %esp +; X86-NEXT: addl $-42, %esi +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: sbb_merge_add1: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %ebx +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: movl %ebx, %ebp +; X64-NEXT: subl $42, %ebp +; X64-NEXT: setb %dil +; X64-NEXT: callq use@PLT +; X64-NEXT: addl $-42, %ebx +; X64-NEXT: xorl %ebp, %ebx +; X64-NEXT: movl %ebx, %eax +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %rbp +; X64-NEXT: retq + %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42) + %borrow = extractvalue { i8, i32 } %sbb, 0 + call void @use(i8 %borrow) + %diff = extractvalue { i8, i32 } %sbb, 1 + %add = add i32 %a0, -42 + %result = xor i32 %diff, %add + ret i32 %result +} + +define i32 @sbb_merge_add2(i32 %a0) nounwind { +; X86-LABEL: sbb_merge_add2: +; X86: # %bb.0: +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl $42, %edi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: subl %esi, %edi +; X86-NEXT: setb %al +; X86-NEXT: pushl %eax +; X86-NEXT: calll use@PLT +; X86-NEXT: addl $4, %esp +; X86-NEXT: addl $-42, %esi +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: retl +; +; X64-LABEL: sbb_merge_add2: +; X64: # %bb.0: +; X64-NEXT: pushq %rbp +; X64-NEXT: pushq %rbx +; X64-NEXT: pushq %rax +; X64-NEXT: movl %edi, %ebx +; X64-NEXT: movl $42, %ebp +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: subl %ebx, %ebp +; X64-NEXT: setb %dil +; X64-NEXT: callq use@PLT +; X64-NEXT: addl $-42, %ebx +; X64-NEXT: xorl %ebp, %ebx +; X64-NEXT: movl %ebx, %eax +; X64-NEXT: addq $8, %rsp +; X64-NEXT: popq %rbx +; X64-NEXT: popq %rbp +; X64-NEXT: retq + %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 42, i32 %a0) + %borrow = extractvalue { i8, i32 } %sbb, 0 + call void @use(i8 %borrow) + %diff = extractvalue { i8, i32 } %sbb, 1 + %add = add i32 %a0, -42 + %result = xor i32 %diff, %add + ret i32 %result +} + declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32) +declare void @use(i8) From b9f1d0f054a15c59af40adcfa5fad782e9b35d42 Mon Sep 17 00:00:00 2001 From: Brandon Date: Mon, 20 Oct 2025 14:22:36 -0500 Subject: [PATCH 2/5] [X86] Fold generic ADD/SUB with constants to X86ISD::SUB/ADD --- llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++- llvm/test/CodeGen/X86/combine-adc.ll | 20 +-- llvm/test/CodeGen/X86/combine-sbb.ll | 48 +++---- .../CodeGen/X86/dag-update-nodetomatch.ll | 129 +++++++++--------- 4 files changed, 114 insertions(+), 108 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b5f8ee50cba3d..74a7d83aadfd9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57616,10 +57616,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, } // Fold any similar generic ADD/SUB opcodes to reuse this node. - auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { + auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) { SDValue Ops[] = {N0, N1}; SDVTList VTs = DAG.getVTList(N->getValueType(0)); - if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { + if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) { SDValue Op(N, 0); if (Negate) { // Bail if this is only used by a user of the x86 add/sub. @@ -57631,8 +57631,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, DCI.CombineTo(GenericAddSub, Op); } }; - MatchGeneric(LHS, RHS, false); - MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); + MatchGeneric(GenericOpc, LHS, RHS, false); + MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode()); + + if (ConstantSDNode *Const = dyn_cast(RHS)) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); + if (X86ISD::SUB == N->getOpcode()) { + // With LHS - C, fold LHS + (-C) + MatchGeneric(ISD::ADD, LHS, NegC, false); + } else { + // With -(LHS + C), fold (-C) - LHS + MatchGeneric(ISD::SUB, NegC, LHS, true); + } + } else if (ConstantSDNode *Const = dyn_cast(LHS)) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); + if (X86ISD::SUB == N->getOpcode()) { + // With -(C - RHS), fold RHS + (-C) + MatchGeneric(ISD::ADD, RHS, NegC, true); + } + } // TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the // EFLAGS result doesn't change. diff --git a/llvm/test/CodeGen/X86/combine-adc.ll b/llvm/test/CodeGen/X86/combine-adc.ll index 0e46f2956a77e..a2aaea31aa6ff 100644 --- a/llvm/test/CodeGen/X86/combine-adc.ll +++ b/llvm/test/CodeGen/X86/combine-adc.ll @@ -94,17 +94,17 @@ define i32 @adc_merge_sub(i32 %a0) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl %esi, %edi ; X86-NEXT: addl $42, %edi ; X86-NEXT: setb %al +; X86-NEXT: movl %edi, %esi +; X86-NEXT: negl %esi ; X86-NEXT: pushl %eax ; X86-NEXT: calll use@PLT ; X86-NEXT: addl $4, %esp -; X86-NEXT: movl $-42, %eax -; X86-NEXT: subl %esi, %eax -; X86-NEXT: xorl %edi, %eax +; X86-NEXT: xorl %edi, %esi +; X86-NEXT: movl %esi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl @@ -116,13 +116,13 @@ define i32 @adc_merge_sub(i32 %a0) nounwind { ; X64-NEXT: pushq %rax ; X64-NEXT: movl %edi, %ebx ; X64-NEXT: xorl %edi, %edi -; X64-NEXT: movl %ebx, %ebp -; X64-NEXT: addl $42, %ebp +; X64-NEXT: addl $42, %ebx ; X64-NEXT: setb %dil +; X64-NEXT: movl %ebx, %ebp +; X64-NEXT: negl %ebp ; X64-NEXT: callq use@PLT -; X64-NEXT: movl $-42, %eax -; X64-NEXT: subl %ebx, %eax -; X64-NEXT: xorl %ebp, %eax +; X64-NEXT: xorl %ebx, %ebp +; X64-NEXT: movl %ebp, %eax ; X64-NEXT: addq $8, %rsp ; X64-NEXT: popq %rbx ; X64-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll index ee74d97c3b690..62744d4f3050a 100644 --- a/llvm/test/CodeGen/X86/combine-sbb.ll +++ b/llvm/test/CodeGen/X86/combine-sbb.ll @@ -336,40 +336,25 @@ define i32 @PR40483_sub6(ptr, i32) nounwind { define i32 @sbb_merge_add1(i32 %a0) nounwind { ; X86-LABEL: sbb_merge_add1: ; X86: # %bb.0: -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl %esi, %edi -; X86-NEXT: subl $42, %edi +; X86-NEXT: cmpl $42, {{[0-9]+}}(%esp) ; X86-NEXT: setb %al ; X86-NEXT: pushl %eax ; X86-NEXT: calll use@PLT ; X86-NEXT: addl $4, %esp -; X86-NEXT: addl $-42, %esi -; X86-NEXT: xorl %edi, %esi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi +; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; ; X64-LABEL: sbb_merge_add1: ; X64: # %bb.0: -; X64-NEXT: pushq %rbp -; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax -; X64-NEXT: movl %edi, %ebx -; X64-NEXT: xorl %edi, %edi -; X64-NEXT: movl %ebx, %ebp -; X64-NEXT: subl $42, %ebp -; X64-NEXT: setb %dil +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: cmpl $42, %edi +; X64-NEXT: setb %al +; X64-NEXT: movl %eax, %edi ; X64-NEXT: callq use@PLT -; X64-NEXT: addl $-42, %ebx -; X64-NEXT: xorl %ebp, %ebx -; X64-NEXT: movl %ebx, %eax -; X64-NEXT: addq $8, %rsp -; X64-NEXT: popq %rbx -; X64-NEXT: popq %rbp +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: popq %rcx ; X64-NEXT: retq %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42) %borrow = extractvalue { i8, i32 } %sbb, 0 @@ -385,15 +370,15 @@ define i32 @sbb_merge_add2(i32 %a0) nounwind { ; X86: # %bb.0: ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl $42, %edi ; X86-NEXT: xorl %eax, %eax -; X86-NEXT: subl %esi, %edi +; X86-NEXT: subl {{[0-9]+}}(%esp), %edi ; X86-NEXT: setb %al +; X86-NEXT: movl %edi, %esi +; X86-NEXT: negl %esi ; X86-NEXT: pushl %eax ; X86-NEXT: calll use@PLT ; X86-NEXT: addl $4, %esp -; X86-NEXT: addl $-42, %esi ; X86-NEXT: xorl %edi, %esi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: popl %esi @@ -405,13 +390,14 @@ define i32 @sbb_merge_add2(i32 %a0) nounwind { ; X64-NEXT: pushq %rbp ; X64-NEXT: pushq %rbx ; X64-NEXT: pushq %rax -; X64-NEXT: movl %edi, %ebx ; X64-NEXT: movl $42, %ebp -; X64-NEXT: xorl %edi, %edi -; X64-NEXT: subl %ebx, %ebp -; X64-NEXT: setb %dil +; X64-NEXT: xorl %eax, %eax +; X64-NEXT: subl %edi, %ebp +; X64-NEXT: setb %al +; X64-NEXT: movl %ebp, %ebx +; X64-NEXT: negl %ebx +; X64-NEXT: movl %eax, %edi ; X64-NEXT: callq use@PLT -; X64-NEXT: addl $-42, %ebx ; X64-NEXT: xorl %ebp, %ebx ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: addq $8, %rsp diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll index b428ce457ff40..71ad598abe683 100644 --- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll +++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll @@ -96,6 +96,17 @@ entry: define void @_Z2x6v() local_unnamed_addr { ; CHECK-LABEL: _Z2x6v: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax +; CHECK-NEXT: movl (%rax), %edx +; CHECK-NEXT: andl $511, %edx # imm = 0x1FF +; CHECK-NEXT: leaq 1(%rdx), %rax +; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl %eax, (%rcx) +; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx +; CHECK-NEXT: movl (%rcx), %ecx +; CHECK-NEXT: testl %ecx, %ecx +; CHECK-NEXT: je .LBB1_18 +; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pushq %r15 @@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: .cfi_offset %r14, -32 ; CHECK-NEXT: .cfi_offset %r15, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax -; CHECK-NEXT: movl (%rax), %ebx -; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF -; CHECK-NEXT: leaq 1(%rbx), %rax -; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movl %eax, (%rcx) -; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx -; CHECK-NEXT: movl (%rcx), %ecx -; CHECK-NEXT: testl %ecx, %ecx -; CHECK-NEXT: je .LBB1_18 -; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph -; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdx -; CHECK-NEXT: movq (%rdx), %rsi -; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: notl %edx -; CHECK-NEXT: leaq 8(,%rdx,8), %rdi +; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rsi +; CHECK-NEXT: movq (%rsi), %rsi +; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: notl %edi +; CHECK-NEXT: leaq 8(,%rdi,8), %rdi ; CHECK-NEXT: imulq %rax, %rdi ; CHECK-NEXT: addq %rsi, %rdi ; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r8 -; CHECK-NEXT: movl (%r8), %edx -; CHECK-NEXT: leal 8(,%rbx,8), %eax +; CHECK-NEXT: movl (%r8), %r9d +; CHECK-NEXT: leal 8(,%rdx,8), %eax ; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: leaq 32(%rsi), %r11 -; CHECK-NEXT: leaq 8(,%rbx,8), %rbx -; CHECK-NEXT: xorl %r14d, %r14d -; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r15 -; CHECK-NEXT: movq %rsi, %r12 +; CHECK-NEXT: leaq 32(%rsi), %rbx +; CHECK-NEXT: leaq 8(,%rdx,8), %r14 +; CHECK-NEXT: xorl %r15d, %r15d +; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12 +; CHECK-NEXT: movq %rsi, %r13 ; CHECK-NEXT: jmp .LBB1_2 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movl %edx, (%r8) +; CHECK-NEXT: movl %r9d, (%r8) ; CHECK-NEXT: .LBB1_16: # %for.inc3 ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: addq %rbx, %r12 -; CHECK-NEXT: incq %r14 -; CHECK-NEXT: addq %rbx, %r11 +; CHECK-NEXT: addq %r14, %r13 +; CHECK-NEXT: incq %r15 +; CHECK-NEXT: addq %r14, %rbx ; CHECK-NEXT: incl %ecx ; CHECK-NEXT: je .LBB1_17 ; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB1_12 Depth 2 ; CHECK-NEXT: # Child Loop BB1_14 Depth 2 -; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: testl %r9d, %r9d ; CHECK-NEXT: jns .LBB1_16 ; CHECK-NEXT: # %bb.3: # %for.body2.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: movslq %edx, %r13 -; CHECK-NEXT: testq %r13, %r13 +; CHECK-NEXT: movslq %r9d, %r9 +; CHECK-NEXT: testq %r9, %r9 ; CHECK-NEXT: movq $-1, %rbp -; CHECK-NEXT: cmovnsq %r13, %rbp -; CHECK-NEXT: subq %r13, %rbp +; CHECK-NEXT: cmovnsq %r9, %rbp +; CHECK-NEXT: subq %r9, %rbp ; CHECK-NEXT: incq %rbp ; CHECK-NEXT: cmpq $4, %rbp ; CHECK-NEXT: jb .LBB1_14 @@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: # %bb.5: # %vector.memcheck ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-NEXT: imulq %r14, %rax -; CHECK-NEXT: leaq (%rsi,%rax), %r10 -; CHECK-NEXT: leaq (%r10,%r13,8), %r9 -; CHECK-NEXT: testq %r13, %r13 -; CHECK-NEXT: movq $-1, %r10 -; CHECK-NEXT: cmovnsq %r13, %r10 -; CHECK-NEXT: cmpq %r15, %r9 +; CHECK-NEXT: imulq %r15, %rax +; CHECK-NEXT: leaq (%rsi,%rax), %r11 +; CHECK-NEXT: leaq (%r11,%r9,8), %r10 +; CHECK-NEXT: testq %r9, %r9 +; CHECK-NEXT: movq $-1, %r11 +; CHECK-NEXT: cmovnsq %r9, %r11 +; CHECK-NEXT: cmpq %r12, %r10 ; CHECK-NEXT: jae .LBB1_7 ; CHECK-NEXT: # %bb.6: # %vector.memcheck ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: leaq 8(%rsi), %r9 -; CHECK-NEXT: addq %r9, %rax -; CHECK-NEXT: leaq (%rax,%r10,8), %rax -; CHECK-NEXT: cmpq %r15, %rax +; CHECK-NEXT: leaq 8(%rsi), %r10 +; CHECK-NEXT: addq %r10, %rax +; CHECK-NEXT: leaq (%rax,%r11,8), %rax +; CHECK-NEXT: cmpq %r12, %rax ; CHECK-NEXT: ja .LBB1_14 ; CHECK-NEXT: .LBB1_7: # %vector.body.preheader ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 @@ -201,50 +201,47 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; CHECK-NEXT: movdqu %xmm0, (%r12,%r13,8) -; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13,8) -; CHECK-NEXT: movl $4, %r10d +; CHECK-NEXT: movdqu %xmm0, (%r13,%r9,8) +; CHECK-NEXT: movdqu %xmm0, 16(%r13,%r9,8) +; CHECK-NEXT: movl $4, %r11d ; CHECK-NEXT: shrq $2, %rax ; CHECK-NEXT: jne .LBB1_11 ; CHECK-NEXT: jmp .LBB1_13 ; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: xorl %r10d, %r10d +; CHECK-NEXT: xorl %r11d, %r11d ; CHECK-NEXT: shrq $2, %rax ; CHECK-NEXT: je .LBB1_13 ; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; CHECK-NEXT: movq %r10, %rax +; CHECK-NEXT: movq %r11, %rax ; CHECK-NEXT: subq %rdx, %rax -; CHECK-NEXT: addq %r13, %r10 -; CHECK-NEXT: leaq (%r11,%r10,8), %r10 +; CHECK-NEXT: addq %r9, %r11 +; CHECK-NEXT: leaq (%rbx,%r11,8), %r11 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_12: # %vector.body ; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movdqu %xmm0, -32(%r10) -; CHECK-NEXT: movdqu %xmm0, -16(%r10) -; CHECK-NEXT: movdqu %xmm0, (%r10) -; CHECK-NEXT: movdqu %xmm0, 16(%r10) -; CHECK-NEXT: addq $64, %r10 +; CHECK-NEXT: movdqu %xmm0, -32(%r11) +; CHECK-NEXT: movdqu %xmm0, -16(%r11) +; CHECK-NEXT: movdqu %xmm0, (%r11) +; CHECK-NEXT: movdqu %xmm0, 16(%r11) +; CHECK-NEXT: addq $64, %r11 ; CHECK-NEXT: addq $8, %rax ; CHECK-NEXT: jne .LBB1_12 ; CHECK-NEXT: .LBB1_13: # %middle.block ; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1 -; CHECK-NEXT: addq %rdx, %r13 +; CHECK-NEXT: addq %rdx, %r9 ; CHECK-NEXT: cmpq %rdx, %rbp -; CHECK-NEXT: movq %r13, %rdx ; CHECK-NEXT: je .LBB1_15 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_14: # %for.body2 ; CHECK-NEXT: # Parent Loop BB1_2 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 -; CHECK-NEXT: movq (%r15), %rax -; CHECK-NEXT: movq %rax, (%r12,%r13,8) -; CHECK-NEXT: leaq 1(%r13), %rdx -; CHECK-NEXT: cmpq $-1, %r13 -; CHECK-NEXT: movq %rdx, %r13 +; CHECK-NEXT: movq (%r12), %rax +; CHECK-NEXT: movq %rax, (%r13,%r9,8) +; CHECK-NEXT: incq %r9 ; CHECK-NEXT: jl .LBB1_14 ; CHECK-NEXT: jmp .LBB1_15 ; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge @@ -252,7 +249,6 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: movq %rdi, (%rax) ; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax ; CHECK-NEXT: movl $0, (%rax) -; CHECK-NEXT: .LBB1_18: # %for.end5 ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: popq %r12 @@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr { ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_restore %rbx +; CHECK-NEXT: .cfi_restore %r12 +; CHECK-NEXT: .cfi_restore %r13 +; CHECK-NEXT: .cfi_restore %r14 +; CHECK-NEXT: .cfi_restore %r15 +; CHECK-NEXT: .cfi_restore %rbp +; CHECK-NEXT: .LBB1_18: # %for.end5 ; CHECK-NEXT: retq entry: %0 = load i32, ptr @x1, align 4 From 576b93ebfc595871afd19348081bb4063e8dc8b7 Mon Sep 17 00:00:00 2001 From: Brandon Date: Wed, 22 Oct 2025 21:48:32 -0500 Subject: [PATCH 3/5] [X86] Fix misleading comments in combineX86AddSub --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 74a7d83aadfd9..84dbaec297eda 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57637,16 +57637,16 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, if (ConstantSDNode *Const = dyn_cast(RHS)) { SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); if (X86ISD::SUB == N->getOpcode()) { - // With LHS - C, fold LHS + (-C) + // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C). MatchGeneric(ISD::ADD, LHS, NegC, false); } else { - // With -(LHS + C), fold (-C) - LHS + // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS). MatchGeneric(ISD::SUB, NegC, LHS, true); } } else if (ConstantSDNode *Const = dyn_cast(LHS)) { SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); if (X86ISD::SUB == N->getOpcode()) { - // With -(C - RHS), fold RHS + (-C) + // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C). MatchGeneric(ISD::ADD, RHS, NegC, true); } } From 4662b9e556bbe812d9a42f1da4cb5c9588ff19e2 Mon Sep 17 00:00:00 2001 From: Brandon Date: Thu, 23 Oct 2025 10:01:56 -0500 Subject: [PATCH 4/5] [X86] Move constant acquirement inside the `if` block to avoid unnecessary DAG node creation --- llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 84dbaec297eda..bf904fbf2620c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57644,8 +57644,8 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, MatchGeneric(ISD::SUB, NegC, LHS, true); } } else if (ConstantSDNode *Const = dyn_cast(LHS)) { - SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); if (X86ISD::SUB == N->getOpcode()) { + SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C). MatchGeneric(ISD::ADD, RHS, NegC, true); } From 64521e9f7f31a2f4160902e644131ab26f551b66 Mon Sep 17 00:00:00 2001 From: Brandon Date: Thu, 23 Oct 2025 10:09:41 -0500 Subject: [PATCH 5/5] [X86] Use`auto` with `dyn_cast` for style consistency --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index bf904fbf2620c..6eceb3a728bfe 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -57634,7 +57634,7 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, MatchGeneric(GenericOpc, LHS, RHS, false); MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode()); - if (ConstantSDNode *Const = dyn_cast(RHS)) { + if (auto *Const = dyn_cast(RHS)) { SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); if (X86ISD::SUB == N->getOpcode()) { // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C). @@ -57643,7 +57643,7 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS). MatchGeneric(ISD::SUB, NegC, LHS, true); } - } else if (ConstantSDNode *Const = dyn_cast(LHS)) { + } else if (auto *Const = dyn_cast(LHS)) { if (X86ISD::SUB == N->getOpcode()) { SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT); // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).