diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2072e48914ae6..aa6905e638bb9 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -25486,25 +25486,31 @@ static SDValue performCSELCombine(SDNode *N, } } - // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't - // use overflow flags, to avoid the comparison with zero. In case of success, - // this also replaces the original SUB(x,y) with the newly created SUBS(x,y). - // NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB - // nodes with their SUBS equivalent as is already done for other flag-setting + // CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) or // CSEL a, + // b, cc, SUBS(ADD(x,y), 0) -> CSEL a, b, cc, ADDS(x,y) if cc doesn't use + // overflow flags, to avoid the comparison with zero. In case of success, this + // also replaces the original SUB(x,y) with the newly created SUBS(x,y). NOTE: + // Perhaps in the future use performFlagSettingCombine to replace SUB nodes + // with their SUBS equivalent as is already done for other flag-setting // operators, in which case doing the replacement here becomes redundant. if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) && isNullConstant(Cond.getOperand(1))) { - SDValue Sub = Cond.getOperand(0); + SDValue SubOrAdd = Cond.getOperand(0); AArch64CC::CondCode CC = static_cast(N->getConstantOperandVal(2)); - if (Sub.getOpcode() == ISD::SUB && - (CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || - CC == AArch64CC::PL)) { + if ((SubOrAdd.getOpcode() == ISD::SUB || + SubOrAdd.getOpcode() == ISD::ADD) && + ((CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI || + CC == AArch64CC::PL))) { SDLoc DL(N); - SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(), - Sub.getOperand(0), Sub.getOperand(1)); - DCI.CombineTo(Sub.getNode(), Subs); - DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1)); + + SDValue Result = + DAG.getNode((SubOrAdd.getOpcode() == ISD::SUB ? AArch64ISD::SUBS + : AArch64ISD::ADDS), + DL, Cond->getVTList(), SubOrAdd.getOperand(0), + SubOrAdd.getOperand(1)); + DCI.CombineTo(SubOrAdd.getNode(), Result); + DCI.CombineTo(Cond.getNode(), Result, Result.getValue(1)); return SDValue(N, 0); } } diff --git a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll index 3caac1d13495d..057c373967257 100644 --- a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll +++ b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -295,19 +295,33 @@ define i64 @test_and_4(i64 %x, i64 %y) { } define i64 @test_add(i64 %x, i64 %y) { -; CHECK-LABEL: test_add: -; CHECK: // %bb.0: -; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: add x19, x0, #3 -; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: bl callee -; CHECK-NEXT: cmp x19, #0 -; CHECK-NEXT: csel x0, x19, x0, eq -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-SD-LABEL: test_add: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-SD-NEXT: .cfi_offset w19, -8 +; CHECK-SD-NEXT: .cfi_offset w30, -16 +; CHECK-SD-NEXT: mov x19, x0 +; CHECK-SD-NEXT: mov x0, xzr +; CHECK-SD-NEXT: bl callee +; CHECK-SD-NEXT: adds x8, x19, #3 +; CHECK-SD-NEXT: csel x0, x8, x0, eq +; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: test_add: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 +; CHECK-GI-NEXT: .cfi_offset w19, -8 +; CHECK-GI-NEXT: .cfi_offset w30, -16 +; CHECK-GI-NEXT: add x19, x0, #3 +; CHECK-GI-NEXT: mov x0, xzr +; CHECK-GI-NEXT: bl callee +; CHECK-GI-NEXT: cmp x19, #0 +; CHECK-GI-NEXT: csel x0, x19, x0, eq +; CHECK-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload +; CHECK-GI-NEXT: ret %a = add i64 %x, 3 %b = call i64 @callee(i64 0) %c = icmp eq i64 %a, 0