Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25486,25 +25486,31 @@ static SDValue performCSELCombine(SDNode *N,
}
}

// CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) if cc doesn't
// use overflow flags, to avoid the comparison with zero. In case of success,
// this also replaces the original SUB(x,y) with the newly created SUBS(x,y).
// NOTE: Perhaps in the future use performFlagSettingCombine to replace SUB
// nodes with their SUBS equivalent as is already done for other flag-setting
// CSEL a, b, cc, SUBS(SUB(x,y), 0) -> CSEL a, b, cc, SUBS(x,y) or // CSEL a,
// b, cc, SUBS(ADD(x,y), 0) -> CSEL a, b, cc, ADDS(x,y) if cc doesn't use
// overflow flags, to avoid the comparison with zero. In case of success, this
// also replaces the original SUB(x,y) with the newly created SUBS(x,y). NOTE:
// Perhaps in the future use performFlagSettingCombine to replace SUB nodes
// with their SUBS equivalent as is already done for other flag-setting
// operators, in which case doing the replacement here becomes redundant.
if (Cond.getOpcode() == AArch64ISD::SUBS && Cond->hasNUsesOfValue(1, 1) &&
isNullConstant(Cond.getOperand(1))) {
SDValue Sub = Cond.getOperand(0);
SDValue SubOrAdd = Cond.getOperand(0);
AArch64CC::CondCode CC =
static_cast<AArch64CC::CondCode>(N->getConstantOperandVal(2));
if (Sub.getOpcode() == ISD::SUB &&
(CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
CC == AArch64CC::PL)) {
if ((SubOrAdd.getOpcode() == ISD::SUB ||
SubOrAdd.getOpcode() == ISD::ADD) &&
((CC == AArch64CC::EQ || CC == AArch64CC::NE || CC == AArch64CC::MI ||
CC == AArch64CC::PL))) {
SDLoc DL(N);
SDValue Subs = DAG.getNode(AArch64ISD::SUBS, DL, Cond->getVTList(),
Sub.getOperand(0), Sub.getOperand(1));
DCI.CombineTo(Sub.getNode(), Subs);
DCI.CombineTo(Cond.getNode(), Subs, Subs.getValue(1));

SDValue Result =
DAG.getNode((SubOrAdd.getOpcode() == ISD::SUB ? AArch64ISD::SUBS
: AArch64ISD::ADDS),
DL, Cond->getVTList(), SubOrAdd.getOperand(0),
SubOrAdd.getOperand(1));
DCI.CombineTo(SubOrAdd.getNode(), Result);
DCI.CombineTo(Cond.getNode(), Result, Result.getValue(1));
return SDValue(N, 0);
}
}
Expand Down
40 changes: 27 additions & 13 deletions llvm/test/CodeGen/AArch64/peephole-and-tst.ll
Original file line number Diff line number Diff line change
Expand Up @@ -295,19 +295,33 @@ define i64 @test_and_4(i64 %x, i64 %y) {
}

define i64 @test_add(i64 %x, i64 %y) {
; CHECK-LABEL: test_add:
; CHECK: // %bb.0:
; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: add x19, x0, #3
; CHECK-NEXT: mov x0, xzr
; CHECK-NEXT: bl callee
; CHECK-NEXT: cmp x19, #0
; CHECK-NEXT: csel x0, x19, x0, eq
; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-NEXT: ret
; CHECK-SD-LABEL: test_add:
; CHECK-SD: // %bb.0:
; CHECK-SD-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
; CHECK-SD-NEXT: .cfi_offset w19, -8
; CHECK-SD-NEXT: .cfi_offset w30, -16
; CHECK-SD-NEXT: mov x19, x0
; CHECK-SD-NEXT: mov x0, xzr
; CHECK-SD-NEXT: bl callee
; CHECK-SD-NEXT: adds x8, x19, #3
; CHECK-SD-NEXT: csel x0, x8, x0, eq
; CHECK-SD-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-SD-NEXT: ret
;
; CHECK-GI-LABEL: test_add:
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
; CHECK-GI-NEXT: .cfi_def_cfa_offset 16
; CHECK-GI-NEXT: .cfi_offset w19, -8
; CHECK-GI-NEXT: .cfi_offset w30, -16
; CHECK-GI-NEXT: add x19, x0, #3
; CHECK-GI-NEXT: mov x0, xzr
; CHECK-GI-NEXT: bl callee
; CHECK-GI-NEXT: cmp x19, #0
; CHECK-GI-NEXT: csel x0, x19, x0, eq
; CHECK-GI-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload
; CHECK-GI-NEXT: ret
%a = add i64 %x, 3
%b = call i64 @callee(i64 0)
%c = icmp eq i64 %a, 0
Expand Down