Skip to content

Commit 584982a

Browse files
committed
Only fold into tst directly if AND is one-use
Otherwise, we can be too aggressive, and when it comes time to split along edges, we can end up duplicating more than we bargained for.
1 parent 8c1aff4 commit 584982a

File tree

6 files changed

+110
-220
lines changed

6 files changed

+110
-220
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3745,19 +3745,13 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
37453745
Opcode = AArch64ISD::ADDS;
37463746
LHS = LHS.getOperand(1);
37473747
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
3748-
if (LHS.getOpcode() == ISD::AND) {
3748+
if (LHS.getOpcode() == ISD::AND && LHS.hasOneUse()) {
37493749
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
3750-
// (a.k.a. ANDS) except that the flags are only guaranteed to work for one
3751-
// of the signed comparisons.
3752-
const SDValue ANDSNode =
3753-
DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, FlagsVT),
3754-
LHS.getOperand(0), LHS.getOperand(1));
3755-
// Replace all users of (and X, Y) with newly generated (ands X, Y)
3756-
DAG.ReplaceAllUsesWith(LHS, ANDSNode);
3757-
return ANDSNode.getValue(1);
3758-
} else if (LHS.getOpcode() == AArch64ISD::ANDS) {
3759-
// Use result of ANDS
3760-
return LHS.getValue(1);
3750+
// (a.k.a. ANDS) except that the flags are only guaranteed to work for
3751+
// signed comparisons.
3752+
Opcode = AArch64ISD::ANDS;
3753+
RHS = LHS.getOperand(1);
3754+
LHS = LHS.getOperand(0);
37613755
}
37623756
}
37633757

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5109,14 +5109,14 @@ MachineInstr *AArch64InstructionSelector::tryFoldIntegerCompare(
51095109
//
51105110
// tst x, y
51115111
if (!CmpInst::isUnsigned(P) && LHSDef &&
5112-
LHSDef->getOpcode() == TargetOpcode::G_AND) {
5112+
LHSDef->getOpcode() == TargetOpcode::G_AND &&
5113+
MRI.hasOneNonDBGUse(LHS.getReg())) {
51135114
// Make sure that the RHS is 0.
51145115
auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS.getReg(), MRI);
51155116
if (!ValAndVReg || ValAndVReg->Value != 0)
51165117
return nullptr;
51175118

5118-
return emitTST(LHSDef->getOperand(1),
5119-
LHSDef->getOperand(2), MIRBuilder);
5119+
return emitTST(LHSDef->getOperand(1), LHSDef->getOperand(2), MIRBuilder);
51205120
}
51215121

51225122
return nullptr;

llvm/test/CodeGen/AArch64/arm64-ccmp.ll

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,23 +1050,19 @@ define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
10501050
define i32 @multiccmp(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 {
10511051
; CHECK-SD-LABEL: multiccmp:
10521052
; CHECK-SD: ; %bb.0: ; %entry
1053-
; CHECK-SD-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
1054-
; CHECK-SD-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
1055-
; CHECK-SD-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
1053+
; CHECK-SD-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
1054+
; CHECK-SD-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
10561055
; CHECK-SD-NEXT: mov x19, x5
10571056
; CHECK-SD-NEXT: cmp w0, w1
1058-
; CHECK-SD-NEXT: cset w20, gt
1059-
; CHECK-SD-NEXT: cmp w2, w3
1060-
; CHECK-SD-NEXT: cset w21, ne
1061-
; CHECK-SD-NEXT: tst w20, w21
1057+
; CHECK-SD-NEXT: ccmp w2, w3, #4, gt
1058+
; CHECK-SD-NEXT: cset w20, ne
10621059
; CHECK-SD-NEXT: csel w0, w5, w4, ne
10631060
; CHECK-SD-NEXT: bl _callee
1064-
; CHECK-SD-NEXT: tst w20, w21
1061+
; CHECK-SD-NEXT: cmp w20, #0
10651062
; CHECK-SD-NEXT: csel w0, w0, w19, ne
10661063
; CHECK-SD-NEXT: bl _callee
1067-
; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
1068-
; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
1069-
; CHECK-SD-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
1064+
; CHECK-SD-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
1065+
; CHECK-SD-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
10701066
; CHECK-SD-NEXT: ret
10711067
;
10721068
; CHECK-GI-LABEL: multiccmp:

llvm/test/CodeGen/AArch64/fcmp-fp128.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,12 +180,10 @@ define double @one(fp128 %a, fp128 %b, double %d, double %e) {
180180
; CHECK-SD-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill
181181
; CHECK-SD-NEXT: bl __eqtf2
182182
; CHECK-SD-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload
183-
; CHECK-SD-NEXT: cmp w0, #0
184-
; CHECK-SD-NEXT: cset w19, ne
183+
; CHECK-SD-NEXT: mov w19, w0
185184
; CHECK-SD-NEXT: bl __unordtf2
186185
; CHECK-SD-NEXT: cmp w0, #0
187-
; CHECK-SD-NEXT: cset w8, eq
188-
; CHECK-SD-NEXT: tst w8, w19
186+
; CHECK-SD-NEXT: ccmp w19, #0, #4, eq
189187
; CHECK-SD-NEXT: ldp x30, x19, [sp, #48] // 16-byte Folded Reload
190188
; CHECK-SD-NEXT: fcsel d0, d9, d8, ne
191189
; CHECK-SD-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload

0 commit comments

Comments
 (0)