Skip to content

Commit 1e6bd14

Browse files
committed
Allow commuting cmn
This will require modifying the outcc, so I had to make that change.
1 parent 3acdcf8 commit 1e6bd14

File tree

5 files changed

+95
-111
lines changed

5 files changed

+95
-111
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 23 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3540,7 +3540,8 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
35403540
}
35413541

35423542
static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3543-
const SDLoc &DL, SelectionDAG &DAG) {
3543+
AArch64CC::CondCode &OutCC, const SDLoc &DL,
3544+
SelectionDAG &DAG) {
35443545
EVT VT = LHS.getValueType();
35453546
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
35463547

@@ -3563,12 +3564,12 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
35633564
// Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
35643565
Opcode = AArch64ISD::ADDS;
35653566
RHS = RHS.getOperand(1);
3566-
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3567-
isIntEqualitySetCC(CC)) {
3567+
} else if (isCMN(LHS, CC, DAG)) {
35683568
// As we are looking for EQ/NE compares, the operands can be commuted ; can
35693569
// we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
35703570
Opcode = AArch64ISD::ADDS;
35713571
LHS = LHS.getOperand(1);
3572+
OutCC = getSwappedCondition(OutCC);
35723573
} else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
35733574
if (LHS.getOpcode() == ISD::AND) {
35743575
// Similarly, (CMP (and X, Y), 0) can be implemented with a TST
@@ -3646,7 +3647,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
36463647
static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
36473648
ISD::CondCode CC, SDValue CCOp,
36483649
AArch64CC::CondCode Predicate,
3649-
AArch64CC::CondCode OutCC,
3650+
AArch64CC::CondCode &OutCC,
36503651
const SDLoc &DL, SelectionDAG &DAG) {
36513652
unsigned Opcode = 0;
36523653
const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
@@ -3668,12 +3669,12 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
36683669
} else if (isCMN(RHS, CC, DAG)) {
36693670
Opcode = AArch64ISD::CCMN;
36703671
RHS = RHS.getOperand(1);
3671-
} else if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
3672-
isIntEqualitySetCC(CC)) {
3672+
} else if (isCMN(LHS, CC, DAG)) {
36733673
// As we are looking for EQ/NE compares, the operands can be commuted ; can
36743674
// we combine a (CCMP (sub 0, op1), op2) into a CCMN instruction ?
36753675
Opcode = AArch64ISD::CCMN;
36763676
LHS = LHS.getOperand(1);
3677+
OutCC = getSwappedCondition(OutCC);
36773678
}
36783679
if (Opcode == 0)
36793680
Opcode = AArch64ISD::CCMP;
@@ -3786,7 +3787,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
37863787
if (ExtraCC != AArch64CC::AL) {
37873788
SDValue ExtraCmp;
37883789
if (!CCOp.getNode())
3789-
ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
3790+
ExtraCmp = emitComparison(LHS, RHS, CC, ExtraCC, DL, DAG);
37903791
else
37913792
ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
37923793
ExtraCC, DL, DAG);
@@ -3797,7 +3798,7 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
37973798

37983799
// Produce a normal comparison if we are first in the chain
37993800
if (!CCOp)
3800-
return emitComparison(LHS, RHS, CC, DL, DAG);
3801+
return emitComparison(LHS, RHS, CC, OutCC, DL, DAG);
38013802
// Otherwise produce a ccmp.
38023803
return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
38033804
DAG);
@@ -4014,13 +4015,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40144015
// can be turned into:
40154016
// cmp w12, w11, lsl #1
40164017
if (!isa<ConstantSDNode>(RHS) || !isLegalCmpImmed(RHS->getAsAPIntVal())) {
4017-
bool LHSIsCMN = isCMN(LHS, CC, DAG);
4018-
bool RHSIsCMN = isCMN(RHS, CC, DAG);
4019-
SDValue TheLHS = LHSIsCMN ? LHS.getOperand(1) : LHS;
4020-
SDValue TheRHS = RHSIsCMN ? RHS.getOperand(1) : RHS;
4018+
SDValue TheLHS = isCMN(LHS, CC, DAG) ? LHS.getOperand(1) : LHS;
4019+
SDValue TheRHS = isCMN(RHS, CC, DAG) ? RHS.getOperand(1) : RHS;
40214020

4022-
if (getCmpOperandFoldingProfit(TheLHS) + (LHSIsCMN ? 1 : 0) >
4023-
getCmpOperandFoldingProfit(TheRHS) + (RHSIsCMN ? 1 : 0)) {
4021+
if (getCmpOperandFoldingProfit(TheLHS) >
4022+
getCmpOperandFoldingProfit(TheRHS)) {
40244023
std::swap(LHS, RHS);
40254024
CC = ISD::getSetCCSwappedOperands(CC);
40264025
}
@@ -4056,10 +4055,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40564055
SDValue SExt =
40574056
DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(), LHS,
40584057
DAG.getValueType(MVT::i16));
4058+
4059+
AArch64CC = changeIntCCToAArch64CC(CC);
40594060
Cmp = emitComparison(
40604061
SExt, DAG.getSignedConstant(ValueofRHS, DL, RHS.getValueType()), CC,
4061-
DL, DAG);
4062-
AArch64CC = changeIntCCToAArch64CC(CC);
4062+
AArch64CC, DL, DAG);
40634063
}
40644064
}
40654065

@@ -4072,8 +4072,8 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40724072
}
40734073

40744074
if (!Cmp) {
4075-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
40764075
AArch64CC = changeIntCCToAArch64CC(CC);
4076+
Cmp = emitComparison(LHS, RHS, CC, AArch64CC, DL, DAG);
40774077
}
40784078
AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
40794079
return Cmp;
@@ -10664,8 +10664,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
1066410664

1066510665
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1066610666
// clean. Some of them require two branches to implement.
10667-
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
10668-
AArch64CC::CondCode CC1, CC2;
10667+
AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
10668+
SDValue Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
1066910669
changeFPCCToAArch64CC(CC, CC1, CC2);
1067010670
SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
1067110671
SDValue BR1 =
@@ -11149,12 +11149,12 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
1114911149
// If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
1115011150
// and do the comparison.
1115111151
SDValue Cmp;
11152+
AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
1115211153
if (IsStrict)
1115311154
Cmp = emitStrictFPComparison(LHS, RHS, DL, DAG, Chain, IsSignaling);
1115411155
else
11155-
Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11156+
Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
1115611157

11157-
AArch64CC::CondCode CC1, CC2;
1115811158
changeFPCCToAArch64CC(CC, CC1, CC2);
1115911159
SDValue Res;
1116011160
if (CC2 == AArch64CC::AL) {
@@ -11550,12 +11550,11 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
1155011550
if (VectorCmp)
1155111551
return VectorCmp;
1155211552
}
11553-
11554-
SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
11553+
AArch64CC::CondCode CC1 = AArch64CC::AL, CC2;
11554+
SDValue Cmp = emitComparison(LHS, RHS, CC, CC1, DL, DAG);
1155511555

1155611556
// Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
1155711557
// clean. Some of them require two CSELs to implement.
11558-
AArch64CC::CondCode CC1, CC2;
1155911558
changeFPCCToAArch64CC(CC, CC1, CC2);
1156011559

1156111560
if (Flags.hasNoSignedZeros()) {

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ class AArch64InstructionSelector : public InstructionSelector {
352352
MachineInstr *emitConditionalComparison(Register LHS, Register RHS,
353353
CmpInst::Predicate CC,
354354
AArch64CC::CondCode Predicate,
355-
AArch64CC::CondCode OutCC,
355+
AArch64CC::CondCode &OutCC,
356356
MachineIRBuilder &MIB) const;
357357
MachineInstr *emitConjunctionRec(Register Val, AArch64CC::CondCode &OutCC,
358358
bool Negate, Register CCOp,
@@ -4869,7 +4869,7 @@ static bool canEmitConjunction(Register Val, bool &CanNegate, bool &MustBeFirst,
48694869

48704870
MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
48714871
Register LHS, Register RHS, CmpInst::Predicate CC,
4872-
AArch64CC::CondCode Predicate, AArch64CC::CondCode OutCC,
4872+
AArch64CC::CondCode Predicate, AArch64CC::CondCode &OutCC,
48734873
MachineIRBuilder &MIB) const {
48744874
auto &MRI = *MIB.getMRI();
48754875
LLT OpTy = MRI.getType(LHS);
@@ -4878,7 +4878,25 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
48784878
if (CmpInst::isIntPredicate(CC)) {
48794879
assert(OpTy.getSizeInBits() == 32 || OpTy.getSizeInBits() == 64);
48804880
C = getIConstantVRegValWithLookThrough(RHS, MRI);
4881-
if (!C || C->Value.sgt(31) || C->Value.slt(-31))
4881+
if (!C) {
4882+
MachineInstr *Def = getDefIgnoringCopies(RHS, MRI);
4883+
if (isCMN(Def, CC, MRI)) {
4884+
RHS = Def->getOperand(2).getReg();
4885+
CCmpOpc =
4886+
OpTy.getSizeInBits() == 32 ? AArch64::CCMNWr : AArch64::CCMNXr;
4887+
} else {
4888+
Def = getDefIgnoringCopies(LHS, MRI);
4889+
if (isCMN(Def, CC, MRI)) {
4890+
LHS = Def->getOperand(2).getReg();
4891+
OutCC = getSwappedCondition(OutCC);
4892+
CCmpOpc =
4893+
OpTy.getSizeInBits() == 32 ? AArch64::CCMNWr : AArch64::CCMNXr;
4894+
} else {
4895+
CCmpOpc =
4896+
OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
4897+
}
4898+
}
4899+
} else if (C->Value.sgt(31) || C->Value.slt(-31))
48824900
CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWr : AArch64::CCMPXr;
48834901
else if (C->Value.ule(31))
48844902
CCmpOpc = OpTy.getSizeInBits() == 32 ? AArch64::CCMPWi : AArch64::CCMPXi;
@@ -4904,8 +4922,7 @@ MachineInstr *AArch64InstructionSelector::emitConditionalComparison(
49044922
}
49054923
AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
49064924
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
4907-
auto CCmp =
4908-
MIB.buildInstr(CCmpOpc, {}, {LHS});
4925+
auto CCmp = MIB.buildInstr(CCmpOpc, {}, {LHS});
49094926
if (CCmpOpc == AArch64::CCMPWi || CCmpOpc == AArch64::CCMPXi)
49104927
CCmp.addImm(C->Value.getZExtValue());
49114928
else if (CCmpOpc == AArch64::CCMNWi || CCmpOpc == AArch64::CCMNXi)

llvm/test/CodeGen/AArch64/cmp-chains.ll

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -268,16 +268,15 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
268268
; CHECK-SD-NEXT: csel w0, w1, w0, gt
269269
; CHECK-SD-NEXT: ret
270270
;
271-
; CHECK-GI-LABEL: neg_range_int_comp:
272-
; CHECK-GI: // %bb.0:
273-
; CHECK-GI-NEXT: orr w8, w3, #0x1
274-
; CHECK-GI-NEXT: cmp w0, w2
275-
; CHECK-GI-NEXT: neg w8, w8
276-
; CHECK-GI-NEXT: ccmp w1, w8, #4, lt
277-
; CHECK-GI-NEXT: csel w0, w1, w0, gt
278-
; CHECK-GI-NEXT: ret
271+
; GISEL-LABEL: neg_range_int_comp:
272+
; GISEL: // %bb.0:
273+
; GISEL-NEXT: cmp w0, w2
274+
; GISEL-NEXT: orr w8, w3, #0x1
275+
; GISEL-NEXT: ccmn w1, w8, #4, lt
276+
; GISEL-NEXT: csel w0, w1, w0, gt
277+
; GISEL-NEXT: ret
279278
%dor = or i32 %d, 1
280-
%negd = sub i32 0, %dor
279+
%negd = sub nsw i32 0, %dor
281280
%cmp = icmp sgt i32 %b, %negd
282281
%cmp1 = icmp slt i32 %a, %c
283282
%or.cond = and i1 %cmp, %cmp1
@@ -371,16 +370,15 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
371370
; CHECK-SD-NEXT: csel w0, w1, w0, lt
372371
; CHECK-SD-NEXT: ret
373372
;
374-
; CHECK-GI-LABEL: neg_range_int_comp2:
375-
; CHECK-GI: // %bb.0:
376-
; CHECK-GI-NEXT: orr w8, w3, #0x1
377-
; CHECK-GI-NEXT: cmp w0, w2
378-
; CHECK-GI-NEXT: neg w8, w8
379-
; CHECK-GI-NEXT: ccmp w1, w8, #0, ge
380-
; CHECK-GI-NEXT: csel w0, w1, w0, lt
381-
; CHECK-GI-NEXT: ret
373+
; GISEL-LABEL: neg_range_int_comp2:
374+
; GISEL: // %bb.0:
375+
; GISEL-NEXT: cmp w0, w2
376+
; GISEL-NEXT: orr w8, w3, #0x1
377+
; GISEL-NEXT: ccmn w1, w8, #0, ge
378+
; GISEL-NEXT: csel w0, w1, w0, lt
379+
; GISEL-NEXT: ret
382380
%dor = or i32 %d, 1
383-
%negd = sub i32 0, %dor
381+
%negd = sub nsw i32 0, %dor
384382
%cmp = icmp slt i32 %b, %negd
385383
%cmp1 = icmp sge i32 %a, %c
386384
%or.cond = and i1 %cmp, %cmp1
@@ -407,7 +405,7 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
407405
; CHECK-GI-NEXT: csel w0, w1, w0, lo
408406
; CHECK-GI-NEXT: ret
409407
%dor = or i32 %d, 1
410-
%negd = sub i32 0, %dor
408+
%negd = sub nsw i32 0, %dor
411409
%cmp = icmp ult i32 %b, %negd
412410
%cmp1 = icmp sgt i32 %a, %c
413411
%or.cond = and i1 %cmp, %cmp1

llvm/test/CodeGen/AArch64/cmp-select-sign.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -261,7 +261,7 @@ define i32 @or_neg(i32 %x, i32 %y) {
261261
; CHECK-LABEL: or_neg:
262262
; CHECK: // %bb.0:
263263
; CHECK-NEXT: orr w8, w0, #0x1
264-
; CHECK-NEXT: cmn w1, w8
264+
; CHECK-NEXT: cmn w8, w1
265265
; CHECK-NEXT: cset w0, lt
266266
; CHECK-NEXT: ret
267267
%3 = or i32 %x, 1
@@ -275,7 +275,7 @@ define i32 @or_neg_ugt(i32 %x, i32 %y) {
275275
; CHECK-LABEL: or_neg_ugt:
276276
; CHECK: // %bb.0:
277277
; CHECK-NEXT: orr w8, w0, #0x1
278-
; CHECK-NEXT: cmn w1, w8
278+
; CHECK-NEXT: cmn w8, w1
279279
; CHECK-NEXT: cset w0, lo
280280
; CHECK-NEXT: ret
281281
%3 = or i32 %x, 1
@@ -319,7 +319,7 @@ define i32 @or_neg_no_smin_but_zero(i32 %x, i32 %y) {
319319
; CHECK-LABEL: or_neg_no_smin_but_zero:
320320
; CHECK: // %bb.0:
321321
; CHECK-NEXT: bic w8, w0, w0, asr #31
322-
; CHECK-NEXT: cmn w1, w8
322+
; CHECK-NEXT: cmn w8, w1
323323
; CHECK-NEXT: cset w0, lt
324324
; CHECK-NEXT: ret
325325
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)
@@ -350,7 +350,7 @@ define i32 @or_neg2(i32 %x, i32 %y) {
350350
; CHECK-LABEL: or_neg2:
351351
; CHECK: // %bb.0:
352352
; CHECK-NEXT: orr w8, w0, #0x1
353-
; CHECK-NEXT: cmn w1, w8
353+
; CHECK-NEXT: cmn w8, w1
354354
; CHECK-NEXT: cset w0, le
355355
; CHECK-NEXT: ret
356356
%3 = or i32 %x, 1
@@ -364,7 +364,7 @@ define i32 @or_neg3(i32 %x, i32 %y) {
364364
; CHECK-LABEL: or_neg3:
365365
; CHECK: // %bb.0:
366366
; CHECK-NEXT: orr w8, w0, #0x1
367-
; CHECK-NEXT: cmn w1, w8
367+
; CHECK-NEXT: cmn w8, w1
368368
; CHECK-NEXT: cset w0, gt
369369
; CHECK-NEXT: ret
370370
%3 = or i32 %x, 1
@@ -378,7 +378,7 @@ define i32 @or_neg4(i32 %x, i32 %y) {
378378
; CHECK-LABEL: or_neg4:
379379
; CHECK: // %bb.0:
380380
; CHECK-NEXT: orr w8, w0, #0x1
381-
; CHECK-NEXT: cmn w1, w8
381+
; CHECK-NEXT: cmn w8, w1
382382
; CHECK-NEXT: cset w0, ge
383383
; CHECK-NEXT: ret
384384
%3 = or i32 %x, 1
@@ -392,7 +392,7 @@ define i32 @or_neg_ult(i32 %x, i32 %y) {
392392
; CHECK-LABEL: or_neg_ult:
393393
; CHECK: // %bb.0:
394394
; CHECK-NEXT: orr w8, w0, #0x1
395-
; CHECK-NEXT: cmn w1, w8
395+
; CHECK-NEXT: cmn w8, w1
396396
; CHECK-NEXT: cset w0, lo
397397
; CHECK-NEXT: ret
398398
%3 = or i32 %x, 1
@@ -434,7 +434,7 @@ define i32 @or_neg_no_smin_but_zero2(i32 %x, i32 %y) {
434434
; CHECK-LABEL: or_neg_no_smin_but_zero2:
435435
; CHECK: // %bb.0:
436436
; CHECK-NEXT: bic w8, w0, w0, asr #31
437-
; CHECK-NEXT: cmn w1, w8
437+
; CHECK-NEXT: cmn w8, w1
438438
; CHECK-NEXT: cset w0, ge
439439
; CHECK-NEXT: ret
440440
%3 = call i32 @llvm.smax.i32(i32 %x, i32 0)

0 commit comments

Comments
 (0)