Skip to content

Commit f4669f2

Browse files
committed
[Global-ISel][AArch64] Port ShouldBeAdjustedToZero and allow negative cmp immediates in global-isel
To undo the mitigations, shouldBeAdjustedToZero also includes one-use checks for cmn 1 to cmp 0 because of the pl/mi favoritism. This was also brought to SelectionDAG.
1 parent 4bf3395 commit f4669f2

File tree

10 files changed

+194
-268
lines changed

10 files changed

+194
-268
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3961,20 +3961,24 @@ static unsigned getCmpOperandFoldingProfit(SDValue Op) {
39613961
}
39623962

39633963
// emitComparison() converts comparison with one or negative one to comparison
3964-
// with 0. Note that this only works for signed comparisons because of how ANDS
3965-
// works.
3964+
// with 0.
39663965
static bool shouldBeAdjustedToZero(SDValue LHS, APInt C, ISD::CondCode &CC) {
3967-
// Only works for ANDS and AND.
3968-
if (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != AArch64ISD::ANDS)
3966+
// TODO: Is this too restrictive? This is just to prevent CSE with other
3967+
// comparisons.
3968+
if (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != AArch64ISD::ANDS &&
3969+
!LHS.hasOneUse())
39693970
return false;
39703971

3971-
if (C.isOne() && (CC == ISD::SETLT || CC == ISD::SETGE)) {
3972-
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3972+
if (C.isAllOnes() && (CC == ISD::SETLE || CC == ISD::SETGT)) {
3973+
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
39733974
return true;
39743975
}
39753976

3976-
if (C.isAllOnes() && (CC == ISD::SETLE || CC == ISD::SETGT)) {
3977-
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3977+
if (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != AArch64ISD::ANDS)
3978+
return false;
3979+
3980+
if (C.isOne() && (CC == ISD::SETLT || CC == ISD::SETGE)) {
3981+
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
39783982
return true;
39793983
}
39803984

@@ -4035,13 +4039,12 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
40354039
break;
40364040
case ISD::SETULE:
40374041
case ISD::SETUGT: {
4038-
if (!C.isAllOnes()) {
4039-
APInt CPlusOne = C + 1;
4040-
if (isLegalCmpImmed(CPlusOne) ||
4041-
(NumImmForC > numberOfInstrToLoadImm(CPlusOne))) {
4042-
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4043-
RHS = DAG.getConstant(CPlusOne, DL, VT);
4044-
}
4042+
assert(!C.isAllOnes() && "C should not be -1 here");
4043+
APInt CPlusOne = C + 1;
4044+
if (isLegalCmpImmed(CPlusOne) ||
4045+
(NumImmForC > numberOfInstrToLoadImm(CPlusOne))) {
4046+
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
4047+
RHS = DAG.getConstant(CPlusOne, DL, VT);
40454048
}
40464049
break;
40474050
}

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp

Lines changed: 66 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -561,6 +561,46 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
561561
MI.eraseFromParent();
562562
}
563563

564+
bool isLegalCmpImmed(APInt C) {
565+
// Works for negative immediates too, as it can be written as an ADDS
566+
// instruction with a negated immediate.
567+
return isLegalArithImmed(C.abs().getZExtValue());
568+
}
569+
570+
/// Check if a comparison with 1 or -1 should be adjusted to compare with 0.
571+
/// This only works for signed comparisons because of how ANDS works.
572+
///
573+
/// \param LHS - The left-hand side register of the comparison
574+
/// \param C - The constant value (1 or -1)
575+
/// \param P - The predicate to potentially adjust
576+
/// \param MRI - Machine register info for looking up definitions
577+
/// \returns true if the comparison should be adjusted to compare with 0
578+
static bool shouldBeAdjustedToZero(Register LHS, APInt C, CmpInst::Predicate &P,
579+
const MachineRegisterInfo &MRI) {
580+
// Only works for AND operations
581+
MachineInstr *LHSDef = getDefIgnoringCopies(LHS, MRI);
582+
583+
// TODO: Too restrictive?
584+
if (!LHSDef ||
585+
(LHSDef->getOpcode() != TargetOpcode::G_AND && !MRI.hasOneUse(LHS)))
586+
return false;
587+
588+
if (C.isAllOnes() && (P == CmpInst::ICMP_SLE || P == CmpInst::ICMP_SGT)) {
589+
P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
590+
return true;
591+
}
592+
593+
if (LHSDef->getOpcode() != TargetOpcode::G_AND)
594+
return false;
595+
596+
if (C.isOne() && (P == CmpInst::ICMP_SLT || P == CmpInst::ICMP_SGE)) {
597+
P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
598+
return true;
599+
}
600+
601+
return false;
602+
}
603+
564604
/// Determine if it is possible to modify the \p RHS and predicate \p P of a
565605
/// G_ICMP instruction such that the right-hand side is an arithmetic immediate.
566606
///
@@ -569,7 +609,7 @@ void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI,
569609
///
570610
/// \note This assumes that the comparison has been legalized.
571611
std::optional<std::pair<uint64_t, CmpInst::Predicate>>
572-
tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
612+
tryAdjustICmpImmAndPred(Register LHS, Register RHS, CmpInst::Predicate P,
573613
const MachineRegisterInfo &MRI) {
574614
const auto &Ty = MRI.getType(RHS);
575615
if (Ty.isVector())
@@ -582,11 +622,18 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
582622
auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI);
583623
if (!ValAndVReg)
584624
return std::nullopt;
585-
uint64_t OriginalC = ValAndVReg->Value.getZExtValue();
586-
uint64_t C = OriginalC;
587-
if (isLegalArithImmed(C))
625+
626+
APInt C = ValAndVReg->Value;
627+
628+
// Check if this is a comparison with 1 or -1 that should be adjusted to 0
629+
if (shouldBeAdjustedToZero(LHS, C, P, MRI))
630+
return {{0, P}};
631+
632+
if (isLegalCmpImmed(C))
588633
return std::nullopt;
589634

635+
uint64_t OriginalC = C.getZExtValue();
636+
590637
// We have a non-arithmetic immediate. Check if adjusting the immediate and
591638
// adjusting the predicate will result in a legal arithmetic immediate.
592639
switch (P) {
@@ -599,9 +646,7 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
599646
// x slt c => x sle c - 1
600647
// x sge c => x sgt c - 1
601648
//
602-
// When c is not the smallest possible negative number.
603-
if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) ||
604-
(Size == 32 && static_cast<int32_t>(C) == INT32_MIN))
649+
if (C.isMinSignedValue())
605650
return std::nullopt;
606651
P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT;
607652
C -= 1;
@@ -614,9 +659,9 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
614659
// x uge c => x ugt c - 1
615660
//
616661
// When c is not zero.
617-
assert(C != 0 && "C should not be zero here!");
662+
assert(!C.isZero() && "C should not be zero here!");
618663
P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT;
619-
C -= 1;
664+
C = C - 1;
620665
break;
621666
case CmpInst::ICMP_SLE:
622667
case CmpInst::ICMP_SGT:
@@ -626,11 +671,10 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
626671
// x sgt c => s sge c + 1
627672
//
628673
// When c is not the largest possible signed integer.
629-
if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) ||
630-
(Size == 64 && static_cast<int64_t>(C) == INT64_MAX))
674+
if (C.isMaxSignedValue())
631675
return std::nullopt;
632676
P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE;
633-
C += 1;
677+
C = C + 1;
634678
break;
635679
case CmpInst::ICMP_ULE:
636680
case CmpInst::ICMP_UGT:
@@ -640,29 +684,27 @@ tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P,
640684
// x ugt c => s uge c + 1
641685
//
642686
// When c is not the largest possible unsigned integer.
643-
if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) ||
644-
(Size == 64 && C == UINT64_MAX))
645-
return std::nullopt;
687+
assert(!C.isMaxValue() &&
688+
"C should not be -1 here, as it is a valid legal immediate!");
646689
P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
647-
C += 1;
690+
C = C + 1;
648691
break;
649692
}
650693

651694
// Check if the new constant is valid, and return the updated constant and
652695
// predicate if it is.
653-
if (Size == 32)
654-
C = static_cast<uint32_t>(C);
655-
if (isLegalArithImmed(C))
656-
return {{C, P}};
696+
uint64_t NewC = C.getZExtValue();
697+
if (isLegalCmpImmed(C))
698+
return {{NewC, P}};
657699

658700
auto NumberOfInstrToLoadImm = [=](uint64_t Imm) {
659701
SmallVector<AArch64_IMM::ImmInsnModel> Insn;
660702
AArch64_IMM::expandMOVImm(Imm, 32, Insn);
661703
return Insn.size();
662704
};
663705

664-
if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(C))
665-
return {{C, P}};
706+
if (NumberOfInstrToLoadImm(OriginalC) > NumberOfInstrToLoadImm(NewC))
707+
return {{NewC, P}};
666708

667709
return std::nullopt;
668710
}
@@ -679,9 +721,10 @@ bool matchAdjustICmpImmAndPred(
679721
MachineInstr &MI, const MachineRegisterInfo &MRI,
680722
std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) {
681723
assert(MI.getOpcode() == TargetOpcode::G_ICMP);
724+
Register LHS = MI.getOperand(2).getReg();
682725
Register RHS = MI.getOperand(3).getReg();
683726
auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
684-
if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) {
727+
if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(LHS, RHS, Pred, MRI)) {
685728
MatchInfo = *MaybeNewImmAndPred;
686729
return true;
687730
}

llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-lowering-adjust-icmp-imm.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -649,8 +649,8 @@ body: |
649649
; LOWER-NEXT: {{ $}}
650650
; LOWER-NEXT: %reg0:_(s32) = COPY $w0
651651
; LOWER-NEXT: %reg1:_(s32) = COPY $w1
652-
; LOWER-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
653-
; LOWER-NEXT: %cmp:_(s32) = G_ICMP intpred(slt), %reg0(s32), [[C]]
652+
; LOWER-NEXT: %cst:_(s32) = G_CONSTANT i32 -1
653+
; LOWER-NEXT: %cmp:_(s32) = G_ICMP intpred(sle), %reg0(s32), %cst
654654
; LOWER-NEXT: %select:_(s32) = G_SELECT %cmp(s32), %reg0, %reg1
655655
; LOWER-NEXT: $w0 = COPY %select(s32)
656656
; LOWER-NEXT: RET_ReallyLR implicit $w0
@@ -660,8 +660,8 @@ body: |
660660
; SELECT-NEXT: {{ $}}
661661
; SELECT-NEXT: %reg0:gpr32common = COPY $w0
662662
; SELECT-NEXT: %reg1:gpr32 = COPY $w1
663-
; SELECT-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri %reg0, 0, 0, implicit-def $nzcv
664-
; SELECT-NEXT: %select:gpr32 = CSELWr %reg0, %reg1, 4, implicit $nzcv
663+
; SELECT-NEXT: [[ADDSWri:%[0-9]+]]:gpr32 = ADDSWri %reg0, 1, 0, implicit-def $nzcv
664+
; SELECT-NEXT: %select:gpr32 = CSELWr %reg0, %reg1, 13, implicit $nzcv
665665
; SELECT-NEXT: $w0 = COPY %select
666666
; SELECT-NEXT: RET_ReallyLR implicit $w0
667667
%reg0:_(s32) = COPY $w0

llvm/test/CodeGen/AArch64/check-sign-bit-before-extension.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ define i32 @f_i8_sign_extend_inreg(i8 %in, i32 %a, i32 %b) nounwind {
1414
; CHECK-LABEL: f_i8_sign_extend_inreg:
1515
; CHECK: // %bb.0: // %entry
1616
; CHECK-NEXT: sxtb w8, w0
17-
; CHECK-NEXT: cmn w8, #1
18-
; CHECK-NEXT: csel w8, w1, w2, gt
17+
; CHECK-NEXT: cmp w8, #0
18+
; CHECK-NEXT: csel w8, w1, w2, pl
1919
; CHECK-NEXT: add w0, w8, w0, uxtb
2020
; CHECK-NEXT: ret
2121
entry:
@@ -36,8 +36,8 @@ define i32 @f_i16_sign_extend_inreg(i16 %in, i32 %a, i32 %b) nounwind {
3636
; CHECK-LABEL: f_i16_sign_extend_inreg:
3737
; CHECK: // %bb.0: // %entry
3838
; CHECK-NEXT: sxth w8, w0
39-
; CHECK-NEXT: cmn w8, #1
40-
; CHECK-NEXT: csel w8, w1, w2, gt
39+
; CHECK-NEXT: cmp w8, #0
40+
; CHECK-NEXT: csel w8, w1, w2, pl
4141
; CHECK-NEXT: add w0, w8, w0, uxth
4242
; CHECK-NEXT: ret
4343
entry:
@@ -145,8 +145,8 @@ define i64 @f_i32_sign_extend_i64(i32 %in, i64 %a, i64 %b) nounwind {
145145
; CHECK: // %bb.0: // %entry
146146
; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
147147
; CHECK-NEXT: sxtw x8, w0
148-
; CHECK-NEXT: cmn x8, #1
149-
; CHECK-NEXT: csel x8, x1, x2, gt
148+
; CHECK-NEXT: cmp x8, #0
149+
; CHECK-NEXT: csel x8, x1, x2, pl
150150
; CHECK-NEXT: add x0, x8, w0, uxtw
151151
; CHECK-NEXT: ret
152152
entry:

0 commit comments

Comments
 (0)