Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 165 additions & 1 deletion llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,9 @@ class AArch64AsmPrinter : public AsmPrinter {
void emitAttributes(unsigned Flags, uint64_t PAuthABIPlatform,
uint64_t PAuthABIVersion, AArch64TargetStreamer *TS);

// Emit expansion of Compare-and-branch pseudo instructions
void emitCBPseudoExpansion(const MachineInstr *MI);

void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
void EmitToStreamer(const MCInst &Inst) {
EmitToStreamer(*OutStreamer, Inst);
Expand Down Expand Up @@ -2589,6 +2592,160 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) {
return BAE;
}

void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) {
bool IsImm = false;
bool Is32Bit = false;

switch (MI->getOpcode()) {
default:
llvm_unreachable("This is not a CB pseudo instruction");
case AArch64::CBWPrr:
IsImm = false;
Is32Bit = true;
break;
case AArch64::CBXPrr:
IsImm = false;
Is32Bit = false;
break;
case AArch64::CBWPri:
IsImm = true;
Is32Bit = true;
break;
case AArch64::CBXPri:
IsImm = true;
Is32Bit = false;
break;
}

AArch64CC::CondCode CC =
static_cast<AArch64CC::CondCode>(MI->getOperand(0).getImm());
bool NeedsRegSwap = false;
bool NeedsImmDec = false;
bool NeedsImmInc = false;

// Decide if we need to either swap register operands or increment/decrement
// immediate operands
unsigned MCOpC;
switch (CC) {
default:
llvm_unreachable("Invalid CB condition code");
case AArch64CC::EQ:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBEQWri : AArch64::CBEQXri)
: (Is32Bit ? AArch64::CBEQWrr : AArch64::CBEQXrr);
NeedsRegSwap = false;
NeedsImmDec = false;
NeedsImmInc = false;
break;
case AArch64CC::NE:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBNEWri : AArch64::CBNEXri)
: (Is32Bit ? AArch64::CBNEWrr : AArch64::CBNEXrr);
NeedsRegSwap = false;
NeedsImmDec = false;
NeedsImmInc = false;
break;
case AArch64CC::HS:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri)
: (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr);
NeedsRegSwap = false;
NeedsImmDec = IsImm;
NeedsImmInc = false;
break;
case AArch64CC::LO:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri)
: (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr);
NeedsRegSwap = !IsImm;
NeedsImmDec = false;
NeedsImmInc = false;
break;
case AArch64CC::HI:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri)
: (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr);
NeedsRegSwap = false;
NeedsImmDec = false;
NeedsImmInc = false;
break;
case AArch64CC::LS:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri)
: (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr);
NeedsRegSwap = !IsImm;
NeedsImmDec = false;
NeedsImmInc = IsImm;
break;
case AArch64CC::GE:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri)
: (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr);
NeedsRegSwap = false;
NeedsImmDec = IsImm;
NeedsImmInc = false;
break;
case AArch64CC::LT:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri)
: (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr);
NeedsRegSwap = !IsImm;
NeedsImmDec = false;
NeedsImmInc = false;
break;
case AArch64CC::GT:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri)
: (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr);
NeedsRegSwap = false;
NeedsImmDec = false;
NeedsImmInc = false;
break;
case AArch64CC::LE:
MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri)
: (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr);
NeedsRegSwap = !IsImm;
NeedsImmDec = false;
NeedsImmInc = IsImm;
break;
}

assert(!(NeedsImmDec && NeedsImmInc) &&
"Cannot require increment and decrement of CB immediate operand at "
"the same time");
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we can determine statically that this will never fire - so maybe it is worth removing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.


MCInst Inst;
Inst.setOpcode(MCOpC);

MCOperand Lhs, Rhs, Trgt;
lowerOperand(MI->getOperand(1), Lhs);
lowerOperand(MI->getOperand(2), Rhs);
lowerOperand(MI->getOperand(3), Trgt);

// Now swap, increment or decrement
if (NeedsRegSwap) {
assert(
!IsImm &&
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similarly as above around here, though don't let me stop you from keeping them if you prefer.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.

"Unexpected register swap for CB instruction with immediate operand");
assert(Lhs.isReg() && "Expected register operand for CB");
assert(Rhs.isReg() && "Expected register operand for CB");
Inst.addOperand(Rhs);
Inst.addOperand(Lhs);
} else if (NeedsImmDec) {
assert(IsImm && "Unexpected immediate decrement for CB instruction with "
"reg-reg operands");
Rhs.setImm(Rhs.getImm() - 1);
Inst.addOperand(Lhs);
Inst.addOperand(Rhs);
} else if (NeedsImmInc) {
assert(IsImm && "Unexpected immediate increment for CB instruction with "
"reg-reg operands");
Rhs.setImm(Rhs.getImm() + 1);
Inst.addOperand(Lhs);
Inst.addOperand(Rhs);
} else {
Inst.addOperand(Lhs);
Inst.addOperand(Rhs);
}

assert((!IsImm || (Rhs.getImm() >= 0 && Rhs.getImm() < 64)) &&
"CB immediate operand out-of-bounds");

Inst.addOperand(Trgt);
EmitToStreamer(*OutStreamer, Inst);
}

// Simple pseudo-instructions have their lowering (with expansion to real
// instructions) auto-generated.
#include "AArch64GenMCPseudoLowering.inc"
Expand Down Expand Up @@ -3155,13 +3312,20 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
return;

case AArch64::BLR:
case AArch64::BR:
case AArch64::BR: {
recordIfImportCall(MI);
MCInst TmpInst;
MCInstLowering.Lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
return;
}
case AArch64::CBWPri:
case AArch64::CBXPri:
case AArch64::CBWPrr:
case AArch64::CBXPrr:
emitCBPseudoExpansion(MI);
return;
}

// Finally, do the automated lowerings for everything else.
MCInst TmpInst;
Expand Down
52 changes: 52 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2983,6 +2983,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::CTTZ_ELTS)
MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64)
MAKE_CASE(AArch64ISD::URSHR_I_PRED)
MAKE_CASE(AArch64ISD::CBRR)
MAKE_CASE(AArch64ISD::CBRI)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forget if this was asked before but can we drop AArch64ISD::CBRI and just rely on AArch64ISD::CBRR, creating a CBWPri if the operands an immediate in the right range? It just helps reduce the number of nodes a bit.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking if the immediate is in the right range depends on the condition code, so I'm not sure if that's really worth it because you complicate the matching patterns a lot: You'd need individual patterns for every condition code or at least check every code individually when matching the condition.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it would need WantsRoot (or WantParent?) on a ComplexPattern, so that it can check the Imm is in range whilst checking the condition on the CB.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh right, yeah, that works! Thanks!

Done.

}
#undef MAKE_CASE
return nullptr;
Expand Down Expand Up @@ -10593,6 +10595,56 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
}

// Try to emit Armv9.6 CB instructions. We prefer tb{n}z/cb{n}z due to their
// larger branch displacement but do prefer CB over cmp + br.
if (Subtarget->hasCMPBR() &&
AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) &&
ProduceNonFlagSettingCondBr) {
AArch64CC::CondCode ACC = changeIntCCToAArch64CC(CC);
unsigned Opc = AArch64ISD::CBRR;
if (auto *Imm = dyn_cast<ConstantSDNode>(RHS)) {
// Check conservatively if the immediate fits the valid range [0, 64).
// Immediate variants for GE and HS definitely need to be decremented
// when lowering the pseudos later, so an immediate of 1 would become 0.
// For the inverse conditions LT and LO we don't know for sure if they
// will need a decrement but should the decision be made to reverse the
// branch condition, we again end up with the need to decrement.
// The same argument holds for LE, LS, GT and HI and possibly
// incremented immediates. This can lead to slightly less optimal
// codegen, e.g. we never codegen the legal case
// cblt w0, #63, A
// because we could end up with the illegal case
// cbge w0, #64, B
// should the decision to reverse the branch direction be made. For the
// lower bound cases this is no problem since we can express comparisons
// against 0 with either tbz/tnbz or using wzr/xzr.
uint64_t LowerBound = 0, UpperBound = 64;
switch (ACC) {
case AArch64CC::GE:
case AArch64CC::HS:
case AArch64CC::LT:
case AArch64CC::LO:
LowerBound = 1;
break;
case AArch64CC::LE:
case AArch64CC::LS:
case AArch64CC::GT:
case AArch64CC::HI:
UpperBound = 63;
break;
default:
break;
}

if (Imm->getAPIntValue().uge(LowerBound) &&
Imm->getAPIntValue().ult(UpperBound))
Opc = AArch64ISD::CBRI;
}

SDValue Cond = DAG.getTargetConstant(ACC, dl, MVT::i32);
return DAG.getNode(Opc, dl, MVT::Other, Chain, Cond, LHS, RHS, Dest);
}

SDValue CCVal;
SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,10 @@ enum NodeType : unsigned {
// SME ZA loads and stores
SME_ZA_LDR,
SME_ZA_STR,

// Compare-and-branch
CBRR,
CBRI,
};

} // end namespace AArch64ISD
Expand Down
15 changes: 15 additions & 0 deletions llvm/lib/Target/AArch64/AArch64InstrFormats.td
Original file line number Diff line number Diff line change
Expand Up @@ -13232,6 +13232,21 @@ multiclass CmpBranchRegisterAlias<string mnemonic, string insn> {
def : InstAlias<mnemonic # "\t$Rt, $Rm, $target",
(!cast<Instruction>(insn # "Xrr") GPR64:$Rm, GPR64:$Rt, am_brcmpcond:$target), 0>;
}

class CmpBranchRegisterPseudo<RegisterClass regtype>
: Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, regtype:$Rm, am_brcmpcond:$Target), []>,
Sched<[WriteBr]> {
let isBranch = 1;
let isTerminator = 1;
}

class CmpBranchImmediatePseudo<RegisterClass regtype, ImmLeaf imtype>
: Pseudo<(outs), (ins ccode:$Cond, regtype:$Rt, imtype:$Imm, am_brcmpcond:$Target), []>,
Sched<[WriteBr]> {
let isBranch = 1;
let isTerminator = 1;
}

//----------------------------------------------------------------------------
// Allow the size specifier tokens to be upper case, not just lower.
def : TokenAlias<".4B", ".4b">; // Add dot product
Expand Down
Loading