-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][FEAT_CMPBR] Codegen for Armv9.6-a compare-and-branch #116465
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
55a4f0f
563ae0f
df6e831
0139ebe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -208,6 +208,9 @@ class AArch64AsmPrinter : public AsmPrinter { | |
| void emitAttributes(unsigned Flags, uint64_t PAuthABIPlatform, | ||
| uint64_t PAuthABIVersion, AArch64TargetStreamer *TS); | ||
|
|
||
| // Emit expansion of Compare-and-branch pseudo instructions | ||
| void emitCBPseudoExpansion(const MachineInstr *MI); | ||
|
|
||
| void EmitToStreamer(MCStreamer &S, const MCInst &Inst); | ||
| void EmitToStreamer(const MCInst &Inst) { | ||
| EmitToStreamer(*OutStreamer, Inst); | ||
|
|
@@ -2589,6 +2592,160 @@ AArch64AsmPrinter::lowerBlockAddressConstant(const BlockAddress &BA) { | |
| return BAE; | ||
| } | ||
|
|
||
| void AArch64AsmPrinter::emitCBPseudoExpansion(const MachineInstr *MI) { | ||
| bool IsImm = false; | ||
| bool Is32Bit = false; | ||
|
|
||
| switch (MI->getOpcode()) { | ||
| default: | ||
| llvm_unreachable("This is not a CB pseudo instruction"); | ||
| case AArch64::CBWPrr: | ||
| IsImm = false; | ||
| Is32Bit = true; | ||
| break; | ||
| case AArch64::CBXPrr: | ||
| IsImm = false; | ||
| Is32Bit = false; | ||
| break; | ||
| case AArch64::CBWPri: | ||
| IsImm = true; | ||
| Is32Bit = true; | ||
| break; | ||
| case AArch64::CBXPri: | ||
| IsImm = true; | ||
| Is32Bit = false; | ||
| break; | ||
| } | ||
|
|
||
| AArch64CC::CondCode CC = | ||
| static_cast<AArch64CC::CondCode>(MI->getOperand(0).getImm()); | ||
| bool NeedsRegSwap = false; | ||
| bool NeedsImmDec = false; | ||
| bool NeedsImmInc = false; | ||
|
|
||
| // Decide if we need to either swap register operands or increment/decrement | ||
| // immediate operands | ||
| unsigned MCOpC; | ||
| switch (CC) { | ||
| default: | ||
| llvm_unreachable("Invalid CB condition code"); | ||
| case AArch64CC::EQ: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBEQWri : AArch64::CBEQXri) | ||
| : (Is32Bit ? AArch64::CBEQWrr : AArch64::CBEQXrr); | ||
| NeedsRegSwap = false; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::NE: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBNEWri : AArch64::CBNEXri) | ||
| : (Is32Bit ? AArch64::CBNEWrr : AArch64::CBNEXrr); | ||
| NeedsRegSwap = false; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::HS: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri) | ||
| : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr); | ||
| NeedsRegSwap = false; | ||
| NeedsImmDec = IsImm; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::LO: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri) | ||
| : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr); | ||
| NeedsRegSwap = !IsImm; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::HI: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBHIWri : AArch64::CBHIXri) | ||
| : (Is32Bit ? AArch64::CBHIWrr : AArch64::CBHIXrr); | ||
| NeedsRegSwap = false; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::LS: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBLOWri : AArch64::CBLOXri) | ||
| : (Is32Bit ? AArch64::CBHSWrr : AArch64::CBHSXrr); | ||
| NeedsRegSwap = !IsImm; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = IsImm; | ||
| break; | ||
| case AArch64CC::GE: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri) | ||
| : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr); | ||
| NeedsRegSwap = false; | ||
| NeedsImmDec = IsImm; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::LT: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri) | ||
| : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr); | ||
| NeedsRegSwap = !IsImm; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::GT: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBGTWri : AArch64::CBGTXri) | ||
| : (Is32Bit ? AArch64::CBGTWrr : AArch64::CBGTXrr); | ||
| NeedsRegSwap = false; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = false; | ||
| break; | ||
| case AArch64CC::LE: | ||
| MCOpC = IsImm ? (Is32Bit ? AArch64::CBLTWri : AArch64::CBLTXri) | ||
| : (Is32Bit ? AArch64::CBGEWrr : AArch64::CBGEXrr); | ||
| NeedsRegSwap = !IsImm; | ||
| NeedsImmDec = false; | ||
| NeedsImmInc = IsImm; | ||
| break; | ||
| } | ||
|
|
||
| assert(!(NeedsImmDec && NeedsImmInc) && | ||
| "Cannot require increment and decrement of CB immediate operand at " | ||
| "the same time"); | ||
|
|
||
| MCInst Inst; | ||
| Inst.setOpcode(MCOpC); | ||
|
|
||
| MCOperand Lhs, Rhs, Trgt; | ||
| lowerOperand(MI->getOperand(1), Lhs); | ||
| lowerOperand(MI->getOperand(2), Rhs); | ||
| lowerOperand(MI->getOperand(3), Trgt); | ||
|
|
||
| // Now swap, increment or decrement | ||
| if (NeedsRegSwap) { | ||
| assert( | ||
| !IsImm && | ||
|
||
| "Unexpected register swap for CB instruction with immediate operand"); | ||
| assert(Lhs.isReg() && "Expected register operand for CB"); | ||
| assert(Rhs.isReg() && "Expected register operand for CB"); | ||
| Inst.addOperand(Rhs); | ||
| Inst.addOperand(Lhs); | ||
| } else if (NeedsImmDec) { | ||
| assert(IsImm && "Unexpected immediate decrement for CB instruction with " | ||
| "reg-reg operands"); | ||
| Rhs.setImm(Rhs.getImm() - 1); | ||
| Inst.addOperand(Lhs); | ||
| Inst.addOperand(Rhs); | ||
| } else if (NeedsImmInc) { | ||
| assert(IsImm && "Unexpected immediate increment for CB instruction with " | ||
| "reg-reg operands"); | ||
| Rhs.setImm(Rhs.getImm() + 1); | ||
| Inst.addOperand(Lhs); | ||
| Inst.addOperand(Rhs); | ||
| } else { | ||
| Inst.addOperand(Lhs); | ||
| Inst.addOperand(Rhs); | ||
| } | ||
|
|
||
| assert((!IsImm || (Rhs.getImm() >= 0 && Rhs.getImm() < 64)) && | ||
| "CB immediate operand out-of-bounds"); | ||
|
|
||
| Inst.addOperand(Trgt); | ||
| EmitToStreamer(*OutStreamer, Inst); | ||
| } | ||
|
|
||
| // Simple pseudo-instructions have their lowering (with expansion to real | ||
| // instructions) auto-generated. | ||
| #include "AArch64GenMCPseudoLowering.inc" | ||
|
|
@@ -3155,13 +3312,20 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) { | |
| return; | ||
|
|
||
| case AArch64::BLR: | ||
| case AArch64::BR: | ||
| case AArch64::BR: { | ||
| recordIfImportCall(MI); | ||
| MCInst TmpInst; | ||
| MCInstLowering.Lower(MI, TmpInst); | ||
| EmitToStreamer(*OutStreamer, TmpInst); | ||
| return; | ||
| } | ||
| case AArch64::CBWPri: | ||
| case AArch64::CBXPri: | ||
| case AArch64::CBWPrr: | ||
| case AArch64::CBXPrr: | ||
| emitCBPseudoExpansion(MI); | ||
| return; | ||
| } | ||
|
|
||
| // Finally, do the automated lowerings for everything else. | ||
| MCInst TmpInst; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2983,6 +2983,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { | |
| MAKE_CASE(AArch64ISD::CTTZ_ELTS) | ||
| MAKE_CASE(AArch64ISD::CALL_ARM64EC_TO_X64) | ||
| MAKE_CASE(AArch64ISD::URSHR_I_PRED) | ||
| MAKE_CASE(AArch64ISD::CBRR) | ||
| MAKE_CASE(AArch64ISD::CBRI) | ||
|
||
| } | ||
| #undef MAKE_CASE | ||
| return nullptr; | ||
|
|
@@ -10593,6 +10595,56 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { | |
| DAG.getConstant(SignBitPos, dl, MVT::i64), Dest); | ||
| } | ||
|
|
||
| // Try to emit Armv9.6 CB instructions. We prefer tb{n}z/cb{n}z due to their | ||
| // larger branch displacement but do prefer CB over cmp + br. | ||
| if (Subtarget->hasCMPBR() && | ||
| AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) && | ||
| ProduceNonFlagSettingCondBr) { | ||
| AArch64CC::CondCode ACC = changeIntCCToAArch64CC(CC); | ||
| unsigned Opc = AArch64ISD::CBRR; | ||
| if (auto *Imm = dyn_cast<ConstantSDNode>(RHS)) { | ||
| // Check conservatively if the immediate fits the valid range [0, 64). | ||
| // Immediate variants for GE and HS definitely need to be decremented | ||
| // when lowering the pseudos later, so an immediate of 1 would become 0. | ||
| // For the inverse conditions LT and LO we don't know for sure if they | ||
| // will need a decrement but should the decision be made to reverse the | ||
| // branch condition, we again end up with the need to decrement. | ||
| // The same argument holds for LE, LS, GT and HI and possibly | ||
| // incremented immediates. This can lead to slightly less optimal | ||
| // codegen, e.g. we never codegen the legal case | ||
| // cblt w0, #63, A | ||
| // because we could end up with the illegal case | ||
| // cbge w0, #64, B | ||
| // should the decision to reverse the branch direction be made. For the | ||
| // lower bound cases this is no problem since we can express comparisons | ||
| // against 0 with either tbz/tnbz or using wzr/xzr. | ||
| uint64_t LowerBound = 0, UpperBound = 64; | ||
| switch (ACC) { | ||
| case AArch64CC::GE: | ||
| case AArch64CC::HS: | ||
| case AArch64CC::LT: | ||
| case AArch64CC::LO: | ||
| LowerBound = 1; | ||
| break; | ||
| case AArch64CC::LE: | ||
| case AArch64CC::LS: | ||
| case AArch64CC::GT: | ||
| case AArch64CC::HI: | ||
| UpperBound = 63; | ||
| break; | ||
| default: | ||
| break; | ||
| } | ||
|
|
||
| if (Imm->getAPIntValue().uge(LowerBound) && | ||
| Imm->getAPIntValue().ult(UpperBound)) | ||
| Opc = AArch64ISD::CBRI; | ||
| } | ||
|
|
||
| SDValue Cond = DAG.getTargetConstant(ACC, dl, MVT::i32); | ||
| return DAG.getNode(Opc, dl, MVT::Other, Chain, Cond, LHS, RHS, Dest); | ||
| } | ||
|
|
||
| SDValue CCVal; | ||
| SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl); | ||
| return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like we can determine statically that this will never fire - so maybe it is worth removing.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Removed.