Skip to content

Commit 48beed5

Browse files
authored
Revert "[AArch64][SME] Port all SME routines to RuntimeLibcalls" (llvm#153392)
This introduced a 5% compile-time regression on AArch64, see https://llvm-compile-time-tracker.com/compare.php?from=b9138bde3562de5c28a239dbd303caf2406678c6&to=271688b87abe7cf45aceaff8266270a25eb7b436&stat=instructions:u. Reverts llvm#152505.
1 parent be3a7a6 commit 48beed5

14 files changed

+185
-217
lines changed

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3560,12 +3560,6 @@ class LLVM_ABI TargetLoweringBase {
35603560
return Libcalls.getLibcallImplName(Call);
35613561
}
35623562

3563-
/// Check if this is valid libcall for the current module, otherwise
3564-
/// RTLIB::Unsupported.
3565-
RTLIB::LibcallImpl getSupportedLibcallImpl(StringRef FuncName) const {
3566-
return Libcalls.getSupportedLibcallImpl(FuncName);
3567-
}
3568-
35693563
const char *getMemcpyName() const { return Libcalls.getMemcpyName(); }
35703564

35713565
/// Get the comparison predicate that's to be used to test the result of the

llvm/include/llvm/IR/RuntimeLibcalls.td

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -406,17 +406,6 @@ multiclass LibmLongDoubleLibCall<string libcall_basename = !toupper(NAME),
406406
def SC_MEMCPY : RuntimeLibcall;
407407
def SC_MEMMOVE : RuntimeLibcall;
408408
def SC_MEMSET : RuntimeLibcall;
409-
def SC_MEMCHR: RuntimeLibcall;
410-
411-
// AArch64 SME ABI calls
412-
def SMEABI_SME_STATE : RuntimeLibcall;
413-
def SMEABI_TPIDR2_SAVE : RuntimeLibcall;
414-
def SMEABI_ZA_DISABLE : RuntimeLibcall;
415-
def SMEABI_TPIDR2_RESTORE : RuntimeLibcall;
416-
def SMEABI_GET_CURRENT_VG : RuntimeLibcall;
417-
def SMEABI_SME_STATE_SIZE : RuntimeLibcall;
418-
def SMEABI_SME_SAVE : RuntimeLibcall;
419-
def SMEABI_SME_RESTORE : RuntimeLibcall;
420409

421410
// ARM EABI calls
422411
def AEABI_MEMCPY4 : RuntimeLibcall; // Align 4
@@ -1234,35 +1223,8 @@ defset list<RuntimeLibcallImpl> AArch64LibcallImpls = {
12341223
def __arm_sc_memcpy : RuntimeLibcallImpl<SC_MEMCPY>;
12351224
def __arm_sc_memmove : RuntimeLibcallImpl<SC_MEMMOVE>;
12361225
def __arm_sc_memset : RuntimeLibcallImpl<SC_MEMSET>;
1237-
def __arm_sc_memchr : RuntimeLibcallImpl<SC_MEMCHR>;
12381226
} // End AArch64LibcallImpls
12391227

1240-
def __arm_sme_state : RuntimeLibcallImpl<SMEABI_SME_STATE>;
1241-
def __arm_tpidr2_save : RuntimeLibcallImpl<SMEABI_TPIDR2_SAVE>;
1242-
def __arm_za_disable : RuntimeLibcallImpl<SMEABI_ZA_DISABLE>;
1243-
def __arm_tpidr2_restore : RuntimeLibcallImpl<SMEABI_TPIDR2_RESTORE>;
1244-
def __arm_get_current_vg : RuntimeLibcallImpl<SMEABI_GET_CURRENT_VG>;
1245-
def __arm_sme_state_size : RuntimeLibcallImpl<SMEABI_SME_STATE_SIZE>;
1246-
def __arm_sme_save : RuntimeLibcallImpl<SMEABI_SME_SAVE>;
1247-
def __arm_sme_restore : RuntimeLibcallImpl<SMEABI_SME_RESTORE>;
1248-
1249-
def SMEABI_LibCalls_PreserveMost_From_X0 : LibcallsWithCC<(add
1250-
__arm_tpidr2_save,
1251-
__arm_za_disable,
1252-
__arm_tpidr2_restore),
1253-
SMEABI_PreserveMost_From_X0>;
1254-
1255-
def SMEABI_LibCalls_PreserveMost_From_X1 : LibcallsWithCC<(add
1256-
__arm_get_current_vg,
1257-
__arm_sme_state_size,
1258-
__arm_sme_save,
1259-
__arm_sme_restore),
1260-
SMEABI_PreserveMost_From_X1>;
1261-
1262-
def SMEABI_LibCalls_PreserveMost_From_X2 : LibcallsWithCC<(add
1263-
__arm_sme_state),
1264-
SMEABI_PreserveMost_From_X2>;
1265-
12661228
def isAArch64_ExceptArm64EC
12671229
: RuntimeLibcallPredicate<"(TT.isAArch64() && !TT.isWindowsArm64EC())">;
12681230
def isWindowsArm64EC : RuntimeLibcallPredicate<"TT.isWindowsArm64EC()">;
@@ -1282,10 +1244,7 @@ def AArch64SystemLibrary : SystemRuntimeLibrary<
12821244
LibmHasSinCosF32, LibmHasSinCosF64, LibmHasSinCosF128,
12831245
DefaultLibmExp10,
12841246
DefaultStackProtector,
1285-
SecurityCheckCookieIfWinMSVC,
1286-
SMEABI_LibCalls_PreserveMost_From_X0,
1287-
SMEABI_LibCalls_PreserveMost_From_X1,
1288-
SMEABI_LibCalls_PreserveMost_From_X2)
1247+
SecurityCheckCookieIfWinMSVC)
12891248
>;
12901249

12911250
// Prepend a # to every name

llvm/include/llvm/IR/RuntimeLibcallsImpl.td

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,6 @@ def ARM_AAPCS : LibcallCallingConv<[{CallingConv::ARM_AAPCS}]>;
3636
def ARM_AAPCS_VFP : LibcallCallingConv<[{CallingConv::ARM_AAPCS_VFP}]>;
3737
def X86_STDCALL : LibcallCallingConv<[{CallingConv::X86_StdCall}]>;
3838
def AVR_BUILTIN : LibcallCallingConv<[{CallingConv::AVR_BUILTIN}]>;
39-
def SMEABI_PreserveMost_From_X0 : LibcallCallingConv<[{CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0}]>;
40-
def SMEABI_PreserveMost_From_X1 : LibcallCallingConv<[{CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1}]>;
41-
def SMEABI_PreserveMost_From_X2 : LibcallCallingConv<[{CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2}]>;
4239

4340
/// Abstract definition for functionality the compiler may need to
4441
/// emit a call to. Emits the RTLIB::Libcall enum - This enum defines

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1487,11 +1487,8 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
14871487

14881488
if (Opc == AArch64::BL) {
14891489
auto Op1 = MBBI->getOperand(0);
1490-
auto &TLI =
1491-
*MBBI->getMF()->getSubtarget<AArch64Subtarget>().getTargetLowering();
1492-
char const *GetCurrentVG =
1493-
TLI.getLibcallName(RTLIB::SMEABI_GET_CURRENT_VG);
1494-
return Op1.isSymbol() && StringRef(Op1.getSymbolName()) == GetCurrentVG;
1490+
return Op1.isSymbol() &&
1491+
(StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
14951492
}
14961493
}
14971494

@@ -3471,7 +3468,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
34713468
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
34723469
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
34733470
MachineFunction &MF = *MBB.getParent();
3474-
auto &TLI = *MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
34753471
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
34763472
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
34773473
bool NeedsWinCFI = needsWinCFI(MF);
@@ -3585,11 +3581,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
35853581
.addReg(AArch64::X0, RegState::Implicit)
35863582
.setMIFlag(MachineInstr::FrameSetup);
35873583

3588-
RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;
3589-
const uint32_t *RegMask =
3590-
TRI->getCallPreservedMask(MF, TLI.getLibcallCallingConv(LC));
3584+
const uint32_t *RegMask = TRI->getCallPreservedMask(
3585+
MF,
3586+
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
35913587
BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
3592-
.addExternalSymbol(TLI.getLibcallName(LC))
3588+
.addExternalSymbol("__arm_get_current_vg")
35933589
.addRegMask(RegMask)
35943590
.addReg(AArch64::X0, RegState::ImplicitDefine)
35953591
.setMIFlag(MachineInstr::FrameSetup);

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3083,12 +3083,13 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
30833083
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
30843084
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
30853085
if (FuncInfo->isSMESaveBufferUsed()) {
3086-
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
30873086
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
30883087
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
3089-
.addExternalSymbol(getLibcallName(LC))
3088+
.addExternalSymbol("__arm_sme_state_size")
30903089
.addReg(AArch64::X0, RegState::ImplicitDefine)
3091-
.addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
3090+
.addRegMask(TRI->getCallPreservedMask(
3091+
*MF, CallingConv::
3092+
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1));
30923093
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
30933094
MI.getOperand(0).getReg())
30943095
.addReg(AArch64::X0);
@@ -3108,12 +3109,13 @@ AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI,
31083109
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
31093110
Register ResultReg = MI.getOperand(0).getReg();
31103111
if (FuncInfo->isPStateSMRegUsed()) {
3111-
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
31123112
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
31133113
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
3114-
.addExternalSymbol(getLibcallName(LC))
3114+
.addExternalSymbol("__arm_sme_state")
31153115
.addReg(AArch64::X0, RegState::ImplicitDefine)
3116-
.addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
3116+
.addRegMask(TRI->getCallPreservedMask(
3117+
*MF, CallingConv::
3118+
AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2));
31173119
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), ResultReg)
31183120
.addReg(AArch64::X0);
31193121
} else {
@@ -5737,15 +5739,15 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
57375739
SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG,
57385740
SDValue Chain, SDLoc DL,
57395741
EVT VT) const {
5740-
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
5741-
SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
5742+
SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
57425743
getPointerTy(DAG.getDataLayout()));
57435744
Type *Int64Ty = Type::getInt64Ty(*DAG.getContext());
57445745
Type *RetTy = StructType::get(Int64Ty, Int64Ty);
57455746
TargetLowering::CallLoweringInfo CLI(DAG);
57465747
ArgListTy Args;
57475748
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
5748-
getLibcallCallingConv(LC), RetTy, Callee, std::move(Args));
5749+
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2,
5750+
RetTy, Callee, std::move(Args));
57495751
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
57505752
SDValue Mask = DAG.getConstant(/*PSTATE.SM*/ 1, DL, MVT::i64);
57515753
return DAG.getNode(ISD::AND, DL, MVT::i64, CallResult.first.getOperand(0),
@@ -8598,12 +8600,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
85988600
}
85998601

86008602
static SMECallAttrs
8601-
getSMECallAttrs(const Function &Caller, const TargetLowering &TLI,
8603+
getSMECallAttrs(const Function &Caller,
86028604
const TargetLowering::CallLoweringInfo &CLI) {
86038605
if (CLI.CB)
8604-
return SMECallAttrs(*CLI.CB, &TLI);
8606+
return SMECallAttrs(*CLI.CB);
86058607
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
8606-
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI));
8608+
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol()));
86078609
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal));
86088610
}
86098611

@@ -8625,7 +8627,7 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
86258627

86268628
// SME Streaming functions are not eligible for TCO as they may require
86278629
// the streaming mode or ZA to be restored after returning from the call.
8628-
SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI);
8630+
SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, CLI);
86298631
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
86308632
CallAttrs.requiresPreservingAllZAState() ||
86318633
CallAttrs.caller().hasStreamingBody())
@@ -8919,14 +8921,14 @@ static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
89198921
DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64);
89208922
Args.push_back(Entry);
89218923

8922-
RTLIB::Libcall LC =
8923-
IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
8924-
SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
8925-
TLI.getPointerTy(DAG.getDataLayout()));
8924+
SDValue Callee =
8925+
DAG.getExternalSymbol(IsSave ? "__arm_sme_save" : "__arm_sme_restore",
8926+
TLI.getPointerTy(DAG.getDataLayout()));
89268927
auto *RetTy = Type::getVoidTy(*DAG.getContext());
89278928
TargetLowering::CallLoweringInfo CLI(DAG);
89288929
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
8929-
TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args));
8930+
CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1, RetTy,
8931+
Callee, std::move(Args));
89308932
return TLI.LowerCallTo(CLI).second;
89318933
}
89328934

@@ -9114,7 +9116,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
91149116
}
91159117

91169118
// Determine whether we need any streaming mode changes.
9117-
SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI);
9119+
SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), CLI);
91189120

91199121
auto DescribeCallsite =
91209122
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
@@ -9691,12 +9693,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
96919693

96929694
if (RequiresLazySave) {
96939695
// Conditionally restore the lazy save using a pseudo node.
9694-
RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
96959696
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
96969697
SDValue RegMask = DAG.getRegisterMask(
9697-
TRI->getCallPreservedMask(MF, getLibcallCallingConv(LC)));
9698+
TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
96989699
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
9699-
getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
9700+
"__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
97009701
SDValue TPIDR2_EL0 = DAG.getNode(
97019702
ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
97029703
DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
@@ -29035,7 +29036,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
2903529036

2903629037
// Checks to allow the use of SME instructions
2903729038
if (auto *Base = dyn_cast<CallBase>(&Inst)) {
29038-
auto CallAttrs = SMECallAttrs(*Base, this);
29039+
auto CallAttrs = SMECallAttrs(*Base);
2903929040
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
2904029041
CallAttrs.requiresPreservingZT0() ||
2904129042
CallAttrs.requiresPreservingAllZAState())

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -220,16 +220,20 @@ static cl::opt<bool> EnableFixedwidthAutovecInStreamingMode(
220220
static cl::opt<bool> EnableScalableAutovecInStreamingMode(
221221
"enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
222222

223-
static bool isSMEABIRoutineCall(const CallInst &CI, const TargetLowering &TLI) {
223+
static bool isSMEABIRoutineCall(const CallInst &CI) {
224224
const auto *F = CI.getCalledFunction();
225-
return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine();
225+
return F && StringSwitch<bool>(F->getName())
226+
.Case("__arm_sme_state", true)
227+
.Case("__arm_tpidr2_save", true)
228+
.Case("__arm_tpidr2_restore", true)
229+
.Case("__arm_za_disable", true)
230+
.Default(false);
226231
}
227232

228233
/// Returns true if the function has explicit operations that can only be
229234
/// lowered using incompatible instructions for the selected mode. This also
230235
/// returns true if the function F may use or modify ZA state.
231-
static bool hasPossibleIncompatibleOps(const Function *F,
232-
const TargetLowering &TLI) {
236+
static bool hasPossibleIncompatibleOps(const Function *F) {
233237
for (const BasicBlock &BB : *F) {
234238
for (const Instruction &I : BB) {
235239
// Be conservative for now and assume that any call to inline asm or to
@@ -238,7 +242,7 @@ static bool hasPossibleIncompatibleOps(const Function *F,
238242
// all native LLVM instructions can be lowered to compatible instructions.
239243
if (isa<CallInst>(I) && !I.isDebugOrPseudoInst() &&
240244
(cast<CallInst>(I).isInlineAsm() || isa<IntrinsicInst>(I) ||
241-
isSMEABIRoutineCall(cast<CallInst>(I), TLI)))
245+
isSMEABIRoutineCall(cast<CallInst>(I))))
242246
return true;
243247
}
244248
}
@@ -286,7 +290,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
286290
if (CallAttrs.requiresLazySave() || CallAttrs.requiresSMChange() ||
287291
CallAttrs.requiresPreservingZT0() ||
288292
CallAttrs.requiresPreservingAllZAState()) {
289-
if (hasPossibleIncompatibleOps(Callee, *getTLI()))
293+
if (hasPossibleIncompatibleOps(Callee))
290294
return false;
291295
}
292296

@@ -353,7 +357,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
353357
// change only once and avoid inlining of G into F.
354358

355359
SMEAttrs FAttrs(*F);
356-
SMECallAttrs CallAttrs(Call, getTLI());
360+
SMECallAttrs CallAttrs(Call);
357361

358362
if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) {
359363
if (F == Call.getCaller()) // (1)

0 commit comments

Comments
 (0)