diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt index 19f26c92a0f94..2d213d95f333a 100644 --- a/compiler-rt/lib/builtins/README.txt +++ b/compiler-rt/lib/builtins/README.txt @@ -272,11 +272,6 @@ switch32 switch8 switchu8 -// This function generates a custom trampoline function with the specific -// realFunc and localsPtr values. -void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated, - const void* realFunc, void* localsPtr); - // There is no C interface to the *_vfp_d8_d15_regs functions. There are // called in the prolog and epilog of Thumb1 functions. When the C++ ABI use // SJLJ for exceptions, each function with a catch clause or destructors needs diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c index 830e25e4c0303..844eb27944142 100644 --- a/compiler-rt/lib/builtins/trampoline_setup.c +++ b/compiler-rt/lib/builtins/trampoline_setup.c @@ -41,45 +41,3 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, __clear_cache(trampOnStack, &trampOnStack[10]); } #endif // __powerpc__ && !defined(__powerpc64__) - -// The AArch64 compiler generates calls to __trampoline_setup() when creating -// trampoline functions on the stack for use with nested functions. -// This function creates a custom 36-byte trampoline function on the stack -// which loads x18 with a pointer to the outer function's locals -// and then jumps to the target nested function. -// Note: x18 is a reserved platform register on Windows and macOS. - -#if defined(__aarch64__) && defined(__ELF__) -COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack, - int trampSizeAllocated, - const void *realFunc, void *localsPtr) { - // This should never happen, but if compiler did not allocate - // enough space on stack for the trampoline, abort. - if (trampSizeAllocated < 36) - compilerrt_abort(); - - // create trampoline - // Load realFunc into x17. mov/movk 16 bits at a time. - trampOnStack[0] = - 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11; - trampOnStack[1] = - 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11; - trampOnStack[2] = - 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11; - trampOnStack[3] = - 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11; - // Load localsPtr into x18 - trampOnStack[4] = - 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12; - trampOnStack[5] = - 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12; - trampOnStack[6] = - 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12; - trampOnStack[7] = - 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12; - trampOnStack[8] = 0xd61f0220; // br x17 - - // Clear instruction cache. - __clear_cache(trampOnStack, &trampOnStack[9]); -} -#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64) diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c index d51d35acaa02f..da115fe764271 100644 --- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c +++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c @@ -7,7 +7,7 @@ /* * Tests nested functions - * The ppc and aarch64 compilers generates a call to __trampoline_setup + * The ppc compiler generates a call to __trampoline_setup * The i386 and x86_64 compilers generate a call to ___enable_execute_stack */ diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp index 82b11ad7db32a..69bdb48146a54 100644 --- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp +++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp @@ -274,12 +274,12 @@ class BoxedProcedurePass auto loc = embox.getLoc(); mlir::Type i8Ty = builder.getI8Type(); mlir::Type i8Ptr = builder.getRefType(i8Ty); - // For AArch64, PPC32 and PPC64, the thunk is populated by a call to + // For PPC32 and PPC64, the thunk is populated by a call to // __trampoline_setup, which is defined in // compiler-rt/lib/builtins/trampoline_setup.c and requires the - // thunk size greater than 32 bytes. For RISCV and x86_64, the - // thunk setup doesn't go through __trampoline_setup and fits in 32 - // bytes. + // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64, + // the thunk setup doesn't go through __trampoline_setup and fits in + // 32 bytes. fir::SequenceType::Extent thunkSize = triple.getTrampolineSize(); mlir::Type buffTy = SequenceType::get({thunkSize}, i8Ty); auto buffer = builder.create(loc, buffTy); diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir index 5d82522055adc..97d9b38ed6f40 100644 --- a/flang/test/Fir/boxproc.fir +++ b/flang/test/Fir/boxproc.fir @@ -3,7 +3,7 @@ // RUN: %if powerpc-registered-target %{tco --target=powerpc64le-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-PPC %} // CHECK-LABEL: define void @_QPtest_proc_dummy() -// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [36 x i8], i64 1, align 1 +// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-X86: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-PPC: %[[VAL_3:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1 // CHECK: %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8 @@ -63,7 +63,7 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) { } // CHECK-LABEL: define void @_QPtest_proc_dummy_char() -// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [36 x i8], i64 1, align 1 +// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-X86: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1 // CHECK-PPC: %[[VAL_20:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1 // CHECK: %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8 diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td index 920cc67273146..1b5a713bffdc9 100644 --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -28,6 +28,12 @@ class CCIfSubtarget //===----------------------------------------------------------------------===// defvar AArch64_Common = [ + // The 'nest' parameter, if any, is passed in X15. + // The previous register used here (X18) is also defined to be unavailable + // for this purpose, while all of X9-X15 were defined to be free for LLVM to + // use for this, so use X15 (which LLVM often already clobbers anyways). + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32], CCBitConvertToType>, @@ -117,13 +123,7 @@ defvar AArch64_Common = [ ]; let Entry = 1 in -def CC_AArch64_AAPCS : CallingConv>], - AArch64_Common -)>; +def CC_AArch64_AAPCS : CallingConv; let Entry = 1 in def RetCC_AArch64_AAPCS : CallingConv<[ @@ -177,6 +177,8 @@ def CC_AArch64_Win64_VarArg : CallingConv<[ // a stack layout compatible with the x64 calling convention. let Entry = 1 in def CC_AArch64_Arm64EC_VarArg : CallingConv<[ + CCIfNest>, + // Convert small floating-point values to integer. CCIfType<[f16, bf16], CCBitConvertToType>, CCIfType<[f32], CCBitConvertToType>, @@ -353,6 +355,8 @@ def RetCC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[ // + Stack slots are sized as needed rather than being at least 64-bit. let Entry = 1 in def CC_AArch64_DarwinPCS : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -427,6 +431,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ let Entry = 1 in def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -450,6 +456,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // same as the normal Darwin VarArgs handling. let Entry = 1 in def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ + CCIfNest>, + CCIfType<[v2f32], CCBitConvertToType>, CCIfType<[v2f64, v4f32, f128], CCBitConvertToType>, @@ -494,6 +502,8 @@ def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[ let Entry = 1 in def CC_AArch64_GHC : CallingConv<[ + CCIfNest>, + CCIfType<[iPTR], CCBitConvertToType>, // Handle all vector types as either f64 or v2f64. @@ -522,6 +532,7 @@ def CC_AArch64_Preserve_None : CallingConv<[ // We can pass arguments in all general registers, except: // - X8, used for sret + // - X15 (on Windows), used as a temporary register in the prologue when allocating call frames // - X16/X17, used by the linker as IP0/IP1 // - X18, the platform register // - X19, the base pointer diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 3335ee04bb0e0..2650c621e19f6 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -331,7 +331,9 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF, static bool produceCompactUnwindFrame(MachineFunction &MF); static bool needsWinCFI(const MachineFunction &MF); static StackOffset getSVEStackSize(const MachineFunction &MF); -static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB); +static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, + bool HasCall = false); +static bool requiresSaveVG(const MachineFunction &MF); /// Returns true if a homogeneous prolog or epilog code can be emitted /// for the size optimization. If possible, a frame helper call is injected. @@ -1006,6 +1008,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, } } +static bool windowsRequiresStackProbe(const MachineFunction &MF, + uint64_t StackSizeInBytes) { + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + const AArch64FunctionInfo &MFI = *MF.getInfo(); + // TODO: When implementing stack protectors, take that into account + // for the probe threshold. + return Subtarget.isTargetWindows() && MFI.hasStackProbing() && + StackSizeInBytes >= uint64_t(MFI.getStackProbeSize()); +} + static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) { const MachineFunction *MF = MBB.getParent(); @@ -1027,7 +1039,8 @@ static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs, // but we would then have to make sure that we were in fact saving at least one // callee-save register in the prologue, which is additional complexity that // doesn't seem worth the benefit. -static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { +static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB, + bool HasCall) { MachineFunction *MF = MBB->getParent(); // If MBB is an entry block, use X9 as the scratch register @@ -1041,6 +1054,11 @@ static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); LivePhysRegs LiveRegs(TRI); getLiveRegsForEntryMBB(LiveRegs, *MBB); + if (HasCall) { + LiveRegs.addReg(AArch64::X16); + LiveRegs.addReg(AArch64::X17); + LiveRegs.addReg(AArch64::X18); + } // Prefer X9 since it was historically used for the prologue scratch reg. const MachineRegisterInfo &MRI = MF->getRegInfo(); @@ -1081,23 +1099,18 @@ bool AArch64FrameLowering::canUseAsPrologue( MBB.isLiveIn(AArch64::NZCV)) return false; - // Don't need a scratch register if we're not going to re-align the stack or - // emit stack probes. - if (!RegInfo->hasStackRealignment(*MF) && !TLI->hasInlineStackProbe(*MF)) - return true; - // Otherwise, we can use any block as long as it has a scratch register - // available. - return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; -} + if (RegInfo->hasStackRealignment(*MF) || TLI->hasInlineStackProbe(*MF)) + if (findScratchNonCalleeSaveRegister(TmpMBB) == AArch64::NoRegister) + return false; -static bool windowsRequiresStackProbe(MachineFunction &MF, - uint64_t StackSizeInBytes) { - const AArch64Subtarget &Subtarget = MF.getSubtarget(); - const AArch64FunctionInfo &MFI = *MF.getInfo(); - // TODO: When implementing stack protectors, take that into account - // for the probe threshold. - return Subtarget.isTargetWindows() && MFI.hasStackProbing() && - StackSizeInBytes >= uint64_t(MFI.getStackProbeSize()); + // May need a scratch register (for return value) if require making a special + // call + if (requiresSaveVG(*MF) || + windowsRequiresStackProbe(*MF, std::numeric_limits::max())) + if (findScratchNonCalleeSaveRegister(TmpMBB, true) == AArch64::NoRegister) + return false; + + return true; } static bool needsWinCFI(const MachineFunction &MF) { @@ -1378,8 +1391,8 @@ bool requiresGetVGCall(MachineFunction &MF) { !MF.getSubtarget().hasSVE(); } -static bool requiresSaveVG(MachineFunction &MF) { - AArch64FunctionInfo *AFI = MF.getInfo(); +static bool requiresSaveVG(const MachineFunction &MF) { + const AArch64FunctionInfo *AFI = MF.getInfo(); // For Darwin platforms we don't save VG for non-SVE functions, even if SME // is enabled with streaming mode changes. if (!AFI->hasStreamingModeChanges()) @@ -2049,6 +2062,29 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (AFI->getSVECalleeSavedStackSize()) report_fatal_error( "SVE callee saves not yet supported with stack probing"); + + // Find an available register to spill the value of X15 to, if X15 is being + // used already for nest. + unsigned X15Scratch = AArch64::NoRegister; + const AArch64Subtarget &STI = MF.getSubtarget(); + if (llvm::any_of(MBB.liveins(), + [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) { + return STI.getRegisterInfo()->isSuperOrSubRegisterEq( + AArch64::X15, LiveIn.PhysReg); + })) { + X15Scratch = findScratchNonCalleeSaveRegister(&MBB, true); + assert(X15Scratch != AArch64::NoRegister && + (X15Scratch < AArch64::X15 || X15Scratch > AArch64::X17)); +#ifndef NDEBUG + LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it +#endif + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch) + .addReg(AArch64::XZR) + .addReg(AArch64::X15, RegState::Undef) + .addReg(AArch64::X15, RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); + } + uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4; if (NeedsWinCFI) { HasWinCFI = true; @@ -2171,6 +2207,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // we've set a frame pointer and already finished the SEH prologue. assert(!NeedsWinCFI); } + if (X15Scratch != AArch64::NoRegister) { + BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15) + .addReg(AArch64::XZR) + .addReg(X15Scratch, RegState::Undef) + .addReg(X15Scratch, RegState::Implicit) + .setMIFlag(MachineInstr::FrameSetup); + } } StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize; @@ -3355,7 +3398,7 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters( unsigned X0Scratch = AArch64::NoRegister; if (Reg1 == AArch64::VG) { // Find an available register to store value of VG to. - Reg1 = findScratchNonCalleeSaveRegister(&MBB); + Reg1 = findScratchNonCalleeSaveRegister(&MBB, true); assert(Reg1 != AArch64::NoRegister); SMEAttrs Attrs = AFI->getSMEFnAttrs(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index caac00c5b2faa..2036678942c0e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7116,59 +7116,80 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) { SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - // Note: x18 cannot be used for the Nest parameter on Windows and macOS. - if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) - report_fatal_error( - "ADJUST_TRAMPOLINE operation is only supported on Linux."); - return Op.getOperand(0); } SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const { - - // Note: x18 cannot be used for the Nest parameter on Windows and macOS. - if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows()) - report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux."); - SDValue Chain = Op.getOperand(0); - SDValue Trmp = Op.getOperand(1); // trampoline + SDValue Trmp = Op.getOperand(1); // trampoline, >=32 bytes SDValue FPtr = Op.getOperand(2); // nested function SDValue Nest = Op.getOperand(3); // 'nest' parameter value - SDLoc dl(Op); - EVT PtrVT = getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); + const Value *TrmpAddr = cast(Op.getOperand(4))->getValue(); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; + // ldr NestReg, .+16 + // ldr x17, .+20 + // br x17 + // .word 0 + // .nest: .qword nest + // .fptr: .qword fptr + SDValue OutChains[5]; - Entry.Ty = IntPtrTy; - Entry.Node = Trmp; - Args.push_back(Entry); + const Function *Func = + cast(cast(Op.getOperand(5))->getValue()); + CallingConv::ID CC = Func->getCallingConv(); + unsigned NestReg; - if (auto *FI = dyn_cast(Trmp.getNode())) { - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - Entry.Node = - DAG.getConstant(MFI.getObjectSize(FI->getIndex()), dl, MVT::i64); - } else - Entry.Node = DAG.getConstant(36, dl, MVT::i64); + switch (CC) { + default: + NestReg = 0x0f; // X15 + case CallingConv::ARM64EC_Thunk_Native: + case CallingConv::ARM64EC_Thunk_X64: + // Must be kept in sync with AArch64CallingConv.td + NestReg = 0x04; // X4 + break; + } - Args.push_back(Entry); - Entry.Node = FPtr; - Args.push_back(Entry); - Entry.Node = Nest; - Args.push_back(Entry); + const char FptrReg = 0x11; // X17 - // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl).setChain(Chain).setLibCallee( - CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args)); + SDValue Addr = Trmp; - std::pair CallResult = LowerCallTo(CLI); - return CallResult.second; + SDLoc dl(Op); + OutChains[0] = DAG.getStore( + Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(4, dl, MVT::i64)); + OutChains[1] = DAG.getStore( + Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 4)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(8, dl, MVT::i64)); + OutChains[2] = + DAG.getStore(Chain, dl, DAG.getConstant(0xd61f0220u, dl, MVT::i32), Addr, + MachinePointerInfo(TrmpAddr, 8)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(16, dl, MVT::i64)); + OutChains[3] = + DAG.getStore(Chain, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 16)); + + Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(24, dl, MVT::i64)); + OutChains[4] = + DAG.getStore(Chain, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24)); + + SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + + SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, + DAG.getConstant(12, dl, MVT::i64)); + + // Call clear cache on the trampoline instructions. + return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken, Trmp, + EndOfTrmp); } SDValue AArch64TargetLowering::LowerOperation(SDValue Op, diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp index 6a559ff023caa..aa1251f3b9485 100644 --- a/llvm/lib/TargetParser/Triple.cpp +++ b/llvm/lib/TargetParser/Triple.cpp @@ -1732,8 +1732,6 @@ unsigned Triple::getTrampolineSize() const { if (isOSLinux()) return 48; break; - case Triple::aarch64: - return 36; } return 32; } diff --git a/llvm/test/CodeGen/AArch64/nest-register.ll b/llvm/test/CodeGen/AArch64/nest-register.ll index 1e1c1b044bab6..2e94dfba1fa52 100644 --- a/llvm/test/CodeGen/AArch64/nest-register.ll +++ b/llvm/test/CodeGen/AArch64/nest-register.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc -disable-post-ra -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s ; Tests that the 'nest' parameter attribute causes the relevant parameter to be @@ -5,18 +6,21 @@ define ptr @nest_receiver(ptr nest %arg) nounwind { ; CHECK-LABEL: nest_receiver: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov x0, x18 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, x15 +; CHECK-NEXT: ret ret ptr %arg } define ptr @nest_caller(ptr %arg) nounwind { ; CHECK-LABEL: nest_caller: -; CHECK: mov x18, x0 -; CHECK-NEXT: bl nest_receiver -; CHECK: ret +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: mov x15, x0 +; CHECK-NEXT: bl nest_receiver +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret %result = call ptr @nest_receiver(ptr nest %arg) ret ptr %result diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll index 9619895c450ca..32c3eaeb9c876 100644 --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -207,7 +207,7 @@ define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" { ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr x8, [sp, #64] ; CHECK-NEXT: ldr q0, [sp, #48] -; CHECK-NEXT: mov x18, xzr +; CHECK-NEXT: mov x15, xzr ; CHECK-NEXT: mov w0, #42 // =0x2a ; CHECK-NEXT: mov w1, #17 // =0x11 ; CHECK-NEXT: str x8, [sp, #16] diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll index 30ac2aa283b3e..d9016b02a0f80 100644 --- a/llvm/test/CodeGen/AArch64/trampoline.ll +++ b/llvm/test/CodeGen/AArch64/trampoline.ll @@ -1,32 +1,265 @@ -; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK-LINUX +; RUN: llc -mtriple=aarch64-none-eabi < %s | FileCheck %s --check-prefixes=CHECK-LINUX +; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-PC +; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK-APPLE @trampg = internal global [36 x i8] zeroinitializer, align 8 declare void @llvm.init.trampoline(ptr, ptr, ptr); declare ptr @llvm.adjust.trampoline(ptr); -define i64 @f(ptr nest %c, i64 %x, i64 %y) { - %sum = add i64 %x, %y - ret i64 %sum +define ptr @f(ptr nest %x, i64 %y) { +; CHECK-LINUX-LABEL: f: +; CHECK-LINUX: // %bb.0: +; CHECK-LINUX-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-LINUX-NEXT: sub sp, sp, #237, lsl #12 // =970752 +; CHECK-LINUX-NEXT: sub sp, sp, #3264 +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 974032 +; CHECK-LINUX-NEXT: .cfi_offset w29, -16 +; CHECK-LINUX-NEXT: add x0, x15, x0 +; CHECK-LINUX-NEXT: add sp, sp, #237, lsl #12 // =970752 +; CHECK-LINUX-NEXT: add sp, sp, #3264 +; CHECK-LINUX-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-LINUX-NEXT: ret +; +; CHECK-PC-LABEL: f: +; CHECK-PC: .seh_proc f +; CHECK-PC-NEXT: // %bb.0: +; CHECK-PC-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-PC-NEXT: .seh_save_fplr_x 16 +; CHECK-PC-NEXT: mov x9, x15 +; CHECK-PC-NEXT: mov x15, #60876 // =0xedcc +; CHECK-PC-NEXT: .seh_nop +; CHECK-PC-NEXT: bl __chkstk +; CHECK-PC-NEXT: .seh_nop +; CHECK-PC-NEXT: sub sp, sp, x15, lsl #4 +; CHECK-PC-NEXT: .seh_stackalloc 974016 +; CHECK-PC-NEXT: mov x15, x9 +; CHECK-PC-NEXT: .seh_endprologue +; CHECK-PC-NEXT: add x0, x15, x0 +; CHECK-PC-NEXT: .seh_startepilogue +; CHECK-PC-NEXT: add sp, sp, #237, lsl #12 // =970752 +; CHECK-PC-NEXT: .seh_stackalloc 970752 +; CHECK-PC-NEXT: add sp, sp, #3264 +; CHECK-PC-NEXT: .seh_stackalloc 3264 +; CHECK-PC-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-PC-NEXT: .seh_save_fplr_x 16 +; CHECK-PC-NEXT: .seh_endepilogue +; CHECK-PC-NEXT: ret +; CHECK-PC-NEXT: .seh_endfunclet +; CHECK-PC-NEXT: .seh_endproc +; +; CHECK-APPLE-LABEL: f: +; CHECK-APPLE: ; %bb.0: +; CHECK-APPLE-NEXT: stp x28, x27, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: sub sp, sp, #237, lsl #12 ; =970752 +; CHECK-APPLE-NEXT: sub sp, sp, #3264 +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 974032 +; CHECK-APPLE-NEXT: .cfi_offset w27, -8 +; CHECK-APPLE-NEXT: .cfi_offset w28, -16 +; CHECK-APPLE-NEXT: add x0, x15, x0 +; CHECK-APPLE-NEXT: add sp, sp, #237, lsl #12 ; =970752 +; CHECK-APPLE-NEXT: add sp, sp, #3264 +; CHECK-APPLE-NEXT: ldp x28, x27, [sp], #16 ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: ret + %chkstack = alloca [u0xedcba x i8] + %sum = getelementptr i8, ptr %x, i64 %y + ret ptr %sum } define i64 @func1() { +; CHECK-LINUX-LABEL: func1: +; CHECK-LINUX: // %bb.0: +; CHECK-LINUX-NEXT: sub sp, sp, #64 +; CHECK-LINUX-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 64 +; CHECK-LINUX-NEXT: .cfi_offset w30, -16 +; CHECK-LINUX-NEXT: adrp x8, :got:f +; CHECK-LINUX-NEXT: mov w9, #544 // =0x220 +; CHECK-LINUX-NEXT: add x0, sp, #8 +; CHECK-LINUX-NEXT: ldr x8, [x8, :got_lo12:f] +; CHECK-LINUX-NEXT: movk w9, #54815, lsl #16 +; CHECK-LINUX-NEXT: str w9, [sp, #16] +; CHECK-LINUX-NEXT: add x9, sp, #56 +; CHECK-LINUX-NEXT: stp x9, x8, [sp, #24] +; CHECK-LINUX-NEXT: mov x8, #132 // =0x84 +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 +; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 +; CHECK-LINUX-NEXT: str x8, [sp, #8] +; CHECK-LINUX-NEXT: add x8, sp, #8 +; CHECK-LINUX-NEXT: add x1, x8, #12 +; CHECK-LINUX-NEXT: bl __clear_cache +; CHECK-LINUX-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-LINUX-NEXT: mov x0, xzr +; CHECK-LINUX-NEXT: add sp, sp, #64 +; CHECK-LINUX-NEXT: ret +; +; CHECK-PC-LABEL: func1: +; CHECK-PC: .seh_proc func1 +; CHECK-PC-NEXT: // %bb.0: +; CHECK-PC-NEXT: sub sp, sp, #64 +; CHECK-PC-NEXT: .seh_stackalloc 64 +; CHECK-PC-NEXT: str x30, [sp, #48] // 8-byte Folded Spill +; CHECK-PC-NEXT: .seh_save_reg x30, 48 +; CHECK-PC-NEXT: .seh_endprologue +; CHECK-PC-NEXT: adrp x8, f +; CHECK-PC-NEXT: add x8, x8, :lo12:f +; CHECK-PC-NEXT: add x9, sp, #56 +; CHECK-PC-NEXT: stp x9, x8, [sp, #24] +; CHECK-PC-NEXT: mov w8, #544 // =0x220 +; CHECK-PC-NEXT: add x0, sp, #8 +; CHECK-PC-NEXT: movk w8, #54815, lsl #16 +; CHECK-PC-NEXT: str w8, [sp, #16] +; CHECK-PC-NEXT: mov x8, #132 // =0x84 +; CHECK-PC-NEXT: movk x8, #22528, lsl #16 +; CHECK-PC-NEXT: movk x8, #177, lsl #32 +; CHECK-PC-NEXT: movk x8, #22528, lsl #48 +; CHECK-PC-NEXT: str x8, [sp, #8] +; CHECK-PC-NEXT: add x8, sp, #8 +; CHECK-PC-NEXT: add x1, x8, #12 +; CHECK-PC-NEXT: bl __clear_cache +; CHECK-PC-NEXT: mov x0, xzr +; CHECK-PC-NEXT: .seh_startepilogue +; CHECK-PC-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-PC-NEXT: .seh_save_reg x30, 48 +; CHECK-PC-NEXT: add sp, sp, #64 +; CHECK-PC-NEXT: .seh_stackalloc 64 +; CHECK-PC-NEXT: .seh_endepilogue +; CHECK-PC-NEXT: ret +; CHECK-PC-NEXT: .seh_endfunclet +; CHECK-PC-NEXT: .seh_endproc +; +; CHECK-APPLE-LABEL: func1: +; CHECK-APPLE: ; %bb.0: +; CHECK-APPLE-NEXT: sub sp, sp, #64 +; CHECK-APPLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 64 +; CHECK-APPLE-NEXT: .cfi_offset w30, -8 +; CHECK-APPLE-NEXT: .cfi_offset w29, -16 +; CHECK-APPLE-NEXT: Lloh0: +; CHECK-APPLE-NEXT: adrp x8, _f@PAGE +; CHECK-APPLE-NEXT: Lloh1: +; CHECK-APPLE-NEXT: add x8, x8, _f@PAGEOFF +; CHECK-APPLE-NEXT: add x9, sp, #40 +; CHECK-APPLE-NEXT: stp x9, x8, [sp, #16] +; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220 +; CHECK-APPLE-NEXT: mov x0, sp +; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 +; CHECK-APPLE-NEXT: str w8, [sp, #8] +; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84 +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 +; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 +; CHECK-APPLE-NEXT: str x8, [sp] +; CHECK-APPLE-NEXT: mov x8, sp +; CHECK-APPLE-NEXT: add x1, x8, #12 +; CHECK-APPLE-NEXT: bl ___clear_cache +; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: mov x0, xzr +; CHECK-APPLE-NEXT: add sp, sp, #64 +; CHECK-APPLE-NEXT: ret +; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh0, Lloh1 %val = alloca i64 - %nval = bitcast ptr %val to ptr %tramp = alloca [36 x i8], align 8 - ; CHECK: mov w1, #36 - ; CHECK: bl __trampoline_setup - call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval) + call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %val) %fp = call ptr @llvm.adjust.trampoline(ptr %tramp) ret i64 0 } define i64 @func2() { +; CHECK-LINUX-LABEL: func2: +; CHECK-LINUX: // %bb.0: +; CHECK-LINUX-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 16 +; CHECK-LINUX-NEXT: .cfi_offset w30, -16 +; CHECK-LINUX-NEXT: adrp x8, :got:f +; CHECK-LINUX-NEXT: mov w9, #544 // =0x220 +; CHECK-LINUX-NEXT: adrp x0, trampg +; CHECK-LINUX-NEXT: add x0, x0, :lo12:trampg +; CHECK-LINUX-NEXT: ldr x8, [x8, :got_lo12:f] +; CHECK-LINUX-NEXT: movk w9, #54815, lsl #16 +; CHECK-LINUX-NEXT: str w9, [x0, #8] +; CHECK-LINUX-NEXT: add x9, sp, #8 +; CHECK-LINUX-NEXT: add x1, x0, #12 +; CHECK-LINUX-NEXT: stp x9, x8, [x0, #16] +; CHECK-LINUX-NEXT: mov x8, #132 // =0x84 +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16 +; CHECK-LINUX-NEXT: movk x8, #177, lsl #32 +; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48 +; CHECK-LINUX-NEXT: str x8, [x0] +; CHECK-LINUX-NEXT: bl __clear_cache +; CHECK-LINUX-NEXT: mov x0, xzr +; CHECK-LINUX-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-LINUX-NEXT: ret +; +; CHECK-PC-LABEL: func2: +; CHECK-PC: .seh_proc func2 +; CHECK-PC-NEXT: // %bb.0: +; CHECK-PC-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-PC-NEXT: .seh_save_reg_x x30, 16 +; CHECK-PC-NEXT: .seh_endprologue +; CHECK-PC-NEXT: adrp x0, trampg +; CHECK-PC-NEXT: add x0, x0, :lo12:trampg +; CHECK-PC-NEXT: adrp x8, f +; CHECK-PC-NEXT: add x8, x8, :lo12:f +; CHECK-PC-NEXT: add x9, sp, #8 +; CHECK-PC-NEXT: add x1, x0, #12 +; CHECK-PC-NEXT: stp x9, x8, [x0, #16] +; CHECK-PC-NEXT: mov w8, #544 // =0x220 +; CHECK-PC-NEXT: movk w8, #54815, lsl #16 +; CHECK-PC-NEXT: str w8, [x0, #8] +; CHECK-PC-NEXT: mov x8, #132 // =0x84 +; CHECK-PC-NEXT: movk x8, #22528, lsl #16 +; CHECK-PC-NEXT: movk x8, #177, lsl #32 +; CHECK-PC-NEXT: movk x8, #22528, lsl #48 +; CHECK-PC-NEXT: str x8, [x0] +; CHECK-PC-NEXT: bl __clear_cache +; CHECK-PC-NEXT: mov x0, xzr +; CHECK-PC-NEXT: .seh_startepilogue +; CHECK-PC-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-PC-NEXT: .seh_save_reg_x x30, 16 +; CHECK-PC-NEXT: .seh_endepilogue +; CHECK-PC-NEXT: ret +; CHECK-PC-NEXT: .seh_endfunclet +; CHECK-PC-NEXT: .seh_endproc +; +; CHECK-APPLE-LABEL: func2: +; CHECK-APPLE: ; %bb.0: +; CHECK-APPLE-NEXT: sub sp, sp, #32 +; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-APPLE-NEXT: .cfi_offset w30, -8 +; CHECK-APPLE-NEXT: .cfi_offset w29, -16 +; CHECK-APPLE-NEXT: Lloh2: +; CHECK-APPLE-NEXT: adrp x0, _trampg@PAGE +; CHECK-APPLE-NEXT: Lloh3: +; CHECK-APPLE-NEXT: add x0, x0, _trampg@PAGEOFF +; CHECK-APPLE-NEXT: Lloh4: +; CHECK-APPLE-NEXT: adrp x8, _f@PAGE +; CHECK-APPLE-NEXT: Lloh5: +; CHECK-APPLE-NEXT: add x8, x8, _f@PAGEOFF +; CHECK-APPLE-NEXT: add x9, sp, #8 +; CHECK-APPLE-NEXT: add x1, x0, #12 +; CHECK-APPLE-NEXT: stp x9, x8, [x0, #16] +; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220 +; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16 +; CHECK-APPLE-NEXT: str w8, [x0, #8] +; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84 +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16 +; CHECK-APPLE-NEXT: movk x8, #177, lsl #32 +; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48 +; CHECK-APPLE-NEXT: str x8, [x0] +; CHECK-APPLE-NEXT: bl ___clear_cache +; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-APPLE-NEXT: mov x0, xzr +; CHECK-APPLE-NEXT: add sp, sp, #32 +; CHECK-APPLE-NEXT: ret +; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh4, Lloh5 +; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh2, Lloh3 %val = alloca i64 - %nval = bitcast ptr %val to ptr - ; CHECK: mov w1, #36 - ; CHECK: bl __trampoline_setup - call void @llvm.init.trampoline(ptr @trampg, ptr @f, ptr %nval) + call void @llvm.init.trampoline(ptr @trampg, ptr @f, ptr %val) %fp = call ptr @llvm.adjust.trampoline(ptr @trampg) ret i64 0 } diff --git a/llvm/test/CodeGen/AArch64/win64cc-x18.ll b/llvm/test/CodeGen/AArch64/win64cc-x18.ll index b3e78cc9bbb81..4b45c300e9c1d 100644 --- a/llvm/test/CodeGen/AArch64/win64cc-x18.ll +++ b/llvm/test/CodeGen/AArch64/win64cc-x18.ll @@ -1,35 +1,26 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +;; Testing that nest uses x15 on all calling conventions (except Arm64EC) -;; Testing that x18 is not clobbered when passing pointers with the nest -;; attribute on windows - -; RUN: llc < %s -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,CHECK-NO-X18 -; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-X18 +; RUN: llc < %s -mtriple=aarch64-pc-windows-msvc | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-darwin- | FileCheck %s define dso_local i64 @other(ptr nest %p) #0 { ; CHECK-LABEL: other: -; CHECK-X18: ldr x0, [x18] -; CHECK-NO-X18: ldr x0, [x0] +; CHECK: ldr x0, [x15] +; CHECK: ret %r = load i64, ptr %p -; CHECK: ret ret i64 %r } define dso_local void @func() #0 { ; CHECK-LABEL: func: - - +; CHECK: add x15, sp, #8 +; CHECK: bl {{_?other}} +; CHECK: ret entry: %p = alloca i64 -; CHECK: mov w8, #1 -; CHECK: stp x30, x8, [sp, #-16] -; CHECK-X18: add x18, sp, #8 store i64 1, ptr %p -; CHECK-NO-X18: add x0, sp, #8 -; CHECK: bl other call void @other(ptr nest %p) -; CHECK: ldr x30, [sp], #16 -; CHECK: ret ret void } diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll index 4799ea3bcd19f..986666e015e9e 100644 --- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -93,7 +93,7 @@ define dso_local i32 @all_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c ; CHECK-NEXT: mov x5, #0 // =0x0 ; CHECK-NEXT: mov x6, #0 // =0x0 ; CHECK-NEXT: mov x7, #0 // =0x0 -; CHECK-NEXT: mov x18, #0 // =0x0 +; CHECK-NEXT: mov x15, #0 // =0x0 ; CHECK-NEXT: orr w0, w8, w2 ; CHECK-NEXT: mov x2, #0 // =0x0 ; CHECK-NEXT: mov x8, #0 // =0x0 @@ -146,7 +146,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; DEFAULT-NEXT: mov x5, #0 // =0x0 ; DEFAULT-NEXT: mov x6, #0 // =0x0 ; DEFAULT-NEXT: mov x7, #0 // =0x0 -; DEFAULT-NEXT: mov x18, #0 // =0x0 +; DEFAULT-NEXT: mov x15, #0 // =0x0 ; DEFAULT-NEXT: movi v0.2d, #0000000000000000 ; DEFAULT-NEXT: orr w0, w8, w2 ; DEFAULT-NEXT: mov x2, #0 // =0x0 @@ -169,7 +169,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; SVE-OR-SME-NEXT: mov x5, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 -; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x15, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0 ; SVE-OR-SME-NEXT: orr w0, w8, w2 ; SVE-OR-SME-NEXT: mov x2, #0 // =0x0 @@ -196,7 +196,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo ; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 -; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0 ; STREAMING-COMPAT-NEXT: fmov d0, xzr ; STREAMING-COMPAT-NEXT: orr w0, w8, w2 ; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0 @@ -492,7 +492,7 @@ define dso_local double @all_gpr_arg_float(double noundef %a, float noundef %b) ; CHECK-NEXT: mov x6, #0 // =0x0 ; CHECK-NEXT: mov x7, #0 // =0x0 ; CHECK-NEXT: mov x8, #0 // =0x0 -; CHECK-NEXT: mov x18, #0 // =0x0 +; CHECK-NEXT: mov x15, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -547,7 +547,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; DEFAULT-NEXT: mov x6, #0 // =0x0 ; DEFAULT-NEXT: mov x7, #0 // =0x0 ; DEFAULT-NEXT: mov x8, #0 // =0x0 -; DEFAULT-NEXT: mov x18, #0 // =0x0 +; DEFAULT-NEXT: mov x15, #0 // =0x0 ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: movi v2.2d, #0000000000000000 ; DEFAULT-NEXT: movi v3.2d, #0000000000000000 @@ -570,7 +570,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; SVE-OR-SME-NEXT: mov x6, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x7, #0 // =0x0 ; SVE-OR-SME-NEXT: mov x8, #0 // =0x0 -; SVE-OR-SME-NEXT: mov x18, #0 // =0x0 +; SVE-OR-SME-NEXT: mov x15, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0 ; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0 @@ -597,7 +597,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca ; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0 ; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0 -; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0 +; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0 ; STREAMING-COMPAT-NEXT: fmov d1, xzr ; STREAMING-COMPAT-NEXT: fmov d2, xzr ; STREAMING-COMPAT-NEXT: fmov d3, xzr