@@ -26250,28 +26250,57 @@ static unsigned getOpcodeForRetpoline(unsigned RPOpc) {
2625026250
2625126251static const char *getRetpolineSymbol(const X86Subtarget &Subtarget,
2625226252 unsigned Reg) {
26253+ if (Subtarget.useRetpolineExternalThunk()) {
26254+ // When using an external thunk for retpolines, we pick names that match the
26255+ // names GCC happens to use as well. This helps simplify the implementation
26256+ // of the thunks for kernels where they have no easy ability to create
26257+ // aliases and are doing non-trivial configuration of the thunk's body. For
26258+ // example, the Linux kernel will do boot-time hot patching of the thunk
26259+ // bodies and cannot easily export aliases of these to loaded modules.
26260+ //
26261+ // Note that at any point in the future, we may need to change the semantics
26262+ // of how we implement retpolines and at that time will likely change the
26263+ // name of the called thunk. Essentially, there is no hard guarantee that
26264+ // LLVM will generate calls to specific thunks, we merely make a best-effort
26265+ // attempt to help out kernels and other systems where duplicating the
26266+ // thunks is costly.
26267+ switch (Reg) {
26268+ case X86::EAX:
26269+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26270+ return "__x86_indirect_thunk_eax";
26271+ case X86::ECX:
26272+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26273+ return "__x86_indirect_thunk_ecx";
26274+ case X86::EDX:
26275+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26276+ return "__x86_indirect_thunk_edx";
26277+ case X86::EDI:
26278+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26279+ return "__x86_indirect_thunk_edi";
26280+ case X86::R11:
26281+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
26282+ return "__x86_indirect_thunk_r11";
26283+ }
26284+ llvm_unreachable("unexpected reg for retpoline");
26285+ }
26286+
26287+ // When targeting an internal COMDAT thunk use an LLVM-specific name.
2625326288 switch (Reg) {
26254- case 0:
26255- assert(!Subtarget.is64Bit() && "R11 should always be available on x64");
26256- return Subtarget.useRetpolineExternalThunk()
26257- ? "__llvm_external_retpoline_push"
26258- : "__llvm_retpoline_push";
2625926289 case X86::EAX:
26260- return Subtarget.useRetpolineExternalThunk()
26261- ? "__llvm_external_retpoline_eax"
26262- : "__llvm_retpoline_eax";
26290+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26291+ return "__llvm_retpoline_eax";
2626326292 case X86::ECX:
26264- return Subtarget.useRetpolineExternalThunk()
26265- ? "__llvm_external_retpoline_ecx"
26266- : "__llvm_retpoline_ecx";
26293+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26294+ return "__llvm_retpoline_ecx";
2626726295 case X86::EDX:
26268- return Subtarget.useRetpolineExternalThunk()
26269- ? "__llvm_external_retpoline_edx"
26270- : "__llvm_retpoline_edx";
26296+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26297+ return "__llvm_retpoline_edx";
26298+ case X86::EDI:
26299+ assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!");
26300+ return "__llvm_retpoline_edi";
2627126301 case X86::R11:
26272- return Subtarget.useRetpolineExternalThunk()
26273- ? "__llvm_external_retpoline_r11"
26274- : "__llvm_retpoline_r11";
26302+ assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!");
26303+ return "__llvm_retpoline_r11";
2627526304 }
2627626305 llvm_unreachable("unexpected reg for retpoline");
2627726306}
@@ -26290,15 +26319,13 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2629026319 // just use R11, but we scan for uses anyway to ensure we don't generate
2629126320 // incorrect code. On 32-bit, we use one of EAX, ECX, or EDX that isn't
2629226321 // already a register use operand to the call to hold the callee. If none
26293- // are available, push the callee instead. This is less efficient, but is
26294- // necessary for functions using 3 regparms. Such function calls are
26295- // (currently) not eligible for tail call optimization, because there is no
26296- // scratch register available to hold the address of the callee.
26322+ // are available, use EDI instead. EDI is chosen because EBX is the PIC base
26323+ // register and ESI is the base pointer to realigned stack frames with VLAs.
2629726324 SmallVector<unsigned, 3> AvailableRegs;
2629826325 if (Subtarget.is64Bit())
2629926326 AvailableRegs.push_back(X86::R11);
2630026327 else
26301- AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX});
26328+ AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI });
2630226329
2630326330 // Zero out any registers that are already used.
2630426331 for (const auto &MO : MI.operands()) {
@@ -26316,30 +26343,18 @@ X86TargetLowering::EmitLoweredRetpoline(MachineInstr &MI,
2631626343 break;
2631726344 }
2631826345 }
26346+ if (!AvailableReg)
26347+ report_fatal_error("calling convention incompatible with retpoline, no "
26348+ "available registers");
2631926349
2632026350 const char *Symbol = getRetpolineSymbol(Subtarget, AvailableReg);
2632126351
26322- if (AvailableReg == 0) {
26323- // No register available. Use PUSH. This must not be a tailcall, and this
26324- // must not be x64.
26325- if (Subtarget.is64Bit())
26326- report_fatal_error(
26327- "Cannot make an indirect call on x86-64 using both retpoline and a "
26328- "calling convention that preservers r11");
26329- if (Opc != X86::CALLpcrel32)
26330- report_fatal_error("Cannot make an indirect tail call on x86 using "
26331- "retpoline without a preserved register");
26332- BuildMI(*BB, MI, DL, TII->get(X86::PUSH32r)).addReg(CalleeVReg);
26333- MI.getOperand(0).ChangeToES(Symbol);
26334- MI.setDesc(TII->get(Opc));
26335- } else {
26336- BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26337- .addReg(CalleeVReg);
26338- MI.getOperand(0).ChangeToES(Symbol);
26339- MI.setDesc(TII->get(Opc));
26340- MachineInstrBuilder(*BB->getParent(), &MI)
26341- .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
26342- }
26352+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg)
26353+ .addReg(CalleeVReg);
26354+ MI.getOperand(0).ChangeToES(Symbol);
26355+ MI.setDesc(TII->get(Opc));
26356+ MachineInstrBuilder(*BB->getParent(), &MI)
26357+ .addReg(AvailableReg, RegState::Implicit | RegState::Kill);
2634326358 return BB;
2634426359}
2634526360
0 commit comments