@@ -2213,6 +2213,9 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
22132213 OutStreamer->AddComment (" EVEX TO EVEX Compression " , false );
22142214 }
22152215
2216+ // We use this to suppress NOP padding for Windows EH.
2217+ bool IsTailJump = false ;
2218+
22162219 switch (MI->getOpcode ()) {
22172220 case TargetOpcode::DBG_VALUE:
22182221 llvm_unreachable (" Should be handled target independently" );
@@ -2271,6 +2274,7 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
22712274 case X86::TAILJMPm64_REX:
22722275 // Lower these as normal, but add some comments.
22732276 OutStreamer->AddComment (" TAILCALL" );
2277+ IsTailJump = true ;
22742278 break ;
22752279
22762280 case X86::TLS_addr32:
@@ -2482,8 +2486,151 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
24822486 SMShadowTracker.emitShadowPadding (*OutStreamer, getSubtargetInfo ());
24832487 // Then emit the call
24842488 OutStreamer->emitInstruction (TmpInst, getSubtargetInfo ());
2489+
2490+ // Since tail calls transfer control without leaving a stack frame, there is
2491+ // never a need for NOP padding tail calls.
2492+ if (!IsTailJump)
2493+ maybeEmitNopAfterCallForWindowsEH (MI);
24852494 return ;
24862495 }
24872496
24882497 EmitAndCountInstruction (TmpInst);
24892498}
2499+
2500+ // Determines whether a NOP is required after a CALL, so that Windows EH
2501+ // IP2State tables have the correct information.
2502+ //
2503+ // On most Windows platforms (AMD64, ARM64, ARM32, IA64, but *not* x86-32),
2504+ // exception handling works by looking up instruction pointers in lookup
2505+ // tables. These lookup tables are stored in .xdata sections in executables.
2506+ // One element of the lookup tables are the "IP2State" tables (Instruction
2507+ // Pointer to State).
2508+ //
2509+ // If a function has any instructions that require cleanup during exception
2510+ // unwinding, then it will have an IP2State table. Each entry in the IP2State
2511+ // table describes a range of bytes in the function's instruction stream, and
2512+ // associates an "EH state number" with that range of instructions. A value of
2513+ // -1 means "the null state", which does not require any code to execute.
2514+ // A value other than -1 is an index into the State table.
2515+ //
2516+ // The entries in the IP2State table contain byte offsets within the instruction
2517+ // stream of the function. The Windows ABI requires that these offsets are
2518+ // aligned to instruction boundaries; they are not permitted to point to a byte
2519+ // that is not the first byte of an instruction.
2520+ //
2521+ // Unfortunately, CALL instructions present a problem during unwinding. CALL
2522+ // instructions push the address of the instruction after the CALL instruction,
2523+ // so that execution can resume after the CALL. If the CALL is the last
2524+ // instruction within an IP2State region, then the return address (on the stack)
2525+ // points to the *next* IP2State region. This means that the unwinder will
2526+ // use the wrong cleanup funclet during unwinding.
2527+ //
2528+ // To fix this problem, the Windows AMD64 ABI requires that CALL instructions
2529+ // are never placed at the end of an IP2State region. Stated equivalently, the
2530+ // end of a CALL instruction cannot be aligned to an IP2State boundary. If a
2531+ // CALL instruction would occur at the end of an IP2State region, then the
2532+ // compiler must insert a NOP instruction after the CALL. The NOP instruction
2533+ // is placed in the same EH region as the CALL instruction, so that the return
2534+ // address points to the NOP and the unwinder will locate the correct region.
2535+ //
2536+ // NOP padding is only necessary on Windows AMD64 targets. On ARM64 and ARM32,
2537+ // instructions have a fixed size so the unwinder knows how to "back up" by
2538+ // one instruction.
2539+ //
2540+ // Interaction with Import Call Optimization (ICO):
2541+ //
2542+ // Import Call Optimization (ICO) is a compiler + OS feature on Windows which
2543+ // improves the performance and security of DLL imports. ICO relies on using a
2544+ // specific CALL idiom that can be replaced by the OS DLL loader. This removes
2545+ // a load and indirect CALL and replaces it with a single direct CALL.
2546+ //
2547+ // To achieve this, ICO also inserts NOPs after the CALL instruction. If the
2548+ // end of the CALL is aligned with an EH state transition, we *also* insert
2549+ // a single-byte NOP. **Both forms of NOPs must be preserved.** They cannot
2550+ // be combined into a single larger NOP; nor can the second NOP be removed.
2551+ //
2552+ // This is necessary because, if ICO is active and the call site is modified
2553+ // by the loader, the loader will end up overwriting the NOPs that were inserted
2554+ // for ICO. That means that those NOPs cannot be used for the correct
2555+ // termination of the exception handling region (the IP2State transition),
2556+ // so we still need an additional NOP instruction. The NOPs cannot be combined
2557+ // into a longer NOP (which is ordinarily desirable) because then ICO would
2558+ // split one instruction, producing a malformed instruction after the ICO call.
2559+ void X86AsmPrinter::maybeEmitNopAfterCallForWindowsEH (const MachineInstr *MI) {
2560+ // We only need to insert NOPs after CALLs when targeting Windows on AMD64.
2561+ // Since this code is already restricted to X86, we just test for Win64.
2562+ const Triple &TT = TM.getTargetTriple ();
2563+ if (!TT.isOSWindows () || TT.getArch () != Triple::x86_64)
2564+ return ;
2565+
2566+ MachineBasicBlock::const_iterator MBBI (MI);
2567+ MachineBasicBlock::const_iterator MBBE = MI->getParent ()->end ();
2568+ ++MBBI; // Step over MI
2569+
2570+ // This loop iterates MBBs
2571+ for (;;) {
2572+
2573+ // This loop iterates instructions
2574+ for (; MBBI != MBBE; ++MBBI) {
2575+ // Check the instruction that follows this CALL.
2576+ const MachineInstr &NextMI = *MBBI;
2577+
2578+ // If there is an EH_LABEL after this CALL, then there is an EH state
2579+ // transition after this CALL. This is exactly the situation which
2580+ // requires NOP padding.
2581+ if (NextMI.isEHLabel ()) {
2582+ EmitAndCountInstruction (MCInstBuilder (X86::NOOP));
2583+ return ;
2584+ }
2585+
2586+ #if 0
2587+ // Somewhat similarly, if the CALL is the last instruction before the
2588+ // SEH prologue, then we also need a NOP. This is necessary because the
2589+ // Windows stack unwinder will not invoke a function's exception handler
2590+ // if the instruction pointer is in the function prologue or epilogue.
2591+ if (NextMI.getOpcode() == X86::SEH_BeginEpilogue) {
2592+ EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2593+ return;
2594+ }
2595+ #endif
2596+
2597+ if (!NextMI.isPseudo () && !NextMI.isMetaInstruction ()) {
2598+ // We found a real instruction. During the CALL, the return IP will
2599+ // point to this instruction. Since this instruction has the same EH
2600+ // state as the call itself (because there is no intervening EH_LABEL),
2601+ // the IP2State table will be accurate; there is no need to insert a
2602+ // NOP.
2603+ return ;
2604+ }
2605+
2606+ // The next instruction is a pseudo-op. Ignore it and keep searching.
2607+ // Because these instructions do not generate any machine code, they
2608+ // cannot prevent the IP2State table from pointing at the wrong
2609+ // instruction during a CALL.
2610+ }
2611+
2612+ // We've reached the end of this MBB. Find the next MBB in program order.
2613+ // MBB order should be finalized by this point, so falling across MBBs is
2614+ // expected.
2615+ MachineFunction::const_iterator MFI{MI->getParent ()};
2616+ MachineFunction::const_iterator MFE{MI->getParent ()->getParent ()->end ()};
2617+
2618+ if (MFI == MFE) {
2619+ if (MI->getParent ()->succ_empty ()) {
2620+ // If the CALL has no successors, then it is a noreturn function.
2621+ // Insert an INT3 instead of a NOP. This accomplishes the same purpose,
2622+ // but is more clear to reads. Also, analysis tools will understand
2623+ // that they should not continue disassembling after the CALL (unless
2624+ // there are other branches to that label).
2625+ EmitAndCountInstruction (MCInstBuilder (X86::INT3));
2626+ } else
2627+ EmitAndCountInstruction (MCInstBuilder (X86::NOOP));
2628+ return ;
2629+ }
2630+
2631+ // Set up iterator to scan the next basic block.
2632+ const MachineBasicBlock *NextMBB = &*MFI;
2633+ MBBI = NextMBB->instr_begin ();
2634+ MBBE = NextMBB->instr_end ();
2635+ }
2636+ }
0 commit comments