@@ -2213,6 +2213,9 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2213
2213
OutStreamer->AddComment (" EVEX TO EVEX Compression " , false );
2214
2214
}
2215
2215
2216
+ // We use this to suppress NOP padding for Windows EH.
2217
+ bool IsTailJump = false ;
2218
+
2216
2219
switch (MI->getOpcode ()) {
2217
2220
case TargetOpcode::DBG_VALUE:
2218
2221
llvm_unreachable (" Should be handled target independently" );
@@ -2271,6 +2274,7 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2271
2274
case X86::TAILJMPm64_REX:
2272
2275
// Lower these as normal, but add some comments.
2273
2276
OutStreamer->AddComment (" TAILCALL" );
2277
+ IsTailJump = true ;
2274
2278
break ;
2275
2279
2276
2280
case X86::TLS_addr32:
@@ -2482,8 +2486,151 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2482
2486
SMShadowTracker.emitShadowPadding (*OutStreamer, getSubtargetInfo ());
2483
2487
// Then emit the call
2484
2488
OutStreamer->emitInstruction (TmpInst, getSubtargetInfo ());
2489
+
2490
+ // Since tail calls transfer control without leaving a stack frame, there is
2491
+ // never a need for NOP padding tail calls.
2492
+ if (!IsTailJump)
2493
+ maybeEmitNopAfterCallForWindowsEH (MI);
2485
2494
return ;
2486
2495
}
2487
2496
2488
2497
EmitAndCountInstruction (TmpInst);
2489
2498
}
2499
+
2500
+ // Determines whether a NOP is required after a CALL, so that Windows EH
2501
+ // IP2State tables have the correct information.
2502
+ //
2503
+ // On most Windows platforms (AMD64, ARM64, ARM32, IA64, but *not* x86-32),
2504
+ // exception handling works by looking up instruction pointers in lookup
2505
+ // tables. These lookup tables are stored in .xdata sections in executables.
2506
+ // One element of the lookup tables are the "IP2State" tables (Instruction
2507
+ // Pointer to State).
2508
+ //
2509
+ // If a function has any instructions that require cleanup during exception
2510
+ // unwinding, then it will have an IP2State table. Each entry in the IP2State
2511
+ // table describes a range of bytes in the function's instruction stream, and
2512
+ // associates an "EH state number" with that range of instructions. A value of
2513
+ // -1 means "the null state", which does not require any code to execute.
2514
+ // A value other than -1 is an index into the State table.
2515
+ //
2516
+ // The entries in the IP2State table contain byte offsets within the instruction
2517
+ // stream of the function. The Windows ABI requires that these offsets are
2518
+ // aligned to instruction boundaries; they are not permitted to point to a byte
2519
+ // that is not the first byte of an instruction.
2520
+ //
2521
+ // Unfortunately, CALL instructions present a problem during unwinding. CALL
2522
+ // instructions push the address of the instruction after the CALL instruction,
2523
+ // so that execution can resume after the CALL. If the CALL is the last
2524
+ // instruction within an IP2State region, then the return address (on the stack)
2525
+ // points to the *next* IP2State region. This means that the unwinder will
2526
+ // use the wrong cleanup funclet during unwinding.
2527
+ //
2528
+ // To fix this problem, the Windows AMD64 ABI requires that CALL instructions
2529
+ // are never placed at the end of an IP2State region. Stated equivalently, the
2530
+ // end of a CALL instruction cannot be aligned to an IP2State boundary. If a
2531
+ // CALL instruction would occur at the end of an IP2State region, then the
2532
+ // compiler must insert a NOP instruction after the CALL. The NOP instruction
2533
+ // is placed in the same EH region as the CALL instruction, so that the return
2534
+ // address points to the NOP and the unwinder will locate the correct region.
2535
+ //
2536
+ // NOP padding is only necessary on Windows AMD64 targets. On ARM64 and ARM32,
2537
+ // instructions have a fixed size so the unwinder knows how to "back up" by
2538
+ // one instruction.
2539
+ //
2540
+ // Interaction with Import Call Optimization (ICO):
2541
+ //
2542
+ // Import Call Optimization (ICO) is a compiler + OS feature on Windows which
2543
+ // improves the performance and security of DLL imports. ICO relies on using a
2544
+ // specific CALL idiom that can be replaced by the OS DLL loader. This removes
2545
+ // a load and indirect CALL and replaces it with a single direct CALL.
2546
+ //
2547
+ // To achieve this, ICO also inserts NOPs after the CALL instruction. If the
2548
+ // end of the CALL is aligned with an EH state transition, we *also* insert
2549
+ // a single-byte NOP. **Both forms of NOPs must be preserved.** They cannot
2550
+ // be combined into a single larger NOP; nor can the second NOP be removed.
2551
+ //
2552
+ // This is necessary because, if ICO is active and the call site is modified
2553
+ // by the loader, the loader will end up overwriting the NOPs that were inserted
2554
+ // for ICO. That means that those NOPs cannot be used for the correct
2555
+ // termination of the exception handling region (the IP2State transition),
2556
+ // so we still need an additional NOP instruction. The NOPs cannot be combined
2557
+ // into a longer NOP (which is ordinarily desirable) because then ICO would
2558
+ // split one instruction, producing a malformed instruction after the ICO call.
2559
+ void X86AsmPrinter::maybeEmitNopAfterCallForWindowsEH (const MachineInstr *MI) {
2560
+ // We only need to insert NOPs after CALLs when targeting Windows on AMD64.
2561
+ // Since this code is already restricted to X86, we just test for Win64.
2562
+ const Triple &TT = TM.getTargetTriple ();
2563
+ if (!TT.isOSWindows () || TT.getArch () != Triple::x86_64)
2564
+ return ;
2565
+
2566
+ MachineBasicBlock::const_iterator MBBI (MI);
2567
+ MachineBasicBlock::const_iterator MBBE = MI->getParent ()->end ();
2568
+ ++MBBI; // Step over MI
2569
+
2570
+ // This loop iterates MBBs
2571
+ for (;;) {
2572
+
2573
+ // This loop iterates instructions
2574
+ for (; MBBI != MBBE; ++MBBI) {
2575
+ // Check the instruction that follows this CALL.
2576
+ const MachineInstr &NextMI = *MBBI;
2577
+
2578
+ // If there is an EH_LABEL after this CALL, then there is an EH state
2579
+ // transition after this CALL. This is exactly the situation which
2580
+ // requires NOP padding.
2581
+ if (NextMI.isEHLabel ()) {
2582
+ EmitAndCountInstruction (MCInstBuilder (X86::NOOP));
2583
+ return ;
2584
+ }
2585
+
2586
+ #if 0
2587
+ // Somewhat similarly, if the CALL is the last instruction before the
2588
+ // SEH prologue, then we also need a NOP. This is necessary because the
2589
+ // Windows stack unwinder will not invoke a function's exception handler
2590
+ // if the instruction pointer is in the function prologue or epilogue.
2591
+ if (NextMI.getOpcode() == X86::SEH_BeginEpilogue) {
2592
+ EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2593
+ return;
2594
+ }
2595
+ #endif
2596
+
2597
+ if (!NextMI.isPseudo () && !NextMI.isMetaInstruction ()) {
2598
+ // We found a real instruction. During the CALL, the return IP will
2599
+ // point to this instruction. Since this instruction has the same EH
2600
+ // state as the call itself (because there is no intervening EH_LABEL),
2601
+ // the IP2State table will be accurate; there is no need to insert a
2602
+ // NOP.
2603
+ return ;
2604
+ }
2605
+
2606
+ // The next instruction is a pseudo-op. Ignore it and keep searching.
2607
+ // Because these instructions do not generate any machine code, they
2608
+ // cannot prevent the IP2State table from pointing at the wrong
2609
+ // instruction during a CALL.
2610
+ }
2611
+
2612
+ // We've reached the end of this MBB. Find the next MBB in program order.
2613
+ // MBB order should be finalized by this point, so falling across MBBs is
2614
+ // expected.
2615
+ MachineFunction::const_iterator MFI{MI->getParent ()};
2616
+ MachineFunction::const_iterator MFE{MI->getParent ()->getParent ()->end ()};
2617
+
2618
+ if (MFI == MFE) {
2619
+ if (MI->getParent ()->succ_empty ()) {
2620
+ // If the CALL has no successors, then it is a noreturn function.
2621
+ // Insert an INT3 instead of a NOP. This accomplishes the same purpose,
2622
+ // but is more clear to reads. Also, analysis tools will understand
2623
+ // that they should not continue disassembling after the CALL (unless
2624
+ // there are other branches to that label).
2625
+ EmitAndCountInstruction (MCInstBuilder (X86::INT3));
2626
+ } else
2627
+ EmitAndCountInstruction (MCInstBuilder (X86::NOOP));
2628
+ return ;
2629
+ }
2630
+
2631
+ // Set up iterator to scan the next basic block.
2632
+ const MachineBasicBlock *NextMBB = &*MFI;
2633
+ MBBI = NextMBB->instr_begin ();
2634
+ MBBE = NextMBB->instr_end ();
2635
+ }
2636
+ }
0 commit comments