@@ -2217,6 +2217,9 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2217
2217
OutStreamer->AddComment (" EVEX TO EVEX Compression " , false );
2218
2218
}
2219
2219
2220
+ // We use this to suppress NOP padding for Windows EH.
2221
+ bool IsTailJump = false ;
2222
+
2220
2223
switch (MI->getOpcode ()) {
2221
2224
case TargetOpcode::DBG_VALUE:
2222
2225
llvm_unreachable (" Should be handled target independently" );
@@ -2275,6 +2278,7 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2275
2278
case X86::TAILJMPm64_REX:
2276
2279
// Lower these as normal, but add some comments.
2277
2280
OutStreamer->AddComment (" TAILCALL" );
2281
+ IsTailJump = true ;
2278
2282
break ;
2279
2283
2280
2284
case X86::TLS_addr32:
@@ -2486,8 +2490,151 @@ void X86AsmPrinter::emitInstruction(const MachineInstr *MI) {
2486
2490
SMShadowTracker.emitShadowPadding (*OutStreamer, getSubtargetInfo ());
2487
2491
// Then emit the call
2488
2492
OutStreamer->emitInstruction (TmpInst, getSubtargetInfo ());
2493
+
2494
+ // Since tail calls transfer control without leaving a stack frame, there is
2495
+ // never a need for NOP padding tail calls.
2496
+ if (!IsTailJump)
2497
+ maybeEmitNopAfterCallForWindowsEH (MI);
2489
2498
return ;
2490
2499
}
2491
2500
2492
2501
EmitAndCountInstruction (TmpInst);
2493
2502
}
2503
+
2504
+ // Determines whether a NOP is required after a CALL, so that Windows EH
2505
+ // IP2State tables have the correct information.
2506
+ //
2507
+ // On most Windows platforms (AMD64, ARM64, ARM32, IA64, but *not* x86-32),
2508
+ // exception handling works by looking up instruction pointers in lookup
2509
+ // tables. These lookup tables are stored in .xdata sections in executables.
2510
+ // One element of the lookup tables are the "IP2State" tables (Instruction
2511
+ // Pointer to State).
2512
+ //
2513
+ // If a function has any instructions that require cleanup during exception
2514
+ // unwinding, then it will have an IP2State table. Each entry in the IP2State
2515
+ // table describes a range of bytes in the function's instruction stream, and
2516
+ // associates an "EH state number" with that range of instructions. A value of
2517
+ // -1 means "the null state", which does not require any code to execute.
2518
+ // A value other than -1 is an index into the State table.
2519
+ //
2520
+ // The entries in the IP2State table contain byte offsets within the instruction
2521
+ // stream of the function. The Windows ABI requires that these offsets are
2522
+ // aligned to instruction boundaries; they are not permitted to point to a byte
2523
+ // that is not the first byte of an instruction.
2524
+ //
2525
+ // Unfortunately, CALL instructions present a problem during unwinding. CALL
2526
+ // instructions push the address of the instruction after the CALL instruction,
2527
+ // so that execution can resume after the CALL. If the CALL is the last
2528
+ // instruction within an IP2State region, then the return address (on the stack)
2529
+ // points to the *next* IP2State region. This means that the unwinder will
2530
+ // use the wrong cleanup funclet during unwinding.
2531
+ //
2532
+ // To fix this problem, the Windows AMD64 ABI requires that CALL instructions
2533
+ // are never placed at the end of an IP2State region. Stated equivalently, the
2534
+ // end of a CALL instruction cannot be aligned to an IP2State boundary. If a
2535
+ // CALL instruction would occur at the end of an IP2State region, then the
2536
+ // compiler must insert a NOP instruction after the CALL. The NOP instruction
2537
+ // is placed in the same EH region as the CALL instruction, so that the return
2538
+ // address points to the NOP and the unwinder will locate the correct region.
2539
+ //
2540
+ // NOP padding is only necessary on Windows AMD64 targets. On ARM64 and ARM32,
2541
+ // instructions have a fixed size so the unwinder knows how to "back up" by
2542
+ // one instruction.
2543
+ //
2544
+ // Interaction with Import Call Optimization (ICO):
2545
+ //
2546
+ // Import Call Optimization (ICO) is a compiler + OS feature on Windows which
2547
+ // improves the performance and security of DLL imports. ICO relies on using a
2548
+ // specific CALL idiom that can be replaced by the OS DLL loader. This removes
2549
+ // a load and indirect CALL and replaces it with a single direct CALL.
2550
+ //
2551
+ // To achieve this, ICO also inserts NOPs after the CALL instruction. If the
2552
+ // end of the CALL is aligned with an EH state transition, we *also* insert
2553
+ // a single-byte NOP. **Both forms of NOPs must be preserved.** They cannot
2554
+ // be combined into a single larger NOP; nor can the second NOP be removed.
2555
+ //
2556
+ // This is necessary because, if ICO is active and the call site is modified
2557
+ // by the loader, the loader will end up overwriting the NOPs that were inserted
2558
+ // for ICO. That means that those NOPs cannot be used for the correct
2559
+ // termination of the exception handling region (the IP2State transition),
2560
+ // so we still need an additional NOP instruction. The NOPs cannot be combined
2561
+ // into a longer NOP (which is ordinarily desirable) because then ICO would
2562
+ // split one instruction, producing a malformed instruction after the ICO call.
2563
+ void X86AsmPrinter::maybeEmitNopAfterCallForWindowsEH (const MachineInstr *MI) {
2564
+ // We only need to insert NOPs after CALLs when targeting Windows on AMD64.
2565
+ // Since this code is already restricted to X86, we just test for Win64.
2566
+ const Triple &TT = TM.getTargetTriple ();
2567
+ if (!TT.isOSWindows () || TT.getArch () != Triple::x86_64)
2568
+ return ;
2569
+
2570
+ MachineBasicBlock::const_iterator MBBI (MI);
2571
+ MachineBasicBlock::const_iterator MBBE = MI->getParent ()->end ();
2572
+ ++MBBI; // Step over MI
2573
+
2574
+ // This loop iterates MBBs
2575
+ for (;;) {
2576
+
2577
+ // This loop iterates instructions
2578
+ for (; MBBI != MBBE; ++MBBI) {
2579
+ // Check the instruction that follows this CALL.
2580
+ const MachineInstr &NextMI = *MBBI;
2581
+
2582
+ // If there is an EH_LABEL after this CALL, then there is an EH state
2583
+ // transition after this CALL. This is exactly the situation which
2584
+ // requires NOP padding.
2585
+ if (NextMI.isEHLabel ()) {
2586
+ EmitAndCountInstruction (MCInstBuilder (X86::NOOP));
2587
+ return ;
2588
+ }
2589
+
2590
+ #if 0
2591
+ // Somewhat similarly, if the CALL is the last instruction before the
2592
+ // SEH prologue, then we also need a NOP. This is necessary because the
2593
+ // Windows stack unwinder will not invoke a function's exception handler
2594
+ // if the instruction pointer is in the function prologue or epilogue.
2595
+ if (NextMI.getOpcode() == X86::SEH_BeginEpilogue) {
2596
+ EmitAndCountInstruction(MCInstBuilder(X86::NOOP));
2597
+ return;
2598
+ }
2599
+ #endif
2600
+
2601
+ if (!NextMI.isPseudo () && !NextMI.isMetaInstruction ()) {
2602
+ // We found a real instruction. During the CALL, the return IP will
2603
+ // point to this instruction. Since this instruction has the same EH
2604
+ // state as the call itself (because there is no intervening EH_LABEL),
2605
+ // the IP2State table will be accurate; there is no need to insert a
2606
+ // NOP.
2607
+ return ;
2608
+ }
2609
+
2610
+ // The next instruction is a pseudo-op. Ignore it and keep searching.
2611
+ // Because these instructions do not generate any machine code, they
2612
+ // cannot prevent the IP2State table from pointing at the wrong
2613
+ // instruction during a CALL.
2614
+ }
2615
+
2616
+ // We've reached the end of this MBB. Find the next MBB in program order.
2617
+ // MBB order should be finalized by this point, so falling across MBBs is
2618
+ // expected.
2619
+ MachineFunction::const_iterator MFI{MI->getParent ()};
2620
+ MachineFunction::const_iterator MFE{MI->getParent ()->getParent ()->end ()};
2621
+
2622
+ if (MFI == MFE) {
2623
+ if (MI->getParent ()->succ_empty ()) {
2624
+ // If the CALL has no successors, then it is a noreturn function.
2625
+ // Insert an INT3 instead of a NOP. This accomplishes the same purpose,
2626
+ // but is more clear to reads. Also, analysis tools will understand
2627
+ // that they should not continue disassembling after the CALL (unless
2628
+ // there are other branches to that label).
2629
+ EmitAndCountInstruction (MCInstBuilder (X86::INT3));
2630
+ } else
2631
+ EmitAndCountInstruction (MCInstBuilder (X86::NOOP));
2632
+ return ;
2633
+ }
2634
+
2635
+ // Set up iterator to scan the next basic block.
2636
+ const MachineBasicBlock *NextMBB = &*MFI;
2637
+ MBBI = NextMBB->instr_begin ();
2638
+ MBBE = NextMBB->instr_end ();
2639
+ }
2640
+ }
0 commit comments