Skip to content

Commit ba1509d

Browse files
committed
Recommit X86: support Swift Async context
This adds support to the X86 backend for the newly committed swiftasync function parameter. If such a (pointer) parameter is present it gets stored into an augmented frame record (populated in IR, but generally containing enhanced backtrace for coroutines using lots of tail calls back and forth). The context frame is identical to AArch64 (primarily so that unwinders etc don't get extra complexity). Specfically, the new frame record is [AsyncCtx, %rbp, ReturnAddr], and its presence is signalled by bit 60 of the stored %rbp being set to 1. %rbp still points to the frame pointer in memory for backwards compatibility (only partial on x86, but OTOH the weird AsyncCtx before the rest of the record is because of x86). Recommited with a fix for unwind info when i386 pc-rel thunks are adjacent to a prologue.
1 parent 7d64493 commit ba1509d

File tree

7 files changed

+281
-9
lines changed

7 files changed

+281
-9
lines changed

llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
14541454
unsigned StackAdjust = 0;
14551455
unsigned StackSize = 0;
14561456
unsigned NumDefCFAOffsets = 0;
1457+
int MinAbsOffset = std::numeric_limits<int>::max();
14571458

14581459
for (unsigned i = 0, e = Instrs.size(); i != e; ++i) {
14591460
const MCCFIInstruction &Inst = Instrs[i];
@@ -1482,6 +1483,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
14821483
memset(SavedRegs, 0, sizeof(SavedRegs));
14831484
StackAdjust = 0;
14841485
SavedRegIdx = 0;
1486+
MinAbsOffset = std::numeric_limits<int>::max();
14851487
InstrOffset += MoveInstrSize;
14861488
break;
14871489
}
@@ -1525,6 +1527,7 @@ class DarwinX86AsmBackend : public X86AsmBackend {
15251527
unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);
15261528
SavedRegs[SavedRegIdx++] = Reg;
15271529
StackAdjust += OffsetSize;
1530+
MinAbsOffset = std::min(MinAbsOffset, abs(Inst.getOffset()));
15281531
InstrOffset += PushInstrSize(Reg);
15291532
break;
15301533
}
@@ -1538,6 +1541,11 @@ class DarwinX86AsmBackend : public X86AsmBackend {
15381541
// Offset was too big for a compact unwind encoding.
15391542
return CU::UNWIND_MODE_DWARF;
15401543

1544+
// We don't attempt to track a real StackAdjust, so if the saved registers
1545+
// aren't adjacent to rbp we can't cope.
1546+
if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)
1547+
return CU::UNWIND_MODE_DWARF;
1548+
15411549
// Get the encoding of the saved registers when we have a frame pointer.
15421550
uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();
15431551
if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;

llvm/lib/Target/X86/X86FastISel.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3068,6 +3068,7 @@ bool X86FastISel::fastLowerArguments() {
30683068
Arg.hasAttribute(Attribute::InReg) ||
30693069
Arg.hasAttribute(Attribute::StructRet) ||
30703070
Arg.hasAttribute(Attribute::SwiftSelf) ||
3071+
Arg.hasAttribute(Attribute::SwiftAsync) ||
30713072
Arg.hasAttribute(Attribute::SwiftError) ||
30723073
Arg.hasAttribute(Attribute::Nest))
30733074
return false;

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 94 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,13 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
409409
return 0;
410410

411411
PI = MBB.erase(PI);
412-
if (PI != MBB.end() && PI->isCFIInstruction()) PI = MBB.erase(PI);
412+
if (PI != MBB.end() && PI->isCFIInstruction()) {
413+
auto CIs = MBB.getParent()->getFrameInstructions();
414+
MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()];
415+
if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset ||
416+
CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset)
417+
PI = MBB.erase(PI);
418+
}
413419
if (!doMergeWithPrevious)
414420
MBBI = skipDebugInstructionsForward(PI, MBB.end());
415421

@@ -1356,6 +1362,14 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
13561362
STI.getTargetLowering()->hasStackProbeSymbol(MF);
13571363
unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF);
13581364

1365+
if (HasFP && X86FI->hasSwiftAsyncContext()) {
1366+
BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8),
1367+
MachineFramePtr)
1368+
.addUse(MachineFramePtr)
1369+
.addImm(60)
1370+
.setMIFlag(MachineInstr::FrameSetup);
1371+
}
1372+
13591373
// Re-align the stack on 64-bit if the x86-interrupt calling convention is
13601374
// used and an error code was pushed, since the x86-64 ABI requires a 16-byte
13611375
// stack alignment.
@@ -1470,11 +1484,44 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
14701484

14711485
if (!IsWin64Prologue && !IsFunclet) {
14721486
// Update EBP with the new base value.
1473-
BuildMI(MBB, MBBI, DL,
1474-
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1475-
FramePtr)
1476-
.addReg(StackPtr)
1477-
.setMIFlag(MachineInstr::FrameSetup);
1487+
if (!X86FI->hasSwiftAsyncContext()) {
1488+
BuildMI(MBB, MBBI, DL,
1489+
TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
1490+
FramePtr)
1491+
.addReg(StackPtr)
1492+
.setMIFlag(MachineInstr::FrameSetup);
1493+
} else {
1494+
// Before we update the live frame pointer we have to ensure there's a
1495+
// valid (or null) asynchronous context in its slot just before FP in
1496+
// the frame record, so store it now.
1497+
const auto &Attrs = MF.getFunction().getAttributes();
1498+
1499+
if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) {
1500+
// We have an initial context in r14, store it just before the frame
1501+
// pointer.
1502+
MBB.addLiveIn(X86::R14);
1503+
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
1504+
.addReg(X86::R14)
1505+
.setMIFlag(MachineInstr::FrameSetup);
1506+
} else {
1507+
// No initial context, store null so that there's no pointer that
1508+
// could be misused.
1509+
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8))
1510+
.addImm(0)
1511+
.setMIFlag(MachineInstr::FrameSetup);
1512+
}
1513+
BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr)
1514+
.addUse(X86::RSP)
1515+
.addImm(1)
1516+
.addUse(X86::NoRegister)
1517+
.addImm(8)
1518+
.addUse(X86::NoRegister)
1519+
.setMIFlag(MachineInstr::FrameSetup);
1520+
BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP)
1521+
.addUse(X86::RSP)
1522+
.addImm(8)
1523+
.setMIFlag(MachineInstr::FrameSetup);
1524+
}
14781525

14791526
if (NeedsDwarfCFI) {
14801527
// Mark effective beginning of when frame pointer becomes valid.
@@ -1979,10 +2026,26 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
19792026
// AfterPop is the position to insert .cfi_restore.
19802027
MachineBasicBlock::iterator AfterPop = MBBI;
19812028
if (HasFP) {
2029+
if (X86FI->hasSwiftAsyncContext()) {
2030+
// Discard the context.
2031+
int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
2032+
emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true);
2033+
}
19822034
// Pop EBP.
19832035
BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
19842036
MachineFramePtr)
19852037
.setMIFlag(MachineInstr::FrameDestroy);
2038+
2039+
// We need to reset FP to its untagged state on return. Bit 60 is currently
2040+
// used to show the presence of an extended frame.
2041+
if (X86FI->hasSwiftAsyncContext()) {
2042+
BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8),
2043+
MachineFramePtr)
2044+
.addUse(MachineFramePtr)
2045+
.addImm(60)
2046+
.setMIFlag(MachineInstr::FrameDestroy);
2047+
}
2048+
19862049
if (NeedsDwarfCFI) {
19872050
unsigned DwarfStackPtr =
19882051
TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true);
@@ -2007,7 +2070,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
20072070

20082071
if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
20092072
if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2010-
(Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)))
2073+
(Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2074+
(Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) &&
2075+
(Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)))
20112076
break;
20122077
FirstCSPop = PI;
20132078
}
@@ -2039,6 +2104,9 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
20392104
uint64_t LEAAmount =
20402105
IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
20412106

2107+
if (X86FI->hasSwiftAsyncContext())
2108+
LEAAmount -= 16;
2109+
20422110
// There are only two legal forms of epilogue:
20432111
// - add SEHAllocationSize, %rsp
20442112
// - lea SEHAllocationSize(%FramePtr), %rsp
@@ -2367,6 +2435,14 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
23672435
SpillSlotOffset -= SlotSize;
23682436
MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
23692437

2438+
// The async context lives directly before the frame pointer, and we
2439+
// allocate a second slot to preserve stack alignment.
2440+
if (X86FI->hasSwiftAsyncContext()) {
2441+
SpillSlotOffset -= SlotSize;
2442+
MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
2443+
SpillSlotOffset -= SlotSize;
2444+
}
2445+
23702446
// Since emitPrologue and emitEpilogue will handle spilling and restoring of
23712447
// the frame register, we can delete it from CSI list and not have to worry
23722448
// about avoiding it later.
@@ -3267,7 +3343,11 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
32673343
bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
32683344
assert(MBB.getParent() && "Block is not attached to a function!");
32693345
const MachineFunction &MF = *MBB.getParent();
3270-
return !TRI->hasStackRealignment(MF) || !MBB.isLiveIn(X86::EFLAGS);
3346+
if (!MBB.isLiveIn(X86::EFLAGS))
3347+
return true;
3348+
3349+
const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
3350+
return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext();
32713351
}
32723352

32733353
bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
@@ -3280,6 +3360,12 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
32803360
if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock())
32813361
return false;
32823362

3363+
// Swift async context epilogue has a BTR instruction that clobbers parts of
3364+
// EFLAGS.
3365+
const MachineFunction &MF = *MBB.getParent();
3366+
if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext())
3367+
return !flagsNeedToBePreservedBeforeTheTerminators(MBB);
3368+
32833369
if (canUseLEAForSPInEpilogue(*MBB.getParent()))
32843370
return true;
32853371

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3747,6 +3747,20 @@ SDValue X86TargetLowering::LowerFormalArguments(
37473747
}
37483748

37493749
for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3750+
if (Ins[I].Flags.isSwiftAsync()) {
3751+
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
3752+
if (Subtarget.is64Bit())
3753+
X86FI->setHasSwiftAsyncContext(true);
3754+
else {
3755+
int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
3756+
X86FI->setSwiftAsyncContextFrameIdx(FI);
3757+
SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
3758+
DAG.getFrameIndex(FI, MVT::i32),
3759+
MachinePointerInfo::getFixedStack(MF, FI));
3760+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
3761+
}
3762+
}
3763+
37503764
// Swift calling convention does not require we copy the sret argument
37513765
// into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
37523766
if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
@@ -25856,7 +25870,27 @@ SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2585625870
}
2585725871
return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
2585825872
}
25859-
25873+
case Intrinsic::swift_async_context_addr: {
25874+
auto &MF = DAG.getMachineFunction();
25875+
auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
25876+
if (Subtarget.is64Bit()) {
25877+
MF.getFrameInfo().setFrameAddressIsTaken(true);
25878+
X86FI->setHasSwiftAsyncContext(true);
25879+
return SDValue(
25880+
DAG.getMachineNode(
25881+
X86::SUB64ri8, dl, MVT::i64,
25882+
DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64),
25883+
DAG.getTargetConstant(8, dl, MVT::i32)),
25884+
0);
25885+
} else {
25886+
// 32-bit so no special extended frame, create or reuse an existing stack
25887+
// slot.
25888+
if (!X86FI->getSwiftAsyncContextFrameIdx())
25889+
X86FI->setSwiftAsyncContextFrameIdx(
25890+
MF.getFrameInfo().CreateStackObject(4, Align(4), false));
25891+
return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32);
25892+
}
25893+
}
2586025894
case Intrinsic::x86_avx512_vp2intersect_q_512:
2586125895
case Intrinsic::x86_avx512_vp2intersect_q_256:
2586225896
case Intrinsic::x86_avx512_vp2intersect_q_128:

llvm/lib/Target/X86/X86MachineFunctionInfo.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
108108
/// True if this function has any preallocated calls.
109109
bool HasPreallocatedCall = false;
110110

111+
/// Whether this function has an extended frame record [Ctx, RBP, Return
112+
/// addr]. If so, bit 60 of the in-memory frame pointer will be 1 to enable
113+
/// other tools to detect the extended record.
114+
bool HasSwiftAsyncContext = false;
115+
116+
Optional<int> SwiftAsyncContextFrameIdx;
117+
111118
ValueMap<const Value *, size_t> PreallocatedIds;
112119
SmallVector<size_t, 0> PreallocatedStackSizes;
113120
SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
@@ -197,6 +204,14 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
197204
bool hasPreallocatedCall() const { return HasPreallocatedCall; }
198205
void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
199206

207+
bool hasSwiftAsyncContext() const { return HasSwiftAsyncContext; }
208+
void setHasSwiftAsyncContext(bool v) { HasSwiftAsyncContext = v; }
209+
210+
Optional<int> getSwiftAsyncContextFrameIdx() const {
211+
return SwiftAsyncContextFrameIdx;
212+
}
213+
void setSwiftAsyncContextFrameIdx(int v) { SwiftAsyncContextFrameIdx = v; }
214+
200215
size_t getPreallocatedIdForCallSite(const Value *CS) {
201216
auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
202217
if (Insert.second) {
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
; RUN: llc -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
2+
; RUN: llc -mtriple=x86_64-apple-darwin %s -o - -fast-isel | FileCheck %s
3+
4+
define i8* @argument(i8* swiftasync %in) {
5+
; CHECK-LABEL: argument:
6+
; CHECK: movq %r14, %rax
7+
8+
ret i8* %in
9+
}
10+
11+
define void @call(i8* %in) {
12+
; CHECK-LABEL: call:
13+
; CHECK: movq %rdi, %r14
14+
15+
call i8* @argument(i8* swiftasync %in)
16+
ret void
17+
}

0 commit comments

Comments
 (0)