diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 6854c4cf6b97..c3c6890aeee6 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -595,14 +595,14 @@ enum CorInfoHelpFunc CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer - CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, CORINFO_HELP_LLVM_EH_CATCH, CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES, CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME, + CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME, CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME, + CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME, CORINFO_HELP_LLVM_EH_UNHANDLED_EXCEPTION, CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET, - CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET, CORINFO_HELP_LLVM_STRESS_GC, CORINFO_HELP_COUNT, diff --git a/src/coreclr/inc/jithelpers.h b/src/coreclr/inc/jithelpers.h index e7ae43ac2b8e..27166b2c8046 100644 --- a/src/coreclr/inc/jithelpers.h +++ b/src/coreclr/inc/jithelpers.h @@ -343,14 +343,14 @@ JITHELPER(CORINFO_HELP_DISPATCH_INDIRECT_CALL, NULL, METHOD__NIL) #endif - JITHELPER(CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_CATCH, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME, NULL, METHOD__NIL) + JITHELPER(CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_EH_UNHANDLED_EXCEPTION, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET, NULL, METHOD__NIL) - JITHELPER(CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET, NULL, METHOD__NIL) JITHELPER(CORINFO_HELP_LLVM_STRESS_GC, JIT_StressGC, METHOD__NIL) #undef JITHELPER #undef DYNAMICJITHELPER diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index bb836a6f61ac..5a348d69c312 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -1740,6 +1740,7 @@ void Compiler::fgAddReversePInvokeEnterExit() LclVarDsc* varDsc = lvaGetDesc(lvaReversePInvokeFrameVar); lvaSetStruct(lvaReversePInvokeFrameVar, typGetBlkLayout(eeGetEEInfo()->sizeOfReversePInvokeFrame), false); +#ifndef TARGET_WASM // WASM RPI helpers have special ABI and are inserted in lowering. // Add enter pinvoke exit callout at the start of prolog GenTree* pInvokeFrameVar = gtNewLclVarAddrNode(lvaReversePInvokeFrameVar); @@ -1804,6 +1805,7 @@ void Compiler::fgAddReversePInvokeEnterExit() printf("\n"); } #endif +#endif // !TARGET_WASM } /***************************************************************************** diff --git a/src/coreclr/jit/llvm.cpp b/src/coreclr/jit/llvm.cpp index 7a2982ca4bcf..fe78d39da9d0 100644 --- a/src/coreclr/jit/llvm.cpp +++ b/src/coreclr/jit/llvm.cpp @@ -517,11 +517,11 @@ bool Llvm::helperCallMayVirtuallyUnwind(CorInfoHelpFunc helperFunc) const { FUNC(CORINFO_HELP_THROW_ENTRYPOINT_NOT_FOUND_EXCEPTION) }, // [R]PI helpers, implemented in "Runtime\thread.cpp". - { FUNC(CORINFO_HELP_JIT_PINVOKE_BEGIN) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR }, HFIF_SS_ARG | HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, - { FUNC(CORINFO_HELP_JIT_PINVOKE_END) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, - { FUNC(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR }, HFIF_SS_ARG }, + { FUNC(CORINFO_HELP_JIT_PINVOKE_BEGIN) CORINFO_TYPE_VOID, { }, HFIF_SS_ARG | HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, + { FUNC(CORINFO_HELP_JIT_PINVOKE_END) CORINFO_TYPE_VOID, { }, HFIF_SS_ARG | HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, + { FUNC(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER) CORINFO_TYPE_PTR, { CORINFO_TYPE_NATIVEUINT }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, { FUNC(CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER_TRACK_TRANSITIONS) }, - { FUNC(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_PTR }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, + { FUNC(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, { FUNC(CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT_TRACK_TRANSITIONS) }, // Implemented in "CoreLib\src\System\Runtime\TypeLoaderExports.cs". @@ -544,14 +544,14 @@ bool Llvm::helperCallMayVirtuallyUnwind(CorInfoHelpFunc helperFunc) const { FUNC(CORINFO_HELP_VALIDATE_INDIRECT_CALL) }, { FUNC(CORINFO_HELP_DISPATCH_INDIRECT_CALL) }, - { FUNC(CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP) CORINFO_TYPE_PTR, { }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, { FUNC(CORINFO_HELP_LLVM_EH_CATCH) CORINFO_TYPE_CLASS, { CORINFO_TYPE_NATIVEUINT }, HFIF_SS_ARG }, { FUNC(CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES) CORINFO_TYPE_VOID, { }, HFIF_SS_ARG }, - { FUNC(CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_PTR, CORINFO_TYPE_NATIVEUINT }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND}, - { FUNC(CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME) CORINFO_TYPE_VOID, { }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND}, + { FUNC(CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR, CORINFO_TYPE_PTR, CORINFO_TYPE_NATIVEUINT }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, + { FUNC(CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME) CORINFO_TYPE_PTR, { CORINFO_TYPE_NATIVEUINT, CORINFO_TYPE_PTR, CORINFO_TYPE_NATIVEUINT }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, + { FUNC(CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME) CORINFO_TYPE_VOID, { }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, + { FUNC(CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME) CORINFO_TYPE_VOID, { CORINFO_TYPE_PTR }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, { FUNC(CORINFO_HELP_LLVM_EH_UNHANDLED_EXCEPTION) CORINFO_TYPE_VOID, { CORINFO_TYPE_CLASS }, HFIF_SS_ARG }, { FUNC(CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET) CORINFO_TYPE_PTR, { CORINFO_TYPE_CLASS, CORINFO_TYPE_PTR }, HFIF_SS_ARG }, - { FUNC(CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET) CORINFO_TYPE_PTR, { }, HFIF_NO_RPI_OR_GC | HFIF_NO_VIRTUAL_UNWIND }, { FUNC(CORINFO_HELP_LLVM_STRESS_GC) CORINFO_TYPE_BYREF, { CORINFO_TYPE_BYREF, CORINFO_TYPE_PTR }, HFIF_SS_ARG }, }; // clang-format on diff --git a/src/coreclr/jit/llvm.h b/src/coreclr/jit/llvm.h index cd944165431e..8a43fd9f12cf 100644 --- a/src/coreclr/jit/llvm.h +++ b/src/coreclr/jit/llvm.h @@ -308,6 +308,7 @@ class Llvm // Shared between virtual unwind frame insertion and LSSA. unsigned m_initialUnwindIndex = UNWIND_INDEX_NONE; + CORINFO_GENERIC_HANDLE m_ehInfoSymbol = nullptr; // Shared between unwind index insertion and EH codegen. ArrayStack* m_unwindIndexMap = nullptr; diff --git a/src/coreclr/jit/llvmlower.cpp b/src/coreclr/jit/llvmlower.cpp index dbb15fad9f7c..daabed2fc0dc 100644 --- a/src/coreclr/jit/llvmlower.cpp +++ b/src/coreclr/jit/llvmlower.cpp @@ -243,6 +243,8 @@ void Llvm::lowerBlock(BasicBlock* block) void Llvm::lowerRange(BasicBlock* block, LIR::Range& range) { + BasicBlock* savedBlock = m_currentBlock; + LIR::Range* savedRange = m_currentRange; m_currentBlock = block; m_currentRange = ⦥ @@ -253,8 +255,8 @@ void Llvm::lowerRange(BasicBlock* block, LIR::Range& range) INDEBUG(range.CheckLIR(_compiler, /* checkUnusedValues */ true)); - m_currentBlock = nullptr; - m_currentRange = nullptr; + m_currentBlock = savedBlock; + m_currentRange = savedRange; } void Llvm::lowerNode(GenTree* node) @@ -795,21 +797,19 @@ void Llvm::lowerUnmanagedCall(GenTreeCall* callNode) // two or more consecutive PI calls. if (!callNode->IsSuppressGCTransition()) { + // TODO-LLVM-Upstream: don't allocate lvaInlinedPInvokeFrameVar (its size is zero). assert(_compiler->opts.ShouldUsePInvokeHelpers()); // No inline transition support yet. assert(_compiler->lvaInlinedPInvokeFrameVar != BAD_VAR_NUM); // Insert CORINFO_HELP_JIT_PINVOKE_BEGIN. - GenTreeLclFld* frameAddr = _compiler->gtNewLclVarAddrNode(_compiler->lvaInlinedPInvokeFrameVar); - GenTreeCall* helperCall = _compiler->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID, frameAddr); - CurrentRange().InsertBefore(callNode, frameAddr, helperCall); - lowerNode(frameAddr); + GenTreeCall* helperCall = _compiler->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_BEGIN, TYP_VOID); + CurrentRange().InsertBefore(callNode, helperCall); lowerNode(helperCall); // Insert CORINFO_HELP_JIT_PINVOKE_END. No need to explicitly lower the call/local address as the // normal lowering loop will pick them up. - frameAddr = _compiler->gtNewLclVarAddrNode(_compiler->lvaInlinedPInvokeFrameVar); - helperCall = _compiler->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID, frameAddr); - CurrentRange().InsertAfter(callNode, frameAddr, helperCall); + helperCall = _compiler->gtNewHelperCallNode(CORINFO_HELP_JIT_PINVOKE_END, TYP_VOID); + CurrentRange().InsertAfter(callNode, helperCall); } if (callNode->gtCallType != CT_INDIRECT) @@ -1598,20 +1598,27 @@ bool Llvm::addVirtualUnwindFrameForExceptionHandling() CORINFO_GENERIC_HANDLE ehInfoSymbol = m_llvm->GetSparseVirtualUnwindInfo(&clauses.BottomRef(), clauses.Height()); - GenTree* ehInfoNode = - m_compiler->gtNewIconHandleNode(reinterpret_cast(ehInfoSymbol), GTF_ICON_CONST_PTR); - GenTree* unwindFrameLclAddr = m_compiler->gtNewLclVarAddrNode(unwindFrameLclNum); - GenTreeIntCon* initialUnwindIndexNode = m_compiler->gtNewIconNode(m_initialIndexValue, TYP_I_IMPL); - GenTreeCall* initializeCall = - m_compiler->gtNewHelperCallNode(CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME, TYP_VOID, - unwindFrameLclAddr, ehInfoNode, initialUnwindIndexNode); - - LIR::Range initRange; - initRange.InsertAtEnd(unwindFrameLclAddr); - initRange.InsertAtEnd(ehInfoNode); - initRange.InsertAtEnd(initialUnwindIndexNode); - initRange.InsertAtEnd(initializeCall); - m_llvm->lowerAndInsertIntoFirstBlock(std::move(initRange)); + // For frames with an RPI transition, we will use RPI helpers that combine the transitions with unwind + // frame linking. + if (!m_compiler->opts.IsReversePInvoke()) + { + GenTree* ehInfoNode = + m_compiler->gtNewIconHandleNode(reinterpret_cast(ehInfoSymbol), GTF_ICON_CONST_PTR); + GenTree* unwindFrameLclAddr = m_compiler->gtNewLclVarAddrNode(unwindFrameLclNum); + GenTreeIntCon* initialUnwindIndexNode = m_compiler->gtNewIconNode(m_initialIndexValue, TYP_I_IMPL); + GenTreeCall* initializeCall = + m_compiler->gtNewHelperCallNode(CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME, TYP_VOID, + unwindFrameLclAddr, ehInfoNode, initialUnwindIndexNode); + + LIR::Range initRange; + initRange.InsertAtEnd(unwindFrameLclAddr); + initRange.InsertAtEnd(ehInfoNode); + initRange.InsertAtEnd(initialUnwindIndexNode); + initRange.InsertAtEnd(initializeCall); + m_llvm->lowerAndInsertIntoFirstBlock(std::move(initRange)); + } + + m_llvm->m_ehInfoSymbol = ehInfoSymbol; m_llvm->m_sparseVirtualUnwindFrameLclNum = unwindFrameLclNum; } @@ -1630,7 +1637,7 @@ bool Llvm::addVirtualUnwindFrameForExceptionHandling() } // Explicit pops are only needed for explicitly linked (via TLS) sparse frames. - if (m_llvm->m_sparseVirtualUnwindFrameLclNum != BAD_VAR_NUM) + if ((m_llvm->m_sparseVirtualUnwindFrameLclNum != BAD_VAR_NUM) && !m_compiler->opts.IsReversePInvoke()) { for (BasicBlock* block : m_compiler->Blocks()) { diff --git a/src/coreclr/jit/llvmlssa.cpp b/src/coreclr/jit/llvmlssa.cpp index 771353f2767c..5901d25a0c39 100644 --- a/src/coreclr/jit/llvmlssa.cpp +++ b/src/coreclr/jit/llvmlssa.cpp @@ -80,6 +80,14 @@ class ShadowStackAllocator private: void IdentifyCandidatesAndInitializeLocals() { + if (m_compiler->lvaReversePInvokeFrameVar != BAD_VAR_NUM) + { + // Expose this explicitly since we delay inserting the RPI helpers until after allocation. + m_compiler->lvaSetVarAddrExposed( + m_compiler->lvaReversePInvokeFrameVar DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS)); + m_compiler->lvaGetDesc(m_compiler->lvaReversePInvokeFrameVar)->lvHasExplicitInit = true; + } + // Initialize independently promoted parameter field locals. // for (unsigned lclNum = 0; lclNum < m_compiler->lvaCount; lclNum++) @@ -163,6 +171,12 @@ class ShadowStackAllocator allocLocation = REG_STK_CANDIDATE_UNCONDITIONAL; INDEBUG(reason = "sparse virtual unwind frame"); } + // RPI frame being on the shadow stack allows us to combine it with the sparse virtual unwind frame. + else if (lclNum == m_compiler->lvaReversePInvokeFrameVar) + { + allocLocation = REG_STK_CANDIDATE_UNCONDITIONAL; + INDEBUG(reason = "RPI frame"); + } // Precise virtual unwind frames work by being at known offsets from each other on the shadow stack. else if (lclNum == m_llvm->m_preciseVirtualUnwindFrameLclNum) { @@ -1572,8 +1586,15 @@ class ShadowStackAllocator m_compiler->lvaGetDesc(preciseVirtualUnwindFrameLclNum)->SetRegNum(REG_STK); } - // The shadow frame must be allocated at a zero offset; the runtime uses its value as the original - // shadow frame parameter to filter funclets. + // As an optimization, the RPI frame is hardcoded to be at offset zero so that so that we don't + // need to pass its offset to the RPI helper. + if (m_compiler->lvaReversePInvokeFrameVar != BAD_VAR_NUM) + { + assignOffset(m_compiler->lvaGetDesc(m_compiler->lvaReversePInvokeFrameVar)); + } + + // As another optimization the sparse virtual unwind frame is allocated right after the RPI frame + // so that we can use the RPI helpers which combine the transition itself with the EH frame push/pop. if (m_llvm->m_sparseVirtualUnwindFrameLclNum != BAD_VAR_NUM) { assignOffset(m_compiler->lvaGetDesc(m_llvm->m_sparseVirtualUnwindFrameLclNum)); @@ -1635,7 +1656,7 @@ class ShadowStackAllocator m_llvm->m_currentBlock = m_compiler->fgFirstBB; m_llvm->m_currentRange = &initRange; - InitializeShadowStackValue(); + InitializeShadowStackValueAndInsertReversePInvokeTransitions(); m_llvm->initializePreciseVirtualUnwindFrame(); unsigned zeroingSize = m_prologZeroingSize; @@ -1664,8 +1685,11 @@ class ShadowStackAllocator m_llvm->m_prologEnd = zeroILOffsetNode; } - void InitializeShadowStackValue() + void InitializeShadowStackValueAndInsertReversePInvokeTransitions() { + unsigned alignment = m_shadowFrameAlignment; + bool explicitAlignNeeded = alignment != DEFAULT_SHADOW_STACK_ALIGNMENT; + unsigned lclNum = m_llvm->m_shadowStackLclNum; LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); // The liveness of our shadow stack local that has been computed before LSSA is not correct since we haven't @@ -1673,21 +1697,64 @@ class ShadowStackAllocator varDsc->lvTracked = 0; GenTreeLclVar* def = nullptr; - if (!varDsc->lvIsParam) + assert(!varDsc->lvIsParam == m_compiler->opts.IsReversePInvoke()); + if (m_compiler->opts.IsReversePInvoke()) { - GenTree* call = m_compiler->gtNewHelperCallNode(CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, TYP_I_IMPL); + assert(!m_compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TRACK_TRANSITIONS)); + + // We optimize the case where the transition can be combined with the virtual unwind frame push/pop. + GenTree* call; + GenTree* alignValueNode = m_compiler->gtNewIconNode(explicitAlignNeeded ? alignment : 0, TYP_I_IMPL); + m_llvm->CurrentRange().InsertAtEnd(alignValueNode); + if (m_llvm->m_sparseVirtualUnwindFrameLclNum != BAD_VAR_NUM) + { + GenTree* ehInfoNode = m_compiler->gtNewIconHandleNode( + reinterpret_cast(m_llvm->m_ehInfoSymbol), GTF_ICON_CONST_PTR); + m_llvm->CurrentRange().InsertAtEnd(ehInfoNode); + + GenTreeIntCon* initialUnwindIndexNode = + m_compiler->gtNewIconNode(m_llvm->m_initialUnwindIndex, TYP_I_IMPL); + m_llvm->CurrentRange().InsertAtEnd(initialUnwindIndexNode); + + call = m_compiler->gtNewHelperCallNode( + CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME, + TYP_I_IMPL, alignValueNode, ehInfoNode, initialUnwindIndexNode); + } + else + { + call = m_compiler->gtNewHelperCallNode( + CORINFO_HELP_JIT_REVERSE_PINVOKE_ENTER, TYP_I_IMPL, alignValueNode); + } def = m_compiler->gtNewStoreLclVarNode(lclNum, call); m_llvm->CurrentRange().InsertAtEnd(call); m_llvm->CurrentRange().InsertAtEnd(def); varDsc->lvHasExplicitInit = 1; + explicitAlignNeeded = false; // The helper will align the shadow stack as necessary. JITDUMP("ReversePInvoke: initialized the shadow stack:\n"); DISPTREERANGE(m_llvm->CurrentRange(), def); + + for (BasicBlock* block : m_compiler->Blocks()) + { + if (block->KindIs(BBJ_RETURN)) + { + LIR::Range callRange; + CorInfoHelpFunc helperFunc = m_llvm->m_sparseVirtualUnwindFrameLclNum != BAD_VAR_NUM + ? CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME + : CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT; + GenTree* addr = m_compiler->gtNewLclVarAddrNode(m_compiler->lvaReversePInvokeFrameVar); + GenTree* call = m_compiler->gtNewHelperCallNode(helperFunc, TYP_VOID, addr); + callRange.InsertAtEnd(addr); + callRange.InsertAtEnd(call); + + m_llvm->lowerRange(block, callRange); + LIR::InsertBeforeTerminator(block, std::move(callRange)); + } + } } m_llvm->m_shadowStackSsaNum = AddUntrackedSsaDef(def, lclNum); - unsigned alignment = m_shadowFrameAlignment; - if (alignment != DEFAULT_SHADOW_STACK_ALIGNMENT) + if (explicitAlignNeeded) { // Zero the padding that may be introduced by the code below. This serves two purposes: // 1. We don't leave "random" pointers on the shadow stack. @@ -1827,9 +1894,14 @@ class ShadowStackAllocator // Filters will be called by the first pass while live state still exists on shadow frames above (in the // traditional sense, where stacks grow down) them. For this reason, filters will access state from the // original frame via a dedicated shadow stack pointer, and use the actual shadow stack for calls. - regNumber shadowStackArgReg = - m_llvm->isBlockInFilter(m_llvm->CurrentBlock()) ? REG_ORIGINAL_SHADOW_STACK_ARG : REG_NA; + bool isFilter = m_llvm->isBlockInFilter(m_llvm->CurrentBlock()); + regNumber shadowStackArgReg = isFilter ? REG_ORIGINAL_SHADOW_STACK_ARG : REG_NA; unsigned lclOffset = lclBaseOffset + lclNode->GetLclOffs(); + if (isFilter && (m_llvm->m_sparseVirtualUnwindFrameLclNum != BAD_VAR_NUM)) + { + // In the sparse model, the original shadow stack pointer is the address of the virtual uwnind frame. + lclOffset -= m_compiler->lvaGetDesc(m_llvm->m_sparseVirtualUnwindFrameLclNum)->GetStackOffset(); + } GenTree* lclAddress = lclAddress = InsertShadowStackAddr(lclNode, lclOffset, shadowStackArgReg); ClassLayout* layout = lclNode->TypeIs(TYP_STRUCT) ? lclNode->GetLayout(m_compiler) : nullptr; @@ -1891,16 +1963,6 @@ class ShadowStackAllocator void RewriteCall(GenTreeCall* call) { - if (call->IsHelperCall(m_compiler, CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT)) - { - // The RPI exit call has an additional argument - the shadow stack top on entry to this RPI method. - GenTree* previousShadowStackTop = InsertShadowStackAddr(call, 0); - CallArg* callArg = - call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(previousShadowStackTop, CORINFO_TYPE_PTR)); - callArg->AbiInfo.IsPointer = true; - callArg->AbiInfo.ArgType = TYP_I_IMPL; - } - // Add in the shadow stack argument now that we know the shadow frame size. if (m_llvm->callHasManagedCallingConvention(call)) { diff --git a/src/coreclr/jit/utils.cpp b/src/coreclr/jit/utils.cpp index 3b16eec3c7cc..6de527fd9c01 100644 --- a/src/coreclr/jit/utils.cpp +++ b/src/coreclr/jit/utils.cpp @@ -1810,7 +1810,6 @@ void HelperCallProperties::init() // This is a debugging aid; it simply returns a constant address. case CORINFO_HELP_LOOP_CLONE_CHOICE_ADDR: - case CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET: isPure = true; noThrow = true; break; @@ -1841,22 +1840,16 @@ void HelperCallProperties::init() mutatesHeap = true; // Conservatively. break; - case CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP: + case CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME: + case CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME: + isNoGC = true; + FALLTHROUGH; case CORINFO_HELP_LLVM_EH_CATCH: case CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES: case CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME: case CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME: noThrow = true; mutatesHeap = true; - switch (helper) - { - case CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP: - nonNullReturn = true; - break; - - default: - break; - } break; default: diff --git a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp index c50d2c854c24..91805266d6ba 100644 --- a/src/coreclr/nativeaot/Runtime/GCHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCHelpers.cpp @@ -672,7 +672,7 @@ EXTERN_C void* F_CALL_CONV RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uin ASSERT(pThread->IsHijacked()); pTransitionFrame->m_RIP = pThread->GetHijackedReturnAddress(); } -#else +#elif !defined(HOST_WASM) // NOTE: The x64 fixup above would not be sufficient on ARM64 and similar architectures since // m_RIP is used to restore LR in POP_COOP_PINVOKE_FRAME. diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 762cde5c9bfb..171b2e25c916 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -70,6 +70,7 @@ EXTERN_C CODE_LOCATION RhpRethrow2; #define FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY(msg) { ASSERT_UNCONDITIONALLY(msg); RhFailFast(); } #endif +#ifndef HOST_WASM // TODO-LLVM: consider excluding this whole file from the portable runtime build... StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame) { STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); @@ -94,6 +95,7 @@ StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransition PrepareToYieldFrame(); } +#endif // !HOST_WASM StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx) { diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 4451b9225d5f..16db0e813093 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -491,9 +491,11 @@ class Thread; #if defined(USE_PORTABLE_HELPERS) struct PInvokeTransitionFrame { +#ifndef HOST_WASM Thread* m_pThread; // Cached so that GetThread is only called once per method - uint32_t m_Flags; // PInvokeTransitionFrameFlags. TODO-LLVM-CQ: Remove. Only needed for Thread.Abort "support". - TgtPTR_Void m_RIP; // PInvokeTransitionFrameFlags. TODO-LLVM-CQ: Remove. + uint32_t m_Flags; // PInvokeTransitionFrameFlags. + TgtPTR_Void m_RIP; // PInvokeTransitionFrameFlags. +#endif // HOST_WASM }; #else // USE_PORTABLE_HELPERS struct PInvokeTransitionFrame diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 3c471751f244..e738a32a8fdb 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -146,9 +146,7 @@ void Thread::ResetCachedTransitionFrame() void Thread::EnablePreemptiveMode() { ASSERT(ThreadStore::GetCurrentThread() == this); -#if !defined(HOST_WASM) ASSERT(m_pDeferredTransitionFrame != NULL); -#endif // set preemptive mode VolatileStoreWithoutBarrier(&m_pTransitionFrame, m_pDeferredTransitionFrame); @@ -317,6 +315,13 @@ void Thread::Construct() ASSERT(m_threadAbortException == NULL); +#ifdef HOST_WASM + // TODO-LLVM: make this configurable. E. g. dependent on native stack size. + m_pShadowStackBottom = new (nothrow) uint8_t[1 * 1024 * 1024]; + if (m_pShadowStackBottom == nullptr) + RhFailFast(); +#endif // HOST_WASM + #ifdef FEATURE_SUSPEND_REDIRECTION ASSERT(m_redirectionContextBuffer == NULL); #endif //FEATURE_SUSPEND_REDIRECTION @@ -387,6 +392,13 @@ void Thread::Destroy() StressLog::ThreadDetach(ptsl); #endif // STRESS_LOG +#ifdef HOST_WASM + if (m_pShadowStackBottom != nullptr) + { + delete[] m_pShadowStackBottom; + } +#endif // HOST_WASM + #ifdef FEATURE_SUSPEND_REDIRECTION if (m_redirectionContextBuffer != NULL) { @@ -398,10 +410,12 @@ void Thread::Destroy() } #ifdef HOST_WASM -void Thread::GcScanWasmShadowStack(ScanFunc * pfnEnumCallback, ScanContext * pvCallbackData) +void Thread::GcScanRootsWorker_Wasm(ScanFunc * pfnEnumCallback, ScanContext * pvCallbackData) { // Wasm does not permit iteration of stack frames so is uses a shadow stack instead - EnumGcRefsInRegionConservatively((PTR_OBJECTREF)m_pShadowStackBottom, (PTR_OBJECTREF)m_pShadowStackTop, pfnEnumCallback, pvCallbackData); + PTR_OBJECTREF pShadowStackBottom = (PTR_OBJECTREF)GetShadowStackBottom(); + PTR_OBJECTREF pShadowStackTop = (PTR_OBJECTREF)GetShadowStackTop(GetTransitionFrame()); + EnumGcRefsInRegionConservatively(pShadowStackBottom, pShadowStackTop, pfnEnumCallback, pvCallbackData); // TODO-LLVM-Upstream: unify this method with the general "GcScanRootsWorker" below. for (GCFrameRegistration* pCurGCFrame = m_pGCFrameRegistrations; pCurGCFrame != NULL; pCurGCFrame = pCurGCFrame->m_pNext) @@ -422,7 +436,7 @@ void Thread::GcScanRoots(ScanFunc * pfnEnumCallback, ScanContext * pvCallbackDat this->CrossThreadUnhijack(); #ifdef HOST_WASM - GcScanWasmShadowStack(pfnEnumCallback, pvCallbackData); + GcScanRootsWorker_Wasm(pfnEnumCallback, pvCallbackData); #else StackFrameIterator frameIterator(this, GetTransitionFrame()); GcScanRootsWorker(pfnEnumCallback, pvCallbackData, frameIterator); @@ -1037,7 +1051,11 @@ EXTERN_C void FASTCALL RhpUnsuppressGcStress() // Standard calling convention variant and actual implementation for RhpWaitForGC EXTERN_C NOINLINE void FASTCALL RhpWaitForGC2(PInvokeTransitionFrame * pFrame) { - Thread * pThread = pFrame->m_pThread; +#ifdef HOST_WASM + Thread* pThread = ThreadStore::GetCurrentThread(); +#else + Thread* pThread = pFrame->m_pThread; +#endif if (pThread->IsDoNotTriggerGcSet()) return; @@ -1047,10 +1065,12 @@ EXTERN_C NOINLINE void FASTCALL RhpWaitForGC2(PInvokeTransitionFrame * pFrame) // Standard calling convention variant and actual implementation for RhpGcPoll EXTERN_C NOINLINE void FASTCALL RhpGcPoll2(PInvokeTransitionFrame* pFrame) { +#ifndef HOST_WASM ASSERT(!Thread::IsHijackTarget(pFrame->m_RIP)); Thread* pThread = ThreadStore::GetCurrentThread(); pFrame->m_pThread = pThread; +#endif // !HOST_WASM RhpWaitForGC2(pFrame); } @@ -1319,6 +1339,7 @@ FCIMPL0(uint64_t, RhCurrentOSThreadId) } FCIMPLEND +#ifndef HOST_WASM // Standard calling convention variant and actual implementation for RhpReversePInvokeAttachOrTrapThread EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame* pFrame) { @@ -1330,7 +1351,6 @@ EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInv // PInvoke // -#ifndef HOST_WASM FCIMPL1(void, RhpReversePInvoke, ReversePInvokeFrame * pFrame) { Thread * pCurThread = ThreadStore::RawGetCurrentThread(); diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 47e28c6321ac..02f244fb449c 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -166,19 +166,21 @@ struct RuntimeThreadLocals uint8_t* m_redirectionContextBuffer; // storage for redirection context, allocated on demand #endif //FEATURE_SUSPEND_REDIRECTION +#ifdef HOST_WASM + uint8_t* m_pShadowStackBottom; +#endif // HOST_WASM + #ifdef FEATURE_GC_STRESS uint32_t m_uRand; // current per-thread random number #endif // FEATURE_GC_STRESS -#ifdef HOST_WASM - void* m_pShadowStackBottom; - void* m_pShadowStackTop; -#endif // HOST_WASM }; struct ReversePInvokeFrame { PInvokeTransitionFrame* m_savedPInvokeTransitionFrame; +#ifndef HOST_WASM Thread* m_savedThread; +#endif // !HOST_WASM }; class Thread : private RuntimeThreadLocals @@ -249,8 +251,8 @@ class Thread : private RuntimeThreadLocals PInvokeTransitionFrame* GetTransitionFrame(); #ifdef HOST_WASM - void GcScanWasmShadowStack(ScanFunc* pfnEnumCallback, ScanContext* pvCallbackData); -#endif + void GcScanRootsWorker_Wasm(ScanFunc* pfnEnumCallback, ScanContext* pvCallbackData); +#endif // HOST_WASM void GcScanRootsWorker(ScanFunc* pfnEnumCallback, ScanContext* pvCallbackData, StackFrameIterator & sfIter); @@ -391,17 +393,17 @@ class Thread : private RuntimeThreadLocals pthread_t GetOSThreadHandle() { return m_hOSThread; } #endif +#ifdef HOST_WASM + void* GetShadowStackBottom(); + void* GetShadowStackTop(PInvokeTransitionFrame* pTransitionFrame); + void* InlineTryFastReversePInvoke_Wasm(size_t alignment); + void* ReversePInvokeAttachOrTrapThread_Wasm(size_t alignment); +#endif // HOST_WASM + #ifdef TARGET_X86 void SetPendingRedirect(PCODE eip); bool CheckPendingRedirect(PCODE eip); #endif - -#ifdef HOST_WASM - void* GetShadowStackBottom(); - void SetShadowStackBottom(void* pShadowStack); - void* GetShadowStackTop(); - void SetShadowStackTop(void* pShadowStack); -#endif }; #ifndef __GCENV_BASE_INCLUDED__ diff --git a/src/coreclr/nativeaot/Runtime/thread.inl b/src/coreclr/nativeaot/Runtime/thread.inl index 6b8892791405..cfd149898151 100644 --- a/src/coreclr/nativeaot/Runtime/thread.inl +++ b/src/coreclr/nativeaot/Runtime/thread.inl @@ -75,7 +75,9 @@ inline void Thread::SetDeferredTransitionFrame(PInvokeTransitionFrame* pTransiti { ASSERT(ThreadStore::GetCurrentThread() == this); ASSERT(Thread::IsCurrentThreadInCooperativeMode()); +#ifndef HOST_WASM ASSERT(!Thread::IsHijackTarget(pTransitionFrame->m_RIP)); +#endif // !HOST_WASM m_pDeferredTransitionFrame = pTransitionFrame; } @@ -169,7 +171,9 @@ FORCEINLINE void Thread::InlineReversePInvokeReturn(ReversePInvokeFrame* pFrame) FORCEINLINE void Thread::InlinePInvoke(PInvokeTransitionFrame* pFrame) { ASSERT(!IsDoNotTriggerGcSet() || ThreadStore::IsTrapThreadsRequested()); +#ifndef HOST_WASM pFrame->m_pThread = this; +#endif // set our mode to preemptive VolatileStoreWithoutBarrier(&m_pTransitionFrame, pFrame); } @@ -191,6 +195,7 @@ FORCEINLINE bool Thread::InlineTryFastReversePInvoke(ReversePInvokeFrame* pFrame // remember the current transition frame, so it will be restored when we return from reverse pinvoke pFrame->m_savedPInvokeTransitionFrame = m_pTransitionFrame; +#ifndef HOST_WASM // If the thread is already in cooperative mode, this is a bad transition that will be a fail fast unless we are in // a do not trigger mode. The exception to the rule allows us to have [UnmanagedCallersOnly] methods that are called via // the "restricted GC callouts" as well as from native, which is necessary because the methods are CCW vtable @@ -210,6 +215,7 @@ FORCEINLINE bool Thread::InlineTryFastReversePInvoke(ReversePInvokeFrame* pFrame if (IsCurrentThreadInCooperativeMode()) return false; // bad transition +#endif // !HOST_WASM // this is an ordinary transition to managed code // GC threads should not do that @@ -228,26 +234,3 @@ FORCEINLINE bool Thread::InlineTryFastReversePInvoke(ReversePInvokeFrame* pFrame return true; } - -#ifdef HOST_WASM -FORCEINLINE void* Thread::GetShadowStackBottom() -{ - return m_pShadowStackBottom; -} - -FORCEINLINE void Thread::SetShadowStackBottom(void *pShadowStack) -{ - ASSERT(m_pShadowStackBottom == nullptr); - m_pShadowStackBottom = pShadowStack; -} - -FORCEINLINE void* Thread::GetShadowStackTop() -{ - return m_pShadowStackTop; -} - -FORCEINLINE void Thread::SetShadowStackTop(void* pShadowStack) -{ - m_pShadowStackTop = pShadowStack; -} -#endif diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 33d882f340fe..83a04b847f22 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -339,6 +339,7 @@ void ThreadStore::ResumeAllThreads(bool waitForGCEvent) } } // ResumeAllThreads +#ifndef HOST_WASM void ThreadStore::InitiateThreadAbort(Thread* targetThread, Object * threadAbortException, bool doRudeAbort) { SuspendAllThreads(/* waitForGCEvent = */ false); @@ -395,6 +396,7 @@ void ThreadStore::CancelThreadAbort(Thread* targetThread) ResumeAllThreads(/* waitForGCEvent = */ false); } +#endif // !HOST_WASM EXTERN_C void* QCALLTYPE RhpGetCurrentThread() { diff --git a/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp b/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp index ccf690d80d13..6f1cae1f9e8c 100644 --- a/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp +++ b/src/coreclr/nativeaot/Runtime/wasm/AllocFast.cpp @@ -34,7 +34,7 @@ extern "C" void* RhpGcAlloc(MethodTable* pEEType, uint32_t uFlags, uintptr_t num extern "C" void RhExceptionHandling_FailedAllocation(void* pShadowStack, MethodTable* pEEType, bool isOverflow); // Automatic finalization. -extern "C" void RhpPInvoke(void* pShadowStack, PInvokeTransitionFrame* pFrame); +extern "C" void RhpPInvoke(PInvokeTransitionFrame* pFrame); extern "C" void RhpPInvokeReturn(PInvokeTransitionFrame* pFrame); extern bool g_FinalizationRequestPending; void FinalizeFinalizableObjects(); @@ -42,10 +42,9 @@ void FinalizeFinalizableObjects(); static Object* AllocateObject(void* pShadowStack, MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements) { // Save the current shadow stack before calling into GC; we may need to scan it for live references. - PInvokeTransitionFrame frame; + PInvokeTransitionFrame* pFrame = (PInvokeTransitionFrame*)pShadowStack; Thread* pThread = ThreadStore::GetCurrentThread(); - pThread->SetShadowStackTop(pShadowStack); - Object* obj = (Object*)RhpGcAlloc(pEEType, uFlags, numElements, &frame); + Object* obj = (Object*)RhpGcAlloc(pEEType, uFlags, numElements, pFrame); #ifndef FEATURE_WASM_MANAGED_THREADS if (g_FinalizationRequestPending) @@ -61,9 +60,9 @@ static Object* AllocateObject(void* pShadowStack, MethodTable* pEEType, uint32_t } // "FinalizeFinalizableObjects" runs in preemptive mode. - RhpPInvoke(pShadowStack, &frame); + RhpPInvoke(pFrame); FinalizeFinalizableObjects(); - RhpPInvokeReturn(&frame); + RhpPInvokeReturn(pFrame); if (obj != nullptr) { diff --git a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp index 5f16b68e02f9..189724d3e7ae 100644 --- a/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp +++ b/src/coreclr/nativeaot/Runtime/wasm/ExceptionHandling/ExceptionHandling.cpp @@ -6,34 +6,22 @@ #include "../wasm.h" -struct SparseVirtualUnwindFrame -{ - SparseVirtualUnwindFrame* Prev; - void* UnwindTable; - size_t UnwindIndex; -}; - // This variable is defined here in native code because: // 1) Unmanaged thread locals are currently much more efficient than managed ones. // 2) Push/pop functions do not need the shadow stack argument. // -thread_local SparseVirtualUnwindFrame* t_pLastSparseVirtualUnwindFrame = nullptr; +// TODO-LLVM-Cleanup: replace with with PLATFORM_THREAD_LOCAL after merge. +__thread SparseVirtualUnwindFrame* t_pLastSparseVirtualUnwindFrame = nullptr; FCIMPL_NO_SS(void, RhpPushSparseVirtualUnwindFrame, SparseVirtualUnwindFrame* pFrame, void* pUnwindTable, size_t unwindIndex) { - ASSERT(t_pLastSparseVirtualUnwindFrame < pFrame); - pFrame->Prev = t_pLastSparseVirtualUnwindFrame; - pFrame->UnwindTable = pUnwindTable; - pFrame->UnwindIndex = unwindIndex; - - t_pLastSparseVirtualUnwindFrame = pFrame; + InlinePushSparseVirtualUnwindFrame(pFrame, pUnwindTable, unwindIndex); } FCIMPLEND FCIMPL_NO_SS(void, RhpPopSparseVirtualUnwindFrame) { - ASSERT(t_pLastSparseVirtualUnwindFrame != nullptr); - t_pLastSparseVirtualUnwindFrame = t_pLastSparseVirtualUnwindFrame->Prev; + InlinePopSparseVirtualUnwindFrame(t_pLastSparseVirtualUnwindFrame); } FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp b/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp index 1581695bb3ef..16a065f8bde5 100644 --- a/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp +++ b/src/coreclr/nativeaot/Runtime/wasm/GcStress.cpp @@ -42,8 +42,22 @@ FCIMPL2(void*, RhpGcStressOnce, void* obj, uint8_t* pFlag) pThread->PushGCFrameRegistration(&gc); } - pThread->SetShadowStackTop(pShadowStack); + bool isCooperative = pThread->IsCurrentThreadInCooperativeMode(); + if (isCooperative) + { + pThread->SetDeferredTransitionFrame((PInvokeTransitionFrame*)pShadowStack); + } + else // We can be called in preemptive mode - on an exit from a PInvoke. + { + ASSERT(obj == nullptr); + pThread->DeferTransitionFrame(); + pThread->DisablePreemptiveMode(); + } GCHeapUtilities::GetGCHeap()->GarbageCollect(); + if (!isCooperative) + { + pThread->EnablePreemptiveMode(); + } if (obj != nullptr) { diff --git a/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp b/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp index 3620ccd809a7..7de449d34189 100644 --- a/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp +++ b/src/coreclr/nativeaot/Runtime/wasm/PInvoke.cpp @@ -4,9 +4,10 @@ #include #include "common.h" +#include "daccess.h" #include "CommonTypes.h" #include "CommonMacros.h" -#include "daccess.h" +#include "CommonMacros.inl" #include "PalRedhawkCommon.h" #include "PalRedhawk.h" #include "thread.h" @@ -16,61 +17,140 @@ #include "wasm.h" -FCIMPL_NO_SS(void*, RhpGetOrInitShadowStackTop) +void* Thread::GetShadowStackBottom() { - Thread* pCurThread = ThreadStore::RawGetCurrentThread(); + ASSERT(m_pShadowStackBottom != nullptr); + return m_pShadowStackBottom; +} - void* pShadowStack = pCurThread->GetShadowStackTop(); +void* Thread::GetShadowStackTop(PInvokeTransitionFrame* pTransitionFrame) +{ + void* pShadowStack; + if (pTransitionFrame == TOP_OF_STACK_MARKER) // TODO-LLVM: remove this check by replacing TOP_OF_STACK_MARKER with m_pShadowStackBottom. + { + pShadowStack = GetShadowStackBottom(); + } + else + { + pShadowStack = pTransitionFrame; + } + ASSERT(pShadowStack != nullptr); + return pShadowStack; +} - if (pShadowStack == nullptr) +FORCEINLINE static void* GetAlignedShadowStackTop(Thread* pThread, PInvokeTransitionFrame* pTransitionFrame, size_t alignment) +{ + void* pShadowStack = pThread->GetShadowStackTop(pTransitionFrame); + + // Note how this aligning means that the transition frame on exit (saved back into current Thread) may differ + // from its value on entry. This is ok since that value will only grow in a bounded manner, such that calling + // an RPI method in a loop will never lead to runaway shadow stack usage. + if (alignment != 0) { - pShadowStack = malloc(1000000); // ~1MB. - if (pShadowStack == nullptr) - { - RhFailFast(); // Fatal OOM. - } + ASSERT(alignment == 8); + ZeroMemory(pShadowStack, 4); + pShadowStack = ALIGN_UP(pShadowStack, alignment); + } + return pShadowStack; +} + +FORCEINLINE static ReversePInvokeFrame* GetReversePInvokeFrame(void* pShadowStack) +{ + return (ReversePInvokeFrame*)pShadowStack; +} + +FORCEINLINE static SparseVirtualUnwindFrame* GetSparseVirtualUnwindFrame(ReversePInvokeFrame* pFrame) +{ + return (SparseVirtualUnwindFrame*)(pFrame + 1); +} - pCurThread->SetShadowStackBottom(pShadowStack); +FORCEINLINE void* Thread::InlineTryFastReversePInvoke_Wasm(size_t alignment) +{ + PInvokeTransitionFrame* pTransitionFrame = m_pTransitionFrame; + if (pTransitionFrame == nullptr) + return nullptr; // Uninitialized thread or illegal transition. Use the slow path. + + ASSERT(!IsCurrentThreadInCooperativeMode()); + void* pShadowStack = GetAlignedShadowStackTop(this, pTransitionFrame, alignment); + ReversePInvokeFrame* pFrame = GetReversePInvokeFrame(pShadowStack); + if (!InlineTryFastReversePInvoke(pFrame)) + return nullptr; // Need to suspend the thread. + + return pShadowStack; +} + +NOINLINE void* Thread::ReversePInvokeAttachOrTrapThread_Wasm(size_t alignment) +{ + // This check is necessary to support GC callouts, see "InlineTryFastReversePInvoke". + // We move it to the slow path since GC callouts should be very rare on WASM. + if (IsDoNotTriggerGcSet()) + { + // We expect this scenario only when EE is stopped. + ASSERT(ThreadStore::IsTrapThreadsRequested()); + return GetAlignedShadowStackTop(this, GetTransitionFrame(), alignment); // The suspender transition frame. } + // The shadow stack at this point may not have been allocated yet, so we need to use a local RPI frame. + ReversePInvokeFrame localFrame; + ReversePInvokeAttachOrTrapThread(&localFrame); + + void* pShadowStack = GetAlignedShadowStackTop(this, localFrame.m_savedPInvokeTransitionFrame, alignment); + *GetReversePInvokeFrame(pShadowStack) = localFrame; + return pShadowStack; +} + +FCIMPL_NO_SS(void*, RhpReversePInvoke, size_t alignment) +{ + Thread* pThread = ThreadStore::RawGetCurrentThread(); + void* pShadowStack = pThread->InlineTryFastReversePInvoke_Wasm(alignment); + if (pShadowStack == nullptr) + pShadowStack = pThread->ReversePInvokeAttachOrTrapThread_Wasm(alignment); + return pShadowStack; } FCIMPLEND -EXTERN_C NOINLINE void FASTCALL RhpReversePInvokeAttachOrTrapThread2(ReversePInvokeFrame* pFrame); +FCIMPL_NO_SS(void*, RhpReversePInvokeAndPushSparseVirtualUnwindFrame, size_t alignment, void* pUnwindTable, size_t unwindIndex) +{ + Thread* pThread = ThreadStore::RawGetCurrentThread(); + void* pShadowStack = pThread->InlineTryFastReversePInvoke_Wasm(alignment); + if (pShadowStack == nullptr) + pShadowStack = pThread->ReversePInvokeAttachOrTrapThread_Wasm(alignment); + + SparseVirtualUnwindFrame* pSparseVirtualUnwindFrame = GetSparseVirtualUnwindFrame(GetReversePInvokeFrame(pShadowStack)); + InlinePushSparseVirtualUnwindFrame(pSparseVirtualUnwindFrame, pUnwindTable, unwindIndex); + return pShadowStack; +} +FCIMPLEND -FCIMPL1(void, RhpReversePInvoke, ReversePInvokeFrame* pFrame) +FCIMPL_NO_SS(void, RhpReversePInvokeReturn, ReversePInvokeFrame* pFrame) { - Thread* pCurThread = ThreadStore::RawGetCurrentThread(); - pFrame->m_savedThread = pCurThread; - if (pCurThread->InlineTryFastReversePInvoke(pFrame)) - return; - - // The slow path may invoke runtime initialization, which runs managed code. - pCurThread->SetShadowStackTop(pShadowStack); - RhpReversePInvokeAttachOrTrapThread2(pFrame); + ThreadStore::RawGetCurrentThread()->InlineReversePInvokeReturn(pFrame); } FCIMPLEND -FCIMPL_NO_SS(void, RhpReversePInvokeReturn, void* pPreviousShadowStackTop, ReversePInvokeFrame* pFrame) +FCIMPL_NO_SS(void, RhpReversePInvokeReturnAndPopSparseVirtualUnwindFrame, ReversePInvokeFrame* pFrame) { - pFrame->m_savedThread->InlineReversePInvokeReturn(pFrame); - pFrame->m_savedThread->SetShadowStackTop(pPreviousShadowStackTop); + InlinePopSparseVirtualUnwindFrame(GetSparseVirtualUnwindFrame(pFrame)); + ThreadStore::RawGetCurrentThread()->InlineReversePInvokeReturn(pFrame); } FCIMPLEND -FCIMPL1(void, RhpPInvoke, PInvokeTransitionFrame* pFrame) +FCIMPL0(void, RhpPInvoke) { - Thread* pCurThread = ThreadStore::RawGetCurrentThread(); - pCurThread->InlinePInvoke(pFrame); - pCurThread->SetShadowStackTop(pShadowStack); + PInvokeTransitionFrame* pFrame = (PInvokeTransitionFrame*)pShadowStack; + ThreadStore::RawGetCurrentThread()->InlinePInvoke(pFrame); } FCIMPLEND -FCIMPL_NO_SS(void, RhpPInvokeReturn, PInvokeTransitionFrame* pFrame) +FCIMPL0(void, RhpPInvokeReturn) { - //reenter cooperative mode - pFrame->m_pThread->InlinePInvokeReturn(pFrame); + // WASM TLS is cheap: + // 1. Without threading, it is free. + // 2. With threading, it costs a single additional load (of the TLS base global). + // So not caching the current thread in the PI frame doesn't cost us anything. + PInvokeTransitionFrame* pFrame = (PInvokeTransitionFrame*)pShadowStack; + ThreadStore::RawGetCurrentThread()->InlinePInvokeReturn(pFrame); } FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/wasm/wasm.h b/src/coreclr/nativeaot/Runtime/wasm/wasm.h index bd86484b1b25..410d8510a4d6 100644 --- a/src/coreclr/nativeaot/Runtime/wasm/wasm.h +++ b/src/coreclr/nativeaot/Runtime/wasm/wasm.h @@ -6,3 +6,29 @@ // serves as simply a marker for such FCalls. // #define FCIMPL_NO_SS(_rettype, _name, ...) extern "C" _rettype _name(__VA_ARGS__) { + +struct SparseVirtualUnwindFrame +{ + SparseVirtualUnwindFrame* Prev; + void* UnwindTable; + size_t UnwindIndex; +}; + +// TODO-LLVM-Cleanup: replace with with PLATFORM_THREAD_LOCAL after merge. +extern __thread SparseVirtualUnwindFrame* t_pLastSparseVirtualUnwindFrame; + +FORCEINLINE void InlinePushSparseVirtualUnwindFrame(SparseVirtualUnwindFrame* pFrame, void* pUnwindTable, size_t unwindIndex) +{ + ASSERT(t_pLastSparseVirtualUnwindFrame < pFrame); + pFrame->Prev = t_pLastSparseVirtualUnwindFrame; + pFrame->UnwindTable = pUnwindTable; + pFrame->UnwindIndex = unwindIndex; + + t_pLastSparseVirtualUnwindFrame = pFrame; +} + +FORCEINLINE void InlinePopSparseVirtualUnwindFrame(SparseVirtualUnwindFrame* pFrame) +{ + ASSERT(t_pLastSparseVirtualUnwindFrame != nullptr); + t_pLastSparseVirtualUnwindFrame = pFrame->Prev; +} diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs index d11a739ae0d1..5a7d6692e5b7 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoHelpFunc.cs @@ -285,14 +285,14 @@ which is the right helper to use to allocate an object of a given type. */ CORINFO_HELP_VALIDATE_INDIRECT_CALL, // CFG: Validate function pointer CORINFO_HELP_DISPATCH_INDIRECT_CALL, // CFG: Validate and dispatch to pointer - CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, CORINFO_HELP_LLVM_EH_CATCH, CORINFO_HELP_LLVM_EH_POP_UNWOUND_VIRTUAL_FRAMES, CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME, + CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME, CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME, + CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME, CORINFO_HELP_LLVM_EH_UNHANDLED_EXCEPTION, CORINFO_HELP_LLVM_RESOLVE_INTERFACE_CALL_TARGET, - CORINFO_HELP_LLVM_GET_EXTERNAL_CALL_TARGET, CORINFO_HELP_LLVM_STRESS_GC, CORINFO_HELP_COUNT, diff --git a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs index d89f39a66abd..7658d65a7644 100644 --- a/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs +++ b/src/coreclr/tools/aot/ILCompiler.RyuJit/JitInterface/CorInfoImpl.RyuJit.cs @@ -29,7 +29,7 @@ internal unsafe partial class CorInfoImpl private const CORINFO_RUNTIME_ABI TargetABI = CORINFO_RUNTIME_ABI.CORINFO_NATIVEAOT_ABI; private uint OffsetOfDelegateFirstTarget => (uint)(4 * PointerSize); // Delegate._functionPointer - private int SizeOfReversePInvokeTransitionFrame => 2 * PointerSize; + private int SizeOfReversePInvokeTransitionFrame => (_compilation.TypeSystemContext.Target.IsWasm ? 1 : 2) * PointerSize; private RyuJitCompilation _compilation; private MethodDebugInformation _debugInfo; @@ -789,9 +789,6 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_DISPATCH_INDIRECT_CALL: return _compilation.NodeFactory.ExternIndirectFunctionSymbol("__guard_dispatch_icall_fptr"); - case CorInfoHelpFunc.CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP: - mangledName = "RhpGetOrInitShadowStackTop"; - break; case CorInfoHelpFunc.CORINFO_HELP_LLVM_EH_CATCH: mangledName = "RhpHandleExceptionWasmCatch"; break; @@ -804,9 +801,15 @@ private ISymbolNode GetHelperFtnUncached(CorInfoHelpFunc ftnNum) case CorInfoHelpFunc.CORINFO_HELP_LLVM_EH_PUSH_VIRTUAL_UNWIND_FRAME: mangledName = "RhpPushSparseVirtualUnwindFrame"; break; + case CorInfoHelpFunc.CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_ENTER_AND_PUSH_VIRTUAL_UNWIND_FRAME: + mangledName = "RhpReversePInvokeAndPushSparseVirtualUnwindFrame"; + break; case CorInfoHelpFunc.CORINFO_HELP_LLVM_EH_POP_VIRTUAL_UNWIND_FRAME: mangledName = "RhpPopSparseVirtualUnwindFrame"; break; + case CorInfoHelpFunc.CORINFO_HELP_LLVM_EH_REVERSE_PINVOKE_EXIT_AND_POP_VIRTUAL_UNWIND_FRAME: + mangledName = "RhpReversePInvokeReturnAndPopSparseVirtualUnwindFrame"; + break; case CorInfoHelpFunc.CORINFO_HELP_LLVM_EH_UNHANDLED_EXCEPTION: mangledName = "RhpHandleUnhandledException"; break; @@ -2028,8 +2031,7 @@ private int SizeOfPInvokeTransitionFrame { if (_compilation.TypeSystemContext.Target.IsWasm) { - // Only m_pThread used. - return this.PointerSize; + return 0; // Shadow stack top pointer used as the transition frame. } // struct PInvokeTransitionFrame: