diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index e6fc86d38fd..8b6b3cc2f14 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -8178,7 +8178,7 @@ GenTree* Compiler::gtNewPhysRegNode(regNumber reg, var_types type) { #ifdef TARGET_ARM64 assert(genIsValidIntReg(reg) || (reg == REG_SPBASE) || (reg == REG_FFR)); -#else +#elif !defined(TARGET_WASM) assert(genIsValidIntReg(reg) || (reg == REG_SPBASE)); #endif GenTree* result = new (this, GT_PHYSREG) GenTreePhysReg(reg, type); diff --git a/src/coreclr/jit/llvm.h b/src/coreclr/jit/llvm.h index 34b62ab29b1..f73f5d2b6a2 100644 --- a/src/coreclr/jit/llvm.h +++ b/src/coreclr/jit/llvm.h @@ -285,7 +285,7 @@ class Llvm { private: static const unsigned SHADOW_STACK_ARG_INDEX = 0; - static const unsigned DEFAULT_SHADOW_STACK_ALIGNMENT = TARGET_POINTER_SIZE; + static const unsigned ORIGINAL_SHADOW_STACK_ARG_INDEX = 1; static const unsigned MIN_HEAP_OBJ_SIZE = TARGET_POINTER_SIZE * 2; void* const m_pEECorInfo; // TODO-LLVM: workaround for not changing the JIT/EE interface. @@ -301,7 +301,6 @@ class Llvm // Lowering members. LIR::Range* m_currentRange = nullptr; SideEffectSet m_scratchSideEffects; // Used for IsInvariantInRange. - bool m_anyFilterFunclets = false; // Optimization facts provided by lowering. BooleanFact m_anyVirtuallyUnwindableCalleesViaLowering = BooleanFact::Unknown; @@ -316,6 +315,7 @@ class Llvm // Shared between LSSA and codegen. bool m_anyAddressExposedOrPinnedShadowLocals = false; + GenTree* m_prologEnd = nullptr; // First 'user' node after the prolog. // Codegen members. llvm::IRBuilder<> _builder; @@ -329,8 +329,6 @@ class Llvm EHRegionInfo* m_EHRegionsInfo; Value* m_exceptionThrownAddressValue = nullptr; - Value* m_rootFunctionShadowStackValue = nullptr; - // Codegen emit context. unsigned m_currentLlvmFunctionIndex = ROOT_FUNC_IDX; unsigned m_currentProtectedRegionIndex = EHblkDsc::NO_ENCLOSING_INDEX; @@ -344,10 +342,9 @@ class Llvm unsigned m_lineNumberCount; CORINFO_LLVM_LINE_NUMBER_DEBUG_INFO* m_lineNumbers; - unsigned m_shadowFrameAlignment = DEFAULT_SHADOW_STACK_ALIGNMENT; unsigned _shadowStackLocalsSize = 0; - unsigned _originalShadowStackLclNum = BAD_VAR_NUM; - unsigned _shadowStackLclNum = BAD_VAR_NUM; + unsigned m_shadowStackSsaNum = SsaConfig::RESERVED_SSA_NUM; + unsigned m_shadowStackLclNum = BAD_VAR_NUM; unsigned m_sparseVirtualUnwindFrameLclNum = BAD_VAR_NUM; unsigned m_preciseVirtualUnwindFrameLclNum = BAD_VAR_NUM; unsigned _llvmArgCount = 0; @@ -473,7 +470,6 @@ class Llvm void lowerVirtualStubCall(GenTreeCall* callNode); void insertNullCheckForCall(GenTreeCall* callNode); void lowerDelegateInvoke(GenTreeCall* callNode); - void lowerReversePInvokeExit(GenTreeCall* callNode); void lowerUnmanagedCall(GenTreeCall* callNode); void lowerCallToShadowStack(GenTreeCall* callNode); void lowerCallReturn(GenTreeCall* callNode); @@ -485,7 +481,6 @@ class Llvm GenTree* normalizeStructUse(LIR::Use& use, ClassLayout* layout); unsigned representAsLclVar(LIR::Use& use); - GenTree* insertShadowStackAddr(GenTree* insertBefore, unsigned offset, unsigned shadowStackLclNum); GenTreeAddrMode* createAddrModeNode(GenTree* base, unsigned offset); void lowerCollectOptimizationFacts(GenTree* node); @@ -496,7 +491,7 @@ class Llvm void dissolvePromotedLocal(unsigned lclNum); bool isFirstBlockCanonical(); - GenTree* lowerAndInsertIntoFirstBlock(LIR::Range& range, GenTree* insertAfter = nullptr); + GenTree* lowerAndInsertIntoFirstBlock(LIR::Range&& range, GenTree* insertAfter = nullptr); public: PhaseStatus AddVirtualUnwindFrame(); @@ -546,8 +541,6 @@ class Llvm bool callIsInTry, bool callIsInFilter DEBUGARG(const char** pReasonWhyNot = nullptr)) const; bool isPotentialGcSafePoint(GenTree* node) const; bool isShadowFrameLocal(LclVarDsc* varDsc) const; - bool isShadowStackLocal(unsigned lclNum) const; - bool isFuncletParameter(unsigned lclNum) const; // ================================================================================================================ // | Codegen | @@ -561,14 +554,14 @@ class Llvm void initializeFunctions(); void annotateFunctions(); - void generateProlog(); - void initializeShadowStack(); + void generateEarlyProlog(); void initializeLocals(); void initializeBlocks(); + void generateLateProlog(); void generateUnwindBlocks(); void generateBlocks(); void generateBlock(BasicBlock* block); - void fillPhis(); + void generatePhis(); void generateAuxiliaryArtifacts(); void verifyGeneratedCode(); void displayGeneratedCode(); @@ -595,6 +588,7 @@ class Llvm void buildCmp(GenTreeOp* node); void buildCnsDouble(GenTreeDblCon* node); void buildIntegralConst(GenTreeIntConCommon* node); + void buildPhysReg(GenTreePhysReg* physReg); void buildCall(GenTreeCall* node); void buildInd(GenTreeIndir* indNode); void buildBlk(GenTreeBlk* blkNode); @@ -669,7 +663,6 @@ class Llvm llvm::Constant* getIntPtrConst(target_size_t value, Type* llvmType = nullptr); Value* getShadowStack(); Value* getShadowStackForCallee(bool isTailCall = false); - Value* getOriginalShadowStack(); void setCurrentEmitContextForBlock(BasicBlock* block); void setCurrentEmitContextBlocks(LlvmBlockRange* llvmBlocks); @@ -693,7 +686,7 @@ class Llvm LlvmBlockRange* getLlvmBlocksForBlock(BasicBlock* block); llvm::BasicBlock* getFirstLlvmBlockForBlock(BasicBlock* block); llvm::BasicBlock* getLastLlvmBlockForBlock(BasicBlock* block); - llvm::BasicBlock* getOrCreatePrologLlvmBlockForFunction(unsigned funcIdx); + llvm::IRBuilderBase::InsertPoint getOrCreateEarlyPrologForFunction(unsigned funcIdx); bool isReachable(BasicBlock* block) const; BasicBlock* getFirstBlockForFunction(unsigned funcIdx) const; diff --git a/src/coreclr/jit/llvmcodegen.cpp b/src/coreclr/jit/llvmcodegen.cpp index e809802602f..9736f88cda1 100644 --- a/src/coreclr/jit/llvmcodegen.cpp +++ b/src/coreclr/jit/llvmcodegen.cpp @@ -18,6 +18,7 @@ #define BBNAME(prefix, index) Twine(prefix) + ((index < 10) ? "0" : "") + Twine(index) using AllocaMap = JitHashTable, llvm::AllocaInst*>; +using InsertPoint = llvm::IRBuilderBase::InsertPoint; struct LlvmBlockRange { @@ -43,6 +44,7 @@ struct FunctionInfo llvm::AllocaInst** Allocas; // Dense "lclNum -> Alloca*" mapping used for the main function. AllocaMap* AllocaMap; // Sparse "lclNum -> Alloca*" mapping used for funclets. }; + Instruction* LastEarlyPrologInst; llvm::BasicBlock* ResumeLlvmBlock; llvm::BasicBlock* ExceptionThrownReturnLlvmBlock; }; @@ -59,10 +61,8 @@ void Llvm::Compile() JITDUMPEXEC(_compiler->fgDispHandlerTab()); initializeBlocks(); - generateProlog(); - generateUnwindBlocks(); generateBlocks(); - fillPhis(); + generatePhis(); finalizeDebugInfo(); generateAuxiliaryArtifacts(); @@ -196,7 +196,7 @@ void Llvm::annotateFunctions() } // Mark the shadow stack dereferenceable. - if ((funcIdx != ROOT_FUNC_IDX) || _compiler->lvaGetDesc(_shadowStackLclNum)->lvIsParam) + if ((funcIdx != ROOT_FUNC_IDX) || _compiler->lvaGetDesc(m_shadowStackLclNum)->lvIsParam) { unsigned derefSize = getShadowFrameSize(funcIdx); if (derefSize != 0) @@ -263,67 +263,19 @@ void Llvm::initializeBlocks() } } -void Llvm::generateProlog() +void Llvm::generateEarlyProlog() { - JITDUMP("\n=============== Generating prolog:\n"); - - LlvmBlockRange prologLlvmBlocks(getOrCreatePrologLlvmBlockForFunction(ROOT_FUNC_IDX)); - setCurrentEmitContext(ROOT_FUNC_IDX, EHblkDsc::NO_ENCLOSING_INDEX, EHblkDsc::NO_ENCLOSING_INDEX, &prologLlvmBlocks); + // "fgFirstBB" is guaranteed to not have any incoming flow, so we can reuse its LLVM blocks for the prolog. + assert(isFirstBlockCanonical()); + JITDUMP("=============== Generating early prolog:\n"); _builder.SetCurrentDebugLocation(nullptr); // By convention, prologs have no debug info. - initializeShadowStack(); initializeLocals(); - declareDebugVariables(); -} - -void Llvm::initializeShadowStack() -{ - Value* shadowStackValue; - if (_compiler->opts.IsReversePInvoke()) - { - shadowStackValue = emitHelperCall(CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP); - JITDUMP("Setting V%02u's initial value to the recovered shadow stack\n", _shadowStackLclNum); - JITDUMPEXEC(displayValue(shadowStackValue)); - } - else - { - shadowStackValue = getRootLlvmFunction()->getArg(SHADOW_STACK_ARG_INDEX); - } - - unsigned alignment = m_shadowFrameAlignment; - if (alignment != DEFAULT_SHADOW_STACK_ALIGNMENT) - { - JITDUMP("Aligning the shadow frame to %u bytes:\n", alignment); - assert(isPow2(alignment)); - - // Zero the padding that may be introduced by the code below. This serves two purposes: - // 1. We don't leave "random" pointers on the shadow stack. - // 2. We allow precise virtual unwinding out of overaligned frames, by skipping the zeroed padding. - unsigned maxPaddingSize = alignment - DEFAULT_SHADOW_STACK_ALIGNMENT; - llvm::Align existingAlign = llvm::Align(DEFAULT_SHADOW_STACK_ALIGNMENT); - Value* memsetInst = _builder.CreateMemSet( - shadowStackValue, _builder.getInt8(0), _builder.getInt32(maxPaddingSize), existingAlign); - JITDUMPEXEC(displayValue(memsetInst)); - - // IR taken from what Clang generates for "__builtin_align_up". - Value* shadowStackIntValue = _builder.CreatePtrToInt(shadowStackValue, getIntPtrLlvmType()); - JITDUMPEXEC(displayValue(shadowStackIntValue)); - Value* alignedShadowStackIntValue = _builder.CreateAdd(shadowStackIntValue, getIntPtrConst(alignment - 1)); - JITDUMPEXEC(displayValue(alignedShadowStackIntValue)); - alignedShadowStackIntValue = _builder.CreateAnd(alignedShadowStackIntValue, getIntPtrConst(~(alignment - 1))); - JITDUMPEXEC(displayValue(alignedShadowStackIntValue)); - Value* alignOffset = _builder.CreateSub(alignedShadowStackIntValue, shadowStackIntValue); - JITDUMPEXEC(displayValue(alignOffset)); - shadowStackValue = _builder.CreateGEP(Type::getInt8Ty(m_context->Context), shadowStackValue, alignOffset); - JITDUMPEXEC(displayValue(shadowStackValue)); - - llvm::CallInst* alignAssume = - _builder.CreateAlignmentAssumption(m_context->Module.getDataLayout(), shadowStackValue, alignment); - JITDUMPEXEC(alignAssume); - } - - m_rootFunctionShadowStackValue = shadowStackValue; + Instruction* lastInst = + _builder.GetInsertPoint() == _builder.GetInsertBlock()->end() ? nullptr : &*_builder.GetInsertPoint(); + getLlvmFunctionInfoForIndex(ROOT_FUNC_IDX).LastEarlyPrologInst = lastInst; + JITDUMP("\n"); } void Llvm::initializeLocals() @@ -333,12 +285,6 @@ void Llvm::initializeLocals() { LclVarDsc* varDsc = _compiler->lvaGetDesc(lclNum); - if (isFuncletParameter(lclNum)) - { - // We model funclet parameters specially because it is not trivial to represent them in IR faithfully. - continue; - } - // Don't look at unreferenced temporaries. if (varDsc->lvRefCnt() == 0) { @@ -404,6 +350,15 @@ void Llvm::initializeLocals() getLlvmFunctionInfoForIndex(ROOT_FUNC_IDX).Allocas = allocas; } +void Llvm::generateLateProlog() +{ + // Now that we have the shadow stack set up by LSSA in its IR prolog, generate EH and debug declares. + JITDUMP("\n=============== Generating late prolog:\n"); + declareDebugVariables(); + generateUnwindBlocks(); + JITDUMP("\n"); +} + void Llvm::generateUnwindBlocks() { if (!_compiler->ehHasCallableHandlers()) @@ -414,6 +369,7 @@ void Llvm::generateUnwindBlocks() // Generate the unwind blocks used to catch native exceptions during the second pass. // We generate these before the rest of the code because throwing calls need a certain // amount of pieces filled in (in particular, "catchswitch"es in the Wasm EH model). + BasicBlock* ambientEmitContextBlock = CurrentBlock(); CompAllocator alloc = _compiler->getAllocator(CMK_Codegen); m_EHRegionsInfo = new (alloc) EHRegionInfo[_compiler->compHndBBtabCount](); @@ -552,9 +508,7 @@ void Llvm::generateUnwindBlocks() llvm::AllocaInst* cppExcTupleAlloca = funcData.CppExcTupleAlloca; if ((model == CORINFO_LLVM_EH_CPP) && (cppExcTupleAlloca == nullptr)) { - llvm::BasicBlock* prologLlvmBlock = getOrCreatePrologLlvmBlockForFunction(funcIdx); - - _builder.SetInsertPoint(prologLlvmBlock->getTerminator()); + _builder.restoreIP(getOrCreateEarlyPrologForFunction(funcIdx)); cppExcTupleAlloca = _builder.CreateAlloca(cppExcTupleLlvmType); funcData.CppExcTupleAlloca = cppExcTupleAlloca; @@ -711,6 +665,8 @@ void Llvm::generateUnwindBlocks() funcData.InsertBeforeLlvmBlock = unwindLlvmBlocks.FirstBlock; } + + setCurrentEmitContextForBlock(ambientEmitContextBlock); } void Llvm::generateBlocks() @@ -756,8 +712,19 @@ void Llvm::generateBlock(BasicBlock* block) setCurrentEmitContextForBlock(block); + if (block == _compiler->fgFirstBB) + { + assert(m_prologEnd != nullptr); + generateEarlyProlog(); + } + for (GenTree* node : LIR::AsRange(block)) { + if (node == m_prologEnd) + { + generateLateProlog(); + } + visitNode(node); } @@ -788,7 +755,7 @@ void Llvm::generateBlock(BasicBlock* block) } } -void Llvm::fillPhis() +void Llvm::generatePhis() { // LLVM requires PHI inputs to match the list of predecessors exactly, which is different from IR in two ways: // @@ -1098,6 +1065,9 @@ void Llvm::visitNode(GenTree* node) case GT_CNS_LNG: buildIntegralConst(node->AsIntConCommon()); break; + case GT_PHYSREG: + buildPhysReg(node->AsPhysReg()); + break; case GT_IND: buildInd(node->AsIndir()); break; @@ -1107,6 +1077,9 @@ void Llvm::visitNode(GenTree* node) case GT_SWITCH: buildSwitch(node->AsUnOp()); break; + case GT_PHI: + buildEmptyPhi(node->AsPhi()); + break; case GT_LCL_FLD: buildLocalField(node->AsLclFld()); break; @@ -1167,9 +1140,6 @@ void Llvm::visitNode(GenTree* node) case GT_BLK: buildBlk(node->AsBlk()); break; - case GT_PHI: - buildEmptyPhi(node->AsPhi()); - break; case GT_PHI_ARG: break; case GT_CATCH_ARG: @@ -1234,18 +1204,7 @@ void Llvm::buildLocalVar(GenTreeLclVar* lclVar) unsigned int ssaNum = lclVar->GetSsaNum(); LclVarDsc* varDsc = _compiler->lvaGetDesc(lclVar); - // We model funclet parameters specially - it is simpler then representing them faithfully in IR. - if (lclNum == _shadowStackLclNum) - { - assert((ssaNum == SsaConfig::FIRST_SSA_NUM) || (ssaNum == SsaConfig::RESERVED_SSA_NUM)); - llvmRef = getShadowStack(); - } - else if (lclNum == _originalShadowStackLclNum) - { - assert((ssaNum == SsaConfig::FIRST_SSA_NUM) || (ssaNum == SsaConfig::RESERVED_SSA_NUM)); - llvmRef = getOriginalShadowStack(); - } - else if (lclVar->HasSsaName()) + if (lclVar->HasSsaName()) { llvmRef = _localsMap[{lclNum, ssaNum}]; } @@ -1852,6 +1811,30 @@ void Llvm::buildIntegralConst(GenTreeIntConCommon* node) mapGenTreeToValue(node, constValue); } +void Llvm::buildPhysReg(GenTreePhysReg* physReg) +{ + Value* regValue; + Function* llvmFunc = getCurrentLlvmFunction(); + switch (physReg->gtSrcReg) + { + case REG_SHADOW_STACK_ARG: + // The root function is expected to reference the shadow stack via "m_shadowStackLclNum". + assert(getCurrentLlvmFunctionIndex() != ROOT_FUNC_IDX); + regValue = llvmFunc->getArg(SHADOW_STACK_ARG_INDEX); + break; + + case REG_ORIGINAL_SHADOW_STACK_ARG: + // Only filters have the original shadow stack parameter. + assert(isCurrentContextInFilter()); + regValue = llvmFunc->getArg(ORIGINAL_SHADOW_STACK_ARG_INDEX); + break; + + default: + unreached(); + } + mapGenTreeToValue(physReg, regValue); +} + void Llvm::buildCall(GenTreeCall* call) { ArrayStack argVec(_compiler->getAllocator(CMK_Codegen)); @@ -3255,11 +3238,11 @@ Value* Llvm::getShadowStack() { if (getCurrentLlvmFunctionIndex() == ROOT_FUNC_IDX) { - assert(m_rootFunctionShadowStackValue != nullptr); - return m_rootFunctionShadowStackValue; + Value* value = _localsMap[{m_shadowStackLclNum, m_shadowStackSsaNum}]; + return value; } - // Note that funclets have the shadow stack arg in the 0th position. + // Note that funclets also have the shadow stack arg in the 0th position. return getCurrentLlvmFunction()->getArg(SHADOW_STACK_ARG_INDEX); } @@ -3270,17 +3253,6 @@ Value* Llvm::getShadowStackForCallee(bool isTailCall) return gepOrAddrInBounds(getShadowStack(), calleeShadowStackOffset); } -Value* Llvm::getOriginalShadowStack() -{ - if (isCurrentContextInFilter()) - { - // The original shadow stack pointer is the second filter parameter. - return getCurrentLlvmFunction()->getArg(1); - } - - return getShadowStack(); -} - void Llvm::setCurrentEmitContextForBlock(BasicBlock* block) { unsigned funcIdx = getLlvmFunctionIndexForBlock(block); @@ -3472,23 +3444,31 @@ llvm::BasicBlock* Llvm::getLastLlvmBlockForBlock(BasicBlock* block) return getLlvmBlocksForBlock(block)->LastBlock; } -llvm::BasicBlock* Llvm::getOrCreatePrologLlvmBlockForFunction(unsigned funcIdx) +InsertPoint Llvm::getOrCreateEarlyPrologForFunction(unsigned funcIdx) { - const char* const PROLOG_BLOCK_NAME = "BB00"; - - BasicBlock* firstUserBlock = getFirstBlockForFunction(funcIdx); - llvm::BasicBlock* firstLlvmUserBlock = getFirstLlvmBlockForBlock(firstUserBlock); - llvm::BasicBlock* prologLlvmBlock = firstLlvmUserBlock->getPrevNode(); - if ((prologLlvmBlock == nullptr) || !prologLlvmBlock->getName().starts_with(PROLOG_BLOCK_NAME)) + FunctionInfo& funcInfo = getLlvmFunctionInfoForIndex(funcIdx); + Instruction* inst = funcInfo.LastEarlyPrologInst; + if (funcIdx == ROOT_FUNC_IDX) { - Function* llvmFunc = firstLlvmUserBlock->getParent(); - prologLlvmBlock = llvm::BasicBlock::Create(m_context->Context, PROLOG_BLOCK_NAME, llvmFunc, firstLlvmUserBlock); - - // Eagerly insert jump to the user block to simplify calling code. - llvm::BranchInst::Create(firstLlvmUserBlock, prologLlvmBlock); + // The root prolog is always created eagerly. + if (inst == nullptr) + { + llvm::BasicBlock* llvmBlock = &funcInfo.LlvmFunction->getEntryBlock(); + return {llvmBlock, llvmBlock->begin()}; + } + return {inst->getParent(), inst->getNextNode()->getIterator()}; } + if (inst == nullptr) + { + Function* llvmFunc = funcInfo.LlvmFunction; + llvm::BasicBlock* llvmFuncBlock = &llvmFunc->getEntryBlock(); + llvm::BasicBlock* prologLlvmBlock = + llvm::BasicBlock::Create(m_context->Context, BBNAME("BB", 0), llvmFunc, llvmFuncBlock); - return prologLlvmBlock; + inst = llvm::BranchInst::Create(llvmFuncBlock, prologLlvmBlock); + funcInfo.LastEarlyPrologInst = inst; + } + return {inst->getParent(), inst->getIterator()}; } //------------------------------------------------------------------------ @@ -3573,10 +3553,13 @@ Value* Llvm::getOrCreateAllocaForLocalInFunclet(unsigned lclNum) llvm::AllocaInst* allocaInst; if (!allocaMap->Lookup(lclNum, &allocaInst)) { - llvm::BasicBlock* prologLlvmBlock = getOrCreatePrologLlvmBlockForFunction(funcIdx); - allocaInst = new llvm::AllocaInst(getLlvmTypeForLclVar(varDsc), 0, "", prologLlvmBlock->getTerminator()); + InsertPoint ambientIp = _builder.saveIP(); + _builder.restoreIP(getOrCreateEarlyPrologForFunction(funcIdx)); + allocaInst = _builder.CreateAlloca(getLlvmTypeForLclVar(varDsc)); allocaMap->Set(lclNum, allocaInst); + + _builder.restoreIP(ambientIp); } return allocaInst; diff --git a/src/coreclr/jit/llvmdebuginfo.cpp b/src/coreclr/jit/llvmdebuginfo.cpp index 60178894a15..f6c72a161e3 100644 --- a/src/coreclr/jit/llvmdebuginfo.cpp +++ b/src/coreclr/jit/llvmdebuginfo.cpp @@ -617,7 +617,7 @@ void Llvm::declareDebugVariables() } DILocation* debugLocation = getArtificialDebugLocation(); - Instruction* insertInst = _builder.GetInsertBlock()->getTerminator(); + llvm::BasicBlock* insertBlock = _builder.GetInsertBlock(); Value* spilledShadowStackAddr = nullptr; for (auto lcl : decltype(m_debugVariablesMap)::KeyValueIteration(&m_debugVariablesMap)) { @@ -664,7 +664,7 @@ void Llvm::declareDebugVariables() llvm::DILocalVariable* debugVariable = lcl->GetValue(); DIExpression* debugExpression = m_diBuilder->createExpression(AsRef(diExpression)); Instruction* debugInst = - m_diBuilder->insertDeclare(addressValue, debugVariable, debugExpression, debugLocation, insertInst); + m_diBuilder->insertDeclare(addressValue, debugVariable, debugExpression, debugLocation, insertBlock); JITDUMP("Declaring V%02u:\n", lclNum); JITDUMPEXEC(displayValue(debugInst)); } diff --git a/src/coreclr/jit/llvmlower.cpp b/src/coreclr/jit/llvmlower.cpp index 6c921bad8b2..dbb15fad9f7 100644 --- a/src/coreclr/jit/llvmlower.cpp +++ b/src/coreclr/jit/llvmlower.cpp @@ -135,8 +135,6 @@ void Llvm::initializeFunclets() FuncInfoDsc* funcInfo = _compiler->funGetFunc(ehDsc->ebdFilterFuncIndex); funcInfo->funKind = FUNC_FILTER; funcInfo->funEHIndex = static_cast(ehIndex); - - m_anyFilterFunclets = true; } if (ehDsc->HasFinallyHandler()) @@ -171,19 +169,11 @@ void Llvm::initializeFunclets() // void Llvm::initializeLlvmArgInfo() { - if (m_anyFilterFunclets) - { - _originalShadowStackLclNum = _compiler->lvaGrabTemp(true DEBUGARG("original shadowstack")); - LclVarDsc* originalShadowStackVarDsc = _compiler->lvaGetDesc(_originalShadowStackLclNum); - originalShadowStackVarDsc->lvType = TYP_I_IMPL; - originalShadowStackVarDsc->lvCorInfoType = CORINFO_TYPE_PTR; - } - unsigned nextLlvmArgNum = 0; bool isManagedAbi = !_compiler->opts.IsReversePInvoke(); - _shadowStackLclNum = _compiler->lvaGrabTempWithImplicitUse(true DEBUGARG("shadowstack")); - LclVarDsc* shadowStackVarDsc = _compiler->lvaGetDesc(_shadowStackLclNum); + m_shadowStackLclNum = _compiler->lvaGrabTempWithImplicitUse(true DEBUGARG("shadowstack")); + LclVarDsc* shadowStackVarDsc = _compiler->lvaGetDesc(m_shadowStackLclNum); shadowStackVarDsc->lvType = TYP_I_IMPL; shadowStackVarDsc->lvCorInfoType = CORINFO_TYPE_PTR; if (isManagedAbi) @@ -437,10 +427,6 @@ void Llvm::lowerCall(GenTreeCall* callNode) { lowerRethrow(callNode); } - else if (callNode->IsHelperCall(_compiler, CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT)) - { - lowerReversePInvokeExit(callNode); - } // "gtFoldExprConst" can attach a superflous argument to the overflow helper. Remove it. else if (callNode->IsHelperCall(_compiler, CORINFO_HELP_OVERFLOW) && !callNode->gtArgs.IsEmpty()) { @@ -801,13 +787,6 @@ void Llvm::lowerDelegateInvoke(GenTreeCall* callNode) lowerIndir(callTarget->AsIndir()); } -void Llvm::lowerReversePInvokeExit(GenTreeCall* callNode) -{ - // The RPI exit call has an additional argument - the shadow stack top on entry to this RPI method. - GenTree* previousShadowStackTop = insertShadowStackAddr(callNode, 0, _shadowStackLclNum); - callNode->gtArgs.PushFront(_compiler, NewCallArg::Primitive(previousShadowStackTop, CORINFO_TYPE_PTR)); -} - void Llvm::lowerUnmanagedCall(GenTreeCall* callNode) { assert(callNode->IsUnmanaged()); @@ -1162,26 +1141,6 @@ unsigned Llvm::representAsLclVar(LIR::Use& use) return use.ReplaceWithLclVar(_compiler); } -GenTree* Llvm::insertShadowStackAddr(GenTree* insertBefore, unsigned offset, unsigned shadowStackLclNum) -{ - assert(isShadowStackLocal(shadowStackLclNum)); - - GenTree* shadowStackLcl = _compiler->gtNewLclvNode(shadowStackLclNum, TYP_I_IMPL); - CurrentRange().InsertBefore(insertBefore, shadowStackLcl); - - if (offset == 0) - { - return shadowStackLcl; - } - - // Using an address mode node here explicitizes our assumption that the shadow stack does not overflow. - assert(offset <= getShadowFrameSize(ROOT_FUNC_IDX)); - GenTree* addrModeNode = createAddrModeNode(shadowStackLcl, offset); - CurrentRange().InsertBefore(insertBefore, addrModeNode); - - return addrModeNode; -} - //------------------------------------------------------------------------ // createAddrModeNode: Create an address mode node. // @@ -1306,7 +1265,7 @@ bool Llvm::isFirstBlockCanonical() return !block->hasTryIndex() && (block->bbPreds == nullptr); } -GenTree* Llvm::lowerAndInsertIntoFirstBlock(LIR::Range& range, GenTree* insertAfter) +GenTree* Llvm::lowerAndInsertIntoFirstBlock(LIR::Range&& range, GenTree* insertAfter) { assert(isFirstBlockCanonical()); lowerRange(_compiler->fgFirstBB, range); @@ -1652,7 +1611,7 @@ bool Llvm::addVirtualUnwindFrameForExceptionHandling() initRange.InsertAtEnd(ehInfoNode); initRange.InsertAtEnd(initialUnwindIndexNode); initRange.InsertAtEnd(initializeCall); - m_llvm->lowerAndInsertIntoFirstBlock(initRange); + m_llvm->lowerAndInsertIntoFirstBlock(std::move(initRange)); m_llvm->m_sparseVirtualUnwindFrameLclNum = unwindFrameLclNum; } @@ -2197,13 +2156,11 @@ bool Llvm::isBlockInFilter(BasicBlock* block) const { if (m_blocksInFilters == BitVecOps::UninitVal()) { - assert(!m_anyFilterFunclets); assert(!block->hasHndIndex() || !_compiler->ehGetBlockHndDsc(block)->InFilterRegionBBRange(block)); return false; } // Ideally, this would be a flag (BBF_*), but we make do with a bitset for now to avoid modifying the frontend. - assert(m_anyFilterFunclets); BitVecTraits bitVecTraits(_compiler->fgBBNumMax + 1, _compiler); return BitVecOps::IsMember(&bitVecTraits, m_blocksInFilters, block->bbNum); } diff --git a/src/coreclr/jit/llvmlssa.cpp b/src/coreclr/jit/llvmlssa.cpp index 3edb0e16b8d..771353f2767 100644 --- a/src/coreclr/jit/llvmlssa.cpp +++ b/src/coreclr/jit/llvmlssa.cpp @@ -39,6 +39,8 @@ // class ShadowStackAllocator { + static const unsigned DEFAULT_SHADOW_STACK_ALIGNMENT = TARGET_POINTER_SIZE; + Compiler* const m_compiler; Llvm* const m_llvm; @@ -48,9 +50,9 @@ class ShadowStackAllocator BitVecTraits m_candidateBitSetTraits = BitVecTraits(0, nullptr); BitVec m_explicitInitShadowSlots = BitVecOps::UninitVal(); + unsigned m_shadowFrameAlignment = DEFAULT_SHADOW_STACK_ALIGNMENT; unsigned m_prologZeroingOffset = 0; unsigned m_prologZeroingSize = 0; - GenTree* m_lastPrologNode = nullptr; #ifdef FEATURE_LSSA_ALLOCATION_RESULT class LssaAllocationResult; @@ -146,7 +148,7 @@ class ShadowStackAllocator continue; } - if ((varDsc->lvRefCnt() == 0) || m_llvm->isFuncletParameter(lclNum)) + if ((varDsc->lvRefCnt() == 0) || (lclNum == m_llvm->m_shadowStackLclNum)) { continue; } @@ -667,8 +669,8 @@ class ShadowStackAllocator if (defNode == nullptr) { VarSetOps::AddElemD(m_compiler, block->bbLiveIn, varDsc->lvVarIndex); - defBlockRange.InsertAfter(m_lssa->m_lastPrologNode, value, store); - m_lssa->m_lastPrologNode = store; + defBlockRange.InsertAfter(m_llvm->m_prologEnd, value, store); + m_llvm->m_prologEnd = store; } else if (defNode->IsPhiDefn()) { @@ -1552,7 +1554,7 @@ class ShadowStackAllocator if (varDsc->lvStructDoubleAlign) { alignment = 8; - m_llvm->m_shadowFrameAlignment = alignment; + m_shadowFrameAlignment = alignment; } #endif // !TARGET_64BIT @@ -1617,7 +1619,7 @@ class ShadowStackAllocator } } - m_llvm->_shadowStackLocalsSize = AlignUp(offset, Llvm::DEFAULT_SHADOW_STACK_ALIGNMENT); + m_llvm->_shadowStackLocalsSize = AlignUp(offset, DEFAULT_SHADOW_STACK_ALIGNMENT); m_compiler->compLclFrameSize = m_llvm->_shadowStackLocalsSize; m_compiler->lvaDoneFrameLayout = Compiler::TENTATIVE_FRAME_LAYOUT; @@ -1630,35 +1632,122 @@ class ShadowStackAllocator void FinalizeProlog() { LIR::Range initRange; + m_llvm->m_currentBlock = m_compiler->fgFirstBB; m_llvm->m_currentRange = &initRange; + + InitializeShadowStackValue(); m_llvm->initializePreciseVirtualUnwindFrame(); unsigned zeroingSize = m_prologZeroingSize; if (zeroingSize != 0) { unsigned offset = m_prologZeroingOffset; - GenTree* addr = m_llvm->insertShadowStackAddr(nullptr, offset, m_llvm->_shadowStackLclNum); - GenTree* zero = m_compiler->gtNewIconNode(0); - ClassLayout* layout = m_compiler->typGetBlkLayout(zeroingSize); - GenTree* store = m_compiler->gtNewStoreBlkNode(layout, addr, zero, GTF_IND_NONFAULTING); - initRange.InsertAfter(addr, zero, store); - + GenTree* store = InsertZeroShadowFrame(offset, zeroingSize); JITDUMP("Added zero-initialization for shadow locals at: [%i, %i]:\n", offset, offset + zeroingSize); DISPTREERANGE(initRange, store); - RecordAllocationActionZeroInit(m_compiler->fgFirstBB, offset, zeroingSize); + RecordAllocationActionZeroInit(m_llvm->CurrentBlock(), offset, zeroingSize); } - GenTree* lastInitNode = m_llvm->lowerAndInsertIntoFirstBlock(initRange); // Insert at the start. - if (m_lastPrologNode == nullptr) + // TODO-LLVM-Cleanup: this seems more complicated than it needs to be... + GenTree* lastInitNode = m_llvm->lowerAndInsertIntoFirstBlock(std::move(initRange)); // Insert at the start. + if (m_llvm->m_prologEnd == nullptr) { - m_lastPrologNode = lastInitNode; + m_llvm->m_prologEnd = lastInitNode; } // Insert a zero-offset ILOffset to notify codegen this is the start of user code. DebugInfo zeroILOffsetDi = DebugInfo(m_compiler->compInlineContext, ILLocation(0, /* isStackEmpty */ true, /* isCall */ false)); GenTree* zeroILOffsetNode = new (m_compiler, GT_IL_OFFSET) GenTreeILOffset(zeroILOffsetDi); - LIR::AsRange(m_compiler->fgFirstBB).InsertAfter(m_lastPrologNode, zeroILOffsetNode); + LIR::AsRange(m_compiler->fgFirstBB).InsertAfter(m_llvm->m_prologEnd, zeroILOffsetNode); + + m_llvm->m_prologEnd = zeroILOffsetNode; + } + + void InitializeShadowStackValue() + { + unsigned lclNum = m_llvm->m_shadowStackLclNum; + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + // The liveness of our shadow stack local that has been computed before LSSA is not correct since we haven't + // yet added all the uses. Since we don't use the liveness info for it anyway, just mark it untracked. + varDsc->lvTracked = 0; + + GenTreeLclVar* def = nullptr; + if (!varDsc->lvIsParam) + { + GenTree* call = m_compiler->gtNewHelperCallNode(CORINFO_HELP_LLVM_GET_OR_INIT_SHADOW_STACK_TOP, TYP_I_IMPL); + def = m_compiler->gtNewStoreLclVarNode(lclNum, call); + m_llvm->CurrentRange().InsertAtEnd(call); + m_llvm->CurrentRange().InsertAtEnd(def); + varDsc->lvHasExplicitInit = 1; + + JITDUMP("ReversePInvoke: initialized the shadow stack:\n"); + DISPTREERANGE(m_llvm->CurrentRange(), def); + } + m_llvm->m_shadowStackSsaNum = AddUntrackedSsaDef(def, lclNum); + + unsigned alignment = m_shadowFrameAlignment; + if (alignment != DEFAULT_SHADOW_STACK_ALIGNMENT) + { + // Zero the padding that may be introduced by the code below. This serves two purposes: + // 1. We don't leave "random" pointers on the shadow stack. + // 2. We allow precise virtual unwinding out of overaligned frames, by skipping the zeroed padding. + GenTreeIndir* store = InsertZeroShadowFrame(0, alignment - DEFAULT_SHADOW_STACK_ALIGNMENT); + + // Generate: "pShadowStack = (pShadowStack + 7) & ~7". + GenTree* initialValue = InsertShadowStackAddr(nullptr, 0); + GenTree* addend = m_compiler->gtNewIconNode(alignment - 1, TYP_I_IMPL); + GenTree* valueWithAddend = m_compiler->gtNewOperNode(GT_ADD, TYP_I_IMPL, initialValue, addend); + m_llvm->CurrentRange().InsertAfter(initialValue, addend, valueWithAddend); + + GenTree* mask = m_compiler->gtNewIconNode(static_cast(~(alignment - 1)), TYP_I_IMPL); + GenTree* alignedValue = m_compiler->gtNewOperNode(GT_AND, TYP_I_IMPL, valueWithAddend, mask); + GenTreeLclVar* alignedValueDef = m_compiler->gtNewStoreLclVarNode(lclNum, alignedValue); + m_llvm->CurrentRange().InsertAfter(valueWithAddend, mask, alignedValue, alignedValueDef); + m_llvm->m_shadowStackSsaNum = AddUntrackedSsaDef(alignedValueDef, lclNum); + + JITDUMP("Aligning the shadow frame to %u bytes:\n", alignment); + DISPRANGE(LIR::ReadOnlyRange(store->Addr(), m_llvm->CurrentRange().LastNode())); + } + } + + unsigned AddUntrackedSsaDef(GenTreeLclVar* def, unsigned lclNum) + { + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); + if (!m_compiler->lvaInSsa(lclNum)) + { + varDsc->lvInSsa = 1; + } + + // We don't initialize other fields of the LclSsaVarDsc like "HasGlobalUse", etc, since codegen + // currently doesn't need them. If/when that changes we'll need to faithfully set them... + unsigned ssaNum = (def == nullptr) ? SsaConfig::FIRST_SSA_NUM : SsaConfig::RESERVED_SSA_NUM; + if (!varDsc->lvPerSsaData.IsValidSsaNum(ssaNum)) + { + ssaNum = varDsc->lvPerSsaData.AllocSsaNum(m_compiler->getAllocator(CMK_SSA), m_llvm->CurrentBlock(), def); + } + else + { + LclSsaVarDsc* ssaDsc = varDsc->GetPerSsaData(ssaNum); + ssaDsc->SetBlock(m_llvm->CurrentBlock()); + ssaDsc->SetDefNode(def); + } + if (def != nullptr) + { + def->SetSsaNum(ssaNum); + } + return ssaNum; + } + + GenTreeIndir* InsertZeroShadowFrame(unsigned offset, unsigned size) + { + GenTree* addr = InsertShadowStackAddr(nullptr, offset); + GenTree* zero = m_compiler->gtNewIconNode(0); + ClassLayout* layout = m_compiler->typGetBlkLayout(size); + GenTreeIndir* store = m_compiler->gtNewStoreBlkNode(layout, addr, zero, GTF_IND_NONFAULTING); + + m_llvm->CurrentRange().InsertAfter(addr, zero, store); + return store; } GenTreeLclVar* InitializeLocalInProlog(unsigned lclNum, GenTree* value) @@ -1671,7 +1760,7 @@ class ShadowStackAllocator LIR::Range range; range.InsertAtEnd(value); range.InsertAtEnd(store); - m_lastPrologNode = m_llvm->lowerAndInsertIntoFirstBlock(range, m_lastPrologNode); + m_llvm->m_prologEnd = m_llvm->lowerAndInsertIntoFirstBlock(std::move(range), m_llvm->m_prologEnd); DISPTREERANGE(LIR::AsRange(m_compiler->fgFirstBB), store); return store; @@ -1738,11 +1827,10 @@ class ShadowStackAllocator // Filters will be called by the first pass while live state still exists on shadow frames above (in the // traditional sense, where stacks grow down) them. For this reason, filters will access state from the // original frame via a dedicated shadow stack pointer, and use the actual shadow stack for calls. - unsigned shadowStackLclNum = m_llvm->isBlockInFilter(m_llvm->CurrentBlock()) - ? m_llvm->_originalShadowStackLclNum - : m_llvm->_shadowStackLclNum; + regNumber shadowStackArgReg = + m_llvm->isBlockInFilter(m_llvm->CurrentBlock()) ? REG_ORIGINAL_SHADOW_STACK_ARG : REG_NA; unsigned lclOffset = lclBaseOffset + lclNode->GetLclOffs(); - GenTree* lclAddress = m_llvm->insertShadowStackAddr(lclNode, lclOffset, shadowStackLclNum); + GenTree* lclAddress = lclAddress = InsertShadowStackAddr(lclNode, lclOffset, shadowStackArgReg); ClassLayout* layout = lclNode->TypeIs(TYP_STRUCT) ? lclNode->GetLayout(m_compiler) : nullptr; GenTree* storedValue = nullptr; @@ -1803,6 +1891,16 @@ class ShadowStackAllocator void RewriteCall(GenTreeCall* call) { + if (call->IsHelperCall(m_compiler, CORINFO_HELP_JIT_REVERSE_PINVOKE_EXIT)) + { + // The RPI exit call has an additional argument - the shadow stack top on entry to this RPI method. + GenTree* previousShadowStackTop = InsertShadowStackAddr(call, 0); + CallArg* callArg = + call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(previousShadowStackTop, CORINFO_TYPE_PTR)); + callArg->AbiInfo.IsPointer = true; + callArg->AbiInfo.ArgType = TYP_I_IMPL; + } + // Add in the shadow stack argument now that we know the shadow frame size. if (m_llvm->callHasManagedCallingConvention(call)) { @@ -1821,8 +1919,7 @@ class ShadowStackAllocator unsigned funcIdx = m_llvm->getLlvmFunctionIndexForBlock(block); unsigned calleeShadowStackOffset = m_llvm->getCalleeShadowStackOffset(funcIdx, isTailCall); - GenTree* calleeShadowStack = - m_llvm->insertShadowStackAddr(call, calleeShadowStackOffset, m_llvm->_shadowStackLclNum); + GenTree* calleeShadowStack = InsertShadowStackAddr(call, calleeShadowStackOffset); CallArg* calleeShadowStackArg = call->gtArgs.PushFront(m_compiler, NewCallArg::Primitive(calleeShadowStack, CORINFO_TYPE_PTR)); @@ -1837,6 +1934,41 @@ class ShadowStackAllocator } } + GenTree* InsertShadowStackAddr(GenTree* insertBefore, unsigned offset, regNumber shadowStackArgReg = REG_NA) + { + GenTree* shadowStack; + if ((shadowStackArgReg == REG_NA) && + (m_llvm->getLlvmFunctionIndexForBlock(m_llvm->CurrentBlock()) != Llvm::ROOT_FUNC_IDX)) + { + // Funclets also reference the shadow stack via PHYSREG for simplicity. + shadowStackArgReg = REG_SHADOW_STACK_ARG; + } + if (shadowStackArgReg == REG_NA) + { + assert(m_llvm->m_shadowStackSsaNum != SsaConfig::RESERVED_SSA_NUM); + GenTreeLclVar* shadowStackLcl = m_compiler->gtNewLclVarNode(m_llvm->m_shadowStackLclNum); + shadowStackLcl->SetSsaNum(m_llvm->m_shadowStackSsaNum); + shadowStack = shadowStackLcl; + } + else + { + shadowStack = m_compiler->gtNewPhysRegNode(shadowStackArgReg, TYP_I_IMPL); + } + m_llvm->CurrentRange().InsertBefore(insertBefore, shadowStack); + + if (offset == 0) + { + return shadowStack; + } + + // Using an address mode node here explicitizes our assumption that the shadow stack does not overflow. + assert(offset <= m_llvm->getShadowFrameSize(Llvm::ROOT_FUNC_IDX)); + GenTree* addrModeNode = m_llvm->createAddrModeNode(shadowStack, offset); + m_llvm->CurrentRange().InsertBefore(insertBefore, addrModeNode); + + return addrModeNode; + } + bool CanShadowTailCall(BasicBlock* block, GenTreeCall* call DEBUGARG(const char** pReasonWhyNot = nullptr)) { if (!CanShadowTailCallInBlock(block DEBUGARG(pReasonWhyNot))) @@ -2530,13 +2662,3 @@ bool Llvm::isShadowFrameLocal(LclVarDsc* varDsc) const // not a great fit because of defaulting to "true" for new locals. return varDsc->GetRegNum() == REG_STK; } - -bool Llvm::isShadowStackLocal(unsigned lclNum) const -{ - return (lclNum == _shadowStackLclNum) || (lclNum == _originalShadowStackLclNum); -} - -bool Llvm::isFuncletParameter(unsigned lclNum) const -{ - return isShadowStackLocal(lclNum); -} diff --git a/src/coreclr/jit/register.h b/src/coreclr/jit/register.h index caf38f26ec3..7dc6baf8e31 100644 --- a/src/coreclr/jit/register.h +++ b/src/coreclr/jit/register.h @@ -345,13 +345,15 @@ REGDEF(STK, 8+KBASE, 0x0000, "STK" ) // (in particular, fixing the assumption that "this" is always enregistered), we will // pretend we have one register (both integer and FP, for simplicity). // -REGDEF(R0, 0, 0x01, "R0") -REGDEF(F0, 1, 0x02, "F0") -REGDEF(LLVM, 2, 0x04, "LLVM") -REGDEF(STK_CANDIDATE_UNCONDITIONAL, 3, 0x08, "SS_UNCONDITIONAL") -REGDEF(STK_CANDIDATE_TENTATIVE, 4, 0x10, "SS_TENTATIVE") -REGDEF(STK_CANDIDATE_COMMITED, 5, 0x20, "SS_COMMITED") -REGDEF(STK, 6, 0x40, "SS") +REGDEF(R0, 0, 0x001, "R0") +REGDEF(F0, 1, 0x002, "F0") +REGDEF(LLVM, 2, 0x004, "LLVM") +REGDEF(STK_CANDIDATE_UNCONDITIONAL, 3, 0x008, "SS_UNCONDITIONAL") +REGDEF(STK_CANDIDATE_TENTATIVE, 4, 0x010, "SS_TENTATIVE") +REGDEF(STK_CANDIDATE_COMMITED, 5, 0x020, "SS_COMMITED") +REGDEF(SHADOW_STACK_ARG, 6, 0x040, "SS_ARG") +REGDEF(ORIGINAL_SHADOW_STACK_ARG, 7, 0x080, "ORIGINAL_SS_ARG") +REGDEF(STK, 8, 0x100, "SS") #elif defined(TARGET_LOONGARCH64) #include "registerloongarch64.h"