@@ -41,6 +41,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
4141#include "ShaderCodeGen.hpp"
4242#include "common/allocator.h"
4343#include "common/debug/Dump.hpp"
44+ #include "common/debug/Dump.hpp"
4445#include "common/igc_regkeys.hpp"
4546#include "common/Stats.hpp"
4647#include "Compiler/CISACodeGen/helper.h"
@@ -450,10 +451,7 @@ bool EmitPass::runOnFunction(llvm::Function& F)
450451 if (hasStackCall)
451452 {
452453 m_encoder->InitFuncAttribute(&F, true);
453- CVariable* pStackBase = nullptr;
454- CVariable* pStackSize = nullptr;
455- m_currShader->InitKernelStack(pStackBase, pStackSize);
456- emitAddSP(m_currShader->GetSP(), pStackBase, pStackSize);
454+ InitializeKernelStack(&F);
457455 }
458456 m_currShader->AddPrologue();
459457 }
@@ -9380,6 +9378,80 @@ void EmitPass::emitReturn(llvm::ReturnInst* inst)
93809378 m_currShader->AddEpilogue(inst);
93819379}
93829380
9381+ /// Initializes the kernel for stack call by initializing the SP and FP
9382+ void EmitPass::InitializeKernelStack(Function* pKernel)
9383+ {
9384+ m_currShader->CreateSP();
9385+ auto pCtx = getAnalysis<CodeGenContextWrapper>().getCodeGenContext();
9386+ auto pModuleMetadata = pCtx->getModuleMetaData();
9387+
9388+ CVariable* pStackBufferBase = m_currShader->GetPrivateBase();
9389+
9390+ CVariable* pHWTID = m_currShader->GetHWTID();
9391+
9392+ CVariable* pSize = nullptr;
9393+
9394+ // Maximun private size in byte, per-workitem
9395+ // When there's stack call, we don't know the actual stack size being used,
9396+ // so set a conservative max stack size.
9397+ uint32_t MaxPrivateSize = m_currShader->GetMaxPrivateMem();
9398+ if (IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
9399+ {
9400+ // Experimental: Patch private memory size
9401+ std::string patchName = "INTEL_PATCH_PRIVATE_MEMORY_SIZE";
9402+ pSize = m_currShader->GetNewVariable(1, ISA_TYPE_UD, CVariable::getAlignment(getGRFSize()), true, CName(patchName));
9403+ m_encoder->AddVISASymbol(patchName, pSize);
9404+ }
9405+ else
9406+ {
9407+ // hard-code per-workitem private-memory size to max size
9408+ pSize = m_currShader->ImmToVariable(MaxPrivateSize * numLanes(m_currShader->m_dispatchSize), ISA_TYPE_UD);
9409+ }
9410+
9411+ CVariable* pThreadOffset = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1, CName::NONE);
9412+ m_encoder->Mul(pThreadOffset, pHWTID, pSize);
9413+ m_encoder->Push();
9414+
9415+ unsigned totalAllocaSize = 0;
9416+
9417+ // reserve space for kernel FP
9418+ totalAllocaSize += SIZE_OWORD;
9419+
9420+ // reserve space for alloca
9421+ auto funcMDItr = pModuleMetadata->FuncMD.find(pKernel);
9422+ if (funcMDItr != pModuleMetadata->FuncMD.end())
9423+ {
9424+ if (funcMDItr->second.privateMemoryPerWI != 0)
9425+ {
9426+ totalAllocaSize += funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
9427+
9428+ if ((uint32_t)funcMDItr->second.privateMemoryPerWI > MaxPrivateSize)
9429+ {
9430+ pCtx->EmitError("Private memory allocation exceeds max allowed size");
9431+ IGC_ASSERT(0);
9432+ }
9433+ }
9434+ }
9435+
9436+ // Set the total alloca size for the entry function
9437+ m_encoder->SetFunctionAllocaStackSize(pKernel, totalAllocaSize);
9438+
9439+ if (!IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
9440+ {
9441+ // If we don't return per-function private memory size,
9442+ // modify private-memory size to a large setting.
9443+ // This will be reported through patch-tokens as per-kernel requirement.
9444+ pModuleMetadata->FuncMD[pKernel].privateMemoryPerWI = MaxPrivateSize;
9445+ }
9446+
9447+ // Initialize SP to per-thread kernel stack base
9448+ CVariable* pSP = m_currShader->GetSP();
9449+ emitAddSP(pSP, pStackBufferBase, pThreadOffset);
9450+
9451+ // Update FP and SP
9452+ emitPushToStack(m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD), true);
9453+ }
9454+
93839455/// This function is NOT about the alignment-rule for storing argv into GRF!
93849456/// It is about the alignment-rule when we pack the arguments into a block for stack-call!
93859457uint EmitPass::stackCallArgumentAlignment(CVariable* argv)
@@ -9942,35 +10014,41 @@ void EmitPass::emitStackFuncEntry(Function* F)
994210014 }
994310015 }
994410016 }
9945- // save SP before allocation
9946- m_currShader->SaveSP();
10017+
10018+ unsigned totalAllocaSize = 0;
10019+
10020+ // reserve space to store caller's FP
10021+ totalAllocaSize += SIZE_OWORD;
994710022
994810023 // reserve space for all the alloca in the function subgroup
994910024 auto funcMDItr = m_currShader->m_ModuleMetadata->FuncMD.find(F);
995010025 if (funcMDItr != m_currShader->m_ModuleMetadata->FuncMD.end())
995110026 {
995210027 if (funcMDItr->second.privateMemoryPerWI != 0)
995310028 {
9954- CVariable* pSP = m_currShader->GetSP();
9955- unsigned totalAllocaSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
9956- emitAddSP(pSP, pSP, m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD));
9957-
9958- // Set the per-function private mem size
9959- m_encoder->SetFunctionAllocaStackSize(F, totalAllocaSize);
9960-
10029+ totalAllocaSize += funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
996110030 if ((uint32_t)funcMDItr->second.privateMemoryPerWI > m_currShader->GetMaxPrivateMem())
996210031 {
996310032 m_currShader->GetContext()->EmitError("Private memory allocation exceeds max allowed size");
996410033 IGC_ASSERT(0);
996510034 }
996610035 }
996710036 }
10037+
10038+ // save FP before allocation
10039+ m_currShader->SaveStackState();
10040+
10041+ // Update SP and FP
10042+ emitPushToStack(m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD), false);
10043+
10044+ // Set the per-function private mem size
10045+ m_encoder->SetFunctionAllocaStackSize(F, totalAllocaSize);
996810046}
996910047
997010048void EmitPass::emitStackFuncExit(llvm::ReturnInst* inst)
997110049{
9972- // restore SP
9973- m_currShader->RestoreSP ();
10050+ // restore SP and FP
10051+ m_currShader->RestoreStackState ();
997410052
997510053 llvm::Function* F = inst->getParent()->getParent();
997610054 llvm::Type* RetTy = F->getReturnType();
@@ -15955,6 +16033,35 @@ void EmitPass::emitGenISACopy(GenIntrinsicInst* GenCopyInst)
1595516033 emitCopyAll(Dst, Src, Ty);
1595616034}
1595716035
16036+ // Puts FP on stack, update FP to SP, then update SP by pushOffset
16037+ // If isKernel, write special FP value instead to indicate base of the stack
16038+ void EmitPass::emitPushToStack(CVariable* pushOffset, bool isKernel)
16039+ {
16040+ CVariable* pFP = m_currShader->GetFP();
16041+ CVariable* pSP = m_currShader->GetSP();
16042+ if (isKernel)
16043+ {
16044+ // Put 0 into FP to indicate kernel stack base
16045+ m_encoder->Copy(pFP, m_currShader->ImmToVariable(0, ISA_TYPE_UQ));
16046+ m_encoder->Push();
16047+ }
16048+
16049+ // Store FP value into current SP
16050+ bool is64BitAddr = (pSP->GetSize() > 4);
16051+ if (is64BitAddr)
16052+ m_encoder->OWStoreA64(pFP, pSP, SIZE_OWORD, 0);
16053+ else
16054+ m_encoder->OWStore(pFP, ESURFACE_STATELESS, nullptr, pSP, SIZE_OWORD, 0);
16055+ m_encoder->Push();
16056+
16057+ // Set FP = SP
16058+ m_encoder->Copy(pFP, pSP);
16059+ m_encoder->Push();
16060+
16061+ // Update SP by pushOffset
16062+ emitAddSP(pSP, pSP, pushOffset);
16063+ }
16064+
1595816065void EmitPass::emitAddSP(CVariable* Dst, CVariable* Src, CVariable* offset)
1595916066{
1596016067 if (m_currShader->m_Platform->hasNoInt64Inst() &&
0 commit comments