Skip to content

Commit 9e146a0

Browse files
Kotynia, Piotrpszymich
authored andcommitted
Revert disable EnableWriteOldFPToStack
Revert disable EnableWriteOldFPToStack flag. (cherry picked from commit 14c0772)
1 parent 7e10ebb commit 9e146a0

File tree

9 files changed

+106
-14
lines changed

9 files changed

+106
-14
lines changed

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10388,6 +10388,14 @@ void EmitPass::emitStackAlloca(GenIntrinsicInst* GII)
1038810388
{
1038910389
// Static private mem access is done through the FP
1039010390
CVariable* pFP = m_currShader->GetFP();
10391+
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
10392+
{
10393+
// If we have written the previous FP to the current frame's start, the start of
10394+
// private memory will be offset by 16 bytes
10395+
CVariable* tempFP = m_currShader->GetNewVariable(pFP);
10396+
emitAddPointer(tempFP, pFP, m_currShader->ImmToVariable(getFPOffset(), ISA_TYPE_UD));
10397+
pFP = tempFP;
10398+
}
1039110399
CVariable* pOffset = m_currShader->GetSymbol(GII->getOperand(0));
1039210400
emitAddPointer(m_destination, pFP, pOffset);
1039310401
}
@@ -18708,13 +18716,55 @@ void EmitPass::emitPushFrameToStack(unsigned& pushSize)
1870818716
m_encoder->Copy(pFP, pSP);
1870918717
m_encoder->Push();
1871018718

18719+
// Allocate 1 extra oword to store previous frame's FP
18720+
pushSize += IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack) ? SIZE_OWORD : 0;
18721+
1871118722
// Since we use unaligned oword writes, pushSize should be OW aligned address
1871218723
pushSize = int_cast<unsigned>(llvm::alignTo(pushSize, SIZE_OWORD));
1871318724

1871418725
if (pushSize != 0)
1871518726
{
1871618727
// Update SP by pushSize
1871718728
emitAddPointer(pSP, pSP, m_currShader->ImmToVariable(pushSize, ISA_TYPE_UD));
18729+
18730+
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
18731+
{
18732+
// Store old FP value to current FP
18733+
CVariable* pOldFP = m_currShader->GetPrevFP();
18734+
// If previous FP is null (for kernel frame), we initialize it to 0
18735+
if (pOldFP == nullptr)
18736+
{
18737+
pOldFP = m_currShader->GetNewVariable(pFP);
18738+
m_encoder->Copy(pOldFP, m_currShader->ImmToVariable(0, pOldFP->GetType()));
18739+
m_encoder->Push();
18740+
}
18741+
18742+
pFP = ReAlignUniformVariable(pFP, EALIGN_GRF);
18743+
bool useA64 = (pFP->GetSize() == 8);
18744+
if (shouldGenerateLSC())
18745+
{
18746+
ResourceDescriptor resource;
18747+
resource.m_surfaceType = ESURFACE_STATELESS;
18748+
emitLSCStore(nullptr, pOldFP, pFP, 64, 1, 0, &resource, (useA64 ? LSC_ADDR_SIZE_64b : LSC_ADDR_SIZE_32b), LSC_DATA_ORDER_TRANSPOSE, 0, 1);
18749+
m_encoder->Push();
18750+
}
18751+
else
18752+
{
18753+
if (useA64)
18754+
m_encoder->OWStoreA64(pOldFP, pFP, SIZE_OWORD, 0);
18755+
else {
18756+
// FP is in units of BYTES, but OWStore requires units of OWORDS
18757+
CVariable* offsetShr = m_currShader->GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, "FPOffset_OW");
18758+
m_encoder->SetSimdSize(SIMDMode::SIMD1);
18759+
m_encoder->SetNoMask();
18760+
m_encoder->SetSrcRegion(0, 0, 1, 0);
18761+
m_encoder->Shr(offsetShr, pFP, m_currShader->ImmToVariable(4, ISA_TYPE_UD));
18762+
m_encoder->Push();
18763+
m_encoder->OWStore(pOldFP, ESURFACE_STATELESS, nullptr, offsetShr, SIZE_OWORD, 0);
18764+
}
18765+
m_encoder->Push();
18766+
}
18767+
}
1871818768
}
1871918769
}
1872018770

IGC/Compiler/CISACodeGen/EmitVISAPass.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ class EmitPass : public llvm::FunctionPass
257257
void emitFlushSamplerCache();
258258
void emitSurfaceInfo(llvm::GenIntrinsicInst* intrinsic);
259259

260+
static uint64_t getFPOffset() { return SIZE_OWORD; }
260261
void emitStackAlloca(llvm::GenIntrinsicInst* intrinsic);
261262
void emitVLAStackAlloca(llvm::GenIntrinsicInst* intrinsic);
262263

IGC/Compiler/DebugInfo/ScalarVISAModule.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ llvm::StringRef ScalarVisaModule::GetVISAFuncName() const
135135
// matches that used by VISA.
136136
return getFunction()->getName();
137137
}
138+
uint64_t ScalarVisaModule::getFPOffset() const {
139+
return EmitPass::getFPOffset();
140+
}
138141

139142
bool ScalarVisaModule::usesSlot1ScratchSpill() const {
140143
return m_pShader->ProgramOutput()->getScratchSpaceUsageInSlot1();

IGC/Compiler/DebugInfo/ScalarVISAModule.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class ScalarVisaModule final : public IGC::VISAModule {
102102
bool hasPTO() const override {
103103
return getPerThreadOffset() != nullptr;
104104
}
105+
uint64_t getFPOffset() const override;
105106
int getPTOReg() const override;
106107
int getFPReg() const override;
107108

IGC/Compiler/Optimizer/OpenCLPasses/PrivateMemory/PrivateMemoryResolution.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,9 @@ bool PrivateMemoryResolution::runOnModule(llvm::Module& M)
251251
return 0;
252252

253253
uint32_t currFuncPrivateMem = (uint32_t)(funcIt->second.privateMemoryPerWI);
254+
// Add 1 OWORD for FP stack write
255+
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
256+
currFuncPrivateMem += uint32_t(EmitPass::getFPOffset());
254257

255258
CallGraphNode* Node = CG[F];
256259

@@ -796,6 +799,26 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
796799

797800
if (privateOnStack)
798801
{
802+
// If the private memory is on the stack there may be a situation where
803+
// some extra data is placed at the beginning of stack frame (e.g. prev FP).
804+
// In that case, allocas' alignment may not be satisfied. To prevent this,
805+
// a padding is added between that extra data and the private memory.
806+
unsigned int allocasExtraOffset = 0;
807+
unsigned int padding = 0;
808+
if IGC_IS_FLAG_ENABLED(EnableWriteOldFPToStack)
809+
{
810+
allocasExtraOffset += uint32_t(EmitPass::getFPOffset());
811+
}
812+
813+
if (allocasExtraOffset > 0)
814+
{
815+
alignment_t privateMemoryAlignment = m_ModAllocaInfo->getPrivateMemAlignment(m_currFunction);
816+
padding = iSTD::Align(allocasExtraOffset, size_t(privateMemoryAlignment)) - allocasExtraOffset;
817+
}
818+
819+
modMD->FuncMD[m_currFunction].privateMemoryPerWI += padding;
820+
modMD->privateMemoryPerWI += padding;//redundant ?
821+
799822
// Creates intrinsics that will be lowered in the CodeGen and will handle the stack-pointer
800823
Instruction* simdLaneId16 = entryBuilder.CreateCall(simdLaneIdFunc, llvm::None, VALUE_NAME("simdLaneId16"));
801824
Value* simdLaneId = entryBuilder.CreateIntCast(simdLaneId16, typeInt32, false, VALUE_NAME("simdLaneId"));
@@ -834,6 +857,10 @@ bool PrivateMemoryResolution::resolveAllocaInstructions(bool privateOnStack)
834857
Value* increment = isUniform ? builder.getInt32(0) : simdLaneId;
835858
Value* perLaneOffset = builder.CreateMul(increment, ConstantInt::get(typeInt32, bufferSize), VALUE_NAME("perLaneOffset"));
836859
Value* totalOffset = builder.CreateAdd(bufferOffset, perLaneOffset, VALUE_NAME(pAI->getName() + ".totalOffset"));
860+
if (padding > 0)
861+
{
862+
totalOffset = builder.CreateAdd(totalOffset, ConstantInt::get(typeInt32, padding), VALUE_NAME(pAI->getName() + ".totalOffsetWithPadding"));
863+
}
837864
Function* stackAllocaFunc = GenISAIntrinsic::getDeclaration(m_currFunction->getParent(), GenISAIntrinsic::GenISA_StackAlloca);
838865
Value* stackAlloca = builder.CreateCall(stackAllocaFunc, totalOffset, VALUE_NAME("stackAlloca"));
839866
privateBuffer = builder.CreatePointerCast(stackAlloca, pAI->getType(), VALUE_NAME(pAI->getName() + ".privateBuffer"));

IGC/DebugInfo/DwarfCompileUnit.cpp

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1276,13 +1276,12 @@ void CompileUnit::addSLMLocation(IGC::DIEBlock *Block, const DbgVariable &DV,
12761276
// There is a private value in the current stack frame.
12771277
// Location encoding is similar to a global variable except SIMD lane
12781278
// location encoding and storage size connected to this, because since it is
1279-
// uniform we assume this size to be 0.
1280-
// 1 DW_OP_regx <Frame Pointer reg encoded>
1281-
// 2 DW_OP_const1u <bit-offset to Frame Pointer reg>
1282-
// 3 DW_OP_const1u 64, i.e. size in bits
1283-
// 4 DW_OP_INTEL_push_bit_piece_stack
1284-
// 6 DW_OP_plus_uconst storageOffset
1285-
// MD: StorageOffset -> the offset
1279+
// uniform we assume this size to be 0. 1 DW_OP_regx <Frame Pointer reg
1280+
// encoded> 2 DW_OP_const1u <bit-offset to Frame Pointer reg> 3
1281+
// DW_OP_const1u 64 , i.e. size in bits 4 DW_OP_INTEL_push_bit_piece_stack
1282+
// 5 DW_OP_plus_uconst SIZE_OWORD // i.e. 0x10 taken from
1283+
// getFPOffset(); same as emitted in EmitPass::emitStackAlloca() 6
1284+
// DW_OP_plus_uconst storageOffset // MD: StorageOffset; the offset
12861285
// where each variable is stored in the current stack frame
12871286

12881287
const auto *VISAMod = Loc.GetVISAModule();
@@ -1323,6 +1322,11 @@ void CompileUnit::addSLMLocation(IGC::DIEBlock *Block, const DbgVariable &DV,
13231322
// Frame Pointer reg>
13241323
extractSubRegValue(Block, 64);
13251324

1325+
addUInt(Block, dwarf::DW_FORM_data1,
1326+
dwarf::DW_OP_plus_uconst); // 5 DW_OP_plus_uconst SIZE_OWORD (taken
1327+
// from getFPOffset())
1328+
addUInt(Block, dwarf::DW_FORM_udata, VISAMod->getFPOffset());
1329+
13261330
addUInt(Block, dwarf::DW_FORM_data1,
13271331
dwarf::DW_OP_plus_uconst); // 6 DW_OP_plus_uconst storageOffset
13281332
addUInt(Block, dwarf::DW_FORM_udata, storageOffset); // storageOffset
@@ -2825,13 +2829,11 @@ bool CompileUnit::buildFpBasedLoc(const DbgVariable &var, IGC::DIEBlock *Block,
28252829
// 2 DW_OP_const1u <bit-offset to Frame Pointer reg>
28262830
// 3 DW_OP_const1u 64 , i.e. size in bits
28272831
// 4 DW_OP_INTEL_push_bit_piece_stack
2828-
// 5 DW_OP_push_simd_lane
2829-
// 6 DW_OP_const1u/2u/4u/8u storageSize
2830-
// MD: StorageSize -> the size of the variable
2831-
// 7 DW_OP_mul
2832-
// 8 DW_OP_plus
2833-
// 9 DW_OP_plus_uconst storageOffset
2834-
// MD: StorageOffset -> the offset where
2832+
// 5 DW_OP_plus_uconst SIZE_OWORD // i.e. 0x10 taken from
2833+
// getFPOffset(); same as emitted in EmitPass::emitStackAlloca() 6
2834+
// DW_OP_push_simd_lane 7 DW_OP_const1u/2u/4u/8u storageSize // MD:
2835+
// StorageSize; the size of the variable 8 DW_OP_mul 9 DW_OP_plus 10
2836+
// DW_OP_plus_uconst storageOffset // MD: StorageOffset; the offset where
28352837
// each variable is stored in the current stack frame
28362838

28372839
auto regNumFP = VISAMod->getFPReg();
@@ -2859,6 +2861,11 @@ bool CompileUnit::buildFpBasedLoc(const DbgVariable &var, IGC::DIEBlock *Block,
28592861
bitOffsetToFPReg); // 2 DW_OP_const1u/2u <bit-offset to Frame Pointer reg>
28602862
extractSubRegValue(Block, 64);
28612863

2864+
addUInt(Block, dwarf::DW_FORM_data1,
2865+
dwarf::DW_OP_plus_uconst); // 5 DW_OP_plus_uconst SIZE_OWORD (taken
2866+
// from getFPOffset())
2867+
addUInt(Block, dwarf::DW_FORM_udata, VISAMod->getFPOffset());
2868+
28622869
addUInt(Block, dwarf::DW_FORM_data1,
28632870
DW_OP_INTEL_push_simd_lane); // 6 DW_OP_INTEL_push_simd_lane
28642871
addConstantUValue(Block, storageSize); // 7 DW_OP_const1u/2u/4u/8u storageSize

IGC/DebugInfo/VISAModule.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ class VISAModule {
549549
virtual bool hasPTO() const = 0;
550550
virtual int getPTOReg() const = 0;
551551
virtual int getFPReg() const = 0;
552+
virtual uint64_t getFPOffset() const = 0;
552553
virtual bool usesSlot1ScratchSpill() const = 0;
553554

554555
virtual llvm::ArrayRef<char> getGenDebug() const = 0;

IGC/VectorCompiler/lib/GenXCodeGen/GenXDebugInfo.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1059,6 +1059,7 @@ class GenXFunction final : public IGC::VISAModule {
10591059
bool hasPTO() const override { return false; }
10601060
int getPTOReg() const override { return -1; }
10611061
int getFPReg() const override { return -1; }
1062+
uint64_t getFPOffset() const override { return 16; }
10621063

10631064
bool usesSlot1ScratchSpill() const override { return false; }
10641065

IGC/common/igc_flags.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,7 @@ DECLARE_IGC_REGKEY(bool, AvoidUsingR0R1, false, "Do not use r0 an
392392
DECLARE_IGC_REGKEY(bool, EnableGTLocationDebugging, true, "Setting this to 1 (true) enables GT location expression emission for GPU debugger", true)
393393
DECLARE_IGC_REGKEY(bool, UseOffsetInLocation, true, "Setting this to 1 (true) preserves private base and per thread offset and removes preservation of any other debug variables", true)
394394
DECLARE_IGC_REGKEY(bool, EnableRelocations, false, "Setting this to 1 (true) makes IGC emit relocatable ELF with debug info", true)
395+
DECLARE_IGC_REGKEY(bool, EnableWriteOldFPToStack, true, "Setting this to 1 (true) writes the caller frame's frame-pointer to the start of callee's frame on stack, to support stack walk", false)
395396
DECLARE_IGC_REGKEY(bool, ZeBinCompatibleDebugging, true, "Setting this to 1 (true) enables embed debug info in zeBinary", true)
396397
DECLARE_IGC_REGKEY(bool, DebugInfoEnforceAmd64EM, false, "Enforces elf file with the debug infomation to have eMachine set to AMD64", false)
397398
DECLARE_IGC_REGKEY(bool, DebugInfoValidation, false, "Enable optional (strict) checks to detect debug information inconsistencies", false)

0 commit comments

Comments
 (0)