Skip to content

Commit ec0ca0c

Browse files
dlei6gsys_zuul
authored andcommitted
Compiler outputs per-function attributes for function pointer linking support
Change-Id: I9d4170442b0aa02ec6d7a4c542152ba128cd527d
1 parent c7505e6 commit ec0ca0c

File tree

8 files changed

+130
-10
lines changed

8 files changed

+130
-10
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4725,6 +4725,42 @@ namespace IGC
47254725
assert(sizeof(vISA::GenRelocEntry) * tableEntries == bufferSize);
47264726
}
47274727

4728+
void CEncoder::CreateFuncAttributeTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries)
4729+
{
4730+
buffer = nullptr;
4731+
bufferSize = 0;
4732+
tableEntries = 0;
4733+
4734+
std::vector<vISA::GenFuncAttribEntry> attribTable;
4735+
for (auto it : funcAttributeMap)
4736+
{
4737+
vISA::GenFuncAttribEntry entry;
4738+
Function* F = it.first;
4739+
4740+
// Ignore internal functions
4741+
if (!isEntryFunc(m_program->GetContext()->getMetaDataUtils(), F) &&
4742+
!F->hasFnAttribute("IndirectlyCalled"))
4743+
continue;
4744+
4745+
assert(F->getName().size() <= vISA::MAX_SYMBOL_NAME_LENGTH);
4746+
strcpy_s(entry.f_name, vISA::MAX_SYMBOL_NAME_LENGTH, F->getName().str().c_str());
4747+
entry.f_isKernel = it.second.isKernel ? 1 : 0;
4748+
entry.f_hasBarrier = it.second.hasBarrier ? 1 : 0;
4749+
entry.f_privateMemPerThread = (uint32_t) (it.second.argumentStackSize + it.second.allocaStackSize);
4750+
4751+
attribTable.push_back(entry);
4752+
}
4753+
4754+
if (!attribTable.empty())
4755+
{
4756+
tableEntries = attribTable.size();
4757+
bufferSize = tableEntries * sizeof(vISA::GenFuncAttribEntry);
4758+
buffer = (void*)malloc(bufferSize);
4759+
assert(buffer && "Table cannot be allocated");
4760+
memcpy_s(buffer, bufferSize, attribTable.data(), bufferSize);
4761+
}
4762+
}
4763+
47284764
void CEncoder::Compile(bool hasSymbolTable)
47294765
{
47304766
CodeGenContext* context = m_program->GetContext();
@@ -5010,6 +5046,13 @@ namespace IGC
50105046
CreateRelocationTable(pOutput->m_funcRelocationTable,
50115047
pOutput->m_funcRelocationTableSize,
50125048
pOutput->m_funcRelocationTableEntries);
5049+
5050+
if (IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
5051+
{
5052+
CreateFuncAttributeTable(pOutput->m_funcAttributeTable,
5053+
pOutput->m_funcAttributeTableSize,
5054+
pOutput->m_funcAttributeTableEntries);
5055+
}
50135056
}
50145057

50155058
if (jitInfo->isSpill == true)

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,7 @@ namespace IGC
474474

475475
void CreateSymbolTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
476476
void CreateRelocationTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
477+
void CreateFuncAttributeTable(void*& buffer, unsigned& bufferSize, unsigned& tableEntries);
477478

478479
uint32_t getGRFSize() const;
479480

@@ -573,6 +574,31 @@ namespace IGC
573574
VISA_SamplerVar* samplervar;
574575

575576
CShader* m_program;
577+
578+
// Keep a map between a function and its per-function attributes needed for function pointer support
579+
struct FuncAttrib
580+
{
581+
bool isKernel = false;
582+
bool hasBarrier = false;
583+
unsigned argumentStackSize = 0;
584+
unsigned allocaStackSize = 0;
585+
};
586+
llvm::SmallDenseMap<llvm::Function*, FuncAttrib> funcAttributeMap;
587+
588+
public:
589+
// Used by EmitVISAPass to set function attributes
590+
void SetFunctionIsKernel(llvm::Function* F) {
591+
funcAttributeMap[F].isKernel = true;
592+
}
593+
void SetFunctionHasBarrier(llvm::Function* F) {
594+
funcAttributeMap[F].hasBarrier = true;
595+
}
596+
void SetFunctionMaxArgumentStackSize(llvm::Function* F, unsigned size) {
597+
funcAttributeMap[F].argumentStackSize = MAX(funcAttributeMap[F].argumentStackSize, size);
598+
}
599+
void SetFunctionAllocaStackSize(llvm::Function* F, unsigned size) {
600+
funcAttributeMap[F].allocaStackSize = size;
601+
}
576602
};
577603

578604
inline void CEncoder::Jump(uint label)

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -280,13 +280,26 @@ void CShader::InitKernelStack(CVariable*& stackBase, CVariable*& stackAllocSize,
280280
encoder.SetSrcSubReg(0, 5);
281281
encoder.And(pHWTID, GetR0(), ImmToVariable(0x1ff, ISA_TYPE_UD));
282282
encoder.Push();
283-
// hard-code per-workitem private-memory size to 8k
284-
CVariable* pSize = ImmToVariable(8 * 1024 * numLanes(m_dispatchSize), ISA_TYPE_UD);
283+
284+
CVariable* pSize = nullptr;
285+
if (IGC_IS_FLAG_ENABLED(EnableRuntimeFuncAttributePatching))
286+
{
287+
// Experimental: Patch private memory size
288+
pSize = GetNewVariable(1, ISA_TYPE_UD, CVariable::getAlignment(getGRFSize()), true);
289+
std::string patchName = "INTEL_PATCH_PRIVATE_MEMORY_SIZE";
290+
encoder.AddVISASymbol(patchName, pSize);
291+
}
292+
else
293+
{
294+
// hard-code per-workitem private-memory size to 8k
295+
pSize = ImmToVariable(8 * 1024 * numLanes(m_dispatchSize), ISA_TYPE_UD);
296+
}
297+
285298
CVariable* pTemp = GetNewVariable(1, ISA_TYPE_UD, EALIGN_DWORD, true, 1);
286299
encoder.Mul(pTemp, pHWTID, pSize);
287300
encoder.Push();
288-
// reserve space for alloca
289301

302+
// reserve space for alloca
290303
auto funcMDItr = m_ModuleMetadata->FuncMD.find(entry);
291304
if (funcMDItr != m_ModuleMetadata->FuncMD.end())
292305
{
@@ -295,8 +308,13 @@ void CShader::InitKernelStack(CVariable*& stackBase, CVariable*& stackAllocSize,
295308
unsigned totalAllocaSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_dispatchSize);
296309
encoder.Add(pTemp, pTemp, ImmToVariable(totalAllocaSize, ISA_TYPE_UD));
297310
encoder.Push();
311+
312+
// Set the total alloca size for the entry function
313+
encoder.SetFunctionAllocaStackSize(entry, totalAllocaSize);
298314
}
299315
}
316+
// Indicate this is the kernel function
317+
encoder.SetFunctionIsKernel(entry);
300318

301319
// modify private-memory size to a large setting
302320
m_ModuleMetadata->FuncMD[entry].privateMemoryPerWI = 8192;

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9564,8 +9564,14 @@ void EmitPass::emitStackCall(llvm::CallInst* inst)
95649564
// end of reading return value from stack
95659565
}
95669566
}
9567-
// update stack pointer after the call
95689567

9568+
// Set the max stack sized pushed in the parent function for this call's args
9569+
if (offsetS > 0)
9570+
{
9571+
m_encoder->SetFunctionMaxArgumentStackSize(inst->getParent()->getParent(), offsetS);
9572+
}
9573+
9574+
// update stack pointer after the call
95699575
CVariable* pSP = m_currShader->GetSP();
95709576
CVariable* pPopSize = m_currShader->ImmToVariable((uint64_t)(~offsetS + 1), ISA_TYPE_D);
95719577
emitAddSP(pSP, pSP, pPopSize);
@@ -9747,6 +9753,9 @@ void EmitPass::emitStackFuncEntry(Function* F, bool ptr64bits)
97479753
CVariable* pSP = m_currShader->GetSP();
97489754
unsigned totalAllocaSize = funcMDItr->second.privateMemoryPerWI * numLanes(m_currShader->m_dispatchSize);
97499755
emitAddSP(pSP, pSP, m_currShader->ImmToVariable(totalAllocaSize, ISA_TYPE_UD));
9756+
9757+
// Set the per-function private mem size
9758+
m_encoder->SetFunctionAllocaStackSize(F, totalAllocaSize);
97509759
}
97519760
}
97529761
}
@@ -13034,6 +13043,9 @@ void EmitPass::emitThreadGroupBarrier(llvm::Instruction* inst)
1303413043
m_currShader->SetHasBarrier();
1303513044
m_encoder->Barrier(BarrierKind);
1303613045
m_encoder->Push();
13046+
13047+
// Set if barrier was used for this function
13048+
m_encoder->SetFunctionHasBarrier(inst->getParent()->getParent());
1303713049
}
1303813050
}
1303913051

IGC/Compiler/CodeGenPublic.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ namespace IGC
110110
void* m_funcRelocationTable = nullptr;
111111
unsigned int m_funcRelocationTableSize = 0;
112112
unsigned int m_funcRelocationTableEntries = 0;
113+
void* m_funcAttributeTable = nullptr;
114+
unsigned int m_funcAttributeTableSize = 0;
115+
unsigned int m_funcAttributeTableEntries = 0;
113116
unsigned int m_offsetToSkipPerThreadDataLoad = 0;
114117
uint32_t m_offsetToSkipSetFFIDGP = 0;
115118
//true means we separate pvtmem and spillfill. pvtmem could go into stateless.

IGC/common/igc_flags.def

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,9 @@ DECLARE_IGC_REGKEY(bool, UniformMemOptLimit, false, "Limit of uniform
290290
DECLARE_IGC_REGKEY(bool, EnableFunctionPointer, true, "Enables support for function pointers and indirect calls", false)
291291
DECLARE_IGC_REGKEY(bool, EnableIndirectCallOptimization, false, "Enables inlining indirect calls by comparing function addresses", false)
292292
DECLARE_IGC_REGKEY(bool, ForceFFIDOverwrite, false, "Force overwriting ffid in sr0.0", false)
293-
294293
DECLARE_IGC_REGKEY(bool, EnableReadGTPinInput, true, "Enables setting GTPin context flags by reading the input to the compiler adapters", false)
294+
DECLARE_IGC_REGKEY(bool, EnableRuntimeFuncAttributePatching, false, "Creates a relocation entry to let runtime calculate the max call depth and patch required scratch space usage", true)
295+
295296

296297
DECLARE_IGC_GROUP("Performance experiments")
297298
DECLARE_IGC_REGKEY(bool, ForceNonCoherentStatelessBTI, false, "Enable gneeration of non cache coherent stateless messages", false)

visa/TranslationInterface.cpp

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2386,7 +2386,16 @@ int IR_Builder::translateVISACFSymbolInst(const std::string& symbolName, G4_DstR
23862386
startTimer(TIMER_VISA_BUILDER_IR_CONSTRUCTION);
23872387
#endif
23882388

2389-
if (no64bitType())
2389+
if (symbolName.compare("INTEL_PATCH_PRIVATE_MEMORY_SIZE") == 0)
2390+
{
2391+
// Relocation for runtime-calculated private memory size
2392+
auto* privateMemPatch = createRelocImm(Type_UD);
2393+
dst->setType(Type_UD);
2394+
G4_INST* mov = createMov(1, dst, privateMemPatch, InstOpt_WriteEnable, true);
2395+
RelocationEntry relocEntry = RelocationEntry::createSymbolAddrReloc(mov, 0, symbolName, GenRelocType::R_SYM_ADDR_32);
2396+
kernel.addRelocation(relocEntry);
2397+
}
2398+
else if (no64bitType())
23902399
{
23912400
auto* funcAddrLow = createRelocImm(Type_UD);
23922401
auto* funcAddrHigh = createRelocImm(Type_UD);

visa/include/RelocationInfo.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,21 +44,21 @@ enum GenSymType {
4444
S_FUNC = 2, // The symbol is associated with a function
4545
S_GLOBAL_VAR = 3, // The symbol is associated with a global variable in global address space
4646
S_GLOBAL_VAR_CONST = 4, // The symbol is associated with a global variable in constant address space
47-
S_CONST_SAMPLER = 5 // The symbol is associated with a constant sampler
47+
S_CONST_SAMPLER = 5 // The symbol is associated with a constant sampler
4848
};
4949

5050
/// GenSymEntry - An symbol table entry
5151
typedef struct {
5252
uint32_t s_type; // The symbol's type
5353
uint32_t s_offset; // The binary offset of this symbol. This field is ignored if s_type is S_UNDEF
54-
uint32_t s_size;
54+
uint32_t s_size; // The size in bytes of the function binary
5555
char s_name[MAX_SYMBOL_NAME_LENGTH]; // The symbol's name
5656
} GenSymEntry;
5757

5858
/// GenRelocType - Specify the relocation's type
5959
enum GenRelocType {
60-
R_NONE = 0,
61-
R_SYM_ADDR = 1, //64-bit type
60+
R_NONE = 0,
61+
R_SYM_ADDR = 1, //64-bit type address
6262
R_SYM_ADDR_32 = 2, //lower 32-bit of 64-bit address.
6363
R_SYM_ADDR_32_HI = 3 //higher 32bits of 64-bit address
6464
};
@@ -70,6 +70,14 @@ typedef struct {
7070
char r_symbol[MAX_SYMBOL_NAME_LENGTH]; // The relocation target symbol's name
7171
} GenRelocEntry;
7272

73+
/// GenFuncAttribEntry - Per-function attribute entry
74+
typedef struct {
75+
uint8_t f_isKernel; // Is the function a kernel
76+
uint8_t f_hasBarrier; // Does the function use barriers
77+
uint32_t f_privateMemPerThread; // Total private memory (in bytes) used by this function per thread
78+
char f_name[MAX_SYMBOL_NAME_LENGTH]; // The function's name
79+
} GenFuncAttribEntry;
80+
7381
}
7482

7583
#endif

0 commit comments

Comments
 (0)