Skip to content

Commit 0b5f9e1

Browse files
KanclerzPiotrigcbot
authored andcommitted
Add has_printf_calls to zeinfo
zeinfo now contains information if kernel/function has printf calls and function pointer calls. This allows neo to create printf_buffer when it is really used.
1 parent 8ca10e3 commit 0b5f9e1

File tree

12 files changed

+48
-9
lines changed

12 files changed

+48
-9
lines changed

IGC/AdaptorOCL/OCL/KernelAnnotations.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,8 @@ struct ExecutionEnvironment {
9090
DWORD StatelessWritesCount = 0;
9191
DWORD IndirectStatelessCount = 0;
9292
DWORD numThreads = 0;
93+
bool HasPrintfCalls = false;
94+
bool HasIndirectCalls = false;
9395
bool HasStackCalls = false;
9496
bool RequireDisableEUFusion = false;
9597
DWORD PerThreadSpillMemoryUsage = 0;

IGC/AdaptorOCL/OCL/sp/zebin_builder.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,11 +471,12 @@ void ZEBinaryBuilder::addKernelExecEnv(const SOpenCLKernelInfo &annotations, zeI
471471
CPlatform(mPlatform).hasScratchSurface() && IGC_IS_FLAG_ENABLED(SeparateSpillPvtScratchSpace);
472472
env.has_no_stateless_write = (annotations.m_executionEnvironment.StatelessWritesCount == 0);
473473
env.has_stack_calls = annotations.m_executionEnvironment.HasStackCalls;
474+
// env.has_printf_calls = annotations.m_executionEnvironment.HasPrintfCalls;
475+
// env.has_indirect_calls = annotations.m_executionEnvironment.HasIndirectCalls;
474476
env.require_disable_eufusion = annotations.m_executionEnvironment.RequireDisableEUFusion;
475477
env.indirect_stateless_count = annotations.m_executionEnvironment.IndirectStatelessCount;
476478
env.inline_data_payload_size = annotations.m_threadPayload.PassInlineDataSize;
477479
env.offset_to_skip_per_thread_data_load = annotations.m_threadPayload.OffsetToSkipPerThreadDataLoad;
478-
;
479480
env.offset_to_skip_set_ffid_gp = annotations.m_threadPayload.OffsetToSkipSetFFIDGP;
480481
env.generate_local_id = annotations.m_threadPayload.generateLocalID;
481482
env.has_lsc_stores_with_non_default_l1_cache_controls =
@@ -515,6 +516,8 @@ void ZEBinaryBuilder::addFunctionExecEnv(const SOpenCLKernelInfo &annotations,
515516
env.simd_size = annotations.m_executionEnvironment.CompiledSIMDSize;
516517
env.barrier_count = zeFuncAttr.f_BarrierCount;
517518
env.has_rtcalls = zeFuncAttr.f_hasRTCalls;
519+
// env.has_printf_calls = zeFuncAttr.f_hasPrintfCalls;
520+
// env.has_indirect_calls = zeFuncAttr.f_hasIndirectCalls;
518521
}
519522

520523
void ZEBinaryBuilder::addLocalIds(uint32_t simdSize, uint32_t grfSize, bool has_local_id_x, bool has_local_id_y,

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5138,9 +5138,11 @@ void CEncoder::CreateFuncAttributeTable(VISAKernel *pMainKernel, GenXFunctionGro
51385138
uint32_t spillMemPerThread =
51395139
getSpillMemSizeWithFG(*F, jitInfo->stats.spillMemUsed, pFga, jitInfo->numBytesScratchGtpin);
51405140
uint8_t hasRTCalls = (uint8_t)modMD->FuncMD[F].hasSyncRTCalls;
5141+
uint8_t hasPrintfCalls = (uint8_t)modMD->FuncMD[F].hasPrintfCalls;
5142+
uint8_t hasIndirectCalls = (uint8_t)modMD->FuncMD[F].hasIndirectCalls;
51415143

51425144
attrs.emplace_back((uint8_t)isKernel, isExternal, barrierCount, privateMemPerThread, spillMemPerThread,
5143-
F->getName().str(), hasRTCalls);
5145+
F->getName().str(), hasRTCalls, hasPrintfCalls, hasIndirectCalls);
51445146
}
51455147
}
51465148

IGC/Compiler/CISACodeGen/OpenCLKernelCodeGen.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1739,6 +1739,8 @@ void COpenCLKernel::FillKernel(SIMDMode simdMode) {
17391739
m_kernelInfo.m_executionEnvironment.CompiledSubGroupsNumber = funcMD.CompiledSubGroupsNumber;
17401740

17411741
m_kernelInfo.m_executionEnvironment.HasRTCalls = funcMD.hasSyncRTCalls;
1742+
m_kernelInfo.m_executionEnvironment.HasPrintfCalls = funcMD.hasPrintfCalls;
1743+
m_kernelInfo.m_executionEnvironment.HasIndirectCalls = funcMD.hasIndirectCalls;
17421744
}
17431745

17441746
m_kernelInfo.m_executionEnvironment.HasGlobalAtomics = GetHasGlobalAtomics();

IGC/Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfAnalysis.cpp

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,11 @@ bool OpenCLPrintfAnalysis::runOnModule(Module &M) {
6565
if (!func.isDeclaration() && m_hasPrintfs.find(&func) != m_hasPrintfs.end()) {
6666
addPrintfBufferArgs(func);
6767
changed = true;
68+
69+
if (m_modMD == nullptr) {
70+
m_modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
71+
}
72+
m_modMD->FuncMD[&func].hasPrintfCalls = true;
6873
}
6974
}
7075
}
@@ -76,9 +81,16 @@ bool OpenCLPrintfAnalysis::runOnModule(Module &M) {
7681
return m_hasPrintfs.size();
7782
}
7883

79-
void OpenCLPrintfAnalysis::visitCallInst(llvm::CallInst &callInst) {
84+
void OpenCLPrintfAnalysis::visitCallInst(CallInst &callInst) {
8085
Function *pF = callInst.getParent()->getParent();
8186
if (!callInst.getCalledFunction() || m_hasPrintfs.find(pF) != m_hasPrintfs.end()) {
87+
88+
if( callInst.isIndirectCall()) {
89+
if (m_modMD == nullptr) {
90+
m_modMD = getAnalysis<MetaDataUtilsWrapper>().getModuleMetaData();
91+
}
92+
m_modMD->FuncMD[pF].hasIndirectCalls = true;
93+
}
8294
return;
8395
}
8496

IGC/Compiler/Optimizer/OpenCLPasses/OpenCLPrintf/OpenCLPrintfAnalysis.hpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,9 @@ class OpenCLPrintfAnalysis : public llvm::ModulePass, public llvm::InstVisitor<O
3333
/// @brief Provides name of pass
3434
virtual llvm::StringRef getPassName() const override { return "OpenCLPrintfAnalysis"; }
3535

36-
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override { AU.addRequired<MetaDataUtilsWrapper>(); }
36+
void getAnalysisUsage(llvm::AnalysisUsage &AU) const override {
37+
AU.addRequired<MetaDataUtilsWrapper>();
38+
}
3739

3840
/// @brief Main entry point.
3941
/// @param M The destination module.
@@ -62,6 +64,7 @@ class OpenCLPrintfAnalysis : public llvm::ModulePass, public llvm::InstVisitor<O
6264

6365
std::unordered_set<llvm::Function *> m_hasPrintfs;
6466

67+
ModuleMetaData *m_modMD = nullptr;
6568
/// @brief MetaData utils used to generate LLVM metadata
6669
IGCMD::MetaDataUtils *m_pMDUtils = nullptr;
6770
};

IGC/ZEBinWriter/zebin/source/autogen/ZEInfo.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ struct zeInfoExecutionEnv
4747
{
4848
bool operator==(const zeInfoExecutionEnv& other) const
4949
{
50-
return barrier_count == other.barrier_count && disable_mid_thread_preemption == other.disable_mid_thread_preemption && grf_count == other.grf_count && has_4gb_buffers == other.has_4gb_buffers && has_device_enqueue == other.has_device_enqueue && has_dpas == other.has_dpas && has_fence_for_image_access == other.has_fence_for_image_access && has_global_atomics == other.has_global_atomics && has_multi_scratch_spaces == other.has_multi_scratch_spaces && has_no_stateless_write == other.has_no_stateless_write && has_stack_calls == other.has_stack_calls && require_disable_eufusion == other.require_disable_eufusion && indirect_stateless_count == other.indirect_stateless_count && inline_data_payload_size == other.inline_data_payload_size && offset_to_skip_per_thread_data_load == other.offset_to_skip_per_thread_data_load && offset_to_skip_set_ffid_gp == other.offset_to_skip_set_ffid_gp && required_sub_group_size == other.required_sub_group_size && required_work_group_size == other.required_work_group_size && simd_size == other.simd_size && slm_size == other.slm_size && private_size == other.private_size && spill_size == other.spill_size && subgroup_independent_forward_progress == other.subgroup_independent_forward_progress && thread_scheduling_mode == other.thread_scheduling_mode && work_group_walk_order_dimensions == other.work_group_walk_order_dimensions && eu_thread_count == other.eu_thread_count && has_sample == other.has_sample && has_rtcalls == other.has_rtcalls && generate_local_id == other.generate_local_id && has_lsc_stores_with_non_default_l1_cache_controls == other.has_lsc_stores_with_non_default_l1_cache_controls && require_iab == other.require_iab;
50+
return barrier_count == other.barrier_count && disable_mid_thread_preemption == other.disable_mid_thread_preemption && grf_count == other.grf_count && has_4gb_buffers == other.has_4gb_buffers && has_device_enqueue == other.has_device_enqueue && has_dpas == other.has_dpas && has_fence_for_image_access == other.has_fence_for_image_access && has_global_atomics == other.has_global_atomics && has_multi_scratch_spaces == other.has_multi_scratch_spaces && has_no_stateless_write == other.has_no_stateless_write && has_stack_calls == other.has_stack_calls && has_printf_calls == other.has_printf_calls && has_indirect_calls == other.has_indirect_calls && require_disable_eufusion == other.require_disable_eufusion && indirect_stateless_count == other.indirect_stateless_count && inline_data_payload_size == other.inline_data_payload_size && offset_to_skip_per_thread_data_load == other.offset_to_skip_per_thread_data_load && offset_to_skip_set_ffid_gp == other.offset_to_skip_set_ffid_gp && required_sub_group_size == other.required_sub_group_size && required_work_group_size == other.required_work_group_size && simd_size == other.simd_size && slm_size == other.slm_size && private_size == other.private_size && spill_size == other.spill_size && subgroup_independent_forward_progress == other.subgroup_independent_forward_progress && thread_scheduling_mode == other.thread_scheduling_mode && work_group_walk_order_dimensions == other.work_group_walk_order_dimensions && eu_thread_count == other.eu_thread_count && has_sample == other.has_sample && has_rtcalls == other.has_rtcalls && generate_local_id == other.generate_local_id && has_lsc_stores_with_non_default_l1_cache_controls == other.has_lsc_stores_with_non_default_l1_cache_controls && require_iab == other.require_iab;
5151
}
5252
zeinfo_int32_t barrier_count = 0;
5353
zeinfo_bool_t disable_mid_thread_preemption = false;
@@ -60,6 +60,8 @@ struct zeInfoExecutionEnv
6060
zeinfo_bool_t has_multi_scratch_spaces = false;
6161
zeinfo_bool_t has_no_stateless_write = false;
6262
zeinfo_bool_t has_stack_calls = false;
63+
zeinfo_bool_t has_printf_calls = false;
64+
zeinfo_bool_t has_indirect_calls = false;
6365
zeinfo_bool_t require_disable_eufusion = false;
6466
zeinfo_int32_t indirect_stateless_count = 0;
6567
zeinfo_int32_t inline_data_payload_size = 0;
@@ -293,7 +295,7 @@ struct zeInfoContainer
293295
KernelsCostInfoTy kernels_cost_info;
294296
};
295297
struct PreDefinedAttrGetter{
296-
static zeinfo_str_t getVersionNumber() { return "1.58"; }
298+
static zeinfo_str_t getVersionNumber() { return "1.59"; }
297299

298300
enum class ArgThreadSchedulingMode {
299301
age_based,

IGC/ZEBinWriter/zebin/source/autogen/ZEInfoYAML.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ void MappingTraits<zeInfoExecutionEnv>::mapping(IO& io, zeInfoExecutionEnv& info
6969
io.mapOptional("has_multi_scratch_spaces", info.has_multi_scratch_spaces, false);
7070
io.mapOptional("has_no_stateless_write", info.has_no_stateless_write, false);
7171
io.mapOptional("has_stack_calls", info.has_stack_calls, false);
72+
io.mapOptional("has_printf_calls", info.has_printf_calls, false);
73+
io.mapOptional("has_indirect_calls", info.has_indirect_calls, false);
7274
io.mapOptional("require_disable_eufusion", info.require_disable_eufusion, false);
7375
io.mapOptional("indirect_stateless_count", info.indirect_stateless_count, 0);
7476
io.mapOptional("inline_data_payload_size", info.inline_data_payload_size, 0);

IGC/ZEBinWriter/zebin/spec/version.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ SPDX-License-Identifier: MIT
77
============================= end_copyright_notice ==========================-->
88

99
# ZEBIN Version
10-
Version 1.58
10+
Version 1.59
1111
=======
1212

1313
## Versioning
@@ -16,6 +16,7 @@ Format: \<_Major number_\>.\<_Minor number_\>
1616
- Minor number: Increase when backward-compatible features are added. For example, add new attributes.
1717

1818
## Change Note
19+
- **Version 1.59**: Add execution env has_printf_calls and has_indirect_calls.
1920
- **Version 1.58**: Add new enum value NT_INTELGT_INDIRECT_ACCESS_BUFFER_VERSION
2021
- **Version 1.57**: Internal feature.
2122
- **Version 1.56**: Internal feature.

IGC/ZEBinWriter/zebin/spec/zeinfo.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ If an attribute is **Required**, it must be present in execution_env. If it's **
127127
| has_multi_scratch_spaces | bool | Optional | false | |
128128
| has_no_stateless_write | bool | Optional | false | |
129129
| has_stack_calls | bool | Optional | false | When this value is true, it indicates that program uses stack calls. The implicit_arg_buffer is allocated accordingly. |
130+
| has_printf_calls | bool | Optional | false | When this value is true, it indicates that the kernel itself uses printf calls in its body. |
131+
| has_indirect_calls | bool | Optional | false | When this value is true, it indicates that the kernel itself uses indirect calls in its body. |
130132
| require_disable_eufusion | bool | Optional | false | When this value is true, it indicates that program requires EU fusion disable |
131133
| indirect_stateless_count | int32 | Optional | 0 | |
132134
| inline_data_payload_size | int32 | Optional | 0 | Size of inline data in cross-thread-payload in byte. The value is 0 when inline data is disabled (default). |

0 commit comments

Comments
 (0)