Skip to content

Commit 4808f66

Browse files
Remove WorkloadInfo from KernelInfo
Related-to: NEO-4729 Signed-off-by: Krystian Chmielewski <[email protected]>
1 parent b6e3817 commit 4808f66

File tree

15 files changed

+223
-315
lines changed

15 files changed

+223
-315
lines changed

opencl/source/helpers/hardware_commands_helper_base.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ size_t HardwareCommandsHelper<GfxFamily>::getSizeRequiredDSH(const Kernel &kerne
4646

4747
totalSize += borderColorSize + additionalSizeRequiredDsh();
4848

49-
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.getKernelInfo().isVmeWorkload));
49+
DEBUG_BREAK_IF(!(totalSize >= kernel.getDynamicStateHeapSize() || kernel.isVmeKernel()));
5050

5151
return alignUp(totalSize, EncodeStates<GfxFamily>::alignInterfaceDescriptorData);
5252
}

opencl/source/kernel/kernel.cpp

Lines changed: 83 additions & 123 deletions
Large diffs are not rendered by default.

opencl/source/program/kernel_info.cpp

Lines changed: 12 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -359,29 +359,19 @@ void KernelInfo::apply(const DeviceInfoKernelPayloadConstants &constants) {
359359
return;
360360
}
361361

362-
uint32_t privateMemoryStatelessSizeOffset = this->workloadInfo.privateMemoryStatelessSizeOffset;
363-
uint32_t localMemoryStatelessWindowSizeOffset = this->workloadInfo.localMemoryStatelessWindowSizeOffset;
364-
uint32_t localMemoryStatelessWindowStartAddressOffset = this->workloadInfo.localMemoryStatelessWindowStartAddressOffset;
362+
const auto &implicitArgs = kernelDescriptor.payloadMappings.implicitArgs;
363+
const auto privateMemorySize = static_cast<uint32_t>(KernelHelper::getPrivateSurfaceSize(kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize,
364+
constants.computeUnitsUsedForScratch));
365365

366-
if (localMemoryStatelessWindowStartAddressOffset != WorkloadInfo::undefinedOffset) {
367-
*(uintptr_t *)&(this->crossThreadData[localMemoryStatelessWindowStartAddressOffset]) = reinterpret_cast<uintptr_t>(constants.slmWindow);
368-
}
369-
370-
if (localMemoryStatelessWindowSizeOffset != WorkloadInfo::undefinedOffset) {
371-
*(uint32_t *)&(this->crossThreadData[localMemoryStatelessWindowSizeOffset]) = constants.slmWindowSize;
372-
}
373-
374-
auto perHwThreadSize = kernelDescriptor.kernelAttributes.perHwThreadPrivateMemorySize;
375-
uint32_t privateMemorySize = static_cast<uint32_t>(KernelHelper::getPrivateSurfaceSize(perHwThreadSize,
376-
constants.computeUnitsUsedForScratch));
377-
378-
if (privateMemoryStatelessSizeOffset != WorkloadInfo::undefinedOffset) {
379-
*(uint32_t *)&(this->crossThreadData[privateMemoryStatelessSizeOffset]) = privateMemorySize;
380-
}
381-
382-
if (this->workloadInfo.maxWorkGroupSizeOffset != WorkloadInfo::undefinedOffset) {
383-
*(uint32_t *)&(this->crossThreadData[this->workloadInfo.maxWorkGroupSizeOffset]) = constants.maxWorkGroupSize;
384-
}
366+
auto setIfValidOffset = [&](auto value, NEO::CrossThreadDataOffset offset) {
367+
if (isValidOffset(offset)) {
368+
*ptrOffset(reinterpret_cast<decltype(value) *>(crossThreadData), offset) = value;
369+
}
370+
};
371+
setIfValidOffset(reinterpret_cast<uintptr_t>(constants.slmWindow), implicitArgs.localMemoryStatelessWindowStartAddres);
372+
setIfValidOffset(constants.slmWindowSize, implicitArgs.localMemoryStatelessWindowSize);
373+
setIfValidOffset(privateMemorySize, implicitArgs.privateMemorySize);
374+
setIfValidOffset(constants.maxWorkGroupSize, implicitArgs.maxWorkGroupSize);
385375
}
386376

387377
std::string concatenateKernelNames(ArrayRef<KernelInfo *> kernelInfos) {

opencl/source/program/kernel_info.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -43,31 +43,9 @@ extern bool useKernelDescriptor;
4343

4444
extern std::map<std::string, size_t> typeSizeMap;
4545

46-
struct WorkloadInfo {
47-
enum : uint32_t { undefinedOffset = std::numeric_limits<uint32_t>::max() };
48-
enum : uint32_t { invalidParentEvent = std::numeric_limits<uint32_t>::max() };
49-
50-
uint32_t globalWorkOffsetOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
51-
uint32_t globalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
52-
uint32_t localWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
53-
uint32_t localWorkSizeOffsets2[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
54-
uint32_t enqueuedLocalWorkSizeOffsets[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
55-
uint32_t numWorkGroupsOffset[3] = {undefinedOffset, undefinedOffset, undefinedOffset};
56-
uint32_t maxWorkGroupSizeOffset = undefinedOffset;
57-
uint32_t workDimOffset = undefinedOffset;
58-
uint32_t slmStaticSize = 0;
59-
uint32_t simdSizeOffset = undefinedOffset;
60-
uint32_t parentEventOffset = undefinedOffset;
61-
uint32_t preferredWkgMultipleOffset = undefinedOffset;
62-
uint32_t privateMemoryStatelessSizeOffset = undefinedOffset;
63-
uint32_t localMemoryStatelessWindowSizeOffset = undefinedOffset;
64-
uint32_t localMemoryStatelessWindowStartAddressOffset = undefinedOffset;
65-
};
66-
6746
static const float YTilingRatioValue = 1.3862943611198906188344642429164f;
6847

6948
struct WorkSizeInfo {
70-
7149
uint32_t maxWorkGroupSize;
7250
uint32_t minWorkGroupSize;
7351
bool hasBarriers;
@@ -168,7 +146,6 @@ struct KernelInfo {
168146
PatchInfo patchInfo = {};
169147
std::vector<KernelArgInfo> kernelArgInfo;
170148
std::vector<KernelArgInfo> kernelNonArgInfo;
171-
WorkloadInfo workloadInfo = {};
172149
std::vector<std::pair<uint32_t, uint32_t>> childrenKernelsIdOffset;
173150
bool usesSsh = false;
174151
bool requiresSshForBuffers = false;

opencl/source/program/kernel_info_from_patchtokens.cpp

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,13 @@ inline void storeTokenIfNotNull(KernelInfo &kernelInfo, T *token) {
2424
kernelInfo.storePatchToken(token);
2525
}
2626
}
27-
2827
template <typename T>
2928
inline uint32_t getOffset(T *token) {
3029
if (token != nullptr) {
3130
return token->Offset;
3231
}
33-
return WorkloadInfo::undefinedOffset;
32+
return undefined<uint32_t>;
3433
}
35-
3634
void populateKernelInfoArgMetadata(KernelInfo &dstKernelInfoArg, const SPatchKernelArgumentInfo *src) {
3735
if (nullptr == src) {
3836
return;
@@ -158,7 +156,7 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
158156

159157
storeTokenIfNotNull(dst, src.tokens.executionEnvironment);
160158
dst.usesSsh = src.tokens.bindingTableState && (src.tokens.bindingTableState->Count > 0);
161-
dst.workloadInfo.slmStaticSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U;
159+
dst.kernelDescriptor.kernelAttributes.slmInlineSize = src.tokens.allocateLocalSurface ? src.tokens.allocateLocalSurface->TotalInlineLocalMemorySize : 0U;
162160

163161
dst.kernelArgInfo.resize(src.tokens.kernelArgs.size());
164162

@@ -178,23 +176,6 @@ void populateKernelInfo(KernelInfo &dst, const PatchTokenBinary::KernelFromPatch
178176
dst.isVmeWorkload = dst.isVmeWorkload || (src.tokens.inlineVmeSamplerInfo != nullptr);
179177
dst.systemKernelOffset = src.tokens.stateSip ? src.tokens.stateSip->SystemKernelOffset : 0U;
180178

181-
for (uint32_t i = 0; i < 3U; ++i) {
182-
dst.workloadInfo.localWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize[i]);
183-
dst.workloadInfo.localWorkSizeOffsets2[i] = getOffset(src.tokens.crossThreadPayloadArgs.localWorkSize2[i]);
184-
dst.workloadInfo.globalWorkOffsetOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkOffset[i]);
185-
dst.workloadInfo.enqueuedLocalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.enqueuedLocalWorkSize[i]);
186-
dst.workloadInfo.globalWorkSizeOffsets[i] = getOffset(src.tokens.crossThreadPayloadArgs.globalWorkSize[i]);
187-
dst.workloadInfo.numWorkGroupsOffset[i] = getOffset(src.tokens.crossThreadPayloadArgs.numWorkGroups[i]);
188-
}
189-
190-
dst.workloadInfo.maxWorkGroupSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.maxWorkGroupSize);
191-
dst.workloadInfo.workDimOffset = getOffset(src.tokens.crossThreadPayloadArgs.workDimensions);
192-
dst.workloadInfo.simdSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.simdSize);
193-
dst.workloadInfo.parentEventOffset = getOffset(src.tokens.crossThreadPayloadArgs.parentEvent);
194-
dst.workloadInfo.preferredWkgMultipleOffset = getOffset(src.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple);
195-
dst.workloadInfo.privateMemoryStatelessSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize);
196-
dst.workloadInfo.localMemoryStatelessWindowSizeOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize);
197-
dst.workloadInfo.localMemoryStatelessWindowStartAddressOffset = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress);
198179
for (auto &childSimdSize : src.tokens.crossThreadPayloadArgs.childBlockSimdSize) {
199180
dst.childrenKernelsIdOffset.push_back({childSimdSize->ArgumentNumber, childSimdSize->Offset});
200181
}

0 commit comments

Comments
 (0)