Skip to content

Commit be2a87f

Browse files
Jaime ArteagaCompute-Runtime-Automation
authored andcommitted
Initialize kernel private surface when kernel is created
This instead of when the associated module is created, to avoid allocating memory for kernels that are never created nor used. Signed-off-by: Jaime Arteaga <[email protected]>
1 parent aa28baa commit be2a87f

File tree

6 files changed

+162
-46
lines changed

6 files changed

+162
-46
lines changed

level_zero/core/source/kernel/kernel.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,6 @@ struct KernelImmutableData {
4848
uint32_t getIsaSize() const;
4949
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
5050

51-
uint64_t getPrivateMemorySize() const;
52-
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); }
53-
5451
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
5552

5653
uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; }
@@ -67,7 +64,6 @@ struct KernelImmutableData {
6764
Device *device = nullptr;
6865
NEO::KernelDescriptor *kernelDescriptor = nullptr;
6966
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
70-
std::unique_ptr<NEO::GraphicsAllocation> privateMemoryGraphicsAllocation = nullptr;
7167

7268
uint32_t crossThreadDataSize = 0;
7369
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 25 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,6 @@ KernelImmutableData::~KernelImmutableData() {
7272
isaGraphicsAllocation.release();
7373
}
7474
crossThreadDataTemplate.reset();
75-
if (nullptr != privateMemoryGraphicsAllocation) {
76-
this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation);
77-
privateMemoryGraphicsAllocation.release();
78-
}
7975
surfaceStateHeapTemplate.reset();
8076
dynamicStateHeapTemplate.reset();
8177
}
@@ -164,21 +160,6 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
164160
}
165161

166162
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize());
167-
auto &kernelAttributes = kernelDescriptor->kernelAttributes;
168-
169-
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
170-
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, computeUnitsUsedForSratch);
171-
172-
UNRECOVERABLE_IF(privateSurfaceSize == 0);
173-
this->privateMemoryGraphicsAllocation.reset(memoryManager->allocateGraphicsMemoryWithProperties(
174-
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}));
175-
176-
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
177-
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
178-
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
179-
*privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
180-
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get());
181-
}
182163

183164
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
184165
UNRECOVERABLE_IF(nullptr == globalConstBuffer);
@@ -207,17 +188,13 @@ uint32_t KernelImmutableData::getIsaSize() const {
207188
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
208189
}
209190

210-
uint64_t KernelImmutableData::getPrivateMemorySize() const {
211-
uint64_t size = 0;
212-
if (privateMemoryGraphicsAllocation != nullptr) {
213-
size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize();
214-
}
215-
return size;
216-
}
217-
218191
KernelImp::KernelImp(Module *module) : module(module) {}
219192

220193
KernelImp::~KernelImp() {
194+
if (nullptr != privateMemoryGraphicsAllocation) {
195+
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation);
196+
}
197+
221198
if (perThreadDataForWholeThreadGroup != nullptr) {
222199
alignedFree(perThreadDataForWholeThreadGroup);
223200
}
@@ -674,6 +651,27 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
674651
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
675652
}
676653

654+
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
655+
auto neoDevice = module->getDevice()->getNEODevice();
656+
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
657+
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize,
658+
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
659+
660+
UNRECOVERABLE_IF(privateSurfaceSize == 0);
661+
this->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
662+
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()});
663+
664+
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
665+
666+
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
667+
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
668+
669+
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
670+
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
671+
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
672+
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
673+
}
674+
677675
if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) {
678676
auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize;
679677
UNRECOVERABLE_IF(reqdSize[1] == 0);

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2020 Intel Corporation
2+
* Copyright (C) 2019-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -124,11 +124,17 @@ struct KernelImp : Kernel {
124124

125125
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
126126

127+
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() {
128+
return privateMemoryGraphicsAllocation;
129+
}
130+
127131
protected:
128132
KernelImp() = default;
129133

130134
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
131135

136+
NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr;
137+
132138
void createPrintfBuffer();
133139
void setDebugSurface();
134140
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
@@ -147,7 +153,7 @@ struct KernelImp : Kernel {
147153
uint32_t numThreadsPerThreadGroup = 1u;
148154
uint32_t threadExecutionMask = 0u;
149155

150-
std::unique_ptr<uint8_t[]> crossThreadData = 0;
156+
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
151157
uint32_t crossThreadDataSize = 0;
152158

153159
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;

level_zero/core/test/unit_tests/fixtures/module_fixture.h

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2020 Intel Corporation
2+
* Copyright (C) 2020-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -12,12 +12,105 @@
1212
#include "shared/test/unit_test/helpers/test_files.h"
1313

1414
#include "level_zero/core/source/module/module.h"
15+
#include "level_zero/core/source/module/module_imp.h"
1516
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
1617
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
1718

1819
namespace L0 {
1920
namespace ult {
2021

22+
struct ModuleImmutableDataFixture : public DeviceFixture {
23+
struct MockImmutableData : KernelImmutableData {
24+
MockImmutableData(uint32_t perHwThreadPrivateMemorySize) {
25+
mockKernelDescriptor = new NEO::KernelDescriptor;
26+
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
27+
kernelDescriptor = mockKernelDescriptor;
28+
return;
29+
}
30+
~MockImmutableData() override {
31+
delete mockKernelDescriptor;
32+
}
33+
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
34+
};
35+
36+
struct MockModule : public L0::ModuleImp {
37+
MockModule(L0::Device *device,
38+
L0::ModuleBuildLog *moduleBuildLog,
39+
L0::ModuleType type,
40+
uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) {
41+
mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize);
42+
}
43+
44+
~MockModule() {
45+
delete mockKernelImmData;
46+
}
47+
48+
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
49+
return mockKernelImmData;
50+
}
51+
MockImmutableData *mockKernelImmData = nullptr;
52+
};
53+
54+
class MockKernel : public WhiteBox<L0::KernelImp> {
55+
public:
56+
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
57+
}
58+
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
59+
return;
60+
}
61+
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
62+
return;
63+
}
64+
~MockKernel() override {
65+
}
66+
std::unique_ptr<Kernel> clone() const override { return nullptr; }
67+
};
68+
69+
void SetUp() override {
70+
DeviceFixture::SetUp();
71+
}
72+
73+
void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) {
74+
std::string testFile;
75+
retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
76+
77+
size_t size = 0;
78+
auto src = loadDataFromFile(
79+
testFile.c_str(),
80+
size);
81+
82+
ASSERT_NE(0u, size);
83+
ASSERT_NE(nullptr, src);
84+
85+
ze_module_desc_t moduleDesc = {};
86+
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
87+
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
88+
moduleDesc.inputSize = size;
89+
90+
ModuleBuildLog *moduleBuildLog = nullptr;
91+
92+
module = std::make_unique<MockModule>(device,
93+
moduleBuildLog,
94+
ModuleType::User,
95+
perHwThreadPrivateMemorySize);
96+
}
97+
98+
void createKernel(MockKernel *kernel) {
99+
ze_kernel_desc_t desc = {};
100+
desc.pKernelName = kernelName.c_str();
101+
kernel->initialize(&desc);
102+
}
103+
104+
void TearDown() override {
105+
DeviceFixture::TearDown();
106+
}
107+
108+
const std::string binaryFilename = "test_kernel";
109+
const std::string kernelName = "test";
110+
const uint32_t numKernelArguments = 6;
111+
std::unique_ptr<MockModule> module;
112+
};
113+
21114
struct ModuleFixture : public DeviceFixture {
22115
void SetUp() override {
23116
DeviceFixture::SetUp();

level_zero/core/test/unit_tests/mocks/mock_kernel.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019-2020 Intel Corporation
2+
* Copyright (C) 2019-2021 Intel Corporation
33
*
44
* SPDX-License-Identifier: MIT
55
*
@@ -27,7 +27,6 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData {
2727
using ::L0::KernelImmutableData::isaGraphicsAllocation;
2828
using ::L0::KernelImmutableData::kernelDescriptor;
2929
using ::L0::KernelImmutableData::KernelImmutableData;
30-
using ::L0::KernelImmutableData::privateMemoryGraphicsAllocation;
3130
using ::L0::KernelImmutableData::residencyContainer;
3231

3332
WhiteBox() : ::L0::KernelImmutableData() {}

level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp

Lines changed: 34 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -254,19 +254,43 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
254254
delete[] kernelNameRetrieved;
255255
}
256256

257-
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
258-
uint32_t computeUnitsUsedForSratch = 0x300;
257+
class KernelImmutableDataTests : public ModuleImmutableDataFixture, public ::testing::Test {
258+
public:
259+
void SetUp() override {
260+
ModuleImmutableDataFixture::SetUp();
261+
}
259262

260-
KernelInfo kernelInfo;
261-
auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
262-
kernelAttributes.perHwThreadPrivateMemorySize = 0x100;
263-
kernelAttributes.simdSize = 8;
263+
void TearDown() override {
264+
ModuleImmutableDataFixture::TearDown();
265+
}
266+
};
264267

265-
KernelImmutableData kernelImmutableData(device);
266-
kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false);
268+
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
269+
uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
270+
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
271+
272+
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
273+
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
274+
275+
createKernel(kernel.get());
276+
277+
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
278+
}
279+
280+
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
281+
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
282+
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
283+
284+
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
285+
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
286+
287+
createKernel(kernel.get());
288+
289+
EXPECT_NE(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
267290

268-
size_t expectedSize = static_cast<size_t>(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch;
269-
EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
291+
size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
292+
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
293+
EXPECT_EQ(expectedSize, kernel->getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
270294
}
271295

272296
HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {

0 commit comments

Comments
 (0)