Skip to content

Commit 08655a3

Browse files
Jaime ArteagaCompute-Runtime-Automation
authored andcommitted
Revert "Initialize kernel private surface when kernel is created"
This reverts commit be2a87f. Signed-off-by: Jaime Arteaga <[email protected]>
1 parent 26b036a commit 08655a3

File tree

6 files changed

+43
-159
lines changed

6 files changed

+43
-159
lines changed

level_zero/core/source/kernel/kernel.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ struct KernelImmutableData {
4848
uint32_t getIsaSize() const;
4949
NEO::GraphicsAllocation *getIsaGraphicsAllocation() const { return isaGraphicsAllocation.get(); }
5050

51+
uint64_t getPrivateMemorySize() const;
52+
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() const { return privateMemoryGraphicsAllocation.get(); }
53+
5154
const uint8_t *getCrossThreadDataTemplate() const { return crossThreadDataTemplate.get(); }
5255

5356
uint32_t getSurfaceStateHeapSize() const { return surfaceStateHeapSize; }
@@ -64,6 +67,7 @@ struct KernelImmutableData {
6467
Device *device = nullptr;
6568
NEO::KernelDescriptor *kernelDescriptor = nullptr;
6669
std::unique_ptr<NEO::GraphicsAllocation> isaGraphicsAllocation = nullptr;
70+
std::unique_ptr<NEO::GraphicsAllocation> privateMemoryGraphicsAllocation = nullptr;
6771

6872
uint32_t crossThreadDataSize = 0;
6973
std::unique_ptr<uint8_t[]> crossThreadDataTemplate = nullptr;

level_zero/core/source/kernel/kernel_imp.cpp

Lines changed: 27 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ KernelImmutableData::~KernelImmutableData() {
7272
isaGraphicsAllocation.release();
7373
}
7474
crossThreadDataTemplate.reset();
75+
if (nullptr != privateMemoryGraphicsAllocation) {
76+
this->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(&*privateMemoryGraphicsAllocation);
77+
privateMemoryGraphicsAllocation.release();
78+
}
7579
surfaceStateHeapTemplate.reset();
7680
dynamicStateHeapTemplate.reset();
7781
}
@@ -160,6 +164,21 @@ void KernelImmutableData::initialize(NEO::KernelInfo *kernelInfo, Device *device
160164
}
161165

162166
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(surfaceStateHeapTemplate.get(), getSurfaceStateHeapSize());
167+
auto &kernelAttributes = kernelDescriptor->kernelAttributes;
168+
169+
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
170+
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize, computeUnitsUsedForSratch);
171+
172+
UNRECOVERABLE_IF(privateSurfaceSize == 0);
173+
this->privateMemoryGraphicsAllocation.reset(memoryManager->allocateGraphicsMemoryWithProperties(
174+
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()}));
175+
176+
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
177+
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
178+
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
179+
*privateMemoryGraphicsAllocation, kernelDescriptor->payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
180+
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation.get());
181+
}
163182

164183
if (NEO::isValidOffset(kernelDescriptor->payloadMappings.implicitArgs.globalConstantsSurfaceAddress.stateless)) {
165184
UNRECOVERABLE_IF(nullptr == globalConstBuffer);
@@ -188,13 +207,17 @@ uint32_t KernelImmutableData::getIsaSize() const {
188207
return static_cast<uint32_t>(isaGraphicsAllocation->getUnderlyingBufferSize());
189208
}
190209

210+
uint64_t KernelImmutableData::getPrivateMemorySize() const {
211+
uint64_t size = 0;
212+
if (privateMemoryGraphicsAllocation != nullptr) {
213+
size = privateMemoryGraphicsAllocation->getUnderlyingBufferSize();
214+
}
215+
return size;
216+
}
217+
191218
KernelImp::KernelImp(Module *module) : module(module) {}
192219

193220
KernelImp::~KernelImp() {
194-
if (nullptr != privateMemoryGraphicsAllocation) {
195-
module->getDevice()->getNEODevice()->getMemoryManager()->freeGraphicsMemory(privateMemoryGraphicsAllocation);
196-
}
197-
198221
if (perThreadDataForWholeThreadGroup != nullptr) {
199222
alignedFree(perThreadDataForWholeThreadGroup);
200223
}
@@ -651,27 +674,6 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
651674
this->dynamicStateHeapDataSize = kernelImmData->getDynamicStateHeapDataSize();
652675
}
653676

654-
auto &kernelAttributes = kernelImmData->getDescriptor().kernelAttributes;
655-
auto neoDevice = module->getDevice()->getNEODevice();
656-
if (kernelAttributes.perHwThreadPrivateMemorySize != 0) {
657-
auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize(kernelAttributes.perHwThreadPrivateMemorySize,
658-
neoDevice->getDeviceInfo().computeUnitsUsedForScratch);
659-
660-
UNRECOVERABLE_IF(privateSurfaceSize == 0);
661-
this->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager()->allocateGraphicsMemoryWithProperties(
662-
{neoDevice->getRootDeviceIndex(), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield()});
663-
664-
UNRECOVERABLE_IF(this->privateMemoryGraphicsAllocation == nullptr);
665-
666-
ArrayRef<uint8_t> crossThredDataArrayRef = ArrayRef<uint8_t>(this->crossThreadData.get(), this->crossThreadDataSize);
667-
ArrayRef<uint8_t> surfaceStateHeapArrayRef = ArrayRef<uint8_t>(this->surfaceStateHeapData.get(), this->surfaceStateHeapDataSize);
668-
669-
patchWithImplicitSurface(crossThredDataArrayRef, surfaceStateHeapArrayRef,
670-
static_cast<uintptr_t>(privateMemoryGraphicsAllocation->getGpuAddressToPatch()),
671-
*privateMemoryGraphicsAllocation, kernelImmData->getDescriptor().payloadMappings.implicitArgs.privateMemoryAddress, *neoDevice);
672-
this->residencyContainer.push_back(this->privateMemoryGraphicsAllocation);
673-
}
674-
675677
if (kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize[0] > 0) {
676678
auto *reqdSize = kernelImmData->getDescriptor().kernelAttributes.requiredWorkgroupSize;
677679
UNRECOVERABLE_IF(reqdSize[1] == 0);

level_zero/core/source/kernel/kernel_imp.h

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,11 @@ struct KernelImp : Kernel {
124124

125125
ze_result_t setCacheConfig(ze_cache_config_flags_t flags) override;
126126

127-
NEO::GraphicsAllocation *getPrivateMemoryGraphicsAllocation() {
128-
return privateMemoryGraphicsAllocation;
129-
}
130-
131127
protected:
132128
KernelImp() = default;
133129

134130
void patchWorkgroupSizeInCrossThreadData(uint32_t x, uint32_t y, uint32_t z);
135131

136-
NEO::GraphicsAllocation *privateMemoryGraphicsAllocation = nullptr;
137-
138132
void createPrintfBuffer();
139133
void setDebugSurface();
140134
virtual void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) = 0;
@@ -153,7 +147,7 @@ struct KernelImp : Kernel {
153147
uint32_t numThreadsPerThreadGroup = 1u;
154148
uint32_t threadExecutionMask = 0u;
155149

156-
std::unique_ptr<uint8_t[]> crossThreadData = nullptr;
150+
std::unique_ptr<uint8_t[]> crossThreadData = 0;
157151
uint32_t crossThreadDataSize = 0;
158152

159153
std::unique_ptr<uint8_t[]> surfaceStateHeapData = nullptr;

level_zero/core/test/unit_tests/fixtures/module_fixture.h

Lines changed: 0 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -12,105 +12,12 @@
1212
#include "shared/test/unit_test/helpers/test_files.h"
1313

1414
#include "level_zero/core/source/module/module.h"
15-
#include "level_zero/core/source/module/module_imp.h"
1615
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
1716
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
1817

1918
namespace L0 {
2019
namespace ult {
2120

22-
struct ModuleImmutableDataFixture : public DeviceFixture {
23-
struct MockImmutableData : KernelImmutableData {
24-
MockImmutableData(uint32_t perHwThreadPrivateMemorySize) {
25-
mockKernelDescriptor = new NEO::KernelDescriptor;
26-
mockKernelDescriptor->kernelAttributes.perHwThreadPrivateMemorySize = perHwThreadPrivateMemorySize;
27-
kernelDescriptor = mockKernelDescriptor;
28-
return;
29-
}
30-
~MockImmutableData() override {
31-
delete mockKernelDescriptor;
32-
}
33-
NEO::KernelDescriptor *mockKernelDescriptor = nullptr;
34-
};
35-
36-
struct MockModule : public L0::ModuleImp {
37-
MockModule(L0::Device *device,
38-
L0::ModuleBuildLog *moduleBuildLog,
39-
L0::ModuleType type,
40-
uint32_t perHwThreadPrivateMemorySize) : ModuleImp(device, moduleBuildLog, type) {
41-
mockKernelImmData = new MockImmutableData(perHwThreadPrivateMemorySize);
42-
}
43-
44-
~MockModule() {
45-
delete mockKernelImmData;
46-
}
47-
48-
const KernelImmutableData *getKernelImmutableData(const char *functionName) const override {
49-
return mockKernelImmData;
50-
}
51-
MockImmutableData *mockKernelImmData = nullptr;
52-
};
53-
54-
class MockKernel : public WhiteBox<L0::KernelImp> {
55-
public:
56-
MockKernel(MockModule *mockModule) : WhiteBox<L0::KernelImp>(mockModule) {
57-
}
58-
void setBufferSurfaceState(uint32_t argIndex, void *address, NEO::GraphicsAllocation *alloc) override {
59-
return;
60-
}
61-
void evaluateIfRequiresGenerationOfLocalIdsByRuntime(const NEO::KernelDescriptor &kernelDescriptor) override {
62-
return;
63-
}
64-
~MockKernel() override {
65-
}
66-
std::unique_ptr<Kernel> clone() const override { return nullptr; }
67-
};
68-
69-
void SetUp() override {
70-
DeviceFixture::SetUp();
71-
}
72-
73-
void createModuleFromBinary(uint32_t perHwThreadPrivateMemorySize) {
74-
std::string testFile;
75-
retrieveBinaryKernelFilenameNoRevision(testFile, binaryFilename + "_", ".bin");
76-
77-
size_t size = 0;
78-
auto src = loadDataFromFile(
79-
testFile.c_str(),
80-
size);
81-
82-
ASSERT_NE(0u, size);
83-
ASSERT_NE(nullptr, src);
84-
85-
ze_module_desc_t moduleDesc = {};
86-
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
87-
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
88-
moduleDesc.inputSize = size;
89-
90-
ModuleBuildLog *moduleBuildLog = nullptr;
91-
92-
module = std::make_unique<MockModule>(device,
93-
moduleBuildLog,
94-
ModuleType::User,
95-
perHwThreadPrivateMemorySize);
96-
}
97-
98-
void createKernel(MockKernel *kernel) {
99-
ze_kernel_desc_t desc = {};
100-
desc.pKernelName = kernelName.c_str();
101-
kernel->initialize(&desc);
102-
}
103-
104-
void TearDown() override {
105-
DeviceFixture::TearDown();
106-
}
107-
108-
const std::string binaryFilename = "test_kernel";
109-
const std::string kernelName = "test";
110-
const uint32_t numKernelArguments = 6;
111-
std::unique_ptr<MockModule> module;
112-
};
113-
11421
struct ModuleFixture : public DeviceFixture {
11522
void SetUp() override {
11623
DeviceFixture::SetUp();

level_zero/core/test/unit_tests/mocks/mock_kernel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ struct WhiteBox<::L0::KernelImmutableData> : public ::L0::KernelImmutableData {
2727
using ::L0::KernelImmutableData::isaGraphicsAllocation;
2828
using ::L0::KernelImmutableData::kernelDescriptor;
2929
using ::L0::KernelImmutableData::KernelImmutableData;
30+
using ::L0::KernelImmutableData::privateMemoryGraphicsAllocation;
3031
using ::L0::KernelImmutableData::residencyContainer;
3132

3233
WhiteBox() : ::L0::KernelImmutableData() {}

level_zero/core/test/unit_tests/sources/kernel/test_kernel.cpp

Lines changed: 10 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -254,43 +254,19 @@ HWTEST_F(KernelPropertiesTests, givenKernelThenCorrectNameIsRetrieved) {
254254
delete[] kernelNameRetrieved;
255255
}
256256

257-
class KernelImmutableDataTests : public ModuleImmutableDataFixture, public ::testing::Test {
258-
public:
259-
void SetUp() override {
260-
ModuleImmutableDataFixture::SetUp();
261-
}
262-
263-
void TearDown() override {
264-
ModuleImmutableDataFixture::TearDown();
265-
}
266-
};
267-
268-
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithNoPrivateMemoryThenPrivateMemoryIsNull) {
269-
uint32_t perHwThreadPrivateMemorySizeRequested = 0u;
270-
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
257+
HWTEST_F(KernelPropertiesTests, whenInitializingThenCalculatesProperPrivateSurfaceSize) {
258+
uint32_t computeUnitsUsedForSratch = 0x300;
271259

272-
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
273-
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
274-
275-
createKernel(kernel.get());
276-
277-
EXPECT_EQ(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
278-
}
279-
280-
HWTEST_F(KernelImmutableDataTests, givenKernelInitializedWithPrivateMemoryThenPrivateMemoryIsCreated) {
281-
uint32_t perHwThreadPrivateMemorySizeRequested = 32u;
282-
createModuleFromBinary(perHwThreadPrivateMemorySizeRequested);
283-
284-
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
285-
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
286-
287-
createKernel(kernel.get());
260+
KernelInfo kernelInfo;
261+
auto &kernelAttributes = kernelInfo.kernelDescriptor.kernelAttributes;
262+
kernelAttributes.perHwThreadPrivateMemorySize = 0x100;
263+
kernelAttributes.simdSize = 8;
288264

289-
EXPECT_NE(nullptr, kernel->getPrivateMemoryGraphicsAllocation());
265+
KernelImmutableData kernelImmutableData(device);
266+
kernelImmutableData.initialize(&kernelInfo, device, computeUnitsUsedForSratch, nullptr, nullptr, false);
290267

291-
size_t expectedSize = perHwThreadPrivateMemorySizeRequested *
292-
device->getNEODevice()->getDeviceInfo().computeUnitsUsedForScratch;
293-
EXPECT_EQ(expectedSize, kernel->getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
268+
size_t expectedSize = static_cast<size_t>(kernelAttributes.perHwThreadPrivateMemorySize) * computeUnitsUsedForSratch;
269+
EXPECT_GE(expectedSize, kernelImmutableData.getPrivateMemoryGraphicsAllocation()->getUnderlyingBufferSize());
294270
}
295271

296272
HWTEST_F(KernelPropertiesTests, givenValidKernelThenPropertiesAreRetrieved) {

0 commit comments

Comments
 (0)