Skip to content

Commit b07f0e8

Browse files
Store multiple Kernels in MultiDeviceKernel
Related-To: NEO-5001 Signed-off-by: Mateusz Jablonski <[email protected]>
1 parent d7f67ad commit b07f0e8

27 files changed

+167
-108
lines changed

opencl/source/kernel/multi_device_kernel.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,22 @@
99
namespace NEO {
1010

1111
MultiDeviceKernel::~MultiDeviceKernel() {
12-
kernel->decRefInternal();
12+
for (auto &pKernel : kernels) {
13+
if (pKernel) {
14+
pKernel->decRefInternal();
15+
}
16+
}
1317
}
14-
MultiDeviceKernel::MultiDeviceKernel(Kernel *pKernel) : kernel(pKernel) {
15-
pKernel->incRefInternal();
16-
pKernel->setMultiDeviceKernel(this);
18+
MultiDeviceKernel::MultiDeviceKernel(KernelVectorType kernelVector) : kernels(std::move(kernelVector)) {
19+
for (auto &pKernel : kernels) {
20+
if (pKernel) {
21+
if (!defaultKernel) {
22+
defaultKernel = kernels[(*pKernel->getDevices().begin())->getRootDeviceIndex()];
23+
}
24+
pKernel->incRefInternal();
25+
pKernel->setMultiDeviceKernel(this);
26+
}
27+
}
1728
};
1829

1930
} // namespace NEO

opencl/source/kernel/multi_device_kernel.h

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -14,48 +14,59 @@ struct OpenCLObjectMapper<_cl_kernel> {
1414
typedef class MultiDeviceKernel DerivedType;
1515
};
1616

17+
using KernelVectorType = StackVec<Kernel *, 4>;
18+
1719
class MultiDeviceKernel : public BaseObject<_cl_kernel> {
1820
public:
1921
static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL;
2022

2123
~MultiDeviceKernel() override;
22-
MultiDeviceKernel(Kernel *pKernel);
24+
MultiDeviceKernel(KernelVectorType kernelVector);
2325

24-
Kernel *getKernel(uint32_t rootDeviceIndex) const { return kernel; }
25-
Kernel *getDefaultKernel() const { return kernel; }
26+
Kernel *getKernel(uint32_t rootDeviceIndex) const { return kernels[rootDeviceIndex]; }
27+
Kernel *getDefaultKernel() const { return defaultKernel; }
2628

2729
template <typename kernel_t = Kernel, typename program_t = Program, typename multi_device_kernel_t = MultiDeviceKernel>
2830
static multi_device_kernel_t *create(program_t *program, const KernelInfoContainer &kernelInfos, cl_int *errcodeRet) {
31+
KernelVectorType kernels{};
32+
kernels.resize(program->getMaxRootDeviceIndex() + 1);
2933

30-
auto pKernel = Kernel::create<kernel_t, program_t>(program, kernelInfos, errcodeRet);
31-
auto pMultiDeviceKernel = new multi_device_kernel_t(pKernel);
34+
for (auto &pDevice : program->getDevices()) {
35+
auto rootDeviceIndex = pDevice->getRootDeviceIndex();
36+
if (kernels[rootDeviceIndex]) {
37+
continue;
38+
}
39+
kernels[rootDeviceIndex] = Kernel::create<kernel_t, program_t>(program, kernelInfos, errcodeRet);
40+
}
41+
auto pMultiDeviceKernel = new multi_device_kernel_t(std::move(kernels));
3242

3343
return pMultiDeviceKernel;
3444
}
3545

36-
cl_int cloneKernel(Kernel *pSourceKernel) { return kernel->cloneKernel(pSourceKernel); }
37-
const std::vector<Kernel::SimpleKernelArgInfo> &getKernelArguments() const { return kernel->getKernelArguments(); }
38-
cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { return kernel->checkCorrectImageAccessQualifier(argIndex, argSize, argValue); }
39-
void unsetArg(uint32_t argIndex) { return kernel->unsetArg(argIndex); }
40-
cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal) { return kernel->setArg(argIndex, argSize, argVal); }
41-
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return kernel->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); }
42-
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return kernel->getArgInfo(argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); }
43-
const ClDeviceVector &getDevices() const { return kernel->getDevices(); }
44-
size_t getKernelArgsNumber() const { return kernel->getKernelArgsNumber(); }
45-
Context &getContext() const { return kernel->getContext(); }
46-
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return kernel->setArgSvmAlloc(argIndex, svmPtr, svmAlloc); }
47-
bool getHasIndirectAccess() const { return kernel->getHasIndirectAccess(); }
48-
void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { return kernel->setUnifiedMemoryProperty(infoType, infoValue); }
49-
void setSvmKernelExecInfo(GraphicsAllocation *argValue) { return kernel->setSvmKernelExecInfo(argValue); }
50-
void clearSvmKernelExecInfo() { return kernel->clearSvmKernelExecInfo(); }
51-
void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue) { return kernel->setUnifiedMemoryExecInfo(argValue); }
52-
void clearUnifiedMemoryExecInfo() { return kernel->clearUnifiedMemoryExecInfo(); }
53-
int setKernelThreadArbitrationPolicy(uint32_t propertyValue) { return kernel->setKernelThreadArbitrationPolicy(propertyValue); }
54-
cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { return kernel->setKernelExecutionType(executionType); }
55-
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) { return kernel->setAdditionalKernelExecInfoWithParam(paramName, paramValueSize, paramValue); }
46+
cl_int cloneKernel(Kernel *pSourceKernel) { return defaultKernel->cloneKernel(pSourceKernel); }
47+
const std::vector<Kernel::SimpleKernelArgInfo> &getKernelArguments() const { return defaultKernel->getKernelArguments(); }
48+
cl_int checkCorrectImageAccessQualifier(cl_uint argIndex, size_t argSize, const void *argValue) const { return defaultKernel->checkCorrectImageAccessQualifier(argIndex, argSize, argValue); }
49+
void unsetArg(uint32_t argIndex) { return defaultKernel->unsetArg(argIndex); }
50+
cl_int setArg(uint32_t argIndex, size_t argSize, const void *argVal) { return defaultKernel->setArg(argIndex, argSize, argVal); }
51+
cl_int getInfo(cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel->getInfo(paramName, paramValueSize, paramValue, paramValueSizeRet); }
52+
cl_int getArgInfo(cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel->getArgInfo(argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); }
53+
const ClDeviceVector &getDevices() const { return defaultKernel->getDevices(); }
54+
size_t getKernelArgsNumber() const { return defaultKernel->getKernelArgsNumber(); }
55+
Context &getContext() const { return defaultKernel->getContext(); }
56+
cl_int setArgSvmAlloc(uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return defaultKernel->setArgSvmAlloc(argIndex, svmPtr, svmAlloc); }
57+
bool getHasIndirectAccess() const { return defaultKernel->getHasIndirectAccess(); }
58+
void setUnifiedMemoryProperty(cl_kernel_exec_info infoType, bool infoValue) { return defaultKernel->setUnifiedMemoryProperty(infoType, infoValue); }
59+
void setSvmKernelExecInfo(GraphicsAllocation *argValue) { return defaultKernel->setSvmKernelExecInfo(argValue); }
60+
void clearSvmKernelExecInfo() { return defaultKernel->clearSvmKernelExecInfo(); }
61+
void setUnifiedMemoryExecInfo(GraphicsAllocation *argValue) { return defaultKernel->setUnifiedMemoryExecInfo(argValue); }
62+
void clearUnifiedMemoryExecInfo() { return defaultKernel->clearUnifiedMemoryExecInfo(); }
63+
int setKernelThreadArbitrationPolicy(uint32_t propertyValue) { return defaultKernel->setKernelThreadArbitrationPolicy(propertyValue); }
64+
cl_int setKernelExecutionType(cl_execution_info_kernel_type_intel executionType) { return defaultKernel->setKernelExecutionType(executionType); }
65+
int32_t setAdditionalKernelExecInfoWithParam(uint32_t paramName, size_t paramValueSize, const void *paramValue) { return defaultKernel->setAdditionalKernelExecInfoWithParam(paramName, paramValueSize, paramValue); }
5666

5767
protected:
58-
Kernel *kernel = nullptr;
68+
KernelVectorType kernels;
69+
Kernel *defaultKernel = nullptr;
5970
};
6071

6172
} // namespace NEO

opencl/test/unit_test/accelerators/media_image_arg_tests.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,11 @@ class MediaImageSetArgTest : public ClDeviceFixture,
4949
pKernelInfo->kernelArgInfo[1].isImage = true;
5050
pKernelInfo->kernelArgInfo[0].isImage = true;
5151

52-
pKernel = new MockKernel(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex));
52+
int32_t retVal = CL_INVALID_PLATFORM;
53+
pMultiDeviceKernel = MultiDeviceKernel::create<MockKernel>(program.get(), MockKernel::toKernelInfoContainer(*pKernelInfo, rootDeviceIndex), &retVal);
54+
pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(rootDeviceIndex));
5355
ASSERT_NE(nullptr, pKernel);
54-
ASSERT_EQ(CL_SUCCESS, pKernel->initialize());
55-
pMultiDeviceKernel = new MultiDeviceKernel(pKernel);
56+
ASSERT_EQ(CL_SUCCESS, retVal);
5657

5758
ASSERT_EQ(true, pKernel->isVmeKernel());
5859

opencl/test/unit_test/api/cl_api_tests.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,9 +49,9 @@ struct ApiFixture {
4949

5050
pProgram = new MockProgram(pContext, false, toClDeviceVector(*pDevice));
5151

52-
pKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(pProgram->mockKernelInfo, testedRootDeviceIndex));
52+
pMultiDeviceKernel = MockMultiDeviceKernel::create<MockKernel>(pProgram, MockKernel::toKernelInfoContainer(pProgram->mockKernelInfo, testedRootDeviceIndex));
53+
pKernel = static_cast<MockKernel *>(pMultiDeviceKernel->getKernel(testedRootDeviceIndex));
5354
ASSERT_NE(nullptr, pKernel);
54-
pMultiDeviceKernel = new MultiDeviceKernel(pKernel);
5555
}
5656

5757
virtual void TearDown() {

opencl/test/unit_test/api/cl_get_kernel_max_concurrent_work_group_count_intel_tests.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ TEST_F(clGetKernelMaxConcurrentWorkGroupCountTests, GivenVariousInputWhenGetting
7272
EXPECT_EQ(expectedMaxConcurrentWorkGroupCount, maxConcurrentWorkGroupCount);
7373

7474
auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram);
75-
MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(pKernelWithExecutionEnvironmentPatch);
75+
MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch));
7676
retVal = clGetKernelMaxConcurrentWorkGroupCountINTEL(pCommandQueue, &multiDeviceKernelWithExecutionEnvironmentPatch, workDim,
7777
globalWorkOffset, localWorkSize,
7878
&maxConcurrentWorkGroupCount);

opencl/test/unit_test/api/cl_get_kernel_sub_group_info_khr_tests.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ TEST_F(KernelSubGroupInfoKhrTest, GivenNullDeviceWhenGettingSubGroupInfoFromMult
199199

200200
MockUnrestrictiveContext context;
201201
auto mockProgram = std::make_unique<MockProgram>(&context, false, context.getDevices());
202-
auto mockKernel = new MockKernel(mockProgram.get(), pKernel->getKernelInfos());
203-
auto pMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(mockKernel);
202+
std::unique_ptr<MultiDeviceKernel> pMultiDeviceKernel(
203+
MultiDeviceKernel::create<MockKernel>(mockProgram.get(), pKernel->getKernelInfos(), nullptr));
204204

205205
retVal = clGetKernelSubGroupInfoKHR(
206206
pMultiDeviceKernel.get(),

opencl/test/unit_test/api/cl_get_kernel_sub_group_info_tests.inl

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,7 @@ TEST_F(KernelSubGroupInfoTest, GivenNullDeviceWhenGettingSubGroupInfoFromMultiDe
348348

349349
MockUnrestrictiveContext context;
350350
auto mockProgram = std::make_unique<MockProgram>(&context, false, context.getDevices());
351-
auto mockKernel = new MockKernel(mockProgram.get(), pKernel->getKernelInfos());
352-
auto pMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(mockKernel);
351+
std::unique_ptr<MultiDeviceKernel> pMultiDeviceKernel(MultiDeviceKernel::create<MockKernel>(mockProgram.get(), pKernel->getKernelInfos(), nullptr));
353352

354353
retVal = clGetKernelSubGroupInfo(
355354
pMultiDeviceKernel.get(),

opencl/test/unit_test/api/cl_get_kernel_suggested_local_work_size_intel_tests.inl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenVariousInputWhenGettingSugge
9898

9999
TEST_F(clGetKernelSuggestedLocalWorkSizeTests, GivenKernelWithExecutionEnvironmentPatchedWhenGettingSuggestedLocalWorkSizeThenCorrectValuesAreReturned) {
100100
auto pKernelWithExecutionEnvironmentPatch = MockKernel::create(pCommandQueue->getDevice(), pProgram);
101-
MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(pKernelWithExecutionEnvironmentPatch);
101+
MultiDeviceKernel multiDeviceKernelWithExecutionEnvironmentPatch(MockMultiDeviceKernel::toKernelVector(pKernelWithExecutionEnvironmentPatch));
102102

103103
size_t globalWorkOffset[] = {0, 0, 0};
104104
size_t globalWorkSize[] = {128, 128, 128};

opencl/test/unit_test/api/cl_get_kernel_work_group_info_tests.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ TEST_F(clGetKernelWorkGroupInfoTest, GivenNullDeviceWhenGettingWorkGroupInfoFrom
6161
size_t paramValueSizeRet;
6262
MockUnrestrictiveContext context;
6363
auto mockProgram = std::make_unique<MockProgram>(&context, false, context.getDevices());
64-
auto pMockKernel = new MockKernel(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(testedRootDeviceIndex), context.getDevice(0)->getRootDeviceIndex()));
65-
auto pMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(pMockKernel);
64+
std::unique_ptr<MultiDeviceKernel> pMultiDeviceKernel(
65+
MockMultiDeviceKernel::create<MockKernel>(mockProgram.get(), MockKernel::toKernelInfoContainer(pKernel->getKernelInfo(testedRootDeviceIndex), context.getDevice(0)->getRootDeviceIndex())));
6666

6767
retVal = clGetKernelWorkGroupInfo(
6868
pMultiDeviceKernel.get(),

opencl/test/unit_test/api/cl_set_kernel_arg_svm_pointer_tests.inl

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ class KernelArgSvmFixture : public ApiFixture<> {
4040
pKernelInfo->kernelArgInfo[0].kernelArgPatchInfoVector[0].size = (uint32_t)sizeof(void *);
4141
pKernelInfo->kernelArgInfo[0].metadata.addressQualifier = KernelArgMetadata::AddrGlobal;
4242

43-
pMockKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex));
44-
ASSERT_EQ(CL_SUCCESS, pMockKernel->initialize());
45-
pMockMultiDeviceKernel = new MultiDeviceKernel(pMockKernel);
43+
pMockMultiDeviceKernel = MultiDeviceKernel::create<MockKernel>(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), nullptr);
44+
pMockKernel = static_cast<MockKernel *>(pMockMultiDeviceKernel->getKernel(testedRootDeviceIndex));
45+
ASSERT_NE(nullptr, pMockKernel);
4646
pMockKernel->setCrossThreadData(pCrossThreadData, sizeof(pCrossThreadData));
4747
}
4848

@@ -88,8 +88,9 @@ TEST_F(clSetKernelArgSVMPointerTests, GivenDeviceNotSupportingSvmWhenSettingKern
8888
auto hwInfo = executionEnvironment->rootDeviceEnvironments[ApiFixture::testedRootDeviceIndex]->getMutableHardwareInfo();
8989
hwInfo->capabilityTable.ftrSvm = false;
9090

91-
auto pMockKernel = new MockKernel(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex));
92-
auto pMultiDeviceKernel = std::make_unique<MultiDeviceKernel>(pMockKernel);
91+
std::unique_ptr<MultiDeviceKernel> pMultiDeviceKernel(
92+
MultiDeviceKernel::create<MockKernel>(pProgram, MockKernel::toKernelInfoContainer(*pKernelInfo, testedRootDeviceIndex), nullptr));
93+
9394
auto retVal = clSetKernelArgSVMPointer(
9495
pMultiDeviceKernel.get(), // cl_kernel kernel
9596
(cl_uint)-1, // cl_uint arg_index

0 commit comments

Comments
 (0)