@@ -14,48 +14,59 @@ struct OpenCLObjectMapper<_cl_kernel> {
1414 typedef class MultiDeviceKernel DerivedType;
1515};
1616
17+ using KernelVectorType = StackVec<Kernel *, 4 >;
18+
1719class MultiDeviceKernel : public BaseObject <_cl_kernel> {
1820 public:
1921 static const cl_ulong objectMagic = 0x3284ADC8EA0AFE25LL ;
2022
2123 ~MultiDeviceKernel () override ;
22- MultiDeviceKernel (Kernel *pKernel );
24+ MultiDeviceKernel (KernelVectorType kernelVector );
2325
24- Kernel *getKernel (uint32_t rootDeviceIndex) const { return kernel ; }
25- Kernel *getDefaultKernel () const { return kernel ; }
26+ Kernel *getKernel (uint32_t rootDeviceIndex) const { return kernels[rootDeviceIndex] ; }
27+ Kernel *getDefaultKernel () const { return defaultKernel ; }
2628
2729 template <typename kernel_t = Kernel, typename program_t = Program, typename multi_device_kernel_t = MultiDeviceKernel>
2830 static multi_device_kernel_t *create (program_t *program, const KernelInfoContainer &kernelInfos, cl_int *errcodeRet) {
31+ KernelVectorType kernels{};
32+ kernels.resize (program->getMaxRootDeviceIndex () + 1 );
2933
30- auto pKernel = Kernel::create<kernel_t , program_t >(program, kernelInfos, errcodeRet);
31- auto pMultiDeviceKernel = new multi_device_kernel_t (pKernel);
34+ for (auto &pDevice : program->getDevices ()) {
35+ auto rootDeviceIndex = pDevice->getRootDeviceIndex ();
36+ if (kernels[rootDeviceIndex]) {
37+ continue ;
38+ }
39+ kernels[rootDeviceIndex] = Kernel::create<kernel_t , program_t >(program, kernelInfos, errcodeRet);
40+ }
41+ auto pMultiDeviceKernel = new multi_device_kernel_t (std::move (kernels));
3242
3343 return pMultiDeviceKernel;
3444 }
3545
36- cl_int cloneKernel (Kernel *pSourceKernel) { return kernel ->cloneKernel (pSourceKernel); }
37- const std::vector<Kernel::SimpleKernelArgInfo> &getKernelArguments () const { return kernel ->getKernelArguments (); }
38- cl_int checkCorrectImageAccessQualifier (cl_uint argIndex, size_t argSize, const void *argValue) const { return kernel ->checkCorrectImageAccessQualifier (argIndex, argSize, argValue); }
39- void unsetArg (uint32_t argIndex) { return kernel ->unsetArg (argIndex); }
40- cl_int setArg (uint32_t argIndex, size_t argSize, const void *argVal) { return kernel ->setArg (argIndex, argSize, argVal); }
41- cl_int getInfo (cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return kernel ->getInfo (paramName, paramValueSize, paramValue, paramValueSizeRet); }
42- cl_int getArgInfo (cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return kernel ->getArgInfo (argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); }
43- const ClDeviceVector &getDevices () const { return kernel ->getDevices (); }
44- size_t getKernelArgsNumber () const { return kernel ->getKernelArgsNumber (); }
45- Context &getContext () const { return kernel ->getContext (); }
46- cl_int setArgSvmAlloc (uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return kernel ->setArgSvmAlloc (argIndex, svmPtr, svmAlloc); }
47- bool getHasIndirectAccess () const { return kernel ->getHasIndirectAccess (); }
48- void setUnifiedMemoryProperty (cl_kernel_exec_info infoType, bool infoValue) { return kernel ->setUnifiedMemoryProperty (infoType, infoValue); }
49- void setSvmKernelExecInfo (GraphicsAllocation *argValue) { return kernel ->setSvmKernelExecInfo (argValue); }
50- void clearSvmKernelExecInfo () { return kernel ->clearSvmKernelExecInfo (); }
51- void setUnifiedMemoryExecInfo (GraphicsAllocation *argValue) { return kernel ->setUnifiedMemoryExecInfo (argValue); }
52- void clearUnifiedMemoryExecInfo () { return kernel ->clearUnifiedMemoryExecInfo (); }
53- int setKernelThreadArbitrationPolicy (uint32_t propertyValue) { return kernel ->setKernelThreadArbitrationPolicy (propertyValue); }
54- cl_int setKernelExecutionType (cl_execution_info_kernel_type_intel executionType) { return kernel ->setKernelExecutionType (executionType); }
55- int32_t setAdditionalKernelExecInfoWithParam (uint32_t paramName, size_t paramValueSize, const void *paramValue) { return kernel ->setAdditionalKernelExecInfoWithParam (paramName, paramValueSize, paramValue); }
46+ cl_int cloneKernel (Kernel *pSourceKernel) { return defaultKernel ->cloneKernel (pSourceKernel); }
47+ const std::vector<Kernel::SimpleKernelArgInfo> &getKernelArguments () const { return defaultKernel ->getKernelArguments (); }
48+ cl_int checkCorrectImageAccessQualifier (cl_uint argIndex, size_t argSize, const void *argValue) const { return defaultKernel ->checkCorrectImageAccessQualifier (argIndex, argSize, argValue); }
49+ void unsetArg (uint32_t argIndex) { return defaultKernel ->unsetArg (argIndex); }
50+ cl_int setArg (uint32_t argIndex, size_t argSize, const void *argVal) { return defaultKernel ->setArg (argIndex, argSize, argVal); }
51+ cl_int getInfo (cl_kernel_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel ->getInfo (paramName, paramValueSize, paramValue, paramValueSizeRet); }
52+ cl_int getArgInfo (cl_uint argIndx, cl_kernel_arg_info paramName, size_t paramValueSize, void *paramValue, size_t *paramValueSizeRet) const { return defaultKernel ->getArgInfo (argIndx, paramName, paramValueSize, paramValue, paramValueSizeRet); }
53+ const ClDeviceVector &getDevices () const { return defaultKernel ->getDevices (); }
54+ size_t getKernelArgsNumber () const { return defaultKernel ->getKernelArgsNumber (); }
55+ Context &getContext () const { return defaultKernel ->getContext (); }
56+ cl_int setArgSvmAlloc (uint32_t argIndex, void *svmPtr, GraphicsAllocation *svmAlloc) { return defaultKernel ->setArgSvmAlloc (argIndex, svmPtr, svmAlloc); }
57+ bool getHasIndirectAccess () const { return defaultKernel ->getHasIndirectAccess (); }
58+ void setUnifiedMemoryProperty (cl_kernel_exec_info infoType, bool infoValue) { return defaultKernel ->setUnifiedMemoryProperty (infoType, infoValue); }
59+ void setSvmKernelExecInfo (GraphicsAllocation *argValue) { return defaultKernel ->setSvmKernelExecInfo (argValue); }
60+ void clearSvmKernelExecInfo () { return defaultKernel ->clearSvmKernelExecInfo (); }
61+ void setUnifiedMemoryExecInfo (GraphicsAllocation *argValue) { return defaultKernel ->setUnifiedMemoryExecInfo (argValue); }
62+ void clearUnifiedMemoryExecInfo () { return defaultKernel ->clearUnifiedMemoryExecInfo (); }
63+ int setKernelThreadArbitrationPolicy (uint32_t propertyValue) { return defaultKernel ->setKernelThreadArbitrationPolicy (propertyValue); }
64+ cl_int setKernelExecutionType (cl_execution_info_kernel_type_intel executionType) { return defaultKernel ->setKernelExecutionType (executionType); }
65+ int32_t setAdditionalKernelExecInfoWithParam (uint32_t paramName, size_t paramValueSize, const void *paramValue) { return defaultKernel ->setAdditionalKernelExecInfoWithParam (paramName, paramValueSize, paramValue); }
5666
5767 protected:
58- Kernel *kernel = nullptr ;
68+ KernelVectorType kernels;
69+ Kernel *defaultKernel = nullptr ;
5970};
6071
6172} // namespace NEO
0 commit comments