@@ -33,27 +33,35 @@ using namespace GPUCA_NAMESPACE::gpu;
3333 }
3434
3535#define GPUCA_KRNL (x_class, x_attributes, ...) GPUCA_KRNL_PROP(x_class, x_attributes)
36- #define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCL
36+ #define GPUCA_KRNL_BACKEND_CLASS GPUReconstructionOCLBackend
3737#include " GPUReconstructionKernelList.h"
3838#undef GPUCA_KRNL
3939
40- GPUReconstructionOCL::GPUReconstructionOCL (const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof (GPUReconstructionDeviceBase))
40+ #include " utils/qGetLdBinarySymbols.h"
41+ QGET_LD_BINARY_SYMBOLS (GPUReconstructionOCLCode_src);
42+ #ifdef OPENCL_ENABLED_SPIRV
43+ QGET_LD_BINARY_SYMBOLS (GPUReconstructionOCLCode_spirv);
44+ #endif
45+
46+ GPUReconstruction* GPUReconstruction_Create_OCL (const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionOCL (cfg); }
47+
48+ GPUReconstructionOCLBackend::GPUReconstructionOCLBackend (const GPUSettingsDeviceBackend& cfg) : GPUReconstructionDeviceBase(cfg, sizeof (GPUReconstructionDeviceBase))
4149{
4250 if (mMaster == nullptr ) {
4351 mInternals = new GPUReconstructionOCLInternals;
4452 }
4553 mDeviceBackendSettings .deviceType = DeviceType::OCL;
4654}
4755
48- GPUReconstructionOCL ::~GPUReconstructionOCL ()
56+ GPUReconstructionOCLBackend ::~GPUReconstructionOCLBackend ()
4957{
5058 Exit (); // Make sure we destroy everything (in particular the ITS tracker) before we exit
5159 if (mMaster == nullptr ) {
5260 delete mInternals ;
5361 }
5462}
5563
56- int32_t GPUReconstructionOCL ::GPUFailedMsgAI (const int64_t error, const char * file, int32_t line)
64+ int32_t GPUReconstructionOCLBackend ::GPUFailedMsgAI (const int64_t error, const char * file, int32_t line)
5765{
5866 // Check for OPENCL Error and in the case of an error display the corresponding error string
5967 if (error == CL_SUCCESS) {
@@ -63,7 +71,7 @@ int32_t GPUReconstructionOCL::GPUFailedMsgAI(const int64_t error, const char* fi
6371 return 1 ;
6472}
6573
66- void GPUReconstructionOCL ::GPUFailedMsgA (const int64_t error, const char * file, int32_t line)
74+ void GPUReconstructionOCLBackend ::GPUFailedMsgA (const int64_t error, const char * file, int32_t line)
6775{
6876 if (GPUFailedMsgAI (error, file, line)) {
6977 static bool runningCallbacks = false ;
@@ -75,12 +83,12 @@ void GPUReconstructionOCL::GPUFailedMsgA(const int64_t error, const char* file,
7583 }
7684}
7785
78- void GPUReconstructionOCL ::UpdateAutomaticProcessingSettings ()
86+ void GPUReconstructionOCLBackend ::UpdateAutomaticProcessingSettings ()
7987{
8088 GPUCA_GPUReconstructionUpdateDefaults ();
8189}
8290
83- int32_t GPUReconstructionOCL ::InitDevice_Runtime ()
91+ int32_t GPUReconstructionOCLBackend ::InitDevice_Runtime ()
8492{
8593 if (mMaster == nullptr ) {
8694 cl_int ocl_error;
@@ -386,7 +394,7 @@ int32_t GPUReconstructionOCL::InitDevice_Runtime()
386394 return (0 );
387395}
388396
389- int32_t GPUReconstructionOCL ::ExitDevice_Runtime ()
397+ int32_t GPUReconstructionOCLBackend ::ExitDevice_Runtime ()
390398{
391399 // Uninitialize OPENCL
392400 SynchronizeGPU ();
@@ -418,7 +426,7 @@ int32_t GPUReconstructionOCL::ExitDevice_Runtime()
418426 return (0 );
419427}
420428
421- size_t GPUReconstructionOCL ::GPUMemCpy (void * dst, const void * src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents)
429+ size_t GPUReconstructionOCLBackend ::GPUMemCpy (void * dst, const void * src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev, deviceEvent* evList, int32_t nEvents)
422430{
423431 if (evList == nullptr ) {
424432 nEvents = 0 ;
@@ -442,7 +450,7 @@ size_t GPUReconstructionOCL::GPUMemCpy(void* dst, const void* src, size_t size,
442450 return size;
443451}
444452
445- size_t GPUReconstructionOCL ::WriteToConstantMemory (size_t offset, const void * src, size_t size, int32_t stream, deviceEvent* ev)
453+ size_t GPUReconstructionOCLBackend ::WriteToConstantMemory (size_t offset, const void * src, size_t size, int32_t stream, deviceEvent* ev)
446454{
447455 if (stream == -1 ) {
448456 SynchronizeGPU ();
@@ -454,11 +462,11 @@ size_t GPUReconstructionOCL::WriteToConstantMemory(size_t offset, const void* sr
454462 return size;
455463}
456464
457- void GPUReconstructionOCL ::ReleaseEvent (deviceEvent ev) { GPUFailedMsg (clReleaseEvent (ev.get <cl_event>())); }
465+ void GPUReconstructionOCLBackend ::ReleaseEvent (deviceEvent ev) { GPUFailedMsg (clReleaseEvent (ev.get <cl_event>())); }
458466
459- void GPUReconstructionOCL ::RecordMarker (deviceEvent* ev, int32_t stream) { GPUFailedMsg (clEnqueueMarkerWithWaitList (mInternals ->command_queue [stream], 0 , nullptr , ev->getEventList <cl_event>())); }
467+ void GPUReconstructionOCLBackend ::RecordMarker (deviceEvent* ev, int32_t stream) { GPUFailedMsg (clEnqueueMarkerWithWaitList (mInternals ->command_queue [stream], 0 , nullptr , ev->getEventList <cl_event>())); }
460468
461- int32_t GPUReconstructionOCL ::DoStuckProtection (int32_t stream, deviceEvent event)
469+ int32_t GPUReconstructionOCLBackend ::DoStuckProtection (int32_t stream, deviceEvent event)
462470{
463471 if (mProcessingSettings .stuckProtection ) {
464472 cl_int tmp = 0 ;
@@ -479,25 +487,25 @@ int32_t GPUReconstructionOCL::DoStuckProtection(int32_t stream, deviceEvent even
479487 return 0 ;
480488}
481489
482- void GPUReconstructionOCL ::SynchronizeGPU ()
490+ void GPUReconstructionOCLBackend ::SynchronizeGPU ()
483491{
484492 for (int32_t i = 0 ; i < mNStreams ; i++) {
485493 GPUFailedMsg (clFinish (mInternals ->command_queue [i]));
486494 }
487495}
488496
489- void GPUReconstructionOCL ::SynchronizeStream (int32_t stream) { GPUFailedMsg (clFinish (mInternals ->command_queue [stream])); }
497+ void GPUReconstructionOCLBackend ::SynchronizeStream (int32_t stream) { GPUFailedMsg (clFinish (mInternals ->command_queue [stream])); }
490498
491- void GPUReconstructionOCL ::SynchronizeEvents (deviceEvent* evList, int32_t nEvents) { GPUFailedMsg (clWaitForEvents (nEvents, evList->getEventList <cl_event>())); }
499+ void GPUReconstructionOCLBackend ::SynchronizeEvents (deviceEvent* evList, int32_t nEvents) { GPUFailedMsg (clWaitForEvents (nEvents, evList->getEventList <cl_event>())); }
492500
493- void GPUReconstructionOCL ::StreamWaitForEvents (int32_t stream, deviceEvent* evList, int32_t nEvents)
501+ void GPUReconstructionOCLBackend ::StreamWaitForEvents (int32_t stream, deviceEvent* evList, int32_t nEvents)
494502{
495503 if (nEvents) {
496504 GPUFailedMsg (clEnqueueMarkerWithWaitList (mInternals ->command_queue [stream], nEvents, evList->getEventList <cl_event>(), nullptr ));
497505 }
498506}
499507
500- bool GPUReconstructionOCL ::IsEventDone (deviceEvent* evList, int32_t nEvents)
508+ bool GPUReconstructionOCLBackend ::IsEventDone (deviceEvent* evList, int32_t nEvents)
501509{
502510 cl_int eventdone;
503511 for (int32_t i = 0 ; i < nEvents; i++) {
@@ -509,7 +517,7 @@ bool GPUReconstructionOCL::IsEventDone(deviceEvent* evList, int32_t nEvents)
509517 return true ;
510518}
511519
512- int32_t GPUReconstructionOCL ::GPUDebug (const char * state, int32_t stream, bool force)
520+ int32_t GPUReconstructionOCLBackend ::GPUDebug (const char * state, int32_t stream, bool force)
513521{
514522 // Wait for OPENCL-Kernel to finish and check for OPENCL errors afterwards, in case of debugmode
515523 if (!force && mProcessingSettings .debugLevel <= 0 ) {
@@ -525,3 +533,108 @@ int32_t GPUReconstructionOCL::GPUDebug(const char* state, int32_t stream, bool f
525533 }
526534 return (0 );
527535}
536+
537+ template <class T , int32_t I, typename ... Args>
538+ int32_t GPUReconstructionOCLBackend::runKernelBackend (const krnlSetupArgs<T, I, Args...>& args)
539+ {
540+ cl_kernel k = args.s .y .num > 1 ? getKernelObject<cl_kernel, T, I, true >() : getKernelObject<cl_kernel, T, I, false >();
541+ return std::apply ([this , &args, &k](auto &... vals) { return runKernelBackendInternal (args.s , k, vals...); }, args.v );
542+ }
543+
544+ template <class S , class T , int32_t I, bool MULTI>
545+ S& GPUReconstructionOCLBackend::getKernelObject ()
546+ {
547+ static uint32_t krnl = FindKernel<T, I>(MULTI ? 2 : 1 );
548+ return mInternals ->kernels [krnl].first ;
549+ }
550+
551+ int32_t GPUReconstructionOCLBackend::GetOCLPrograms ()
552+ {
553+ char platform_version[256 ] = {};
554+ GPUFailedMsg (clGetPlatformInfo (mInternals ->platform , CL_PLATFORM_VERSION, sizeof (platform_version), platform_version, nullptr ));
555+ float ver = 0 ;
556+ sscanf (platform_version, " OpenCL %f" , &ver);
557+
558+ cl_int ocl_error;
559+
560+ const char * ocl_flags = GPUCA_M_STR (OCL_FLAGS);
561+
562+ #ifdef OPENCL_ENABLED_SPIRV // clang-format off
563+ if (ver >= 2 .2f && !GetProcessingSettings ().oclCompileFromSources ) {
564+ GPUInfo (" Reading OpenCL program from SPIR-V IL (Platform version %4.2f)" , ver);
565+ mInternals ->program = clCreateProgramWithIL (mInternals ->context , _binary_GPUReconstructionOCLCode_spirv_start, _binary_GPUReconstructionOCLCode_spirv_len, &ocl_error);
566+ ocl_flags = " " ;
567+ } else
568+ #endif // clang-format on
569+ {
570+ GPUInfo (" Compiling OpenCL program from sources (Platform version %4.2f)" , ver);
571+ size_t program_sizes[1 ] = {_binary_GPUReconstructionOCLCode_src_len};
572+ char * programs_sources[1 ] = {_binary_GPUReconstructionOCLCode_src_start};
573+ mInternals ->program = clCreateProgramWithSource (mInternals ->context , (cl_uint)1 , (const char **)&programs_sources, program_sizes, &ocl_error);
574+ }
575+
576+ if (GPUFailedMsgI (ocl_error)) {
577+ GPUError (" Error creating OpenCL program from binary" );
578+ return 1 ;
579+ }
580+
581+ if (GPUFailedMsgI (clBuildProgram (mInternals ->program , 1 , &mInternals ->device , ocl_flags, nullptr , nullptr ))) {
582+ cl_build_status status;
583+ if (GPUFailedMsgI (clGetProgramBuildInfo (mInternals ->program , mInternals ->device , CL_PROGRAM_BUILD_STATUS, sizeof (status), &status, nullptr )) == 0 && status == CL_BUILD_ERROR) {
584+ size_t log_size;
585+ clGetProgramBuildInfo (mInternals ->program , mInternals ->device , CL_PROGRAM_BUILD_LOG, 0 , nullptr , &log_size);
586+ std::unique_ptr<char []> build_log (new char [log_size + 1 ]);
587+ clGetProgramBuildInfo (mInternals ->program , mInternals ->device , CL_PROGRAM_BUILD_LOG, log_size, build_log.get (), nullptr );
588+ build_log[log_size] = 0 ;
589+ GPUError (" Build Log:\n\n %s\n " , build_log.get ());
590+ }
591+ return 1 ;
592+ }
593+
594+ #define GPUCA_KRNL (...) \
595+ GPUCA_KRNL_WRAP (GPUCA_KRNL_LOAD_, __VA_ARGS__)
596+ #define GPUCA_KRNL_LOAD_single (x_class, ...) \
597+ if (AddKernel<GPUCA_M_KRNL_TEMPLATE (x_class)>(false )) { \
598+ return 1 ; \
599+ }
600+ #define GPUCA_KRNL_LOAD_multi (x_class, ...) \
601+ if (AddKernel<GPUCA_M_KRNL_TEMPLATE (x_class)>(true )) { \
602+ return 1 ; \
603+ }
604+ #include " GPUReconstructionKernelList.h"
605+ #undef GPUCA_KRNL
606+ #undef GPUCA_KRNL_LOAD_single
607+ #undef GPUCA_KRNL_LOAD_multi
608+
609+ return 0 ;
610+ }
611+
612+ bool GPUReconstructionOCLBackend::CheckPlatform (uint32_t i)
613+ {
614+ char platform_version[64 ] = {}, platform_vendor[64 ] = {};
615+ clGetPlatformInfo (mInternals ->platforms [i], CL_PLATFORM_VERSION, sizeof (platform_version), platform_version, nullptr );
616+ clGetPlatformInfo (mInternals ->platforms [i], CL_PLATFORM_VENDOR, sizeof (platform_vendor), platform_vendor, nullptr );
617+ float ver1 = 0 ;
618+ sscanf (platform_version, " OpenCL %f" , &ver1);
619+ if (ver1 >= 2 .2f ) {
620+ if (mProcessingSettings .debugLevel >= 2 ) {
621+ GPUInfo (" OpenCL 2.2 capable platform found" );
622+ }
623+ return true ;
624+ }
625+
626+ if (strcmp (platform_vendor, " Advanced Micro Devices, Inc." ) == 0 && ver1 >= 2 .0f ) {
627+ float ver2 = 0 ;
628+ const char * pos = strchr (platform_version, ' (' );
629+ if (pos) {
630+ sscanf (pos, " (%f)" , &ver2);
631+ }
632+ if ((ver1 >= 2 .f && ver2 >= 2000 .f ) || ver1 >= 2 .1f ) {
633+ if (mProcessingSettings .debugLevel >= 2 ) {
634+ GPUInfo (" AMD ROCm OpenCL Platform found" );
635+ }
636+ return true ;
637+ }
638+ }
639+ return false ;
640+ }
0 commit comments