diff --git a/unified-runtime/source/adapters/opencl/adapter.cpp b/unified-runtime/source/adapters/opencl/adapter.cpp index 365723f14e977..ec3945fb6cc33 100644 --- a/unified-runtime/source/adapters/opencl/adapter.cpp +++ b/unified-runtime/source/adapters/opencl/adapter.cpp @@ -18,6 +18,11 @@ #include #endif +// There can only be one OpenCL adapter alive at a time. +// If it is alive (more get/retains than releases called), this is a pointer to +// it. +static ur_adapter_handle_t liveAdapter = nullptr; + ur_adapter_handle_t_::ur_adapter_handle_t_() { #ifdef _MSC_VER @@ -42,45 +47,38 @@ ur_adapter_handle_t_::ur_adapter_handle_t_() { #undef CL_CORE_FUNCTION #endif // _MSC_VER + assert(!liveAdapter); + liveAdapter = this; } -static ur_adapter_handle_t adapter = nullptr; +ur_adapter_handle_t_::~ur_adapter_handle_t_() { + assert(liveAdapter == this); + liveAdapter = nullptr; +} ur_adapter_handle_t ur::cl::getAdapter() { - if (!adapter) { + if (!liveAdapter) { die("OpenCL adapter used before initalization or after destruction"); } - return adapter; -} - -static void globalAdapterShutdown() { - if (cl_ext::ExtFuncPtrCache) { - delete cl_ext::ExtFuncPtrCache; - cl_ext::ExtFuncPtrCache = nullptr; - } - if (adapter) { - delete adapter; - adapter = nullptr; - } + return liveAdapter; } UR_APIEXPORT ur_result_t UR_APICALL urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, uint32_t *pNumAdapters) { + static std::mutex AdapterConstructionMutex{}; + if (NumEntries > 0 && phAdapters) { - // Sometimes urAdaterGet may be called after the library already been torn - // down, we also need to create a temporary handle for it. - if (!adapter) { - adapter = new ur_adapter_handle_t_(); - atexit(globalAdapterShutdown); - } + std::lock_guard Lock{AdapterConstructionMutex}; - std::lock_guard Lock{adapter->Mutex}; - if (adapter->RefCount++ == 0) { - cl_ext::ExtFuncPtrCache = new cl_ext::ExtFuncPtrCacheT(); + if (!liveAdapter) { + *phAdapters = new ur_adapter_handle_t_(); + } else { + *phAdapters = liveAdapter; } - *phAdapters = adapter; + auto &adapter = *phAdapters; + adapter->RefCount++; } if (pNumAdapters) { @@ -90,21 +88,16 @@ urAdapterGet(uint32_t NumEntries, ur_adapter_handle_t *phAdapters, return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRetain(ur_adapter_handle_t) { - ++adapter->RefCount; +UR_APIEXPORT ur_result_t UR_APICALL +urAdapterRetain(ur_adapter_handle_t hAdapter) { + ++hAdapter->RefCount; return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterRelease(ur_adapter_handle_t) { - // Check first if the adapter is valid pointer - if (adapter) { - std::lock_guard Lock{adapter->Mutex}; - if (--adapter->RefCount == 0) { - if (cl_ext::ExtFuncPtrCache) { - delete cl_ext::ExtFuncPtrCache; - cl_ext::ExtFuncPtrCache = nullptr; - } - } +UR_APIEXPORT ur_result_t UR_APICALL +urAdapterRelease(ur_adapter_handle_t hAdapter) { + if (--hAdapter->RefCount == 0) { + delete hAdapter; } return UR_RESULT_SUCCESS; } @@ -117,18 +110,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetLastError( return UR_RESULT_SUCCESS; } -UR_APIEXPORT ur_result_t UR_APICALL urAdapterGetInfo(ur_adapter_handle_t, - ur_adapter_info_t propName, - size_t propSize, - void *pPropValue, - size_t *pPropSizeRet) { +UR_APIEXPORT ur_result_t UR_APICALL +urAdapterGetInfo(ur_adapter_handle_t hAdapter, ur_adapter_info_t propName, + size_t propSize, void *pPropValue, size_t *pPropSizeRet) { UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); switch (propName) { case UR_ADAPTER_INFO_BACKEND: return ReturnValue(UR_ADAPTER_BACKEND_OPENCL); case UR_ADAPTER_INFO_REFERENCE_COUNT: - return ReturnValue(adapter->RefCount.load()); + return ReturnValue(hAdapter->RefCount.load()); case UR_ADAPTER_INFO_VERSION: return ReturnValue(uint32_t{1}); default: diff --git a/unified-runtime/source/adapters/opencl/adapter.hpp b/unified-runtime/source/adapters/opencl/adapter.hpp index 34ffe43dc1587..546a803cdc538 100644 --- a/unified-runtime/source/adapters/opencl/adapter.hpp +++ b/unified-runtime/source/adapters/opencl/adapter.hpp @@ -7,19 +7,25 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// +#pragma once + #include "device.hpp" #include "logger/ur_logger.hpp" #include "platform.hpp" #include "CL/cl.h" +#include "common.hpp" #include "logger/ur_logger.hpp" struct ur_adapter_handle_t_ { ur_adapter_handle_t_(); + ~ur_adapter_handle_t_(); + + ur_adapter_handle_t_(ur_adapter_handle_t_ &) = delete; std::atomic RefCount = 0; - std::mutex Mutex; logger::Logger &log = logger::get_logger("opencl"); + cl_ext::ExtFuncPtrCacheT fnCache{}; std::vector> URPlatforms; uint32_t NumPlatforms = 0; diff --git a/unified-runtime/source/adapters/opencl/command_buffer.cpp b/unified-runtime/source/adapters/opencl/command_buffer.cpp index a1d13f5d3ade4..d3ef027457b5b 100644 --- a/unified-runtime/source/adapters/opencl/command_buffer.cpp +++ b/unified-runtime/source/adapters/opencl/command_buffer.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "command_buffer.hpp" +#include "adapter.hpp" #include "common.hpp" #include "context.hpp" #include "event.hpp" @@ -25,7 +26,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() { cl_ext::clReleaseCommandBufferKHR_fn clReleaseCommandBufferKHR = nullptr; cl_int Res = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clReleaseCommandBufferKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clReleaseCommandBufferKHRCache, cl_ext::ReleaseCommandBufferName, &clReleaseCommandBufferKHR); assert(Res == CL_SUCCESS); (void)Res; @@ -42,7 +44,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( cl_ext::clCreateCommandBufferKHR_fn clCreateCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCreateCommandBufferKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clCreateCommandBufferKHRCache, cl_ext::CreateCommandBufferName, &clCreateCommandBufferKHR)); const bool IsUpdatable = pCommandBufferDesc->isUpdatable; @@ -116,7 +119,8 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { cl_ext::clFinalizeCommandBufferKHR_fn clFinalizeCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clFinalizeCommandBufferKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clFinalizeCommandBufferKHRCache, cl_ext::FinalizeCommandBufferName, &clFinalizeCommandBufferKHR)); CL_RETURN_ON_FAILURE( @@ -148,7 +152,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( cl_ext::clCommandNDRangeKernelKHR_fn clCommandNDRangeKernelKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandNDRangeKernelKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clCommandNDRangeKernelKHRCache, cl_ext::CommandNRRangeKernelName, &clCommandNDRangeKernelKHR)); cl_mutable_command_khr CommandHandle = nullptr; @@ -238,7 +243,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyExp( cl_ext::clCommandCopyBufferKHR_fn clCommandCopyBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferKHRCache, + CLContext, ur::cl::getAdapter()->fnCache.clCommandCopyBufferKHRCache, cl_ext::CommandCopyBufferName, &clCommandCopyBufferKHR)); const bool IsInOrder = hCommandBuffer->IsInOrder; @@ -280,7 +285,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferCopyRectExp( cl_ext::clCommandCopyBufferRectKHR_fn clCommandCopyBufferRectKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandCopyBufferRectKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clCommandCopyBufferRectKHRCache, cl_ext::CommandCopyBufferRectName, &clCommandCopyBufferRectKHR)); const bool IsInOrder = hCommandBuffer->IsInOrder; @@ -388,7 +394,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendMemBufferFillExp( cl_ext::clCommandFillBufferKHR_fn clCommandFillBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clCommandFillBufferKHRCache, + CLContext, ur::cl::getAdapter()->fnCache.clCommandFillBufferKHRCache, cl_ext::CommandFillBufferName, &clCommandFillBufferKHR)); const bool IsInOrder = hCommandBuffer->IsInOrder; @@ -459,7 +465,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueCommandBufferExp( cl_ext::clEnqueueCommandBufferKHR_fn clEnqueueCommandBufferKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueCommandBufferKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clEnqueueCommandBufferKHRCache, cl_ext::EnqueueCommandBufferName, &clEnqueueCommandBufferKHR)); const uint32_t NumberOfQueues = 1; @@ -618,7 +625,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( cl_ext::clUpdateMutableCommandsKHR_fn clUpdateMutableCommandsKHR = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clUpdateMutableCommandsKHRCache, + CLContext, + ur::cl::getAdapter()->fnCache.clUpdateMutableCommandsKHRCache, cl_ext::UpdateMutableCommandsName, &clUpdateMutableCommandsKHR)); std::vector ConfigList(numKernelUpdates); @@ -754,7 +762,7 @@ ur_result_t UR_APICALL urCommandBufferAppendNativeCommandExp( UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, - cl_ext::ExtFuncPtrCache->clCommandBarrierWithWaitListKHRCache, + ur::cl::getAdapter()->fnCache.clCommandBarrierWithWaitListKHRCache, cl_ext::CommandBarrierWithWaitListName, &clCommandBarrierWithWaitListKHR)); diff --git a/unified-runtime/source/adapters/opencl/common.hpp b/unified-runtime/source/adapters/opencl/common.hpp index b5d702c06fb37..0be4851cc785e 100644 --- a/unified-runtime/source/adapters/opencl/common.hpp +++ b/unified-runtime/source/adapters/opencl/common.hpp @@ -349,11 +349,6 @@ struct ExtFuncPtrCacheT { #undef CL_EXTENSION_FUNC } }; -// A raw pointer is used here since the lifetime of this map has to be tied to -// piTeardown to avoid issues with static destruction order (a user application -// might have static objects that indirectly access this cache in their -// destructor). -inline ExtFuncPtrCacheT *ExtFuncPtrCache; // USM helper function to get an extension function pointer template diff --git a/unified-runtime/source/adapters/opencl/context.cpp b/unified-runtime/source/adapters/opencl/context.cpp index 44bdcf2516684..43374bb5485df 100644 --- a/unified-runtime/source/adapters/opencl/context.cpp +++ b/unified-runtime/source/adapters/opencl/context.cpp @@ -117,20 +117,10 @@ urContextGetInfo(ur_context_handle_t hContext, ur_context_info_t propName, UR_APIEXPORT ur_result_t UR_APICALL urContextRelease(ur_context_handle_t hContext) { - // If we're reasonably sure this context is about to be detroyed we should - // clear the ext function pointer cache. This isn't foolproof sadly but it - // should drastically reduce the chances of the pathological case described - // in the comments in common.hpp. static std::mutex contextReleaseMutex; - auto clContext = hContext->CLContext; std::lock_guard lock(contextReleaseMutex); if (hContext->decrementReferenceCount() == 0) { - // ExtFuncPtrCache is destroyed in an atexit() callback, so it doesn't - // necessarily outlive the adapter (or all the contexts). - if (cl_ext::ExtFuncPtrCache) { - cl_ext::ExtFuncPtrCache->clearCache(clContext); - } delete hContext; } diff --git a/unified-runtime/source/adapters/opencl/context.hpp b/unified-runtime/source/adapters/opencl/context.hpp index a71f6adc05526..75282305aa408 100644 --- a/unified-runtime/source/adapters/opencl/context.hpp +++ b/unified-runtime/source/adapters/opencl/context.hpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #pragma once +#include "adapter.hpp" #include "common.hpp" #include "device.hpp" @@ -29,6 +30,9 @@ struct ur_context_handle_t_ { Devices.emplace_back(phDevices[i]); urDeviceRetain(phDevices[i]); } + // The context retains a reference to the adapter so it can clear the + // function ptr cache on destruction + urAdapterRetain(ur::cl::getAdapter()); RefCount = 1; } @@ -42,6 +46,13 @@ struct ur_context_handle_t_ { const ur_device_handle_t *phDevices, ur_context_handle_t &Context); ~ur_context_handle_t_() { + // If we're reasonably sure this context is about to be destroyed we should + // clear the ext function pointer cache. This isn't foolproof sadly but it + // should drastically reduce the chances of the pathological case described + // in the comments in common.hpp. + ur::cl::getAdapter()->fnCache.clearCache(CLContext); + urAdapterRelease(ur::cl::getAdapter()); + for (uint32_t i = 0; i < DeviceCount; i++) { urDeviceRelease(Devices[i]); } diff --git a/unified-runtime/source/adapters/opencl/enqueue.cpp b/unified-runtime/source/adapters/opencl/enqueue.cpp index 257d64e81d116..369187b9d6d27 100644 --- a/unified-runtime/source/adapters/opencl/enqueue.cpp +++ b/unified-runtime/source/adapters/opencl/enqueue.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// +#include "adapter.hpp" #include "common.hpp" #include "context.hpp" #include "event.hpp" @@ -400,7 +401,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableWrite( MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_ext::clEnqueueWriteGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - Ctx, cl_ext::ExtFuncPtrCache->clEnqueueWriteGlobalVariableCache, + Ctx, ur::cl::getAdapter()->fnCache.clEnqueueWriteGlobalVariableCache, cl_ext::EnqueueWriteGlobalVariableName, &F)); cl_int Res = F(hQueue->CLQueue, hProgram->CLProgram, name, blockingWrite, @@ -422,7 +423,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueDeviceGlobalVariableRead( MapUREventsToCL(numEventsInWaitList, phEventWaitList, CLWaitEvents); cl_ext::clEnqueueReadGlobalVariable_fn F = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - Ctx, cl_ext::ExtFuncPtrCache->clEnqueueReadGlobalVariableCache, + Ctx, ur::cl::getAdapter()->fnCache.clEnqueueReadGlobalVariableCache, cl_ext::EnqueueReadGlobalVariableName, &F)); cl_int Res = F(hQueue->CLQueue, hProgram->CLProgram, name, blockingRead, @@ -446,7 +447,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueReadHostPipe( cl_ext::clEnqueueReadHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueReadHostPipeINTELCache, + CLContext, + ur::cl::getAdapter()->fnCache.clEnqueueReadHostPipeINTELCache, cl_ext::EnqueueReadHostPipeName, &FuncPtr)); if (FuncPtr) { @@ -474,7 +476,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueWriteHostPipe( cl_ext::clEnqueueWriteHostPipeINTEL_fn FuncPtr = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueWriteHostPipeINTELCache, + CLContext, + ur::cl::getAdapter()->fnCache.clEnqueueWriteHostPipeINTELCache, cl_ext::EnqueueWriteHostPipeName, &FuncPtr)); if (FuncPtr) { diff --git a/unified-runtime/source/adapters/opencl/kernel.cpp b/unified-runtime/source/adapters/opencl/kernel.cpp index 5364c677f8ed2..9116ae70d2176 100644 --- a/unified-runtime/source/adapters/opencl/kernel.cpp +++ b/unified-runtime/source/adapters/opencl/kernel.cpp @@ -8,6 +8,7 @@ // //===----------------------------------------------------------------------===// #include "kernel.hpp" +#include "adapter.hpp" #include "common.hpp" #include "device.hpp" #include "memory.hpp" @@ -280,7 +281,7 @@ urKernelGetSubGroupInfo(ur_kernel_handle_t hKernel, ur_device_handle_t hDevice, CL_RETURN_ON_FAILURE(clGetKernelInfo(hKernel->CLKernel, CL_KERNEL_CONTEXT, sizeof(Context), &Context, nullptr)); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - Context, cl_ext::ExtFuncPtrCache->clGetKernelSubGroupInfoKHRCache, + Context, ur::cl::getAdapter()->fnCache.clGetKernelSubGroupInfoKHRCache, cl_ext::GetKernelSubGroupInfoName, &GetKernelSubGroupInfo)); } else { GetKernelSubGroupInfo = clGetKernelSubGroupInfo; @@ -364,7 +365,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { nullptr)); UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &HFunc)); if (HFunc) { @@ -374,7 +375,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clDeviceMemAllocINTELCache, cl_ext::DeviceMemAllocName, &DFunc)); if (DFunc) { @@ -384,7 +385,7 @@ static ur_result_t usmSetIndirectAccess(ur_kernel_handle_t hKernel) { } UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clSharedMemAllocINTELCache, cl_ext::SharedMemAllocName, &SFunc)); if (SFunc) { @@ -436,7 +437,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSetArgPointer( UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( CLContext, - cl_ext::ExtFuncPtrCache->clSetKernelArgMemPointerINTELCache, + ur::cl::getAdapter()->fnCache.clSetKernelArgMemPointerINTELCache, cl_ext::SetKernelArgMemPointerName, &FuncPtr)); if (FuncPtr) { diff --git a/unified-runtime/source/adapters/opencl/memory.cpp b/unified-runtime/source/adapters/opencl/memory.cpp index b770a26d0a015..62698e6105520 100644 --- a/unified-runtime/source/adapters/opencl/memory.cpp +++ b/unified-runtime/source/adapters/opencl/memory.cpp @@ -9,6 +9,7 @@ //===----------------------------------------------------------------------===// #include "memory.hpp" +#include "adapter.hpp" #include "common.hpp" #include "context.hpp" @@ -348,7 +349,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urMemBufferCreate( RetErr = cl_ext::getExtFuncFromContext( CLContext, - cl_ext::ExtFuncPtrCache->clCreateBufferWithPropertiesINTELCache, + ur::cl::getAdapter() + ->fnCache.clCreateBufferWithPropertiesINTELCache, cl_ext::CreateBufferWithPropertiesName, &FuncPtr); if (FuncPtr) { std::vector PropertiesIntel; diff --git a/unified-runtime/source/adapters/opencl/program.cpp b/unified-runtime/source/adapters/opencl/program.cpp index 56a4246aebf0a..1c3a5e45b3bd5 100644 --- a/unified-runtime/source/adapters/opencl/program.cpp +++ b/unified-runtime/source/adapters/opencl/program.cpp @@ -99,7 +99,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCreateWithIL( UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( hContext->CLContext, - cl_ext::ExtFuncPtrCache->clCreateProgramWithILKHRCache, + ur::cl::getAdapter()->fnCache.clCreateProgramWithILKHRCache, cl_ext::CreateProgramWithILName, &CreateProgramWithIL)); Program = CreateProgramWithIL(hContext->CLContext, pIL, length, &Err); @@ -478,7 +478,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetFunctionPointer( UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceFunctionPointerCache, + CLContext, + ur::cl::getAdapter()->fnCache.clGetDeviceFunctionPointerCache, cl_ext::GetDeviceFunctionPointerName, &FuncT)); // Check if the kernel name exists to prevent the OpenCL runtime from throwing @@ -534,7 +535,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramGetGlobalVariablePointer( UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext< cl_ext::clGetDeviceGlobalVariablePointer_fn>( - CLContext, cl_ext::ExtFuncPtrCache->clGetDeviceGlobalVariablePointerCache, + CLContext, + ur::cl::getAdapter()->fnCache.clGetDeviceGlobalVariablePointerCache, cl_ext::GetDeviceGlobalVariablePointerName, &FuncT)); const cl_int CLResult = diff --git a/unified-runtime/source/adapters/opencl/usm.cpp b/unified-runtime/source/adapters/opencl/usm.cpp index 08fdbf7de92c6..391f33f747d24 100644 --- a/unified-runtime/source/adapters/opencl/usm.cpp +++ b/unified-runtime/source/adapters/opencl/usm.cpp @@ -10,6 +10,7 @@ #include +#include "adapter.hpp" #include "common.hpp" #include "context.hpp" #include "device.hpp" @@ -121,7 +122,7 @@ urUSMHostAlloc(ur_context_handle_t Context, const ur_usm_desc_t *pUSMDesc, clHostMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &FuncPtr)) { return UrResult; } @@ -169,7 +170,7 @@ urUSMDeviceAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, clDeviceMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clDeviceMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clDeviceMemAllocINTELCache, cl_ext::DeviceMemAllocName, &FuncPtr)) { return UrResult; } @@ -217,7 +218,7 @@ urUSMSharedAlloc(ur_context_handle_t Context, ur_device_handle_t hDevice, clSharedMemAllocINTEL_fn FuncPtr = nullptr; cl_context CLContext = Context->CLContext; if (auto UrResult = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clSharedMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clSharedMemAllocINTELCache, cl_ext::SharedMemAllocName, &FuncPtr)) { return UrResult; } @@ -251,7 +252,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMFree(ur_context_handle_t Context, cl_context CLContext = Context->CLContext; ur_result_t RetVal = UR_RESULT_ERROR_INVALID_OPERATION; RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clMemBlockingFreeINTELCache, cl_ext::MemBlockingFreeName, &FuncPtr); if (FuncPtr) { @@ -272,7 +273,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( clEnqueueMemFillINTEL_fn EnqueueMemFill = nullptr; UR_RETURN_ON_FAILURE( cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemFillINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clEnqueueMemFillINTELCache, cl_ext::EnqueueMemFillName, &EnqueueMemFill)); cl_event Event; std::vector CLWaitEvents(numEventsInWaitList); @@ -294,17 +295,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill( // target allocation. clHostMemAllocINTEL_fn HostMemAlloc = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &HostMemAlloc)); clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clEnqueueMemcpyINTELCache, cl_ext::EnqueueMemcpyName, &USMMemcpy)); clMemBlockingFreeINTEL_fn USMFree = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clMemBlockingFreeINTELCache, cl_ext::MemBlockingFreeName, &USMFree)); cl_int ClErr = CL_SUCCESS; @@ -370,17 +371,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( cl_int CLErr = CL_SUCCESS; clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clGetMemAllocInfoINTELCache, cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); clEnqueueMemcpyINTEL_fn USMMemcpy = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clEnqueueMemcpyINTELCache, cl_ext::EnqueueMemcpyName, &USMMemcpy)); clMemBlockingFreeINTEL_fn USMFree = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clMemBlockingFreeINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clMemBlockingFreeINTELCache, cl_ext::MemBlockingFreeName, &USMFree)); // Check if the two allocations are DEVICE allocations from different @@ -418,7 +419,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy( cl_event HostCopyEvent = nullptr, FinalCopyEvent = nullptr; clHostMemAllocINTEL_fn HostMemAlloc = nullptr; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clHostMemAllocINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clHostMemAllocINTELCache, cl_ext::HostMemAllocName, &HostMemAlloc)); auto HostAlloc = HostMemAlloc(CLContext, nullptr, size, 0, &CLErr); @@ -608,7 +609,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMMemcpy2D( clEnqueueMemcpyINTEL_fn FuncPtr = nullptr; ur_result_t RetVal = cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clEnqueueMemcpyINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clEnqueueMemcpyINTELCache, cl_ext::EnqueueMemcpyName, &FuncPtr); if (!FuncPtr) { @@ -676,7 +677,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMGetMemAllocInfo( clGetMemAllocInfoINTEL_fn GetMemAllocInfo = nullptr; cl_context CLContext = Context->CLContext; UR_RETURN_ON_FAILURE(cl_ext::getExtFuncFromContext( - CLContext, cl_ext::ExtFuncPtrCache->clGetMemAllocInfoINTELCache, + CLContext, ur::cl::getAdapter()->fnCache.clGetMemAllocInfoINTELCache, cl_ext::GetMemAllocInfoName, &GetMemAllocInfo)); cl_mem_info_intel PropNameCL;