77
88#include " shared/source/command_stream/command_stream_receiver.h"
99#include " shared/source/memory_manager/surface.h"
10- #include " shared/source/utilities/spinlock .h"
10+ #include " shared/source/memory_manager/unified_memory_manager .h"
1111
1212#include " opencl/source/cl_device/cl_device.h"
1313#include " opencl/source/command_queue/command_queue.h"
@@ -30,13 +30,15 @@ using namespace gtpin;
3030
3131namespace NEO {
3232
33+ using GTPinLockType = std::recursive_mutex;
34+
3335extern gtpin::ocl::gtpin_events_t GTPinCallbacks;
3436
3537igc_init_t *pIgcInit = nullptr ;
3638std::atomic<int > sequenceCount (1 );
3739CommandQueue *pCmdQueueForFlushTask = nullptr ;
3840std::deque<gtpinkexec_t > kernelExecQueue;
39- SpinLock kernelExecQueueLock;
41+ GTPinLockType kernelExecQueueLock;
4042
4143void gtpinNotifyContextCreate (cl_context context) {
4244 if (isGTPinInitialized) {
@@ -131,7 +133,7 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
131133 kExec .gtpinResource = (cl_mem)resource;
132134 kExec .commandBuffer = commandBuffer;
133135 kExec .pCommandQueue = (CommandQueue *)pCmdQueue;
134- std::unique_lock<SpinLock > lock{kernelExecQueueLock};
136+ std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
135137 kernelExecQueue.push_back (kExec );
136138 lock.unlock ();
137139 // Patch SSH[gtpinBTI] with GT-Pin resource
@@ -142,10 +144,19 @@ void gtpinNotifyKernelSubmit(cl_kernel kernel, void *pCmdQueue) {
142144 GTPinHwHelper >pinHelper = GTPinHwHelper::get (genFamily);
143145 size_t gtpinBTI = pKernel->getNumberOfBindingTableStates () - 1 ;
144146 void *pSurfaceState = gtpinHelper.getSurfaceState (pKernel, gtpinBTI);
145- cl_mem buffer = (cl_mem)resource;
146- auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
147- pBuffer->setArgStateful (pSurfaceState, false , false , false , false , device,
148- pKernel->getKernelInfo ().kernelDescriptor .kernelAttributes .flags .useGlobalAtomics , pContext->getNumDevices ());
147+ if (gtpinHelper.canUseSharedAllocation (device.getHardwareInfo ())) {
148+ auto allocData = reinterpret_cast <SvmAllocationData *>(resource);
149+ auto gpuAllocation = allocData->gpuAllocations .getGraphicsAllocation (rootDeviceIndex);
150+ size_t size = gpuAllocation->getUnderlyingBufferSize ();
151+ Buffer::setSurfaceState (&device, pSurfaceState, false , false , size, gpuAllocation->getUnderlyingBuffer (), 0 , gpuAllocation, 0 , 0 ,
152+ pKernel->getKernelInfo ().kernelDescriptor .kernelAttributes .flags .useGlobalAtomics , pContext->getNumDevices ());
153+ pKernel->setUnifiedMemoryExecInfo (gpuAllocation);
154+ } else {
155+ cl_mem buffer = (cl_mem)resource;
156+ auto pBuffer = castToObjectOrAbort<Buffer>(buffer);
157+ pBuffer->setArgStateful (pSurfaceState, false , false , false , false , device,
158+ pKernel->getKernelInfo ().kernelDescriptor .kernelAttributes .flags .useGlobalAtomics , pContext->getNumDevices ());
159+ }
149160 }
150161}
151162
@@ -157,7 +168,7 @@ void gtpinNotifyPreFlushTask(void *pCmdQueue) {
157168
158169void gtpinNotifyFlushTask (uint32_t flushedTaskCount) {
159170 if (isGTPinInitialized) {
160- std::unique_lock<SpinLock > lock{kernelExecQueueLock};
171+ std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
161172 size_t numElems = kernelExecQueue.size ();
162173 for (size_t n = 0 ; n < numElems; n++) {
163174 if ((kernelExecQueue[n].pCommandQueue == pCmdQueueForFlushTask) && !kernelExecQueue[n].isTaskCountValid ) {
@@ -173,7 +184,7 @@ void gtpinNotifyFlushTask(uint32_t flushedTaskCount) {
173184
174185void gtpinNotifyTaskCompletion (uint32_t completedTaskCount) {
175186 if (isGTPinInitialized) {
176- std::unique_lock<SpinLock > lock{kernelExecQueueLock};
187+ std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
177188 size_t numElems = kernelExecQueue.size ();
178189 for (size_t n = 0 ; n < numElems;) {
179190 if (kernelExecQueue[n].isTaskCountValid && (kernelExecQueue[n].taskCount <= completedTaskCount)) {
@@ -191,15 +202,23 @@ void gtpinNotifyTaskCompletion(uint32_t completedTaskCount) {
191202
192203void gtpinNotifyMakeResident (void *pKernel, void *pCSR) {
193204 if (isGTPinInitialized) {
194- std::unique_lock<SpinLock> lock{kernelExecQueueLock};
205+ std::unique_lock<GTPinLockType> lock{kernelExecQueueLock};
206+ Context &context = static_cast <Kernel *>(pKernel)->getContext ();
207+ GTPinHwHelper >pinHelper = GTPinHwHelper::get (context.getDevice (0 )->getHardwareInfo ().platform .eRenderCoreFamily );
195208 size_t numElems = kernelExecQueue.size ();
196209 for (size_t n = 0 ; n < numElems; n++) {
197210 if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource ) {
198211 // It's time for kernel to make resident its GT-Pin resource
199212 CommandStreamReceiver *pCommandStreamReceiver = reinterpret_cast <CommandStreamReceiver *>(pCSR);
200- cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource ;
201- auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
202- GraphicsAllocation *pGfxAlloc = pBuffer->getGraphicsAllocation (pCommandStreamReceiver->getRootDeviceIndex ());
213+ GraphicsAllocation *pGfxAlloc = nullptr ;
214+ if (gtpinHelper.canUseSharedAllocation (context.getDevice (0 )->getHardwareInfo ())) {
215+ auto allocData = reinterpret_cast <SvmAllocationData *>(kernelExecQueue[n].gtpinResource );
216+ pGfxAlloc = allocData->gpuAllocations .getGraphicsAllocation (pCommandStreamReceiver->getRootDeviceIndex ());
217+ } else {
218+ cl_mem gtpinBuffer = kernelExecQueue[n].gtpinResource ;
219+ auto pBuffer = castToObjectOrAbort<Buffer>(gtpinBuffer);
220+ pGfxAlloc = pBuffer->getGraphicsAllocation (pCommandStreamReceiver->getRootDeviceIndex ());
221+ }
203222 pCommandStreamReceiver->makeResident (*pGfxAlloc);
204223 kernelExecQueue[n].isResourceResident = true ;
205224 break ;
@@ -210,7 +229,7 @@ void gtpinNotifyMakeResident(void *pKernel, void *pCSR) {
210229
211230void gtpinNotifyUpdateResidencyList (void *pKernel, void *pResVec) {
212231 if (isGTPinInitialized) {
213- std::unique_lock<SpinLock > lock{kernelExecQueueLock};
232+ std::unique_lock<GTPinLockType > lock{kernelExecQueueLock};
214233 size_t numElems = kernelExecQueue.size ();
215234 for (size_t n = 0 ; n < numElems; n++) {
216235 if ((kernelExecQueue[n].pKernel == pKernel) && !kernelExecQueue[n].isResourceResident && kernelExecQueue[n].gtpinResource ) {
0 commit comments