@@ -175,6 +175,58 @@ Buffer *Buffer::create(Context *context,
175175 flags, 0 , size, hostPtr, errcodeRet);
176176}
177177
178+ bool inline copyHostPointer (Buffer *buffer,
179+ size_t size,
180+ void *hostPtr,
181+ GraphicsAllocation *memory,
182+ GraphicsAllocation *mapAllocation,
183+ uint32_t rootDeviceIndex,
184+ bool isCompressionEnabled,
185+ bool implicitScalingEnabled,
186+ cl_int &errcodeRet) {
187+ const bool isLocalMemory = !MemoryPoolHelper::isSystemMemoryPool (memory->getMemoryPool ());
188+ const bool gpuCopyRequired = isCompressionEnabled || isLocalMemory;
189+ if (gpuCopyRequired) {
190+ auto context = buffer->getContext ();
191+ auto &device = context->getDevice (0u )->getDevice ();
192+ auto &hwInfo = device.getHardwareInfo ();
193+ auto hwInfoConfig = HwInfoConfig::get (hwInfo.platform .eProductFamily );
194+ bool copyOnCpuAllowed = implicitScalingEnabled == false &&
195+ size <= Buffer::maxBufferSizeForCopyOnCpu &&
196+ isCompressionEnabled == false &&
197+ hwInfoConfig->getLocalMemoryAccessMode (hwInfo) != LocalMemoryAccessMode::CpuAccessDisallowed &&
198+ memory->storageInfo .isLockable ;
199+ if (DebugManager.flags .CopyHostPtrOnCpu .get () != -1 ) {
200+ copyOnCpuAllowed = DebugManager.flags .CopyHostPtrOnCpu .get () == 1 ;
201+ }
202+ if (auto lockedPointer = copyOnCpuAllowed ? device.getMemoryManager ()->lockResource (memory) : nullptr ) {
203+ memcpy_s (ptrOffset (lockedPointer, buffer->getOffset ()), size, hostPtr, size);
204+ memory->setAubWritable (true , GraphicsAllocation::defaultBank);
205+ memory->setTbxWritable (true , GraphicsAllocation::defaultBank);
206+ return true ;
207+ } else {
208+ auto blitMemoryToAllocationResult = BlitOperationResult::Unsupported;
209+
210+ if (hwInfoConfig->isBlitterFullySupported (hwInfo) && isLocalMemory) {
211+ blitMemoryToAllocationResult = BlitHelperFunctions::blitMemoryToAllocation (device, memory, buffer->getOffset (), hostPtr, {size, 1 , 1 });
212+ }
213+
214+ if (blitMemoryToAllocationResult != BlitOperationResult::Success) {
215+ auto cmdQ = context->getSpecialQueue (rootDeviceIndex);
216+ if (CL_SUCCESS != cmdQ->enqueueWriteBuffer (buffer, CL_TRUE, buffer->getOffset (), size, hostPtr, mapAllocation, 0 , nullptr , nullptr )) {
217+ errcodeRet = CL_OUT_OF_RESOURCES;
218+ return false ;
219+ }
220+ }
221+ return true ;
222+ }
223+ } else {
224+ memcpy_s (ptrOffset (memory->getUnderlyingBuffer (), buffer->getOffset ()), size, hostPtr, size);
225+ return true ;
226+ }
227+ return false ;
228+ }
229+
178230Buffer *Buffer::create (Context *context,
179231 const MemoryProperties &memoryProperties,
180232 cl_mem_flags flags,
@@ -184,6 +236,47 @@ Buffer *Buffer::create(Context *context,
184236 cl_int &errcodeRet) {
185237
186238 errcodeRet = CL_SUCCESS;
239+ Context::BufferPoolAllocator &bufferPoolAllocator = context->getBufferPoolAllocator ();
240+ const bool implicitScalingEnabled = ImplicitScalingHelper::isImplicitScalingEnabled (context->getDevice (0u )->getDeviceBitfield (), true );
241+ const bool useHostPtr = memoryProperties.flags .useHostPtr ;
242+ const bool copyHostPtr = memoryProperties.flags .copyHostPtr ;
243+ if (implicitScalingEnabled == false &&
244+ useHostPtr == false &&
245+ memoryProperties.flags .forceHostMemory == false ) {
246+ cl_int poolAllocRet = CL_SUCCESS;
247+ auto bufferFromPool = bufferPoolAllocator.allocateBufferFromPool (memoryProperties,
248+ flags,
249+ flagsIntel,
250+ size,
251+ hostPtr,
252+ poolAllocRet);
253+ if (CL_SUCCESS == poolAllocRet) {
254+ const bool needsCopy = copyHostPtr;
255+ if (needsCopy) {
256+ for (auto &rootDeviceIndex : context->getRootDeviceIndices ()) {
257+ auto graphicsAllocation = bufferFromPool->getGraphicsAllocation (rootDeviceIndex);
258+ auto mapAllocation = bufferFromPool->getMapAllocation (rootDeviceIndex);
259+ bool isCompressionEnabled = graphicsAllocation->isCompressionEnabled ();
260+ if (copyHostPointer (bufferFromPool,
261+ size,
262+ hostPtr,
263+ graphicsAllocation,
264+ mapAllocation,
265+ rootDeviceIndex,
266+ isCompressionEnabled,
267+ implicitScalingEnabled,
268+ poolAllocRet)) {
269+ break ;
270+ }
271+ }
272+ }
273+ if (!needsCopy || poolAllocRet == CL_SUCCESS) {
274+ return bufferFromPool;
275+ } else {
276+ clReleaseMemObject (bufferFromPool);
277+ }
278+ }
279+ }
187280
188281 MemoryManager *memoryManager = context->getMemoryManager ();
189282 UNRECOVERABLE_IF (!memoryManager);
@@ -194,9 +287,6 @@ Buffer *Buffer::create(Context *context,
194287 AllocationInfoType allocationInfos;
195288 allocationInfos.resize (maxRootDeviceIndex + 1ull );
196289
197- const bool useHostPtr = memoryProperties.flags .useHostPtr ;
198- const bool copyHostPtr = memoryProperties.flags .copyHostPtr ;
199-
200290 void *allocationCpuPtr = nullptr ;
201291 bool forceCopyHostPtr = false ;
202292
@@ -404,45 +494,15 @@ Buffer *Buffer::create(Context *context,
404494 pBuffer->setHostPtrMinSize (size);
405495
406496 if (allocationInfo.copyMemoryFromHostPtr && !copyExecuted) {
407- auto isLocalMemory = !MemoryPoolHelper::isSystemMemoryPool (allocationInfo.memory ->getMemoryPool ());
408- bool gpuCopyRequired = isCompressionEnabled || isLocalMemory;
409-
410- if (gpuCopyRequired) {
411- auto &device = pBuffer->getContext ()->getDevice (0u )->getDevice ();
412- auto &hwInfo = device.getHardwareInfo ();
413- auto hwInfoConfig = HwInfoConfig::get (hwInfo.platform .eProductFamily );
414- bool copyOnCpuAllowed = false == ImplicitScalingHelper::isImplicitScalingEnabled (device.getDeviceBitfield (), true ) &&
415- size <= Buffer::maxBufferSizeForCopyOnCpu &&
416- !isCompressionEnabled &&
417- hwInfoConfig->getLocalMemoryAccessMode (hwInfo) != LocalMemoryAccessMode::CpuAccessDisallowed &&
418- allocationInfo.memory ->storageInfo .isLockable ;
419- if (DebugManager.flags .CopyHostPtrOnCpu .get () != -1 ) {
420- copyOnCpuAllowed = DebugManager.flags .CopyHostPtrOnCpu .get () == 1 ;
421- }
422- if (auto lockedPointer = copyOnCpuAllowed ? device.getMemoryManager ()->lockResource (allocationInfo.memory ) : nullptr ) {
423- memcpy_s (ptrOffset (lockedPointer, pBuffer->getOffset ()), size, hostPtr, size);
424- allocationInfo.memory ->setAubWritable (true , GraphicsAllocation::defaultBank);
425- allocationInfo.memory ->setTbxWritable (true , GraphicsAllocation::defaultBank);
426- copyExecuted = true ;
427- } else {
428- auto blitMemoryToAllocationResult = BlitOperationResult::Unsupported;
429-
430- if (hwInfoConfig->isBlitterFullySupported (hwInfo) && isLocalMemory) {
431- blitMemoryToAllocationResult = BlitHelperFunctions::blitMemoryToAllocation (device, allocationInfo.memory , pBuffer->getOffset (), hostPtr, {size, 1 , 1 });
432- }
433-
434- if (blitMemoryToAllocationResult != BlitOperationResult::Success) {
435- auto cmdQ = context->getSpecialQueue (rootDeviceIndex);
436- if (CL_SUCCESS != cmdQ->enqueueWriteBuffer (pBuffer, CL_TRUE, 0 , size, hostPtr, allocationInfo.mapAllocation , 0 , nullptr , nullptr )) {
437- errcodeRet = CL_OUT_OF_RESOURCES;
438- }
439- }
440- copyExecuted = true ;
441- }
442- } else {
443- memcpy_s (allocationInfo.memory ->getUnderlyingBuffer (), size, hostPtr, size);
444- copyExecuted = true ;
445- }
497+ copyExecuted = copyHostPointer (pBuffer,
498+ size,
499+ hostPtr,
500+ allocationInfo.memory ,
501+ allocationInfo.mapAllocation ,
502+ rootDeviceIndex,
503+ isCompressionEnabled,
504+ implicitScalingEnabled,
505+ errcodeRet);
446506 }
447507 }
448508
0 commit comments