@@ -686,6 +686,36 @@ ze_result_t KernelImp::getProperties(ze_kernel_properties_t *pKernelProperties)
686686 return ZE_RESULT_SUCCESS;
687687}
688688
689+ NEO::GraphicsAllocation *KernelImp::allocatePrivateMemoryGraphicsAllocation () {
690+ auto &kernelAttributes = kernelImmData->getDescriptor ().kernelAttributes ;
691+ auto neoDevice = module ->getDevice ()->getNEODevice ();
692+
693+ auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize (kernelAttributes.perHwThreadPrivateMemorySize ,
694+ neoDevice->getDeviceInfo ().computeUnitsUsedForScratch );
695+
696+ UNRECOVERABLE_IF (privateSurfaceSize == 0 );
697+ auto privateMemoryGraphicsAllocation = neoDevice->getMemoryManager ()->allocateGraphicsMemoryWithProperties (
698+ {neoDevice->getRootDeviceIndex (), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield ()});
699+
700+ UNRECOVERABLE_IF (privateMemoryGraphicsAllocation == nullptr );
701+ return privateMemoryGraphicsAllocation;
702+ }
703+
704+ void KernelImp::patchCrossthreadDataWithPrivateAllocation (NEO::GraphicsAllocation *privateAllocation) {
705+ auto &kernelAttributes = kernelImmData->getDescriptor ().kernelAttributes ;
706+ auto neoDevice = module ->getDevice ()->getNEODevice ();
707+
708+ ArrayRef<uint8_t > crossThredDataArrayRef = ArrayRef<uint8_t >(this ->crossThreadData .get (), this ->crossThreadDataSize );
709+ ArrayRef<uint8_t > surfaceStateHeapArrayRef = ArrayRef<uint8_t >(this ->surfaceStateHeapData .get (), this ->surfaceStateHeapDataSize );
710+
711+ patchWithImplicitSurface (crossThredDataArrayRef, surfaceStateHeapArrayRef,
712+ static_cast <uintptr_t >(privateMemoryGraphicsAllocation->getGpuAddressToPatch ()),
713+ *privateMemoryGraphicsAllocation, kernelImmData->getDescriptor ().payloadMappings .implicitArgs .privateMemoryAddress ,
714+ *neoDevice, kernelAttributes.flags .useGlobalAtomics );
715+
716+ this ->residencyContainer .push_back (this ->privateMemoryGraphicsAllocation );
717+ }
718+
689719ze_result_t KernelImp::initialize (const ze_kernel_desc_t *desc) {
690720 this ->kernelImmData = module ->getKernelImmutableData (desc->pKernelName );
691721 if (this ->kernelImmData == nullptr ) {
@@ -776,25 +806,9 @@ ze_result_t KernelImp::initialize(const ze_kernel_desc_t *desc) {
776806
777807 auto &kernelAttributes = kernelImmData->getDescriptor ().kernelAttributes ;
778808 auto neoDevice = module ->getDevice ()->getNEODevice ();
779- if (kernelAttributes.perHwThreadPrivateMemorySize != 0 ) {
780- auto privateSurfaceSize = NEO::KernelHelper::getPrivateSurfaceSize (kernelAttributes.perHwThreadPrivateMemorySize ,
781- neoDevice->getDeviceInfo ().computeUnitsUsedForScratch );
782-
783- UNRECOVERABLE_IF (privateSurfaceSize == 0 );
784- this ->privateMemoryGraphicsAllocation = neoDevice->getMemoryManager ()->allocateGraphicsMemoryWithProperties (
785- {neoDevice->getRootDeviceIndex (), privateSurfaceSize, NEO::GraphicsAllocation::AllocationType::PRIVATE_SURFACE, neoDevice->getDeviceBitfield ()});
786-
787- UNRECOVERABLE_IF (this ->privateMemoryGraphicsAllocation == nullptr );
788-
789- ArrayRef<uint8_t > crossThredDataArrayRef = ArrayRef<uint8_t >(this ->crossThreadData .get (), this ->crossThreadDataSize );
790- ArrayRef<uint8_t > surfaceStateHeapArrayRef = ArrayRef<uint8_t >(this ->surfaceStateHeapData .get (), this ->surfaceStateHeapDataSize );
791-
792- patchWithImplicitSurface (crossThredDataArrayRef, surfaceStateHeapArrayRef,
793- static_cast <uintptr_t >(privateMemoryGraphicsAllocation->getGpuAddressToPatch ()),
794- *privateMemoryGraphicsAllocation, kernelImmData->getDescriptor ().payloadMappings .implicitArgs .privateMemoryAddress ,
795- *neoDevice, kernelAttributes.flags .useGlobalAtomics );
796-
797- this ->residencyContainer .push_back (this ->privateMemoryGraphicsAllocation );
809+ if ((kernelAttributes.perHwThreadPrivateMemorySize != 0U ) && (false == module ->shouldAllocatePrivateMemoryPerDispatch ())) {
810+ this ->privateMemoryGraphicsAllocation = allocatePrivateMemoryGraphicsAllocation ();
811+ this ->patchCrossthreadDataWithPrivateAllocation (this ->privateMemoryGraphicsAllocation );
798812 }
799813
800814 this ->createPrintfBuffer ();
0 commit comments