@@ -714,16 +714,21 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
714714 EnqueueWriteGlobal (kSPIR_DeviceType , &DeviceInfo->Type ,
715715 sizeof (DeviceInfo->Type ));
716716
717- if (DeviceInfo->Type == DeviceType::CPU) {
718- break ;
719- }
720-
721717 if (LaunchInfo.LocalWorkSize .empty ()) {
722- LaunchInfo.LocalWorkSize .reserve (3 );
723- // FIXME: This is W/A until urKernelSuggestGroupSize is added
724- LaunchInfo.LocalWorkSize [0 ] = 1 ;
725- LaunchInfo.LocalWorkSize [1 ] = 1 ;
726- LaunchInfo.LocalWorkSize [2 ] = 1 ;
718+ LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
719+ auto URes = context.urDdiTable .Kernel .pfnGetSuggestedLocalWorkSize (
720+ Kernel, Queue, LaunchInfo.WorkDim , LaunchInfo.GlobalWorkOffset ,
721+ LaunchInfo.GlobalWorkSize , LaunchInfo.LocalWorkSize .data ());
722+ if (URes != UR_RESULT_SUCCESS) {
723+ if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
724+ return URes;
725+ }
726+ // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
727+ // to inefficient implementation
728+ for (size_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
729+ LaunchInfo.LocalWorkSize [Dim] = 1 ;
730+ }
731+ }
727732 }
728733
729734 const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize .data ();
@@ -733,56 +738,109 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
733738 LocalWorkSize[Dim];
734739 }
735740
736- auto EnqueueAllocateDevice = [Context, &DeviceInfo, Queue,
737- NumWG](size_t Size, uptr &Ptr) {
741+ auto EnqueueAllocateShadowMemory = [Context, &DeviceInfo,
742+ Queue](size_t Size, uptr &Ptr) {
743+ void *Allocated = nullptr ;
738744 auto URes = context.urDdiTable .USM .pfnDeviceAlloc (
739745 Context, DeviceInfo->Handle , nullptr , nullptr , Size,
740- ( void **)&Ptr );
746+ &Allocated );
741747 if (URes != UR_RESULT_SUCCESS) {
742- context.logger .error (
743- " Failed to allocate shadow memory for local memory: {}" ,
744- URes);
745- context.logger .error (
746- " Maybe the number of workgroup ({}) too large" , NumWG);
747748 return URes;
748749 }
749- // Initialize shadow memory of local memory
750- URes = urEnqueueUSMSet (Queue, (void *)Ptr, 0 , Size);
751- if (URes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
752- context.logger .error (
753- " Failed to allocate shadow memory for local memory: {}" ,
754- URes);
755- context.logger .error (
756- " Maybe the number of workgroup ({}) too large" , NumWG);
757- return URes;
750+ // Initialize shadow memory
751+ URes = urEnqueueUSMSet (Queue, Allocated, 0 , Size);
752+ if (URes != UR_RESULT_SUCCESS) {
753+ [[maybe_unused]] auto URes =
754+ context.urDdiTable .USM .pfnFree (Context, Allocated);
755+ assert (URes == UR_RESULT_SUCCESS &&
756+ " urUSMFree failed at allocating shadow memory" );
757+ Allocated = nullptr ;
758758 }
759+ Ptr = (uptr)Allocated;
759760 return URes;
760761 };
761762
763+ auto LocalMemoryUsage =
764+ GetKernelLocalMemorySize (Kernel, DeviceInfo->Handle );
765+ auto PrivateMemoryUsage =
766+ GetKernelPrivateMemorySize (Kernel, DeviceInfo->Handle );
767+
768+ context.logger .info (" KernelInfo {} (LocalMemory={}, PrivateMemory={})" ,
769+ (void *)Kernel, LocalMemoryUsage,
770+ PrivateMemoryUsage);
771+
762772 // Write shadow memory offset for local memory
763773 if (Options ().DetectLocals ) {
764774 // CPU needn't this
765775 if (DeviceInfo->Type == DeviceType::GPU_PVC) {
766- size_t LocalMemorySize = GetLocalMemorySize (DeviceInfo->Handle );
767- size_t LocalShadowMemorySize =
776+ const size_t LocalMemorySize =
777+ GetDeviceLocalMemorySize (DeviceInfo->Handle );
778+ const size_t LocalShadowMemorySize =
768779 (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
769780
770781 context.logger .debug (
771- " LocalMemoryInfo (WorkGroup={}, LocalMemorySize={}, "
782+ " LocalMemory (WorkGroup={}, LocalMemorySize={}, "
772783 " LocalShadowMemorySize={})" ,
773784 NumWG, LocalMemorySize, LocalShadowMemorySize);
774785
775- UR_CALL (EnqueueAllocateDevice (
776- LocalShadowMemorySize, LaunchInfo.Data ->LocalShadowOffset ));
777-
778- LaunchInfo.Data ->LocalShadowOffsetEnd =
779- LaunchInfo.Data ->LocalShadowOffset + LocalShadowMemorySize -
780- 1 ;
786+ if (EnqueueAllocateShadowMemory (
787+ LocalShadowMemorySize,
788+ LaunchInfo.Data ->LocalShadowOffset ) !=
789+ UR_RESULT_SUCCESS) {
790+ context.logger .warning (
791+ " Failed to allocate shadow memory for local "
792+ " memory, maybe the number of workgroup ({}) is too "
793+ " large" ,
794+ NumWG);
795+ context.logger .warning (
796+ " Skip checking local memory of kernel <{}>" ,
797+ GetKernelName (Kernel));
798+ } else {
799+ LaunchInfo.Data ->LocalShadowOffsetEnd =
800+ LaunchInfo.Data ->LocalShadowOffset +
801+ LocalShadowMemorySize - 1 ;
802+
803+ context.logger .info (
804+ " ShadowMemory(Local, {} - {})" ,
805+ (void *)LaunchInfo.Data ->LocalShadowOffset ,
806+ (void *)LaunchInfo.Data ->LocalShadowOffsetEnd );
807+ }
808+ }
809+ }
781810
782- context.logger .info (
783- " ShadowMemory(Local, {} - {})" ,
784- (void *)LaunchInfo.Data ->LocalShadowOffset ,
785- (void *)LaunchInfo.Data ->LocalShadowOffsetEnd );
811+ // Write shadow memory offset for private memory
812+ if (Options ().DetectPrivates ) {
813+ if (DeviceInfo->Type == DeviceType::CPU) {
814+ LaunchInfo.Data ->PrivateShadowOffset = DeviceInfo->ShadowOffset ;
815+ } else if (DeviceInfo->Type == DeviceType::GPU_PVC) {
816+ const size_t PrivateShadowMemorySize =
817+ (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
818+
819+ context.logger .debug (" PrivateMemory(WorkGroup={}, "
820+ " PrivateShadowMemorySize={})" ,
821+ NumWG, PrivateShadowMemorySize);
822+
823+ if (EnqueueAllocateShadowMemory (
824+ PrivateShadowMemorySize,
825+ LaunchInfo.Data ->PrivateShadowOffset ) !=
826+ UR_RESULT_SUCCESS) {
827+ context.logger .warning (
828+ " Failed to allocate shadow memory for private "
829+ " memory, maybe the number of workgroup ({}) is too "
830+ " large" ,
831+ NumWG);
832+ context.logger .warning (
833+ " Skip checking private memory of kernel <{}>" ,
834+ GetKernelName (Kernel));
835+ } else {
836+ LaunchInfo.Data ->PrivateShadowOffsetEnd =
837+ LaunchInfo.Data ->PrivateShadowOffset +
838+ PrivateShadowMemorySize - 1 ;
839+ context.logger .info (
840+ " ShadowMemory(Private, {} - {})" ,
841+ (void *)LaunchInfo.Data ->PrivateShadowOffset ,
842+ (void *)LaunchInfo.Data ->PrivateShadowOffsetEnd );
843+ }
786844 }
787845 }
788846 } while (false );
0 commit comments