@@ -22,8 +22,6 @@ namespace ur_sanitizer_layer {
2222
2323namespace {
2424
25- constexpr auto kSPIR_DeviceSanitizerReportMem = " __DeviceSanitizerReportMem" ;
26-
2725uptr MemToShadow_CPU (uptr USM_SHADOW_BASE, uptr UPtr) {
2826 return USM_SHADOW_BASE + (UPtr >> 3 );
2927}
@@ -348,11 +346,14 @@ ur_result_t SanitizerInterceptor::releaseMemory(ur_context_handle_t Context,
348346
349347ur_result_t SanitizerInterceptor::preLaunchKernel (ur_kernel_handle_t Kernel,
350348 ur_queue_handle_t Queue,
351- LaunchInfo &LaunchInfo) {
349+ USMLaunchInfo &LaunchInfo) {
352350 auto Context = GetContext (Queue);
353351 auto Device = GetDevice (Queue);
354352 auto ContextInfo = getContextInfo (Context);
355353 auto DeviceInfo = getDeviceInfo (Device);
354+ auto KernelInfo = getKernelInfo (Kernel);
355+
356+ UR_CALL (LaunchInfo.updateKernelInfo (*KernelInfo.get ()));
356357
357358 ManagedQueue InternalQueue (Context, Device);
358359 if (!InternalQueue) {
@@ -370,23 +371,12 @@ ur_result_t SanitizerInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
370371
371372ur_result_t SanitizerInterceptor::postLaunchKernel (ur_kernel_handle_t Kernel,
372373 ur_queue_handle_t Queue,
373- ur_event_handle_t &Event,
374- LaunchInfo &LaunchInfo) {
375- auto Program = GetProgram (Kernel);
376- ur_event_handle_t ReadEvent{};
377-
378- // If kernel has defined SPIR_DeviceSanitizerReportMem, then we try to read it
379- // to host, but it's okay that it isn't defined
374+ USMLaunchInfo &LaunchInfo) {
380375 // FIXME: We must use block operation here, until we support urEventSetCallback
381- auto Result = context.urDdiTable .Enqueue .pfnDeviceGlobalVariableRead (
382- Queue, Program, kSPIR_DeviceSanitizerReportMem , true ,
383- sizeof (LaunchInfo.SPIR_DeviceSanitizerReportMem ), 0 ,
384- &LaunchInfo.SPIR_DeviceSanitizerReportMem , 1 , &Event, &ReadEvent);
376+ auto Result = context.urDdiTable .Queue .pfnFinish (Queue);
385377
386378 if (Result == UR_RESULT_SUCCESS) {
387- Event = ReadEvent;
388-
389- const auto &AH = LaunchInfo.SPIR_DeviceSanitizerReportMem ;
379+ const auto &AH = LaunchInfo.Data ->SanitizerReport ;
390380 if (!AH.Flag ) {
391381 return UR_RESULT_SUCCESS;
392382 }
@@ -627,13 +617,44 @@ ur_result_t SanitizerInterceptor::eraseDevice(ur_device_handle_t Device) {
627617 return UR_RESULT_SUCCESS;
628618}
629619
620+ ur_result_t SanitizerInterceptor::insertKernel (ur_kernel_handle_t Kernel) {
621+ std::scoped_lock<ur_shared_mutex> Guard (m_KernelMapMutex);
622+ if (m_KernelMap.find (Kernel) != m_KernelMap.end ()) {
623+ return UR_RESULT_SUCCESS;
624+ }
625+ m_KernelMap.emplace (Kernel, std::make_shared<KernelInfo>(Kernel));
626+ return UR_RESULT_SUCCESS;
627+ }
628+
629+ ur_result_t SanitizerInterceptor::eraseKernel (ur_kernel_handle_t Kernel) {
630+ std::scoped_lock<ur_shared_mutex> Guard (m_KernelMapMutex);
631+ assert (m_KernelMap.find (Kernel) != m_KernelMap.end ());
632+ m_KernelMap.erase (Kernel);
633+ return UR_RESULT_SUCCESS;
634+ }
635+
630636ur_result_t SanitizerInterceptor::prepareLaunch (
631637 ur_context_handle_t Context, std::shared_ptr<DeviceInfo> &DeviceInfo,
632638 ur_queue_handle_t Queue, ur_kernel_handle_t Kernel,
633- LaunchInfo &LaunchInfo) {
639+ USMLaunchInfo &LaunchInfo) {
634640 auto Program = GetProgram (Kernel);
635641
636642 do {
643+ // Set launch info argument
644+ auto ArgNums = GetKernelNumArgs (Kernel);
645+ if (ArgNums) {
646+ context.logger .debug (
647+ " launch_info {} (numLocalArgs={}, localArgs={})" ,
648+ (void *)LaunchInfo.Data , LaunchInfo.Data ->NumLocalArgs ,
649+ (void *)LaunchInfo.Data ->LocalArgs );
650+ ur_result_t URes = context.urDdiTable .Kernel .pfnSetArgPointer (
651+ Kernel, ArgNums - 1 , nullptr , &LaunchInfo.Data );
652+ if (URes != UR_RESULT_SUCCESS) {
653+ context.logger .error (" Failed to set launch info: {}" , URes);
654+ return URes;
655+ }
656+ }
657+
637658 // Write global variable to program
638659 auto EnqueueWriteGlobal = [Queue, Program](const char *Name,
639660 const void *Value,
@@ -723,15 +744,17 @@ ur_result_t SanitizerInterceptor::prepareLaunch(
723744 " LocalShadowMemorySize={})" ,
724745 NumWG, LocalMemorySize, LocalShadowMemorySize);
725746
726- UR_CALL (EnqueueAllocateDevice (LocalShadowMemorySize,
727- LaunchInfo.LocalShadowOffset ));
747+ UR_CALL (EnqueueAllocateDevice (
748+ LocalShadowMemorySize, LaunchInfo.Data -> LocalShadowOffset ));
728749
729- LaunchInfo.LocalShadowOffsetEnd =
730- LaunchInfo.LocalShadowOffset + LocalShadowMemorySize - 1 ;
750+ LaunchInfo.Data ->LocalShadowOffsetEnd =
751+ LaunchInfo.Data ->LocalShadowOffset + LocalShadowMemorySize -
752+ 1 ;
731753
732- context.logger .info (" ShadowMemory(Local, {} - {})" ,
733- (void *)LaunchInfo.LocalShadowOffset ,
734- (void *)LaunchInfo.LocalShadowOffsetEnd );
754+ context.logger .info (
755+ " ShadowMemory(Local, {} - {})" ,
756+ (void *)LaunchInfo.Data ->LocalShadowOffset ,
757+ (void *)LaunchInfo.Data ->LocalShadowOffsetEnd );
735758 }
736759 }
737760 } while (false );
@@ -749,15 +772,61 @@ SanitizerInterceptor::findAllocInfoByAddress(uptr Address) {
749772 return --It;
750773}
751774
752- LaunchInfo::~LaunchInfo () {
775+ ur_result_t USMLaunchInfo::initialize () {
776+ UR_CALL (context.urDdiTable .Context .pfnRetain (Context));
777+ UR_CALL (context.urDdiTable .Device .pfnRetain (Device));
778+ UR_CALL (context.urDdiTable .USM .pfnSharedAlloc (
779+ Context, Device, nullptr , nullptr , sizeof (LaunchInfo), (void **)&Data));
780+ *Data = LaunchInfo{};
781+ return UR_RESULT_SUCCESS;
782+ }
783+
784+ ur_result_t USMLaunchInfo::updateKernelInfo (const KernelInfo &KI) {
785+ auto NumArgs = KI.LocalArgs .size ();
786+ if (NumArgs) {
787+ Data->NumLocalArgs = NumArgs;
788+ UR_CALL (context.urDdiTable .USM .pfnSharedAlloc (
789+ Context, Device, nullptr , nullptr , sizeof (LocalArgsInfo) * NumArgs,
790+ (void **)&Data->LocalArgs ));
791+ uint32_t i = 0 ;
792+ for (auto [ArgIndex, ArgInfo] : KI.LocalArgs ) {
793+ Data->LocalArgs [i++] = ArgInfo;
794+ context.logger .debug (
795+ " local_args (argIndex={}, size={}, sizeWithRZ={})" , ArgIndex,
796+ ArgInfo.Size , ArgInfo.SizeWithRedZone );
797+ }
798+ }
799+ return UR_RESULT_SUCCESS;
800+ }
801+
802+ USMLaunchInfo::~USMLaunchInfo () {
753803 [[maybe_unused]] ur_result_t Result;
754- if (LocalShadowOffset) {
755- Result =
756- context.urDdiTable .USM .pfnFree (Context, (void *)LocalShadowOffset);
804+ if (Data) {
805+ auto Type = GetDeviceType (Device);
806+ if (Type == DeviceType::GPU_PVC) {
807+ if (Data->PrivateShadowOffset ) {
808+ Result = context.urDdiTable .USM .pfnFree (
809+ Context, (void *)Data->PrivateShadowOffset );
810+ assert (Result == UR_RESULT_SUCCESS);
811+ }
812+ if (Data->LocalShadowOffset ) {
813+ Result = context.urDdiTable .USM .pfnFree (
814+ Context, (void *)Data->LocalShadowOffset );
815+ assert (Result == UR_RESULT_SUCCESS);
816+ }
817+ }
818+ if (Data->LocalArgs ) {
819+ Result = context.urDdiTable .USM .pfnFree (Context,
820+ (void *)Data->LocalArgs );
821+ assert (Result == UR_RESULT_SUCCESS);
822+ }
823+ Result = context.urDdiTable .USM .pfnFree (Context, (void *)Data);
757824 assert (Result == UR_RESULT_SUCCESS);
758825 }
759826 Result = context.urDdiTable .Context .pfnRelease (Context);
760827 assert (Result == UR_RESULT_SUCCESS);
828+ Result = context.urDdiTable .Device .pfnRelease (Device);
829+ assert (Result == UR_RESULT_SUCCESS);
761830}
762831
763832} // namespace ur_sanitizer_layer
0 commit comments