@@ -676,162 +676,155 @@ ur_result_t AsanInterceptor::prepareLaunch(
676676 std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
677677 ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {
678678
679- do {
680- auto KernelInfo = getKernelInfo (Kernel);
681- assert (KernelInfo && " Kernel should be instrumented" );
682-
683- // Validate pointer arguments
684- if (getOptions ().DetectKernelArguments ) {
685- for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs ) {
686- auto Ptr = PtrPair.first ;
687- if (Ptr == nullptr ) {
688- continue ;
689- }
690- if (auto ValidateResult = ValidateUSMPointer (
691- ContextInfo->Handle , DeviceInfo->Handle , (uptr)Ptr)) {
692- ReportInvalidKernelArgument (Kernel, ArgIndex, (uptr)Ptr,
693- ValidateResult, PtrPair.second );
694- exitWithErrors ();
695- }
679+ auto KernelInfo = getKernelInfo (Kernel);
680+ assert (KernelInfo && " Kernel should be instrumented" );
681+
682+ // Validate pointer arguments
683+ if (getOptions ().DetectKernelArguments ) {
684+ for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs ) {
685+ auto Ptr = PtrPair.first ;
686+ if (Ptr == nullptr ) {
687+ continue ;
688+ }
689+ if (auto ValidateResult = ValidateUSMPointer (
690+ ContextInfo->Handle , DeviceInfo->Handle , (uptr)Ptr)) {
691+ ReportInvalidKernelArgument (Kernel, ArgIndex, (uptr)Ptr,
692+ ValidateResult, PtrPair.second );
693+ exitWithErrors ();
696694 }
697695 }
696+ }
698697
699- // Set membuffer arguments
700- for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs ) {
701- char *ArgPointer = nullptr ;
702- UR_CALL (MemBuffer->getHandle (DeviceInfo->Handle , ArgPointer));
703- ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
704- Kernel, ArgIndex, nullptr , ArgPointer);
705- if (URes != UR_RESULT_SUCCESS) {
706- getContext ()->logger .error (
707- " Failed to set buffer {} as the {} arg to kernel {}: {}" ,
708- ur_cast<ur_mem_handle_t >(MemBuffer.get ()), ArgIndex, Kernel,
709- URes);
710- }
698+ // Set membuffer arguments
699+ for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs ) {
700+ char *ArgPointer = nullptr ;
701+ UR_CALL (MemBuffer->getHandle (DeviceInfo->Handle , ArgPointer));
702+ ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
703+ Kernel, ArgIndex, nullptr , ArgPointer);
704+ if (URes != UR_RESULT_SUCCESS) {
705+ getContext ()->logger .error (
706+ " Failed to set buffer {} as the {} arg to kernel {}: {}" ,
707+ ur_cast<ur_mem_handle_t >(MemBuffer.get ()), ArgIndex, Kernel,
708+ URes);
711709 }
710+ }
712711
713- auto ArgNums = GetKernelNumArgs (Kernel);
714- // We must prepare all kernel args before call
715- // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
716- // CPU device.
717- if (ArgNums) {
718- ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
719- Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data .getDevicePtr ());
720- if (URes != UR_RESULT_SUCCESS) {
721- getContext ()->logger .error (" Failed to set launch info: {}" ,
722- URes);
723- return URes;
724- }
712+ auto ArgNums = GetKernelNumArgs (Kernel);
713+ // We must prepare all kernel args before call
714+ // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
715+ // CPU device.
716+ if (ArgNums) {
717+ ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
718+ Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data .getDevicePtr ());
719+ if (URes != UR_RESULT_SUCCESS) {
720+ getContext ()->logger .error (" Failed to set launch info: {}" , URes);
721+ return URes;
725722 }
723+ }
726724
727- if (LaunchInfo.LocalWorkSize .empty ()) {
728- LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
729- auto URes =
730- getContext ()->urDdiTable .Kernel .pfnGetSuggestedLocalWorkSize (
731- Kernel, Queue, LaunchInfo.WorkDim ,
732- LaunchInfo.GlobalWorkOffset , LaunchInfo.GlobalWorkSize ,
733- LaunchInfo.LocalWorkSize .data ());
734- if (URes != UR_RESULT_SUCCESS) {
735- if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
736- return URes;
737- }
738- // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
739- // to inefficient implementation
740- for (size_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
741- LaunchInfo.LocalWorkSize [Dim] = 1 ;
742- }
725+ if (LaunchInfo.LocalWorkSize .empty ()) {
726+ LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
727+ auto URes =
728+ getContext ()->urDdiTable .Kernel .pfnGetSuggestedLocalWorkSize (
729+ Kernel, Queue, LaunchInfo.WorkDim , LaunchInfo.GlobalWorkOffset ,
730+ LaunchInfo.GlobalWorkSize , LaunchInfo.LocalWorkSize .data ());
731+ if (URes != UR_RESULT_SUCCESS) {
732+ if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
733+ return URes;
734+ }
735+ // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
736+ // to inefficient implementation
737+ for (size_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
738+ LaunchInfo.LocalWorkSize [Dim] = 1 ;
743739 }
744740 }
741+ }
745742
746- const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize .data ();
747- uint32_t NumWG = 1 ;
748- for (uint32_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
749- NumWG *= (LaunchInfo.GlobalWorkSize [Dim] + LocalWorkSize[Dim] - 1 ) /
750- LocalWorkSize[Dim];
751- }
743+ const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize .data ();
744+ uint32_t NumWG = 1 ;
745+ for (uint32_t Dim = 0 ; Dim < LaunchInfo.WorkDim ; ++Dim) {
746+ NumWG *= (LaunchInfo.GlobalWorkSize [Dim] + LocalWorkSize[Dim] - 1 ) /
747+ LocalWorkSize[Dim];
748+ }
752749
753- // Prepare asan runtime data
754- LaunchInfo.Data .Host .GlobalShadowOffset =
755- DeviceInfo->Shadow ->ShadowBegin ;
756- LaunchInfo.Data .Host .GlobalShadowOffsetEnd =
757- DeviceInfo->Shadow ->ShadowEnd ;
758- LaunchInfo.Data .Host .DeviceTy = DeviceInfo->Type ;
759- LaunchInfo.Data .Host .Debug = getOptions ().Debug ? 1 : 0 ;
760-
761- auto LocalMemoryUsage =
762- GetKernelLocalMemorySize (Kernel, DeviceInfo->Handle );
763- auto PrivateMemoryUsage =
764- GetKernelPrivateMemorySize (Kernel, DeviceInfo->Handle );
765-
766- getContext ()->logger .info (
767- " KernelInfo {} (LocalMemory={}, PrivateMemory={})" , (void *)Kernel,
768- LocalMemoryUsage, PrivateMemoryUsage);
769-
770- // Write shadow memory offset for local memory
771- if (getOptions ().DetectLocals ) {
772- if (DeviceInfo->Shadow ->AllocLocalShadow (
773- Queue, NumWG, LaunchInfo.Data .Host .LocalShadowOffset ,
774- LaunchInfo.Data .Host .LocalShadowOffsetEnd ) !=
775- UR_RESULT_SUCCESS) {
776- getContext ()->logger .warning (
777- " Failed to allocate shadow memory for local "
778- " memory, maybe the number of workgroup ({}) is too "
779- " large" ,
780- NumWG);
781- getContext ()->logger .warning (
782- " Skip checking local memory of kernel <{}>" ,
783- GetKernelName (Kernel));
784- } else {
785- getContext ()->logger .info (
786- " ShadowMemory(Local, WorkGroup{}, {} - {})" , NumWG,
787- (void *)LaunchInfo.Data .Host .LocalShadowOffset ,
788- (void *)LaunchInfo.Data .Host .LocalShadowOffsetEnd );
789- }
750+ // Prepare asan runtime data
751+ LaunchInfo.Data .Host .GlobalShadowOffset = DeviceInfo->Shadow ->ShadowBegin ;
752+ LaunchInfo.Data .Host .GlobalShadowOffsetEnd = DeviceInfo->Shadow ->ShadowEnd ;
753+ LaunchInfo.Data .Host .DeviceTy = DeviceInfo->Type ;
754+ LaunchInfo.Data .Host .Debug = getOptions ().Debug ? 1 : 0 ;
755+
756+ auto LocalMemoryUsage =
757+ GetKernelLocalMemorySize (Kernel, DeviceInfo->Handle );
758+ auto PrivateMemoryUsage =
759+ GetKernelPrivateMemorySize (Kernel, DeviceInfo->Handle );
760+
761+ getContext ()->logger .info (
762+ " KernelInfo {} (LocalMemory={}, PrivateMemory={})" , (void *)Kernel,
763+ LocalMemoryUsage, PrivateMemoryUsage);
764+
765+ // Write shadow memory offset for local memory
766+ if (getOptions ().DetectLocals ) {
767+ if (DeviceInfo->Shadow ->AllocLocalShadow (
768+ Queue, NumWG, LaunchInfo.Data .Host .LocalShadowOffset ,
769+ LaunchInfo.Data .Host .LocalShadowOffsetEnd ) !=
770+ UR_RESULT_SUCCESS) {
771+ getContext ()->logger .warning (
772+ " Failed to allocate shadow memory for local "
773+ " memory, maybe the number of workgroup ({}) is too "
774+ " large" ,
775+ NumWG);
776+ getContext ()->logger .warning (
777+ " Skip checking local memory of kernel <{}>" ,
778+ GetKernelName (Kernel));
779+ } else {
780+ getContext ()->logger .info (
781+ " ShadowMemory(Local, WorkGroup{}, {} - {})" , NumWG,
782+ (void *)LaunchInfo.Data .Host .LocalShadowOffset ,
783+ (void *)LaunchInfo.Data .Host .LocalShadowOffsetEnd );
790784 }
785+ }
791786
792- // Write shadow memory offset for private memory
793- if (getOptions ().DetectPrivates ) {
794- if (DeviceInfo->Shadow ->AllocPrivateShadow (
795- Queue, NumWG, LaunchInfo.Data .Host .PrivateShadowOffset ,
796- LaunchInfo.Data .Host .PrivateShadowOffsetEnd ) !=
797- UR_RESULT_SUCCESS) {
798- getContext ()->logger .warning (
799- " Failed to allocate shadow memory for private "
800- " memory, maybe the number of workgroup ({}) is too "
801- " large" ,
802- NumWG);
803- getContext ()->logger .warning (
804- " Skip checking private memory of kernel <{}>" ,
805- GetKernelName (Kernel));
806- } else {
807- getContext ()->logger .info (
808- " ShadowMemory(Private, WorkGroup{}, {} - {})" , NumWG,
809- (void *)LaunchInfo.Data .Host .PrivateShadowOffset ,
810- (void *)LaunchInfo.Data .Host .PrivateShadowOffsetEnd );
811- }
787+ // Write shadow memory offset for private memory
788+ if (getOptions ().DetectPrivates ) {
789+ if (DeviceInfo->Shadow ->AllocPrivateShadow (
790+ Queue, NumWG, LaunchInfo.Data .Host .PrivateShadowOffset ,
791+ LaunchInfo.Data .Host .PrivateShadowOffsetEnd ) !=
792+ UR_RESULT_SUCCESS) {
793+ getContext ()->logger .warning (
794+ " Failed to allocate shadow memory for private "
795+ " memory, maybe the number of workgroup ({}) is too "
796+ " large" ,
797+ NumWG);
798+ getContext ()->logger .warning (
799+ " Skip checking private memory of kernel <{}>" ,
800+ GetKernelName (Kernel));
801+ } else {
802+ getContext ()->logger .info (
803+ " ShadowMemory(Private, WorkGroup{}, {} - {})" , NumWG,
804+ (void *)LaunchInfo.Data .Host .PrivateShadowOffset ,
805+ (void *)LaunchInfo.Data .Host .PrivateShadowOffsetEnd );
812806 }
807+ }
813808
814- // Write local arguments info
815- if (!KernelInfo->LocalArgs .empty ()) {
816- std::vector<LocalArgsInfo> LocalArgsInfo;
817- for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs ) {
818- LocalArgsInfo.push_back (ArgInfo);
819- getContext ()->logger .debug (
820- " local_args (argIndex={}, size={}, sizeWithRZ={})" ,
821- ArgIndex, ArgInfo.Size , ArgInfo.SizeWithRedZone );
822- }
823- UR_CALL (LaunchInfo.Data .importLocalArgsInfo (Queue, LocalArgsInfo));
809+ // Write local arguments info
810+ if (!KernelInfo->LocalArgs .empty ()) {
811+ std::vector<LocalArgsInfo> LocalArgsInfo;
812+ for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs ) {
813+ LocalArgsInfo.push_back (ArgInfo);
814+ getContext ()->logger .debug (
815+ " local_args (argIndex={}, size={}, sizeWithRZ={})" , ArgIndex,
816+ ArgInfo.Size , ArgInfo.SizeWithRedZone );
824817 }
818+ UR_CALL (LaunchInfo.Data .importLocalArgsInfo (Queue, LocalArgsInfo));
819+ }
825820
826- // sync asan runtime data to device side
827- UR_CALL (LaunchInfo.Data .syncToDevice (Queue));
821+ // sync asan runtime data to device side
822+ UR_CALL (LaunchInfo.Data .syncToDevice (Queue));
828823
829- getContext ()->logger .debug (
830- " launch_info {} (numLocalArgs={}, localArgs={})" ,
831- (void *)LaunchInfo.Data .getDevicePtr (),
832- LaunchInfo.Data .Host .NumLocalArgs ,
833- (void *)LaunchInfo.Data .Host .LocalArgs );
834- } while (false );
824+ getContext ()->logger .debug (" launch_info {} (numLocalArgs={}, localArgs={})" ,
825+ (void *)LaunchInfo.Data .getDevicePtr (),
826+ LaunchInfo.Data .Host .NumLocalArgs ,
827+ (void *)LaunchInfo.Data .Host .LocalArgs );
835828
836829 return UR_RESULT_SUCCESS;
837830}
0 commit comments