@@ -249,15 +249,11 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
249249
250250ur_result_t AsanInterceptor::preLaunchKernel (ur_kernel_handle_t Kernel,
251251 ur_queue_handle_t Queue,
252- USMLaunchInfo &LaunchInfo) {
252+ LaunchInfo &LaunchInfo) {
253253 auto Context = GetContext (Queue);
254254 auto Device = GetDevice (Queue);
255255 auto ContextInfo = getContextInfo (Context);
256256 auto DeviceInfo = getDeviceInfo (Device);
257- auto KernelInfo = getKernelInfo (Kernel);
258- assert (KernelInfo && " Kernel should be instrumented" );
259-
260- UR_CALL (LaunchInfo.updateKernelInfo (*KernelInfo.get ()));
261257
262258 ManagedQueue InternalQueue (Context, Device);
263259 if (!InternalQueue) {
@@ -275,12 +271,14 @@ ur_result_t AsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel,
275271
276272ur_result_t AsanInterceptor::postLaunchKernel (ur_kernel_handle_t Kernel,
277273 ur_queue_handle_t Queue,
278- USMLaunchInfo &LaunchInfo) {
274+ LaunchInfo &LaunchInfo) {
279275 // FIXME: We must use block operation here, until we support urEventSetCallback
280276 auto Result = getContext ()->urDdiTable .Queue .pfnFinish (Queue);
281277
278+ UR_CALL (LaunchInfo.Data .syncFromDevice (Queue));
279+
282280 if (Result == UR_RESULT_SUCCESS) {
283- for (const auto &Report : LaunchInfo.Data -> Report ) {
281+ for (const auto &Report : LaunchInfo.Data . Host . Report ) {
284282 if (!Report.Flag ) {
285283 continue ;
286284 }
@@ -685,7 +683,7 @@ AsanInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) {
685683ur_result_t AsanInterceptor::prepareLaunch (
686684 std::shared_ptr<ContextInfo> &ContextInfo,
687685 std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
688- ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) {
686+ ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {
689687
690688 do {
691689 auto KernelInfo = getKernelInfo (Kernel);
@@ -721,27 +719,20 @@ ur_result_t AsanInterceptor::prepareLaunch(
721719 }
722720 }
723721
724- // Set launch info argument
725722 auto ArgNums = GetKernelNumArgs (Kernel);
723+ // We must prepare all kernel args before call
724+ // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
725+ // CPU device.
726726 if (ArgNums) {
727- getContext ()->logger .debug (
728- " launch_info {} (numLocalArgs={}, localArgs={})" ,
729- (void *)LaunchInfo.Data , LaunchInfo.Data ->NumLocalArgs ,
730- (void *)LaunchInfo.Data ->LocalArgs );
731727 ur_result_t URes = getContext ()->urDdiTable .Kernel .pfnSetArgPointer (
732- Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data );
728+ Kernel, ArgNums - 1 , nullptr , LaunchInfo.Data . getDevicePtr () );
733729 if (URes != UR_RESULT_SUCCESS) {
734730 getContext ()->logger .error (" Failed to set launch info: {}" ,
735731 URes);
736732 return URes;
737733 }
738734 }
739735
740- LaunchInfo.Data ->GlobalShadowOffset = DeviceInfo->Shadow ->ShadowBegin ;
741- LaunchInfo.Data ->GlobalShadowOffsetEnd = DeviceInfo->Shadow ->ShadowEnd ;
742- LaunchInfo.Data ->DeviceTy = DeviceInfo->Type ;
743- LaunchInfo.Data ->Debug = getOptions ().Debug ? 1 : 0 ;
744-
745736 if (LaunchInfo.LocalWorkSize .empty ()) {
746737 LaunchInfo.LocalWorkSize .resize (LaunchInfo.WorkDim );
747738 auto URes =
@@ -768,6 +759,14 @@ ur_result_t AsanInterceptor::prepareLaunch(
768759 LocalWorkSize[Dim];
769760 }
770761
762+ // Prepare asan runtime data
763+ LaunchInfo.Data .Host .GlobalShadowOffset =
764+ DeviceInfo->Shadow ->ShadowBegin ;
765+ LaunchInfo.Data .Host .GlobalShadowOffsetEnd =
766+ DeviceInfo->Shadow ->ShadowEnd ;
767+ LaunchInfo.Data .Host .DeviceTy = DeviceInfo->Type ;
768+ LaunchInfo.Data .Host .Debug = getOptions ().Debug ? 1 : 0 ;
769+
771770 auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle ,
772771 Device = DeviceInfo->Handle ,
773772 Queue](size_t Size, uptr &Ptr) {
@@ -816,7 +815,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
816815
817816 if (EnqueueAllocateShadowMemory (
818817 LocalShadowMemorySize,
819- LaunchInfo.Data -> LocalShadowOffset ) !=
818+ LaunchInfo.Data . Host . LocalShadowOffset ) !=
820819 UR_RESULT_SUCCESS) {
821820 getContext ()->logger .warning (
822821 " Failed to allocate shadow memory for local "
@@ -827,25 +826,25 @@ ur_result_t AsanInterceptor::prepareLaunch(
827826 " Skip checking local memory of kernel <{}>" ,
828827 GetKernelName (Kernel));
829828 } else {
830- LaunchInfo.Data -> LocalShadowOffsetEnd =
831- LaunchInfo.Data -> LocalShadowOffset +
829+ LaunchInfo.Data . Host . LocalShadowOffsetEnd =
830+ LaunchInfo.Data . Host . LocalShadowOffset +
832831 LocalShadowMemorySize - 1 ;
833832
834833 ContextInfo->Stats .UpdateShadowMalloced (
835834 LocalShadowMemorySize);
836835
837836 getContext ()->logger .info (
838837 " ShadowMemory(Local, {} - {})" ,
839- (void *)LaunchInfo.Data -> LocalShadowOffset ,
840- (void *)LaunchInfo.Data -> LocalShadowOffsetEnd );
838+ (void *)LaunchInfo.Data . Host . LocalShadowOffset ,
839+ (void *)LaunchInfo.Data . Host . LocalShadowOffsetEnd );
841840 }
842841 }
843842 }
844843
845844 // Write shadow memory offset for private memory
846845 if (getOptions ().DetectPrivates ) {
847846 if (DeviceInfo->Type == DeviceType::CPU) {
848- LaunchInfo.Data -> PrivateShadowOffset =
847+ LaunchInfo.Data . Host . PrivateShadowOffset =
849848 DeviceInfo->Shadow ->ShadowBegin ;
850849 } else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
851850 DeviceInfo->Type == DeviceType::GPU_DG2) {
@@ -858,7 +857,7 @@ ur_result_t AsanInterceptor::prepareLaunch(
858857
859858 if (EnqueueAllocateShadowMemory (
860859 PrivateShadowMemorySize,
861- LaunchInfo.Data -> PrivateShadowOffset ) !=
860+ LaunchInfo.Data . Host . PrivateShadowOffset ) !=
862861 UR_RESULT_SUCCESS) {
863862 getContext ()->logger .warning (
864863 " Failed to allocate shadow memory for private "
@@ -869,20 +868,41 @@ ur_result_t AsanInterceptor::prepareLaunch(
869868 " Skip checking private memory of kernel <{}>" ,
870869 GetKernelName (Kernel));
871870 } else {
872- LaunchInfo.Data -> PrivateShadowOffsetEnd =
873- LaunchInfo.Data -> PrivateShadowOffset +
871+ LaunchInfo.Data . Host . PrivateShadowOffsetEnd =
872+ LaunchInfo.Data . Host . PrivateShadowOffset +
874873 PrivateShadowMemorySize - 1 ;
875874
876875 ContextInfo->Stats .UpdateShadowMalloced (
877876 PrivateShadowMemorySize);
878877
879878 getContext ()->logger .info (
880879 " ShadowMemory(Private, {} - {})" ,
881- (void *)LaunchInfo.Data -> PrivateShadowOffset ,
882- (void *)LaunchInfo.Data -> PrivateShadowOffsetEnd );
880+ (void *)LaunchInfo.Data . Host . PrivateShadowOffset ,
881+ (void *)LaunchInfo.Data . Host . PrivateShadowOffsetEnd );
883882 }
884883 }
885884 }
885+
886+ // Write local arguments info
887+ if (!KernelInfo->LocalArgs .empty ()) {
888+ std::vector<LocalArgsInfo> LocalArgsInfo;
889+ for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs ) {
890+ LocalArgsInfo.push_back (ArgInfo);
891+ getContext ()->logger .debug (
892+ " local_args (argIndex={}, size={}, sizeWithRZ={})" ,
893+ ArgIndex, ArgInfo.Size , ArgInfo.SizeWithRedZone );
894+ }
895+ UR_CALL (LaunchInfo.Data .importLocalArgsInfo (Queue, LocalArgsInfo));
896+ }
897+
898+ // sync asan runtime data to device side
899+ UR_CALL (LaunchInfo.Data .syncToDevice (Queue));
900+
901+ getContext ()->logger .debug (
902+ " launch_info {} (numLocalArgs={}, localArgs={})" ,
903+ (void *)LaunchInfo.Data .getDevicePtr (),
904+ LaunchInfo.Data .Host .NumLocalArgs ,
905+ (void *)LaunchInfo.Data .Host .LocalArgs );
886906 } while (false );
887907
888908 return UR_RESULT_SUCCESS;
@@ -942,63 +962,39 @@ ContextInfo::~ContextInfo() {
942962 }
943963}
944964
945- ur_result_t USMLaunchInfo::initialize () {
946- UR_CALL (getContext ()->urDdiTable .Context .pfnRetain (Context));
947- UR_CALL (getContext ()->urDdiTable .Device .pfnRetain (Device));
948- UR_CALL (getContext ()->urDdiTable .USM .pfnSharedAlloc (
949- Context, Device, nullptr , nullptr , sizeof (LaunchInfo), (void **)&Data));
950- *Data = LaunchInfo{};
951- return UR_RESULT_SUCCESS;
952- }
953-
954- ur_result_t USMLaunchInfo::updateKernelInfo (const KernelInfo &KI) {
955- auto NumArgs = KI.LocalArgs .size ();
956- if (NumArgs) {
957- Data->NumLocalArgs = NumArgs;
958- UR_CALL (getContext ()->urDdiTable .USM .pfnSharedAlloc (
959- Context, Device, nullptr , nullptr , sizeof (LocalArgsInfo) * NumArgs,
960- (void **)&Data->LocalArgs ));
961- uint32_t i = 0 ;
962- for (auto [ArgIndex, ArgInfo] : KI.LocalArgs ) {
963- Data->LocalArgs [i++] = ArgInfo;
964- getContext ()->logger .debug (
965- " local_args (argIndex={}, size={}, sizeWithRZ={})" , ArgIndex,
966- ArgInfo.Size , ArgInfo.SizeWithRedZone );
967- }
968- }
969- return UR_RESULT_SUCCESS;
970- }
971-
972- USMLaunchInfo::~USMLaunchInfo () {
965+ AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper () {
973966 [[maybe_unused]] ur_result_t Result;
974- if (Data) {
975- auto Type = GetDeviceType (Context, Device);
976- auto ContextInfo = getAsanInterceptor ()->getContextInfo (Context);
977- if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
978- if (Data->PrivateShadowOffset ) {
979- ContextInfo->Stats .UpdateShadowFreed (
980- Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset +
981- 1 );
982- Result = getContext ()->urDdiTable .USM .pfnFree (
983- Context, (void *)Data->PrivateShadowOffset );
984- assert (Result == UR_RESULT_SUCCESS);
985- }
986- if (Data->LocalShadowOffset ) {
987- ContextInfo->Stats .UpdateShadowFreed (
988- Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1 );
989- Result = getContext ()->urDdiTable .USM .pfnFree (
990- Context, (void *)Data->LocalShadowOffset );
991- assert (Result == UR_RESULT_SUCCESS);
992- }
967+ auto Type = GetDeviceType (Context, Device);
968+ auto ContextInfo = getAsanInterceptor ()->getContextInfo (Context);
969+ if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
970+ if (Host.PrivateShadowOffset ) {
971+ ContextInfo->Stats .UpdateShadowFreed (Host.PrivateShadowOffsetEnd -
972+ Host.PrivateShadowOffset + 1 );
973+ Result = getContext ()->urDdiTable .USM .pfnFree (
974+ Context, (void *)Host.PrivateShadowOffset );
975+ assert (Result == UR_RESULT_SUCCESS);
993976 }
994- if (Data->LocalArgs ) {
977+ if (Host.LocalShadowOffset ) {
978+ ContextInfo->Stats .UpdateShadowFreed (Host.LocalShadowOffsetEnd -
979+ Host.LocalShadowOffset + 1 );
995980 Result = getContext ()->urDdiTable .USM .pfnFree (
996- Context, (void *)Data-> LocalArgs );
981+ Context, (void *)Host. LocalShadowOffset );
997982 assert (Result == UR_RESULT_SUCCESS);
998983 }
999- Result = getContext ()->urDdiTable .USM .pfnFree (Context, (void *)Data);
984+ }
985+ if (Host.LocalArgs ) {
986+ Result = getContext ()->urDdiTable .USM .pfnFree (Context,
987+ (void *)Host.LocalArgs );
988+ assert (Result == UR_RESULT_SUCCESS);
989+ }
990+ if (DevicePtr) {
991+ Result = getContext ()->urDdiTable .USM .pfnFree (Context, DevicePtr);
1000992 assert (Result == UR_RESULT_SUCCESS);
1001993 }
994+ }
995+
996+ LaunchInfo::~LaunchInfo () {
997+ [[maybe_unused]] ur_result_t Result;
1002998 Result = getContext ()->urDdiTable .Context .pfnRelease (Context);
1003999 assert (Result == UR_RESULT_SUCCESS);
10041000 Result = getContext ()->urDdiTable .Device .pfnRelease (Device);
0 commit comments