Skip to content

Commit 0194f94

Browse files
committed
Add ASAN intercept function for urEnqueueKernelLaunchWithArgsExp()
1 parent 493a59d commit 0194f94

File tree

1 file changed

+84
-0
lines changed
  • unified-runtime/source/loader/layers/sanitizer/asan

1 file changed

+84
-0
lines changed

unified-runtime/source/loader/layers/sanitizer/asan/asan_ddi.cpp

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,90 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
539539
return UR_RESULT_SUCCESS;
540540
}
541541

542+
///////////////////////////////////////////////////////////////////////////////
543+
/// @brief Intercept function for urEnqueueKernelLaunchWithArgsExp
544+
__urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp(
545+
/// [in] handle of the queue object
546+
ur_queue_handle_t hQueue,
547+
/// [in] handle of the kernel object
548+
ur_kernel_handle_t hKernel,
549+
/// [in] number of dimensions, from 1 to 3, to specify the global and
550+
/// work-group work-items
551+
uint32_t workDim,
552+
/// [in][optional] pointer to an array of workDim unsigned values that
553+
/// specify the offset used to calculate the global ID of a work-item
554+
const size_t *pGlobalWorkOffset,
555+
/// [in] pointer to an array of workDim unsigned values that specify the
556+
/// number of global work-items in workDim that will execute the kernel
557+
/// function
558+
const size_t *pGlobalWorkSize,
559+
/// [in][optional] pointer to an array of workDim unsigned values that
560+
/// specify the number of local work-items forming a work-group that will
561+
/// execute the kernel function.
562+
/// If nullptr, the runtime implementation will choose the work-group size.
563+
const size_t *pLocalWorkSize,
564+
/// [in] Number of entries in pArgs
565+
uint32_t numArgs,
566+
/// [in][optional][range(0, numArgs)] pointer to a list of kernel arg
567+
/// properties.
568+
const ur_exp_kernel_arg_properties_t *pArgs,
569+
/// [in] size of the launch prop list
570+
uint32_t numPropsInLaunchPropList,
571+
/// [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
572+
/// of launch properties
573+
const ur_kernel_launch_property_t *launchPropList,
574+
/// [in] size of the event wait list
575+
uint32_t numEventsInWaitList,
576+
/// [in][optional][range(0, numEventsInWaitList)] pointer to a list of
577+
/// events that must be complete before the kernel execution.
578+
/// If nullptr, the numEventsInWaitList must be 0, indicating that no wait
579+
/// event.
580+
const ur_event_handle_t *phEventWaitList,
581+
/// [out][optional][alloc] return an event object that identifies this
582+
/// particular kernel execution instance. If phEventWaitList and phEvent
583+
/// are not NULL, phEvent must not refer to an element of the
584+
/// phEventWaitList array.
585+
ur_event_handle_t *phEvent) {
586+
587+
// This mutex is to prevent concurrent kernel launches across different queues
588+
// as the DeviceASAN local/private shadow memory does not support concurrent
589+
// kernel launches now.
590+
std::scoped_lock<ur_shared_mutex> Guard(
591+
getAsanInterceptor()->KernelLaunchMutex);
592+
593+
auto pfnKernelLaunch = getContext()->urDdiTable.Enqueue.pfnKernelLaunch;
594+
595+
if (nullptr == pfnKernelLaunch) {
596+
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
597+
}
598+
599+
UR_LOG_L(getContext()->logger, DEBUG, "==== urEnqueueKernelLaunch");
600+
601+
LaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), pGlobalWorkSize,
602+
pLocalWorkSize, pGlobalWorkOffset, workDim);
603+
UR_CALL(LaunchInfo.Data.syncToDevice(hQueue));
604+
605+
UR_CALL(getAsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo));
606+
607+
ur_result_t UrRes = getContext()->urDdiTable.Enqueue.pfnKernelLaunch(
608+
hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
609+
LaunchInfo.LocalWorkSize.data(), numPropsInLaunchPropList, launchPropList,
610+
numEventsInWaitList, phEventWaitList, phEvent);
611+
if (UrRes != UR_RESULT_SUCCESS) {
612+
if (UrRes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
613+
UR_LOG_L(
614+
getContext()->logger, ERR,
615+
"urEnqueueKernelLaunch failed due to out of device memory, maybe "
616+
"SLM is fully used.");
617+
}
618+
return UrRes;
619+
}
620+
621+
UR_CALL(getAsanInterceptor()->postLaunchKernel(hKernel, hQueue, LaunchInfo));
622+
623+
return UR_RESULT_SUCCESS;
624+
}
625+
542626
///////////////////////////////////////////////////////////////////////////////
543627
/// @brief Intercept function for urContextCreate
544628
__urdlllocal ur_result_t UR_APICALL urContextCreate(

0 commit comments

Comments
 (0)