@@ -539,6 +539,90 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
539539 return UR_RESULT_SUCCESS;
540540}
541541
542+ // /////////////////////////////////////////////////////////////////////////////
543+ // / @brief Intercept function for urEnqueueKernelLaunchWithArgsExp
544+ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunchWithArgsExp (
545+ // / [in] handle of the queue object
546+ ur_queue_handle_t hQueue,
547+ // / [in] handle of the kernel object
548+ ur_kernel_handle_t hKernel,
549+ // / [in] number of dimensions, from 1 to 3, to specify the global and
550+ // / work-group work-items
551+ uint32_t workDim,
552+ // / [in][optional] pointer to an array of workDim unsigned values that
553+ // / specify the offset used to calculate the global ID of a work-item
554+ const size_t *pGlobalWorkOffset,
555+ // / [in] pointer to an array of workDim unsigned values that specify the
556+ // / number of global work-items in workDim that will execute the kernel
557+ // / function
558+ const size_t *pGlobalWorkSize,
559+ // / [in][optional] pointer to an array of workDim unsigned values that
560+ // / specify the number of local work-items forming a work-group that will
561+ // / execute the kernel function.
562+ // / If nullptr, the runtime implementation will choose the work-group size.
563+ const size_t *pLocalWorkSize,
564+ // / [in] Number of entries in pArgs
565+ uint32_t numArgs,
566+ // / [in][optional][range(0, numArgs)] pointer to a list of kernel arg
567+ // / properties.
568+ const ur_exp_kernel_arg_properties_t *pArgs,
569+ // / [in] size of the launch prop list
570+ uint32_t numPropsInLaunchPropList,
571+ // / [in][optional][range(0, numPropsInLaunchPropList)] pointer to a list
572+ // / of launch properties
573+ const ur_kernel_launch_property_t *launchPropList,
574+ // / [in] size of the event wait list
575+ uint32_t numEventsInWaitList,
576+ // / [in][optional][range(0, numEventsInWaitList)] pointer to a list of
577+ // / events that must be complete before the kernel execution.
578+ // / If nullptr, the numEventsInWaitList must be 0, indicating that no wait
579+ // / event.
580+ const ur_event_handle_t *phEventWaitList,
581+ // / [out][optional][alloc] return an event object that identifies this
582+ // / particular kernel execution instance. If phEventWaitList and phEvent
583+ // / are not NULL, phEvent must not refer to an element of the
584+ // / phEventWaitList array.
585+ ur_event_handle_t *phEvent) {
586+
587+ // This mutex is to prevent concurrent kernel launches across different queues
588+ // as the DeviceASAN local/private shadow memory does not support concurrent
589+ // kernel launches now.
590+ std::scoped_lock<ur_shared_mutex> Guard (
591+ getAsanInterceptor ()->KernelLaunchMutex );
592+
593+ auto pfnKernelLaunch = getContext ()->urDdiTable .Enqueue .pfnKernelLaunch ;
594+
595+ if (nullptr == pfnKernelLaunch) {
596+ return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
597+ }
598+
599+ UR_LOG_L (getContext ()->logger , DEBUG, " ==== urEnqueueKernelLaunch" );
600+
601+ LaunchInfo LaunchInfo (GetContext (hQueue), GetDevice (hQueue), pGlobalWorkSize,
602+ pLocalWorkSize, pGlobalWorkOffset, workDim);
603+ UR_CALL (LaunchInfo.Data .syncToDevice (hQueue));
604+
605+ UR_CALL (getAsanInterceptor ()->preLaunchKernel (hKernel, hQueue, LaunchInfo));
606+
607+ ur_result_t UrRes = getContext ()->urDdiTable .Enqueue .pfnKernelLaunch (
608+ hQueue, hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize,
609+ LaunchInfo.LocalWorkSize .data (), numPropsInLaunchPropList, launchPropList,
610+ numEventsInWaitList, phEventWaitList, phEvent);
611+ if (UrRes != UR_RESULT_SUCCESS) {
612+ if (UrRes == UR_RESULT_ERROR_OUT_OF_DEVICE_MEMORY) {
613+ UR_LOG_L (
614+ getContext ()->logger , ERR,
615+ " urEnqueueKernelLaunch failed due to out of device memory, maybe "
616+ " SLM is fully used." );
617+ }
618+ return UrRes;
619+ }
620+
621+ UR_CALL (getAsanInterceptor ()->postLaunchKernel (hKernel, hQueue, LaunchInfo));
622+
623+ return UR_RESULT_SUCCESS;
624+ }
625+
542626// /////////////////////////////////////////////////////////////////////////////
543627// / @brief Intercept function for urContextCreate
544628__urdlllocal ur_result_t UR_APICALL urContextCreate (
0 commit comments