@@ -52,6 +52,12 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices,
5252 return UR_RESULT_SUCCESS;
5353}
5454
55+ bool isInstrumentedKernel (ur_kernel_handle_t hKernel) {
56+ auto hProgram = GetProgram (hKernel);
57+ auto PI = getAsanInterceptor ()->getProgramInfo (hProgram);
58+ return PI->isKernelInstrumented (hKernel);
59+ }
60+
5561} // namespace
5662
5763// /////////////////////////////////////////////////////////////////////////////
@@ -307,7 +313,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuild(
307313
308314 UR_CALL (pfnProgramBuild (hContext, hProgram, pOptions));
309315
310- UR_CALL (getAsanInterceptor ()->registerProgram (hContext, hProgram));
316+ UR_CALL (getAsanInterceptor ()->registerProgram (hProgram));
311317
312318 return UR_RESULT_SUCCESS;
313319}
@@ -331,8 +337,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramBuildExp(
331337 getContext ()->logger .debug (" ==== urProgramBuildExp" );
332338
333339 UR_CALL (pfnBuildExp (hProgram, numDevices, phDevices, pOptions));
334- UR_CALL (
335- getAsanInterceptor ()->registerProgram (GetContext (hProgram), hProgram));
340+ UR_CALL (getAsanInterceptor ()->registerProgram (hProgram));
336341
337342 return UR_RESULT_SUCCESS;
338343}
@@ -359,7 +364,7 @@ __urdlllocal ur_result_t UR_APICALL urProgramLink(
359364
360365 UR_CALL (pfnProgramLink (hContext, count, phPrograms, pOptions, phProgram));
361366
362- UR_CALL (getAsanInterceptor ()->registerProgram (hContext, *phProgram));
367+ UR_CALL (getAsanInterceptor ()->registerProgram (*phProgram));
363368
364369 return UR_RESULT_SUCCESS;
365370}
@@ -390,7 +395,7 @@ ur_result_t UR_APICALL urProgramLinkExp(
390395 UR_CALL (pfnProgramLinkExp (hContext, numDevices, phDevices, count,
391396 phPrograms, pOptions, phProgram));
392397
393- UR_CALL (getAsanInterceptor ()->registerProgram (hContext, *phProgram));
398+ UR_CALL (getAsanInterceptor ()->registerProgram (*phProgram));
394399
395400 return UR_RESULT_SUCCESS;
396401}
@@ -460,7 +465,13 @@ __urdlllocal ur_result_t UR_APICALL urEnqueueKernelLaunch(
460465
461466 getContext ()->logger .debug (" ==== urEnqueueKernelLaunch" );
462467
463- USMLaunchInfo LaunchInfo (GetContext (hQueue), GetDevice (hQueue),
468+ if (!isInstrumentedKernel (hKernel)) {
469+ return pfnKernelLaunch (hQueue, hKernel, workDim, pGlobalWorkOffset,
470+ pGlobalWorkSize, pLocalWorkSize,
471+ numEventsInWaitList, phEventWaitList, phEvent);
472+ }
473+
474+ USMLaunchInfo LaunchInfo (GetContext (hKernel), GetDevice (hQueue),
464475 pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset,
465476 workDim);
466477 UR_CALL (LaunchInfo.initialize ());
@@ -1351,7 +1362,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelCreate(
13511362 getContext ()->logger .debug (" ==== urKernelCreate" );
13521363
13531364 UR_CALL (pfnCreate (hProgram, pKernelName, phKernel));
1354- UR_CALL (getAsanInterceptor ()->insertKernel (*phKernel));
1365+ if (isInstrumentedKernel (*phKernel)) {
1366+ UR_CALL (getAsanInterceptor ()->insertKernel (*phKernel));
1367+ }
13551368
13561369 return UR_RESULT_SUCCESS;
13571370}
@@ -1372,8 +1385,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelRetain(
13721385 UR_CALL (pfnRetain (hKernel));
13731386
13741387 auto KernelInfo = getAsanInterceptor ()->getKernelInfo (hKernel);
1375- UR_ASSERT (KernelInfo != nullptr , UR_RESULT_ERROR_INVALID_VALUE);
1376- KernelInfo->RefCount ++;
1388+ if (KernelInfo) {
1389+ KernelInfo->RefCount ++;
1390+ }
13771391
13781392 return UR_RESULT_SUCCESS;
13791393}
@@ -1393,9 +1407,10 @@ __urdlllocal ur_result_t urKernelRelease(
13931407 UR_CALL (pfnRelease (hKernel));
13941408
13951409 auto KernelInfo = getAsanInterceptor ()->getKernelInfo (hKernel);
1396- UR_ASSERT (KernelInfo != nullptr , UR_RESULT_ERROR_INVALID_VALUE);
1397- if (--KernelInfo->RefCount == 0 ) {
1398- UR_CALL (getAsanInterceptor ()->eraseKernel (hKernel));
1410+ if (KernelInfo) {
1411+ if (--KernelInfo->RefCount == 0 ) {
1412+ UR_CALL (getAsanInterceptor ()->eraseKernel (hKernel));
1413+ }
13991414 }
14001415
14011416 return UR_RESULT_SUCCESS;
@@ -1421,10 +1436,11 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgValue(
14211436 getContext ()->logger .debug (" ==== urKernelSetArgValue" );
14221437
14231438 std::shared_ptr<MemBuffer> MemBuffer;
1439+ std::shared_ptr<KernelInfo> KernelInfo;
14241440 if (argSize == sizeof (ur_mem_handle_t ) &&
14251441 (MemBuffer = getAsanInterceptor ()->getMemBuffer (
1426- *ur_cast<const ur_mem_handle_t *>(pArgValue)))) {
1427- auto KernelInfo = getAsanInterceptor ()->getKernelInfo (hKernel);
1442+ *ur_cast<const ur_mem_handle_t *>(pArgValue))) &&
1443+ ( KernelInfo = getAsanInterceptor ()->getKernelInfo (hKernel))) {
14281444 std::scoped_lock<ur_shared_mutex> Guard (KernelInfo->Mutex );
14291445 KernelInfo->BufferArgs [argIndex] = std::move (MemBuffer);
14301446 } else {
@@ -1452,8 +1468,10 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgMemObj(
14521468
14531469 getContext ()->logger .debug (" ==== urKernelSetArgMemObj" );
14541470
1455- if (auto MemBuffer = getAsanInterceptor ()->getMemBuffer (hArgValue)) {
1456- auto KernelInfo = getAsanInterceptor ()->getKernelInfo (hKernel);
1471+ std::shared_ptr<MemBuffer> MemBuffer;
1472+ std::shared_ptr<KernelInfo> KernelInfo;
1473+ if ((MemBuffer = getAsanInterceptor ()->getMemBuffer (hArgValue)) &&
1474+ (KernelInfo = getAsanInterceptor ()->getKernelInfo (hKernel))) {
14571475 std::scoped_lock<ur_shared_mutex> Guard (KernelInfo->Mutex );
14581476 KernelInfo->BufferArgs [argIndex] = std::move (MemBuffer);
14591477 } else {
@@ -1483,8 +1501,7 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgLocal(
14831501 " ==== urKernelSetArgLocal (argIndex={}, argSize={})" , argIndex,
14841502 argSize);
14851503
1486- {
1487- auto KI = getAsanInterceptor ()->getKernelInfo (hKernel);
1504+ if (auto KI = getAsanInterceptor ()->getKernelInfo (hKernel)) {
14881505 std::scoped_lock<ur_shared_mutex> Guard (KI->Mutex );
14891506 // TODO: get local variable alignment
14901507 auto argSizeWithRZ = GetSizeAndRedzoneSizeForLocal (
@@ -1520,8 +1537,9 @@ __urdlllocal ur_result_t UR_APICALL urKernelSetArgPointer(
15201537 " ==== urKernelSetArgPointer (argIndex={}, pArgValue={})" , argIndex,
15211538 pArgValue);
15221539
1523- if (getAsanInterceptor ()->getOptions ().DetectKernelArguments ) {
1524- auto KI = getAsanInterceptor ()->getKernelInfo (hKernel);
1540+ std::shared_ptr<KernelInfo> KI;
1541+ if (getAsanInterceptor ()->getOptions ().DetectKernelArguments &&
1542+ (KI = getAsanInterceptor ()->getKernelInfo (hKernel))) {
15251543 std::scoped_lock<ur_shared_mutex> Guard (KI->Mutex );
15261544 KI->PointerArgs [argIndex] = {pArgValue, GetCurrentBacktrace ()};
15271545 }
0 commit comments