@@ -223,23 +223,28 @@ static void runtimeApiCallback(void *userdata, CUpti_CallbackDomain domain,
223223 CUpti_CallbackId cbid ,
224224 const CUpti_CallbackData * cbdata ) {
225225 if (domain == CUPTI_CB_DOMAIN_RUNTIME_API ) {
226- // We hook on EXIT because that makes our uprobe overhead not add to GPU
226+ // We hook on EXIT because that makes our probe overhead not add to GPU
227227 // launch latency and hopefully covers some of the overhead in the shadow of
228228 // GPU async work.
229229 if (cbdata -> callbackSite == CUPTI_API_EXIT ) {
230230 // Probablistic gate should go here.
231231 uint32_t correlationId = cbdata -> correlationId ;
232232 // Call stub functions for uprobe attachment
233+ const char * name = cbdata -> functionName ;
233234 switch (cbid ) {
234235 case CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernel_v7000 :
235236 case CUPTI_RUNTIME_TRACE_CBID_cudaLaunchKernelExC_v11060 :
237+ if (cbdata -> symbolName ) {
238+ DEBUG_PRINTF ("----------- %s\n" , cbdata -> symbolName );
239+ name = cbdata -> symbolName ;
240+ }
236241 case CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_v10000 :
237242 case CUPTI_RUNTIME_TRACE_CBID_cudaGraphLaunch_ptsz_v10000 :
238- DEBUG_PRINTF (
239- "[CUPTI] Runtime API callback: cbid=%d, correlationId=%u \n", cbid ,
240- correlationId );
243+ DEBUG_PRINTF ("[CUPTI] Runtime API callback: cbid=%d, correlationId=%u, "
244+ "func=%s \n" ,
245+ cbid , correlationId , cbdata -> functionName );
241246 outstandingEvents ++ ;
242- DTRACE_PROBE1 (parcagpu , cuda_correlation , correlationId );
247+ DTRACE_PROBE3 (parcagpu , cuda_correlation , correlationId , cbid , name );
243248 break ;
244249 default :
245250 // Debug: print any other runtime API callback we see with backtrace
@@ -337,9 +342,9 @@ static void bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer,
337342 k -> graphId , k -> graphNodeId , k -> name , k -> correlationId ,
338343 k -> deviceId , k -> streamId , k -> start , k -> end ,
339344 k -> end - k -> start );
340- DTRACE_PROBE7 (parcagpu , kernel_executed , k -> start , k -> end ,
345+ DTRACE_PROBE8 (parcagpu , kernel_executed , k -> start , k -> end ,
341346 k -> correlationId , k -> deviceId , k -> streamId , k -> graphId ,
342- k -> name );
347+ k -> graphNodeId , k -> name );
343348 break ;
344349 }
345350 // case CUPTI_ACTIVITY_KIND_GRAPH_TRACE: {
0 commit comments