@@ -807,7 +807,7 @@ int cuptie_ctx_create(cuptic_info_t thr_info, cuptie_control_t *pstate, uint32_t
807807 CUcontext internalContext ;
808808 cudaArtCheckErrors ( cudaSetDevicePtr (native_event_info .device ), return PAPI_EMISC );
809809 cudaArtCheckErrors ( cudaFreePtr (NULL ), return PAPI_EMISC );
810- cudaCheckErrors ( cuCtxGetCurrentPtr (& internalContext ), return PAPI_EMISC );
810+ cudaCheckErrors ( cuCtxGetCurrentPtr (& internalContext ), return PAPI_EMISC );
811811 thr_info [native_event_info .device ].ctx = internalContext ;
812812 // Pop the context off so verify_user_added_event_or_metric functions properly
813813 cudaCheckErrors ( cuCtxPopCurrentPtr (& internalContext ), return PAPI_EMISC );
@@ -867,13 +867,26 @@ int cuptie_ctx_start(cuptie_control_t state)
867867{
868868 SUBDBG ("ENTERING: Setting up profiling for the Event and Metric APIs.\n" );
869869
870+ CUcontext currentUserContext ;
871+ cudaCheckErrors ( cuCtxGetCurrentPtr (& currentUserContext ), return PAPI_EMISC );
872+ if (currentUserContext != NULL ) {
873+ cudaCheckErrors ( cuCtxPopCurrentPtr (& currentUserContext ), return PAPI_EMISC );
874+ }
875+
870876 int deviceIdx ;
871877 for (deviceIdx = 0 ; deviceIdx < numDevicesOnMachine ; deviceIdx ++ ) {
872878 cuptie_gpu_state_t * gpu_ctl = & (state -> gpu_ctl [deviceIdx ]);
873879 if (gpu_ctl -> added_events -> totalNumberOfUserAddedNativeEvents == 0 ) {
874880 continue ;
875881 }
876882
883+ int papi_errno = cuptic_device_acquire (gpu_ctl -> added_events , API_LEGACY );
884+ if (papi_errno != PAPI_OK ) {
885+ SUBDBG ("Profiling the same gpu from multiple event sets is not allowed.\n" );
886+ return papi_errno ;
887+ }
888+
889+
877890 cudaCheckErrors ( cuCtxSetCurrentPtr (state -> info [deviceIdx ].ctx ), return PAPI_EMISC );
878891
879892 // Calculate the total number of user added events
@@ -924,6 +937,10 @@ int cuptie_ctx_start(cuptie_control_t state)
924937 cuptiCheckErrors ( cuCtxPopCurrentPtr (& state -> info [deviceIdx ].ctx ), return PAPI_EMISC );
925938 }
926939
940+ if (currentUserContext != NULL ) {
941+ cudaCheckErrors ( cuCtxPushCurrentPtr (currentUserContext ), return PAPI_EMISC );
942+ }
943+
927944 SUBDBG ("EXITING: Profiling setup completed.\n" );
928945 return PAPI_OK ;
929946}
@@ -941,6 +958,12 @@ int cuptie_ctx_read(cuptie_control_t state, long long **counterValues)
941958{
942959 SUBDBG ("ENTERING: Reading values for the Event and Metric APIs.\n" );
943960
961+ CUcontext currentUserContext ;
962+ cudaCheckErrors ( cuCtxGetCurrentPtr (& currentUserContext ), return PAPI_EMISC );
963+ if (currentUserContext != NULL ) {
964+ cuptiCheckErrors ( cuCtxPopCurrentPtr (& currentUserContext ), return PAPI_EMISC );
965+ }
966+
944967 int numCountersRead = 0 ;
945968 long long * readCounterValues = state -> counters ;
946969
@@ -1117,6 +1140,10 @@ int cuptie_ctx_read(cuptie_control_t state, long long **counterValues)
11171140 state -> read_count = numCountersRead ;
11181141 * counterValues = readCounterValues ;
11191142
1143+ if (currentUserContext != NULL ) {
1144+ cuptiCheckErrors ( cuCtxPushCurrentPtr (currentUserContext ), return PAPI_EMISC );
1145+ }
1146+
11201147 SUBDBG ("EXITING: Reading values completed.\n" );
11211148 return PAPI_OK ;
11221149}
@@ -1131,6 +1158,12 @@ int cuptie_ctx_stop(cuptie_control_t state)
11311158{
11321159 SUBDBG ("ENTERING: Disabling and destroying the event group sets created. Collection of events will be stopped.\n" );
11331160
1161+ CUcontext currentUserContext ;
1162+ cudaCheckErrors ( cuCtxGetCurrentPtr (& currentUserContext ), return PAPI_EMISC );
1163+ if (currentUserContext != NULL ) {
1164+ cudaCheckErrors ( cuCtxPopCurrentPtr (& currentUserContext ), return PAPI_EMISC );
1165+ }
1166+
11341167 int deviceIdx ;
11351168 for (deviceIdx = 0 ; deviceIdx < numDevicesOnMachine ; deviceIdx ++ ) {
11361169 cuptie_gpu_state_t * gpu_ctl = & (state -> gpu_ctl [deviceIdx ]);
@@ -1146,9 +1179,18 @@ int cuptie_ctx_stop(cuptie_control_t state)
11461179 cuptiCheckErrors ( cuptiEventGroupSetDisablePtr (eventGroupSet ), return PAPI_EMISC );
11471180 cuptiCheckErrors ( cuptiEventGroupSetsDestroyPtr (eventGroupSets ), return PAPI_EMISC );
11481181
1182+ int papi_errno = cuptic_device_release (gpu_ctl -> added_events , API_LEGACY );
1183+ if (papi_errno != PAPI_OK ) {
1184+ return papi_errno ;
1185+ }
1186+
11491187 cudaCheckErrors ( cuCtxPopCurrentPtr (& state -> info [deviceIdx ].ctx ), return PAPI_EMISC );
11501188 }
11511189
1190+ if (currentUserContext != NULL ) {
1191+ cudaCheckErrors ( cuCtxPushCurrentPtr (currentUserContext ), return PAPI_EMISC );
1192+ }
1193+
11521194 SUBDBG ("EXITING: Disabling event group sets completed.\n" );
11531195 return PAPI_OK ;
11541196}
@@ -1164,6 +1206,12 @@ int cuptie_ctx_reset(cuptie_control_t state)
11641206{
11651207 SUBDBG ("ENTERING: Resetting counter values.\n" );
11661208
1209+ CUcontext currentUserContext ;
1210+ cudaCheckErrors ( cuCtxGetCurrentPtr (& currentUserContext ), return PAPI_EMISC );
1211+ if (currentUserContext != NULL ) {
1212+ cudaCheckErrors ( cuCtxPopCurrentPtr (& currentUserContext ), return PAPI_EMISC );
1213+ }
1214+
11671215 int counterIdx ;
11681216 for (counterIdx = 0 ; counterIdx < state -> read_count ; counterIdx ++ ) {
11691217 state -> counters [counterIdx ] = 0 ;
@@ -1192,6 +1240,10 @@ int cuptie_ctx_reset(cuptie_control_t state)
11921240 cudaCheckErrors ( cuCtxPopCurrentPtr (& state -> info [deviceIdx ].ctx ), return PAPI_EMISC );
11931241 }
11941242
1243+ if (currentUserContext != NULL ) {
1244+ cudaCheckErrors ( cuCtxPushCurrentPtr (currentUserContext ), return PAPI_EMISC );
1245+ }
1246+
11951247 SUBDBG ("EXITING: Resetting counter values completed.\n" );
11961248 return PAPI_OK ;
11971249}
@@ -1425,11 +1477,16 @@ static int verify_user_added_event_or_metric(uint32_t *events_id, int num_events
14251477 }
14261478 totalNumberOfUserAddedEvents ++ ;
14271479 state -> gpu_ctl [native_event_info .device ].added_events -> totalNumberOfUserAddedNativeEvents = totalNumberOfUserAddedEvents ;
1480+ // For a specific device table, get the current event index
1481+ int idx = state -> gpu_ctl [native_event_info .device ].added_events -> count ;
1482+ state -> gpu_ctl [native_event_info .device ].added_events -> cuda_devs [idx ] = native_event_info .device ;
1483+ state -> gpu_ctl [native_event_info .device ].added_events -> count ++ ;
14281484
14291485 // Pop off the set context
14301486 cudaCheckErrors ( cuCtxPopCurrentPtr (& thr_info [native_event_info .device ].ctx ), return PAPI_EMISC );
14311487 }
14321488
1489+
14331490 SUBDBG ("EXITING: Checking user added a valid event completed.\n" );
14341491 return PAPI_OK ;
14351492}
@@ -1491,6 +1548,7 @@ static int create_event_and_metric_table(int totalNumberOfEntries, cuptiu_event_
14911548 goto fn_fail ;
14921549 }
14931550
1551+ eventTable -> count = 0 ;
14941552 eventTable -> capacity = totalNumberOfEntries ;
14951553 eventTable -> startTimeStampNs = 0 ;
14961554 eventTable -> totalNumberOfUserAddedNativeEvents = 0 ;
0 commit comments