Skip to content

Commit 84f74cc

Browse files
cuda: Update tests to work with both the LEGACY API and Perfworks Metrics API workflows
1 parent 4060779 commit 84f74cc

23 files changed

+3753
-3377
lines changed

src/components/cuda/cupti_event_and_metric.c

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -807,7 +807,7 @@ int cuptie_ctx_create(cuptic_info_t thr_info, cuptie_control_t *pstate, uint32_t
807807
CUcontext internalContext;
808808
cudaArtCheckErrors( cudaSetDevicePtr(native_event_info.device), return PAPI_EMISC );
809809
cudaArtCheckErrors( cudaFreePtr(NULL), return PAPI_EMISC );
810-
cudaCheckErrors( cuCtxGetCurrentPtr(&internalContext), return PAPI_EMISC);
810+
cudaCheckErrors( cuCtxGetCurrentPtr(&internalContext), return PAPI_EMISC);
811811
thr_info[native_event_info.device].ctx = internalContext;
812812
// Pop the context off so verify_user_added_event_or_metric functions properly
813813
cudaCheckErrors( cuCtxPopCurrentPtr(&internalContext), return PAPI_EMISC );
@@ -867,13 +867,26 @@ int cuptie_ctx_start(cuptie_control_t state)
867867
{
868868
SUBDBG("ENTERING: Setting up profiling for the Event and Metric APIs.\n");
869869

870+
CUcontext currentUserContext;
871+
cudaCheckErrors( cuCtxGetCurrentPtr(&currentUserContext), return PAPI_EMISC);
872+
if (currentUserContext != NULL) {
873+
cudaCheckErrors( cuCtxPopCurrentPtr(&currentUserContext), return PAPI_EMISC );
874+
}
875+
870876
int deviceIdx;
871877
for (deviceIdx = 0; deviceIdx < numDevicesOnMachine; deviceIdx++) {
872878
cuptie_gpu_state_t *gpu_ctl = &(state->gpu_ctl[deviceIdx]);
873879
if (gpu_ctl->added_events->totalNumberOfUserAddedNativeEvents == 0) {
874880
continue;
875881
}
876882

883+
int papi_errno = cuptic_device_acquire(gpu_ctl->added_events, API_LEGACY);
884+
if (papi_errno != PAPI_OK) {
885+
SUBDBG("Profiling the same gpu from multiple event sets is not allowed.\n");
886+
return papi_errno;
887+
}
888+
889+
877890
cudaCheckErrors( cuCtxSetCurrentPtr(state->info[deviceIdx].ctx), return PAPI_EMISC );
878891

879892
// Calculate the total number of user added events
@@ -924,6 +937,10 @@ int cuptie_ctx_start(cuptie_control_t state)
924937
cuptiCheckErrors( cuCtxPopCurrentPtr(&state->info[deviceIdx].ctx), return PAPI_EMISC );
925938
}
926939

940+
if (currentUserContext != NULL) {
941+
cudaCheckErrors( cuCtxPushCurrentPtr(currentUserContext), return PAPI_EMISC );
942+
}
943+
927944
SUBDBG("EXITING: Profiling setup completed.\n");
928945
return PAPI_OK;
929946
}
@@ -941,6 +958,12 @@ int cuptie_ctx_read(cuptie_control_t state, long long **counterValues)
941958
{
942959
SUBDBG("ENTERING: Reading values for the Event and Metric APIs.\n");
943960

961+
CUcontext currentUserContext;
962+
cudaCheckErrors( cuCtxGetCurrentPtr(&currentUserContext), return PAPI_EMISC);
963+
if (currentUserContext != NULL) {
964+
cuptiCheckErrors( cuCtxPopCurrentPtr(&currentUserContext), return PAPI_EMISC );
965+
}
966+
944967
int numCountersRead = 0;
945968
long long *readCounterValues = state->counters;
946969

@@ -1117,6 +1140,10 @@ int cuptie_ctx_read(cuptie_control_t state, long long **counterValues)
11171140
state->read_count = numCountersRead;
11181141
*counterValues = readCounterValues;
11191142

1143+
if (currentUserContext != NULL) {
1144+
cuptiCheckErrors( cuCtxPushCurrentPtr(currentUserContext), return PAPI_EMISC );
1145+
}
1146+
11201147
SUBDBG("EXITING: Reading values completed.\n");
11211148
return PAPI_OK;
11221149
}
@@ -1131,6 +1158,12 @@ int cuptie_ctx_stop(cuptie_control_t state)
11311158
{
11321159
SUBDBG("ENTERING: Disabling and destroying the event group sets created. Collection of events will be stopped.\n");
11331160

1161+
CUcontext currentUserContext;
1162+
cudaCheckErrors( cuCtxGetCurrentPtr(&currentUserContext), return PAPI_EMISC);
1163+
if (currentUserContext != NULL) {
1164+
cudaCheckErrors( cuCtxPopCurrentPtr(&currentUserContext), return PAPI_EMISC );
1165+
}
1166+
11341167
int deviceIdx;
11351168
for (deviceIdx = 0; deviceIdx < numDevicesOnMachine; deviceIdx++) {
11361169
cuptie_gpu_state_t *gpu_ctl = &(state->gpu_ctl[deviceIdx]);
@@ -1146,9 +1179,18 @@ int cuptie_ctx_stop(cuptie_control_t state)
11461179
cuptiCheckErrors( cuptiEventGroupSetDisablePtr(eventGroupSet), return PAPI_EMISC );
11471180
cuptiCheckErrors( cuptiEventGroupSetsDestroyPtr(eventGroupSets), return PAPI_EMISC );
11481181

1182+
int papi_errno = cuptic_device_release(gpu_ctl->added_events, API_LEGACY);
1183+
if (papi_errno != PAPI_OK) {
1184+
return papi_errno;
1185+
}
1186+
11491187
cudaCheckErrors( cuCtxPopCurrentPtr(&state->info[deviceIdx].ctx), return PAPI_EMISC );
11501188
}
11511189

1190+
if (currentUserContext != NULL) {
1191+
cudaCheckErrors( cuCtxPushCurrentPtr(currentUserContext), return PAPI_EMISC );
1192+
}
1193+
11521194
SUBDBG("EXITING: Disabling event group sets completed.\n");
11531195
return PAPI_OK;
11541196
}
@@ -1164,6 +1206,12 @@ int cuptie_ctx_reset(cuptie_control_t state)
11641206
{
11651207
SUBDBG("ENTERING: Resetting counter values.\n");
11661208

1209+
CUcontext currentUserContext;
1210+
cudaCheckErrors( cuCtxGetCurrentPtr(&currentUserContext), return PAPI_EMISC);
1211+
if (currentUserContext != NULL) {
1212+
cudaCheckErrors( cuCtxPopCurrentPtr(&currentUserContext), return PAPI_EMISC );
1213+
}
1214+
11671215
int counterIdx;
11681216
for (counterIdx = 0; counterIdx < state->read_count; counterIdx++) {
11691217
state->counters[counterIdx] = 0;
@@ -1192,6 +1240,10 @@ int cuptie_ctx_reset(cuptie_control_t state)
11921240
cudaCheckErrors( cuCtxPopCurrentPtr(&state->info[deviceIdx].ctx), return PAPI_EMISC );
11931241
}
11941242

1243+
if (currentUserContext != NULL) {
1244+
cudaCheckErrors( cuCtxPushCurrentPtr(currentUserContext), return PAPI_EMISC );
1245+
}
1246+
11951247
SUBDBG("EXITING: Resetting counter values completed.\n");
11961248
return PAPI_OK;
11971249
}
@@ -1425,11 +1477,16 @@ static int verify_user_added_event_or_metric(uint32_t *events_id, int num_events
14251477
}
14261478
totalNumberOfUserAddedEvents++;
14271479
state->gpu_ctl[native_event_info.device].added_events->totalNumberOfUserAddedNativeEvents = totalNumberOfUserAddedEvents;
1480+
// For a specific device table, get the current event index
1481+
int idx = state->gpu_ctl[native_event_info.device].added_events->count;
1482+
state->gpu_ctl[native_event_info.device].added_events->cuda_devs[idx] = native_event_info.device;
1483+
state->gpu_ctl[native_event_info.device].added_events->count++;
14281484

14291485
// Pop off the set context
14301486
cudaCheckErrors( cuCtxPopCurrentPtr(&thr_info[native_event_info.device].ctx), return PAPI_EMISC );
14311487
}
14321488

1489+
14331490
SUBDBG("EXITING: Checking user added a valid event completed.\n");
14341491
return PAPI_OK;
14351492
}
@@ -1491,6 +1548,7 @@ static int create_event_and_metric_table(int totalNumberOfEntries, cuptiu_event_
14911548
goto fn_fail;
14921549
}
14931550

1551+
eventTable->count = 0;
14941552
eventTable->capacity = totalNumberOfEntries;
14951553
eventTable->startTimeStampNs = 0;
14961554
eventTable->totalNumberOfUserAddedNativeEvents = 0;

src/components/cuda/cupti_event_and_metric.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ typedef struct event_and_metric_record_s {
5656
typedef struct event_and_metric_table_s {
5757
unsigned int count;
5858
unsigned int capacity;
59+
int cuda_devs[30];
5960
CUpti_EventGroupSets *eventGroupSets;
6061
CUpti_MetricID metricIDs[PAPI_CUDA_MAX_COUNTERS];
6162
int *idsThatMakeupAUserAddedEventArray[PAPI_CUDA_MAX_COUNTERS];

src/components/cuda/cupti_profiler.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -817,7 +817,7 @@ int cuptip_ctx_start(cuptip_control_t state)
817817
}
818818

819819
LOGDBG("Device num %d: event_count %d, rmr count %d\n", dev_id, gpu_ctl->added_events->count, gpu_ctl->numberOfRawMetricRequests);
820-
papi_errno = cuptic_device_acquire(state->gpu_ctl[dev_id].added_events);
820+
papi_errno = cuptic_device_acquire(state->gpu_ctl[dev_id].added_events, API_PERFWORKS);
821821
if (papi_errno != PAPI_OK) {
822822
ERRDBG("Profiling same gpu from multiple event sets not allowed.\n");
823823
return papi_errno;
@@ -1134,7 +1134,7 @@ int cuptip_ctx_stop(cuptip_control_t state)
11341134
return papi_errno;
11351135
}
11361136

1137-
papi_errno = cuptic_device_release(state->gpu_ctl[dev_id].added_events);
1137+
papi_errno = cuptic_device_release(state->gpu_ctl[dev_id].added_events, API_PERFWORKS);
11381138
if (papi_errno != PAPI_OK) {
11391139
return papi_errno;
11401140
}

src/components/cuda/papi_cupti_common.c

Lines changed: 52 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
#include "cupti_config.h"
1717
#include "papi_cupti_common.h"
18+
#include "cupti_event_and_metric.h"
1819

1920
static void *dl_drv, *dl_rt;
2021

@@ -848,47 +849,77 @@ int cuptic_ctxarr_destroy(cuptic_info_t *pinfo)
848849
return PAPI_OK;
849850
}
850851

851-
int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
852+
int cuptic_device_acquire(void *evt_table, int flag)
852853
{
853-
gpu_occupancy_t acq_mask = 0;
854-
long i;
855-
for (i = 0; i < evt_table->count; i++) {
856-
acq_mask |= (1 << evt_table->cuda_devs[i]);
854+
int i;
855+
gpu_occupancy_t bitmask = 0;
856+
switch(flag) {
857+
case API_LEGACY:
858+
{
859+
cuptiu_event_and_metric_table_t *legacy_evt_table = (cuptiu_event_and_metric_table_t *) evt_table;
860+
for (i = 0; i < legacy_evt_table->count; i++) {
861+
bitmask |= (1 << legacy_evt_table->cuda_devs[i]);
862+
}
863+
break;
864+
}
865+
case API_PERFWORKS:
866+
{
867+
cuptiu_event_table_t *perfworks_evt_table = (cuptiu_event_table_t *) evt_table;
868+
for (i = 0; i < perfworks_evt_table->count; i++) {
869+
bitmask |= (1 << perfworks_evt_table->cuda_devs[i]);
870+
}
871+
break;
872+
}
873+
default:
874+
SUBDBG("Provided flag is not accounted for in this switch statement. Code needs to be updated.\n");
875+
return PAPI_EBUG;
857876
}
858-
*bitmask = acq_mask;
859877

860-
return PAPI_OK;
861-
}
862-
863-
int cuptic_device_acquire(cuptiu_event_table_t *evt_table)
864-
{
865-
gpu_occupancy_t bitmask;
866-
int papi_errno = _devmask_events_get(evt_table, &bitmask);
867-
if (papi_errno != PAPI_OK) {
868-
return papi_errno;
869-
}
870878
if (bitmask & global_gpu_bitmask) {
871879
return PAPI_ECNFLCT;
872880
}
881+
873882
_papi_hwi_lock(_cuda_lock);
874883
global_gpu_bitmask |= bitmask;
875884
_papi_hwi_unlock(_cuda_lock);
885+
876886
return PAPI_OK;
877887
}
878888

879-
int cuptic_device_release(cuptiu_event_table_t *evt_table)
889+
int cuptic_device_release(void *evt_table, int flag)
880890
{
881-
gpu_occupancy_t bitmask;
882-
int papi_errno = _devmask_events_get(evt_table, &bitmask);
883-
if (papi_errno != PAPI_OK) {
884-
return papi_errno;
891+
int i;
892+
gpu_occupancy_t bitmask = 0;
893+
switch(flag) {
894+
case API_LEGACY:
895+
{
896+
cuptiu_event_and_metric_table_t *legacy_evt_table = (cuptiu_event_and_metric_table_t *) evt_table;
897+
for (i = 0; i < legacy_evt_table->count; i++) {
898+
bitmask |= (1 << legacy_evt_table->cuda_devs[i]);
899+
}
900+
break;
901+
}
902+
case API_PERFWORKS:
903+
{
904+
cuptiu_event_table_t *perfworks_evt_table = (cuptiu_event_table_t *) evt_table;
905+
for (i = 0; i < perfworks_evt_table->count; i++) {
906+
bitmask |= (1 << perfworks_evt_table->cuda_devs[i]);
907+
}
908+
break;
909+
}
910+
default:
911+
SUBDBG("Provided flag is not accounted for in this switch statement. Code needs to be updated.\n");
912+
return PAPI_EBUG;
885913
}
914+
886915
if ((bitmask & global_gpu_bitmask) != bitmask) {
887916
return PAPI_EMISC;
888917
}
918+
889919
_papi_hwi_lock(_cuda_lock);
890920
global_gpu_bitmask ^= bitmask;
891921
_papi_hwi_unlock(_cuda_lock);
922+
892923
return PAPI_OK;
893924
}
894925

src/components/cuda/papi_cupti_common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,8 +76,8 @@ int cuptic_ctxarr_get_ctx(cuptic_info_t info, int dev_id, CUcontext *ctx);
7676
int cuptic_ctxarr_destroy(cuptic_info_t *pinfo);
7777

7878
/* functions to track the occupancy of gpu counters in event sets */
79-
int cuptic_device_acquire(cuptiu_event_table_t *evt_table);
80-
int cuptic_device_release(cuptiu_event_table_t *evt_table);
79+
int cuptic_device_acquire(void *evt_table, int flag);
80+
int cuptic_device_release(void *evt_table, int flag);
8181

8282
/* device qualifier interfaces */
8383
int cuptiu_dev_set(cuptiu_bitmap_t *bitmap, int i);

0 commit comments

Comments
 (0)