Skip to content

Commit 94ad308

Browse files
cuda: Add if defined's around calls to cuCtxCreate to avoid compilation errors in Cuda Toolkit 13
1 parent ce3dbc8 commit 94ad308

File tree

10 files changed

+105
-25
lines changed

10 files changed

+105
-25
lines changed

src/components/cuda/papi_cupti_common.c

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t
7979
CUresult ( *cuCtxGetCurrentPtr ) (CUcontext *);
8080
CUresult ( *cuCtxSetCurrentPtr ) (CUcontext);
8181
CUresult ( *cuCtxDestroyPtr ) (CUcontext);
82-
CUresult ( *cuCtxCreatePtr ) (CUcontext *pctx, unsigned int flags, CUdevice dev);
8382
CUresult ( *cuCtxGetDevicePtr ) (CUdevice *);
8483
CUresult ( *cuDeviceGetPtr ) (CUdevice *, int);
8584
CUresult ( *cuDeviceGetCountPtr ) (int *);
@@ -125,7 +124,6 @@ int load_cuda_sym(void)
125124
cuCtxSetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxSetCurrent");
126125
cuCtxGetCurrentPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetCurrent");
127126
cuCtxDestroyPtr = DLSYM_AND_CHECK(dl_drv, "cuCtxDestroy");
128-
cuCtxCreatePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxCreate");
129127
cuCtxGetDevicePtr = DLSYM_AND_CHECK(dl_drv, "cuCtxGetDevice");
130128
cuDeviceGetPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGet");
131129
cuDeviceGetCountPtr = DLSYM_AND_CHECK(dl_drv, "cuDeviceGetCount");
@@ -156,7 +154,6 @@ static int unload_cuda_sym(void)
156154
cuCtxSetCurrentPtr = NULL;
157155
cuCtxGetCurrentPtr = NULL;
158156
cuCtxDestroyPtr = NULL;
159-
cuCtxCreatePtr = NULL;
160157
cuCtxGetDevicePtr = NULL;
161158
cuDeviceGetPtr = NULL;
162159
cuDeviceGetCountPtr = NULL;

src/components/cuda/papi_cupti_common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ extern unsigned int _cuda_lock;
2929
extern CUresult ( *cuCtxGetCurrentPtr ) (CUcontext *);
3030
extern CUresult ( *cuCtxSetCurrentPtr ) (CUcontext);
3131
extern CUresult ( *cuCtxDestroyPtr ) (CUcontext);
32-
extern CUresult ( *cuCtxCreatePtr ) (CUcontext *pctx, unsigned int flags, CUdevice dev);
3332
extern CUresult ( *cuCtxGetDevicePtr ) (CUdevice *);
3433
extern CUresult ( *cuDeviceGetPtr ) (CUdevice *, int);
3534
extern CUresult ( *cuDeviceGetCountPtr ) (int *);

src/components/cuda/tests/HelloWorld.cu

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -170,13 +170,21 @@ int main(int argc, char** argv)
170170
// If multiple GPUs/contexts were being used, you'd need to
171171
// create contexts for each device. See, for example,
172172
// simpleMultiGPU.cu.
173-
174-
// Context Create. We will use this one to run our kernel.
175-
cuError = cuCtxCreate(&sessionCtx, 0, 0); // Create a context, NULL flags, Device 0.
173+
int flags = 0;
174+
CUdevice device = 0;
175+
#if defined(CUDA_TOOLKIT_GE_13)
176+
cuError = cuCtxCreate(&sessionCtx, (CUctxCreateParams*)0, flags, device);
177+
if (cuError != CUDA_SUCCESS) {
178+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version >= 13: %d\n", cuError);
179+
exit(1);
180+
}
181+
#else
182+
cuError = cuCtxCreate(&sessionCtx, flags, device);
176183
if (cuError != CUDA_SUCCESS) {
177-
fprintf(stderr, "Failed to create cuContext: %d\n", cuError);
178-
exit(-1);
184+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version < 13: %d\n", cuError);
185+
exit(1);
179186
}
187+
#endif
180188

181189
if (STEP_BY_STEP_DEBUG) {
182190
cuCtxGetCurrent(&getCtx);

src/components/cuda/tests/Makefile

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,15 @@ TESTS_NOCTX = concurrent_profiling_noCuCtx pthreads_noCuCtx \
1313
simpleMultiGPU_noCuCtx
1414

1515
NVCC = $(PAPI_CUDA_ROOT)/bin/nvcc
16+
NVCC_VERSION := $(shell $(NVCC) --version | grep -oP '(?<=release )\d+\.\d+')
17+
18+
# Check to see if we are using a Cuda Toolkit version greater than or equal to 13
19+
# as the API call for cuCtxCreate changed at this version
20+
CUDA_TOOLKIT_GE_13 := $(shell echo "$(NVCC_VERSION) 13.0" | awk '{print $$1 >= $$2}')
21+
CUDA_CPPFLAGS :=
22+
ifeq ($(CUDA_TOOLKIT_GE_13), 1)
23+
CUDA_CPPFLAGS += -DCUDA_TOOLKIT_GE_13
24+
endif
1625

1726
PAPI_FLAG = -DPAPI # Comment this line for tests to run without PAPI profiling
1827
NVCFLAGS = -g -ccbin='$(CC)' $(PAPI_FLAG)
@@ -26,10 +35,10 @@ CUDALIBS = -L$(PAPI_CUDA_ROOT)/lib64 -lcudart -lcuda
2635
cuda_tests: $(TESTS) $(TESTS_NOCTX)
2736

2837
%.o:%.cu
29-
$(NVCC) $(INCLUDE) $(NVCFLAGS) -c -o $@ $<
38+
$(NVCC) $(INCLUDE) $(NVCFLAGS) $(CUDA_CPPFLAGS) -c -o $@ $<
3039

3140
%.mac:%.cu
32-
$(NVCC) $(INCLUDE) $(NVCFLAGS) -E -c -o $@ $<
41+
$(NVCC) $(INCLUDE) $(NVCFLAGS) $(CUDA_CPPFLAGS) -E -c -o $@ $<
3342

3443
test_multi_read_and_reset: test_multi_read_and_reset.o $(UTILOBJS)
3544
$(CXX) $(CFLAGS) -o test_multi_read_and_reset test_multi_read_and_reset.o $(UTILOBJS) $(PAPILIB) $(CUDALIBS) $(LDFLAGS)

src/components/cuda/tests/concurrent_profiling.cu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,12 @@ int main(int argc, char **argv)
350350
// config.maxLaunchesPerPass = 1; // Must be >= maxRangesPerPass. Set this to the largest count of kernel launches which may be encountered in any Pass in this Session
351351

352352
// // Device 0 has max of 3 passes; other devices only run one pass in this sample code
353-
DRIVER_API_CALL(cuCtxCreate(&(config.context), 0, device)); // Either set to a context, or may be NULL if a default context has been created
353+
int flags = 0;
354+
#if defined(CUDA_TOOLKIT_GE_13)
355+
DRIVER_API_CALL( cuCtxCreate(&(config.context), (CUctxCreateParams*)0, flags, device) );
356+
#else
357+
DRIVER_API_CALL( cuCtxCreate(&(config.context), flags, device) );
358+
#endif
354359
deviceData[device].config = config;// Save this device config
355360

356361
// Initialize CUPTI Profiling structures

src/components/cuda/tests/cudaOpenMP.cu

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,13 @@ int main(int argc, char *argv[])
170170
int num_threads = (num_gpus > MAX_THREADS) ? MAX_THREADS : num_gpus;
171171
// Create a gpu context for every thread
172172
for (i=0; i < num_threads; i++) {
173-
DRIVER_API_CALL(cuCtxCreate(&(ctx_arr[i]), 0, i % num_gpus)); // "% num_gpus" allows more CPU threads than GPU devices
173+
int flags = 0;
174+
CUdevice device = i % num_gpus;
175+
#if defined(CUDA_TOOLKIT_GE_13)
176+
DRIVER_API_CALL( cuCtxCreate(&(ctx_arr[i]), (CUctxCreateParams*)0, flags, device) );
177+
#else
178+
DRIVER_API_CALL( cuCtxCreate(&(ctx_arr[i]), flags, device) );
179+
#endif
174180
DRIVER_API_CALL(cuCtxPopCurrent(&(ctx_arr[i])));
175181
}
176182

src/components/cuda/tests/pthreads.cu

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,14 @@ int main(int argc, char **argv)
232232
for(i = 0; i < numGPUs; i++)
233233
{
234234
tid[i] = i;
235-
DRIVER_API_CALL(cuCtxCreate(&(cuCtx[i]), 0, i % numGPUs));
235+
236+
int flags = 0;
237+
CUdevice device = i % numGPUs;
238+
#if defined(CUDA_TOOLKIT_GE_13)
239+
DRIVER_API_CALL( cuCtxCreate(&(cuCtx[i]), (CUctxCreateParams*)0, flags, device) );
240+
#else
241+
DRIVER_API_CALL( cuCtxCreate(&(cuCtx[i]), flags, device) );
242+
#endif
236243
DRIVER_API_CALL(cuCtxPopCurrent(&(cuCtx[i])));
237244

238245
rc = pthread_create(&tidarr[i], NULL, thread_gpu, &(tid[i]));

src/components/cuda/tests/simpleMultiGPU.cu

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,12 @@ int main( int argc, char **argv )
235235
// to as late as PAPI_start(), but they are needed to
236236
// create streams, alloc memory, etc.
237237
for (i = 0; i < num_gpus; i++) {
238-
CHECK_CU_ERROR( cuCtxCreate( &(ctx[i]), 0, device[i] ), "cuCtxCreate" ); // automatically pushes the new context on the stack.
238+
int flags = 0;
239+
#if defined(CUDA_TOOLKIT_GE_13)
240+
CHECK_CU_ERROR( cuCtxCreate(&(ctx[i]), (CUctxCreateParams*)0, flags, device[i]), "cuCtxCreate" );
241+
#else
242+
CHECK_CU_ERROR( cuCtxCreate(&(ctx[i]), flags, device[i]), "cuCtxCreate" );
243+
#endif
239244
CHECK_CU_ERROR( cuCtxPopCurrent(&poppedCtx), "cuCtxPopCurrent" ); // ... so take it off.
240245
}
241246

src/components/cuda/tests/test_2thr_1gpu_not_allowed.cu

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,14 @@ int main(int argc, char **argv)
151151
for(i = 0; i < NUM_THREADS; i++)
152152
{
153153
data[i].idx = i;
154-
DRIVER_API_CALL(cuCtxCreate(&(data[i].cuCtx), 0, 0));
154+
155+
int flags = 0;
156+
CUdevice device = 0;
157+
#if defined(CUDA_TOOLKIT_GE_13)
158+
DRIVER_API_CALL( cuCtxCreate(&(data[i].cuCtx), (CUctxCreateParams*)0, flags, device) );
159+
#else
160+
DRIVER_API_CALL( cuCtxCreate(&(data[i].cuCtx), flags, device) );
161+
#endif
155162
DRIVER_API_CALL(cuCtxPopCurrent(&(data[i].cuCtx)));
156163

157164
rc = pthread_create(&data[i].tid, NULL, thread_gpu, &(data[i]));

src/components/cuda/tests/test_multi_read_and_reset.cu

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,11 +77,23 @@ void multi_reset(int event_count, char **evt_names, long long *values)
7777
{
7878
CUcontext ctx;
7979
int papi_errno, i;
80-
papi_errno = cuCtxCreate(&ctx, 0, 0);
81-
if (papi_errno != CUDA_SUCCESS) {
82-
fprintf(stderr, "cuda error: failed to create cuda context.\n");
80+
81+
CUresult cuError;
82+
int flags = 0;
83+
CUdevice device = 0;
84+
#if defined(CUDA_TOOLKIT_GE_13)
85+
cuError = cuCtxCreate(&ctx, (CUctxCreateParams*)0, flags, device);
86+
if (cuError != CUDA_SUCCESS) {
87+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version >= 13: %d\n", cuError);
88+
exit(1);
89+
}
90+
#else
91+
cuError = cuCtxCreate(&ctx, flags, device);
92+
if (cuError != CUDA_SUCCESS) {
93+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version < 13: %d\n", cuError);
8394
exit(1);
8495
}
96+
#endif
8597

8698
#ifdef PAPI
8799
int EventSet = PAPI_NULL;
@@ -171,11 +183,23 @@ void multi_read(int event_count, char **evt_names, long long *values)
171183
{
172184
CUcontext ctx;
173185
int papi_errno, i;
174-
papi_errno = cuCtxCreate(&ctx, 0, 0);
175-
if (papi_errno != CUDA_SUCCESS) {
176-
fprintf(stderr, "cuda error: failed to create cuda context.\n");
186+
187+
CUresult cuError;
188+
int flags = 0;
189+
CUdevice device = 0;
190+
#if defined(CUDA_TOOLKIT_GE_13)
191+
cuError = cuCtxCreate(&ctx, (CUctxCreateParams*)0, flags, device);
192+
if (cuError != CUDA_SUCCESS) {
193+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version >= 13: %d\n", cuError);
177194
exit(1);
178195
}
196+
#else
197+
cuError = cuCtxCreate(&ctx, flags, device);
198+
if (cuError != CUDA_SUCCESS) {
199+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version < 13: %d\n", cuError);
200+
exit(1);
201+
}
202+
#endif
179203

180204
#ifdef PAPI
181205
int EventSet = PAPI_NULL, j;
@@ -256,11 +280,24 @@ void single_read(int event_count, char **evt_names, long long *values, char ***a
256280
{
257281
int papi_errno, i;
258282
CUcontext ctx;
259-
papi_errno = cuCtxCreate(&ctx, 0, 0);
260-
if (papi_errno != CUDA_SUCCESS) {
261-
fprintf(stderr, "cuda error: failed to create cuda context.\n");
283+
284+
CUresult cuError;
285+
int flags = 0;
286+
CUdevice device = 0;
287+
#if defined(CUDA_TOOLKIT_GE_13)
288+
cuError = cuCtxCreate(&ctx, (CUctxCreateParams*)0, flags, device);
289+
if (cuError != CUDA_SUCCESS) {
290+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version >= 13: %d\n", cuError);
262291
exit(1);
263292
}
293+
#else
294+
cuError = cuCtxCreate(&ctx, flags, device);
295+
if (cuError != CUDA_SUCCESS) {
296+
fprintf(stderr, "Failed to create Cuda context for a Cuda Toolkit version < 13: %d\n", cuError);
297+
exit(1);
298+
}
299+
#endif
300+
264301
#ifdef PAPI
265302
int EventSet = PAPI_NULL, j;
266303
papi_errno = PAPI_create_eventset(&EventSet);

0 commit comments

Comments
 (0)