Skip to content

Commit 6536550

Browse files
[libomptarget][cuda] Handle missing _v2 symbols gracefully
[libomptarget][cuda] Handle missing _v2 symbols gracefully Follow on from D95367. Dlsym the _v2 symbols if present, otherwise use the unsuffixed version. Builds a hashtable for the check, can revise for zero heap allocations later if necessary. Reviewed By: jdoerfert Differential Revision: https://reviews.llvm.org/D95415
1 parent 65e2fa5 commit 6536550

File tree

2 files changed

+30
-12
lines changed

2 files changed

+30
-12
lines changed

openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
#include "Debug.h"
1616
#include "dlwrap.h"
1717

18+
#include <string>
19+
#include <unordered_map>
20+
1821
#include <dlfcn.h>
1922

2023
DLWRAP_INTERNAL(cuInit, 1);
@@ -67,6 +70,21 @@ DLWRAP_FINALIZE();
6770
static bool checkForCUDA() {
6871
// return true if dlopen succeeded and all functions found
6972

73+
// Prefer _v2 versions of functions if found in the library
74+
std::unordered_map<std::string, const char *> TryFirst = {
75+
{"cuMemAlloc", "cuMemAlloc_v2"},
76+
{"cuMemFree", "cuMemFree_v2"},
77+
{"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
78+
{"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
79+
{"cuStreamDestroy", "cuStreamDestroy_v2"},
80+
{"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
81+
{"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
82+
{"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
83+
{"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
84+
{"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
85+
{"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
86+
};
87+
7088
const char *CudaLib = DYNAMIC_CUDA_PATH;
7189
void *DynlibHandle = dlopen(CudaLib, RTLD_NOW);
7290
if (!DynlibHandle) {
@@ -77,11 +95,23 @@ static bool checkForCUDA() {
7795
for (size_t I = 0; I < dlwrap::size(); I++) {
7896
const char *Sym = dlwrap::symbol(I);
7997

98+
auto It = TryFirst.find(Sym);
99+
if (It != TryFirst.end()) {
100+
const char *First = It->second;
101+
void *P = dlsym(DynlibHandle, First);
102+
if (P) {
103+
DP("Implementing %s with dlsym(%s) -> %p\n", Sym, First, P);
104+
*dlwrap::pointer(I) = P;
105+
continue;
106+
}
107+
}
108+
80109
void *P = dlsym(DynlibHandle, Sym);
81110
if (P == nullptr) {
82111
DP("Unable to find '%s' in '%s'!\n", Sym, CudaLib);
83112
return false;
84113
}
114+
DP("Implementing %s with dlsym(%s) -> %p\n", Sym, Sym, P);
85115

86116
*dlwrap::pointer(I) = P;
87117
}

openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -49,18 +49,6 @@ typedef enum CUctx_flags_enum {
4949
CU_CTX_SCHED_MASK = 0x07,
5050
} CUctx_flags;
5151

52-
#define cuMemFree cuMemFree_v2
53-
#define cuMemAlloc cuMemAlloc_v2
54-
#define cuMemcpyDtoH cuMemcpyDtoH_v2
55-
#define cuMemcpyHtoD cuMemcpyHtoD_v2
56-
#define cuStreamDestroy cuStreamDestroy_v2
57-
#define cuModuleGetGlobal cuModuleGetGlobal_v2
58-
#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
59-
#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
60-
#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
61-
#define cuDevicePrimaryCtxRelease cuDevicePrimaryCtxRelease_v2
62-
#define cuDevicePrimaryCtxSetFlags cuDevicePrimaryCtxSetFlags_v2
63-
6452
CUresult cuCtxGetDevice(CUdevice *);
6553
CUresult cuDeviceGet(CUdevice *, int);
6654
CUresult cuDeviceGetAttribute(int *, CUdevice_attribute, CUdevice);

0 commit comments

Comments
 (0)