Skip to content

Commit 54e75b4

Browse files
committed
Merge pull request #2215 from vchiluka5:lazy_load_nvcuda_dll
2 parents 8532e65 + c494a5d commit 54e75b4

File tree

1 file changed

+120
-40
lines changed

1 file changed

+120
-40
lines changed

modules/cudaoptflow/src/nvidiaOpticalFlow.cpp

Lines changed: 120 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ CV_Error(cv::Error::HeaderIsNull, "Nvidia Optical Flow headers not found. Make s
2929

3030
//macro for dll loading
3131
#if defined(_WIN64)
32-
#define MODULENAME TEXT("nvofapi64.dll")
32+
#define OF_MODULENAME TEXT("nvofapi64.dll")
33+
#define CUDA_MODULENAME TEXT("nvcuda.dll")
3334
#elif defined(_WIN32)
34-
#define MODULENAME TEXT("nvofapi.dll")
35+
#define OF_MODULENAME TEXT("nvofapi.dll")
36+
#define CUDA_MODULENAME TEXT("nvcuda.dll")
3537
#else
36-
#define MODULENAME "libnvidia-opticalflow.so.1"
38+
#define OF_MODULENAME "libnvidia-opticalflow.so.1"
39+
#define CUDA_MODULENAME "libcuda.so"
3740
#endif
3841

3942
#define NVOF_API_CALL(nvOFAPI) \
@@ -112,6 +115,114 @@ using namespace cv::cuda;
112115

113116
namespace
114117
{
118+
class LoadNvidiaModules
119+
{
120+
private:
121+
typedef int(*PFNCudaCuCtxGetCurrent)(CUcontext*);
122+
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda)
123+
(uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
124+
125+
PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx;
126+
PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda;
127+
HMODULE m_hOFModule;
128+
HMODULE m_hCudaModule;
129+
bool m_isFailed;
130+
131+
LoadNvidiaModules() :
132+
m_cudaDriverAPIGetCurrentCtx(NULL),
133+
m_NvOFAPICreateInstanceCuda(NULL),
134+
m_isFailed(false)
135+
{
136+
//Loading Cuda Library
137+
#if defined(_WIN32) || defined(_WIN64)
138+
HMODULE hCudaModule = LoadLibrary(CUDA_MODULENAME);
139+
#else
140+
void *hCudaModule = dlopen(CUDA_MODULENAME, RTLD_LAZY);
141+
#endif
142+
143+
if (hCudaModule == NULL)
144+
{
145+
m_isFailed = true;
146+
CV_Error(Error::StsBadFunc, "Cannot find Cuda library.");
147+
}
148+
m_hCudaModule = hCudaModule;
149+
150+
#if defined(_WIN32)
151+
m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)GetProcAddress(m_hCudaModule, "cuCtxGetCurrent");
152+
#else
153+
m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)dlsym(m_hCudaModule, "cuCtxGetCurrent");
154+
#endif
155+
if (!m_cudaDriverAPIGetCurrentCtx)
156+
{
157+
m_isFailed = true;
158+
CV_Error(Error::StsBadFunc,
159+
"Cannot find Cuda Driver API : cuCtxGetCurrent() entry in Cuda library");
160+
}
161+
162+
//Loading Optical Flow Library
163+
#if defined(_WIN32) || defined(_WIN64)
164+
HMODULE hOFModule = LoadLibrary(OF_MODULENAME);
165+
#else
166+
void *hOFModule = dlopen(OF_MODULENAME, RTLD_LAZY);
167+
#endif
168+
169+
if (hOFModule == NULL)
170+
{
171+
m_isFailed = true;
172+
CV_Error(Error::StsBadFunc, "Cannot find NvOF library.");
173+
}
174+
m_hOFModule = hOFModule;
175+
176+
#if defined(_WIN32)
177+
m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hOFModule, "NvOFAPICreateInstanceCuda");
178+
#else
179+
m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)dlsym(m_hOFModule, "NvOFAPICreateInstanceCuda");
180+
#endif
181+
if (!m_NvOFAPICreateInstanceCuda)
182+
{
183+
m_isFailed = true;
184+
CV_Error(Error::StsBadFunc,
185+
"Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library");
186+
}
187+
};
188+
189+
~LoadNvidiaModules()
190+
{
191+
if (NULL != m_hCudaModule)
192+
{
193+
#if defined(_WIN32) || defined(_WIN64)
194+
FreeLibrary(m_hCudaModule);
195+
#else
196+
dlclose(m_hCudaModule);
197+
#endif
198+
}
199+
if (NULL != m_hOFModule)
200+
{
201+
#if defined(_WIN32) || defined(_WIN64)
202+
FreeLibrary(m_hOFModule);
203+
#else
204+
dlclose(m_hOFModule);
205+
#endif
206+
}
207+
m_hCudaModule = NULL;
208+
m_hOFModule = NULL;
209+
m_cudaDriverAPIGetCurrentCtx = NULL;
210+
m_NvOFAPICreateInstanceCuda = NULL;
211+
}
212+
213+
public:
214+
static LoadNvidiaModules& Init()
215+
{
216+
static LoadNvidiaModules LoadLibraryObj;
217+
if (LoadLibraryObj.m_isFailed)
218+
CV_Error(Error::StsError, "Can't initialize LoadNvidiaModules Class Object");
219+
return LoadLibraryObj;
220+
}
221+
222+
PFNCudaCuCtxGetCurrent GetCudaLibraryFunctionPtr() { return m_cudaDriverAPIGetCurrentCtx; }
223+
PFNNvOFAPICreateInstanceCuda GetOFLibraryFunctionPtr() { return m_NvOFAPICreateInstanceCuda; }
224+
};
225+
115226
class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0
116227
{
117228
private:
@@ -169,7 +280,6 @@ class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0
169280
NvOFHandle GetHandle() { return m_hOF; }
170281

171282
protected:
172-
HMODULE m_hModule; //module handle to load nvof dll
173283
std::mutex m_lock;
174284

175285
public:
@@ -198,6 +308,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
198308
m_cuContext(nullptr), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
199309
m_gridSize(NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
200310
{
311+
LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init();
312+
201313
int nGpu = 0;
202314

203315
cuSafeCall(cudaGetDeviceCount(&nGpu));
@@ -208,7 +320,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
208320

209321
cuSafeCall(cudaSetDevice(m_gpuId));
210322
cuSafeCall(cudaFree(m_cuContext));
211-
cuSafeCall(cuCtxGetCurrent(&m_cuContext));
323+
324+
cuSafeCall(LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr()(&m_cuContext));
212325

213326
if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
214327
{
@@ -253,38 +366,9 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
253366
m_costBufElementSize = sizeof(uint32_t);
254367
}
255368

256-
#if defined(_WIN32) || defined(_WIN64)
257-
HMODULE hModule = LoadLibrary(MODULENAME);
258-
#else
259-
void *hModule = dlopen(MODULENAME, RTLD_LAZY);
260-
#endif
261-
262-
if (hModule == NULL)
263-
{
264-
CV_Error(Error::StsBadFunc,
265-
"Cannot find NvOF library.");
266-
}
267-
m_hModule = hModule;
268-
269-
typedef NV_OF_STATUS(NVOFAPI *PFNNvOFAPICreateInstanceCuda)
270-
(uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
271-
272-
#if defined(_WIN32)
273-
PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
274-
= (PFNNvOFAPICreateInstanceCuda)GetProcAddress(m_hModule, "NvOFAPICreateInstanceCuda");
275-
#else
276-
PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
277-
= (PFNNvOFAPICreateInstanceCuda)dlsym(m_hModule, "NvOFAPICreateInstanceCuda");
278-
#endif
279-
if (!NvOFAPICreateInstanceCuda)
280-
{
281-
CV_Error(Error::StsBadFunc,
282-
"Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library");
283-
}
284-
285369
m_ofAPI.reset(new NV_OF_CUDA_API_FUNCTION_LIST());
286370

287-
NVOF_API_CALL(NvOFAPICreateInstanceCuda(NV_OF_API_VERSION, m_ofAPI.get()));
371+
NVOF_API_CALL(LoadNvidiaModulesObj.GetOFLibraryFunctionPtr()(NV_OF_API_VERSION, m_ofAPI.get()));
288372
NVOF_API_CALL(GetAPI()->nvCreateOpticalFlowCuda(m_cuContext, &m_hOF));
289373

290374
memset(&m_initParams, 0, sizeof(m_initParams));
@@ -416,9 +500,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
416500
}
417501
}
418502

419-
cuSafeCall(cuCtxPushCurrent(m_cuContext));
420503
inputStream.waitForCompletion();
421-
cuSafeCall(cuCtxPopCurrent(&m_cuContext));
422504

423505
//Execute Call
424506
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
@@ -436,9 +518,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
436518
m_hCostBuffer : nullptr;;
437519
NVOF_API_CALL(GetAPI()->nvOFExecute(GetHandle(), &exeInParams, &exeOutParams));
438520

439-
cuSafeCall(cuCtxPushCurrent(m_cuContext));
440521
outputStream.waitForCompletion();
441-
cuSafeCall(cuCtxPopCurrent(&m_cuContext));
442522

443523
if (_flow.isMat())
444524
flowXYGpuMat.download(_flow);
@@ -460,7 +540,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
460540
else
461541
CV_Error(Error::StsBadArg, "Incorrect cost buffer passed. Pass Mat or GpuMat");
462542
}
463-
cuSafeCall(cuCtxSynchronize());
543+
cuSafeCall(cudaDeviceSynchronize());
464544
}
465545

466546
void NvidiaOpticalFlowImpl::collectGarbage()

0 commit comments

Comments
 (0)