@@ -29,11 +29,14 @@ CV_Error(cv::Error::HeaderIsNull, "Nvidia Optical Flow headers not found. Make s
29
29
30
30
// macro for dll loading
31
31
#if defined(_WIN64)
32
- #define MODULENAME TEXT (" nvofapi64.dll" )
32
+ #define OF_MODULENAME TEXT (" nvofapi64.dll" )
33
+ #define CUDA_MODULENAME TEXT (" nvcuda.dll" )
33
34
#elif defined(_WIN32)
34
- #define MODULENAME TEXT (" nvofapi.dll" )
35
+ #define OF_MODULENAME TEXT (" nvofapi.dll" )
36
+ #define CUDA_MODULENAME TEXT (" nvcuda.dll" )
35
37
#else
36
- #define MODULENAME " libnvidia-opticalflow.so.1"
38
+ #define OF_MODULENAME " libnvidia-opticalflow.so.1"
39
+ #define CUDA_MODULENAME " libcuda.so"
37
40
#endif
38
41
39
42
#define NVOF_API_CALL (nvOFAPI ) \
@@ -112,6 +115,114 @@ using namespace cv::cuda;
112
115
113
116
namespace
114
117
{
118
+ class LoadNvidiaModules
119
+ {
120
+ private:
121
+ typedef int (*PFNCudaCuCtxGetCurrent)(CUcontext*);
122
+ typedef NV_OF_STATUS (NVOFAPI *PFNNvOFAPICreateInstanceCuda)
123
+ (uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
124
+
125
+ PFNCudaCuCtxGetCurrent m_cudaDriverAPIGetCurrentCtx;
126
+ PFNNvOFAPICreateInstanceCuda m_NvOFAPICreateInstanceCuda;
127
+ HMODULE m_hOFModule;
128
+ HMODULE m_hCudaModule;
129
+ bool m_isFailed;
130
+
131
+ LoadNvidiaModules () :
132
+ m_cudaDriverAPIGetCurrentCtx (NULL ),
133
+ m_NvOFAPICreateInstanceCuda (NULL ),
134
+ m_isFailed (false )
135
+ {
136
+ // Loading Cuda Library
137
+ #if defined(_WIN32) || defined(_WIN64)
138
+ HMODULE hCudaModule = LoadLibrary (CUDA_MODULENAME);
139
+ #else
140
+ void *hCudaModule = dlopen (CUDA_MODULENAME, RTLD_LAZY);
141
+ #endif
142
+
143
+ if (hCudaModule == NULL )
144
+ {
145
+ m_isFailed = true ;
146
+ CV_Error (Error::StsBadFunc, " Cannot find Cuda library." );
147
+ }
148
+ m_hCudaModule = hCudaModule;
149
+
150
+ #if defined(_WIN32)
151
+ m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)GetProcAddress (m_hCudaModule, " cuCtxGetCurrent" );
152
+ #else
153
+ m_cudaDriverAPIGetCurrentCtx = (PFNCudaCuCtxGetCurrent)dlsym (m_hCudaModule, " cuCtxGetCurrent" );
154
+ #endif
155
+ if (!m_cudaDriverAPIGetCurrentCtx)
156
+ {
157
+ m_isFailed = true ;
158
+ CV_Error (Error::StsBadFunc,
159
+ " Cannot find Cuda Driver API : cuCtxGetCurrent() entry in Cuda library" );
160
+ }
161
+
162
+ // Loading Optical Flow Library
163
+ #if defined(_WIN32) || defined(_WIN64)
164
+ HMODULE hOFModule = LoadLibrary (OF_MODULENAME);
165
+ #else
166
+ void *hOFModule = dlopen (OF_MODULENAME, RTLD_LAZY);
167
+ #endif
168
+
169
+ if (hOFModule == NULL )
170
+ {
171
+ m_isFailed = true ;
172
+ CV_Error (Error::StsBadFunc, " Cannot find NvOF library." );
173
+ }
174
+ m_hOFModule = hOFModule;
175
+
176
+ #if defined(_WIN32)
177
+ m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)GetProcAddress (m_hOFModule, " NvOFAPICreateInstanceCuda" );
178
+ #else
179
+ m_NvOFAPICreateInstanceCuda = (PFNNvOFAPICreateInstanceCuda)dlsym (m_hOFModule, " NvOFAPICreateInstanceCuda" );
180
+ #endif
181
+ if (!m_NvOFAPICreateInstanceCuda)
182
+ {
183
+ m_isFailed = true ;
184
+ CV_Error (Error::StsBadFunc,
185
+ " Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library" );
186
+ }
187
+ };
188
+
189
+ ~LoadNvidiaModules ()
190
+ {
191
+ if (NULL != m_hCudaModule)
192
+ {
193
+ #if defined(_WIN32) || defined(_WIN64)
194
+ FreeLibrary (m_hCudaModule);
195
+ #else
196
+ dlclose (m_hCudaModule);
197
+ #endif
198
+ }
199
+ if (NULL != m_hOFModule)
200
+ {
201
+ #if defined(_WIN32) || defined(_WIN64)
202
+ FreeLibrary (m_hOFModule);
203
+ #else
204
+ dlclose (m_hOFModule);
205
+ #endif
206
+ }
207
+ m_hCudaModule = NULL ;
208
+ m_hOFModule = NULL ;
209
+ m_cudaDriverAPIGetCurrentCtx = NULL ;
210
+ m_NvOFAPICreateInstanceCuda = NULL ;
211
+ }
212
+
213
+ public:
214
+ static LoadNvidiaModules& Init ()
215
+ {
216
+ static LoadNvidiaModules LoadLibraryObj;
217
+ if (LoadLibraryObj.m_isFailed )
218
+ CV_Error (Error::StsError, " Can't initialize LoadNvidiaModules Class Object" );
219
+ return LoadLibraryObj;
220
+ }
221
+
222
+ PFNCudaCuCtxGetCurrent GetCudaLibraryFunctionPtr () { return m_cudaDriverAPIGetCurrentCtx; }
223
+ PFNNvOFAPICreateInstanceCuda GetOFLibraryFunctionPtr () { return m_NvOFAPICreateInstanceCuda; }
224
+ };
225
+
115
226
class NvidiaOpticalFlowImpl : public cv ::cuda::NvidiaOpticalFlow_1_0
116
227
{
117
228
private:
@@ -169,7 +280,6 @@ class NvidiaOpticalFlowImpl : public cv::cuda::NvidiaOpticalFlow_1_0
169
280
NvOFHandle GetHandle () { return m_hOF; }
170
281
171
282
protected:
172
- HMODULE m_hModule; // module handle to load nvof dll
173
283
std::mutex m_lock;
174
284
175
285
public:
@@ -198,6 +308,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
198
308
m_cuContext (nullptr ), m_format(NV_OF_BUFFER_FORMAT_GRAYSCALE8),
199
309
m_gridSize (NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
200
310
{
311
+ LoadNvidiaModules& LoadNvidiaModulesObj = LoadNvidiaModules::Init ();
312
+
201
313
int nGpu = 0 ;
202
314
203
315
cuSafeCall (cudaGetDeviceCount (&nGpu));
@@ -208,7 +320,8 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
208
320
209
321
cuSafeCall (cudaSetDevice (m_gpuId));
210
322
cuSafeCall (cudaFree (m_cuContext));
211
- cuSafeCall (cuCtxGetCurrent (&m_cuContext));
323
+
324
+ cuSafeCall (LoadNvidiaModulesObj.GetCudaLibraryFunctionPtr ()(&m_cuContext));
212
325
213
326
if (m_gridSize != NV_OF_OUTPUT_VECTOR_GRID_SIZE_4)
214
327
{
@@ -253,38 +366,9 @@ NvidiaOpticalFlowImpl::NvidiaOpticalFlowImpl(
253
366
m_costBufElementSize = sizeof (uint32_t );
254
367
}
255
368
256
- #if defined(_WIN32) || defined(_WIN64)
257
- HMODULE hModule = LoadLibrary (MODULENAME);
258
- #else
259
- void *hModule = dlopen (MODULENAME, RTLD_LAZY);
260
- #endif
261
-
262
- if (hModule == NULL )
263
- {
264
- CV_Error (Error::StsBadFunc,
265
- " Cannot find NvOF library." );
266
- }
267
- m_hModule = hModule;
268
-
269
- typedef NV_OF_STATUS (NVOFAPI *PFNNvOFAPICreateInstanceCuda)
270
- (uint32_t apiVer, NV_OF_CUDA_API_FUNCTION_LIST* cudaOf);
271
-
272
- #if defined(_WIN32)
273
- PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
274
- = (PFNNvOFAPICreateInstanceCuda)GetProcAddress (m_hModule, " NvOFAPICreateInstanceCuda" );
275
- #else
276
- PFNNvOFAPICreateInstanceCuda NvOFAPICreateInstanceCuda
277
- = (PFNNvOFAPICreateInstanceCuda)dlsym (m_hModule, " NvOFAPICreateInstanceCuda" );
278
- #endif
279
- if (!NvOFAPICreateInstanceCuda)
280
- {
281
- CV_Error (Error::StsBadFunc,
282
- " Cannot find NvOFAPICreateInstanceCuda() entry in NVOF library" );
283
- }
284
-
285
369
m_ofAPI.reset (new NV_OF_CUDA_API_FUNCTION_LIST ());
286
370
287
- NVOF_API_CALL (NvOFAPICreateInstanceCuda (NV_OF_API_VERSION, m_ofAPI.get ()));
371
+ NVOF_API_CALL (LoadNvidiaModulesObj. GetOFLibraryFunctionPtr () (NV_OF_API_VERSION, m_ofAPI.get ()));
288
372
NVOF_API_CALL (GetAPI ()->nvCreateOpticalFlowCuda (m_cuContext, &m_hOF));
289
373
290
374
memset (&m_initParams, 0 , sizeof (m_initParams));
@@ -416,9 +500,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
416
500
}
417
501
}
418
502
419
- cuSafeCall (cuCtxPushCurrent (m_cuContext));
420
503
inputStream.waitForCompletion ();
421
- cuSafeCall (cuCtxPopCurrent (&m_cuContext));
422
504
423
505
// Execute Call
424
506
NV_OF_EXECUTE_INPUT_PARAMS exeInParams;
@@ -436,9 +518,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
436
518
m_hCostBuffer : nullptr ;;
437
519
NVOF_API_CALL (GetAPI ()->nvOFExecute (GetHandle (), &exeInParams, &exeOutParams));
438
520
439
- cuSafeCall (cuCtxPushCurrent (m_cuContext));
440
521
outputStream.waitForCompletion ();
441
- cuSafeCall (cuCtxPopCurrent (&m_cuContext));
442
522
443
523
if (_flow.isMat ())
444
524
flowXYGpuMat.download (_flow);
@@ -460,7 +540,7 @@ void NvidiaOpticalFlowImpl::calc(InputArray _frame0, InputArray _frame1, InputOu
460
540
else
461
541
CV_Error (Error::StsBadArg, " Incorrect cost buffer passed. Pass Mat or GpuMat" );
462
542
}
463
- cuSafeCall (cuCtxSynchronize ());
543
+ cuSafeCall (cudaDeviceSynchronize ());
464
544
}
465
545
466
546
void NvidiaOpticalFlowImpl::collectGarbage ()
0 commit comments