@@ -329,11 +329,120 @@ CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr
329
329
330
330
/* * @brief BufferPool for use with CUDA streams
331
331
332
- * BufferPool utilizes cuda::Stream's allocator to create new buffers. It is
333
- * particularly useful when BufferPoolUsage is set to true, or a custom
334
- * allocator is specified for the cuda::Stream, and you want to implement your
335
- * own stream based functions utilizing the same underlying GPU memory
336
- * management.
332
+ BufferPool utilizes Stream's allocator to create new buffers for GpuMat's. It is
333
+ only useful when enabled with #setBufferPoolUsage.
334
+
335
+ @code
336
+ setBufferPoolUsage(true);
337
+ @endcode
338
+
339
+ @note #setBufferPoolUsage must be called \em before any Stream declaration.
340
+
341
+ Users may specify custom allocator for Stream and may implement their own stream based
342
+ functions utilizing the same underlying GPU memory management.
343
+
344
+ If custom allocator is not specified, BufferPool utilizes StackAllocator by
345
+ default. StackAllocator allocates a chunk of GPU device memory beforehand,
346
+ and when GpuMat is declared later on, it is given the pre-allocated memory.
347
+ This kind of strategy reduces the number of calls for memory allocating APIs
348
+ such as cudaMalloc or cudaMallocPitch.
349
+
350
+ Below is an example that utilizes BufferPool with StackAllocator:
351
+
352
+ @code
353
+ #include <opencv2/opencv.hpp>
354
+
355
+ using namespace cv;
356
+ using namespace cv::cuda
357
+
358
+ int main()
359
+ {
360
+ setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
361
+ setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
362
+
363
+ Stream stream1, stream2; // Each stream uses 1 stack
364
+ BufferPool pool1(stream1), pool2(stream2);
365
+
366
+ GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
367
+ GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
368
+
369
+ GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
370
+ GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
371
+
372
+ cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
373
+ cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
374
+ }
375
+ @endcode
376
+
377
+ If we allocate another GpuMat on pool1 in the above example, it will be carried out by
378
+ the DefaultAllocator since the stack for pool1 is full.
379
+
380
+ @code
381
+ GpuMat d_add1 = pool1.getBuffer(1024, 1024, CV_8UC1); // Stack for pool1 is full, memory is allocated with DefaultAllocator
382
+ @endcode
383
+
384
+ If a third stream is declared in the above example, allocating with #getBuffer
385
+ within that stream will also be carried out by the DefaultAllocator becuase we've run out of
386
+ stacks.
387
+
388
+ @code
389
+ Stream stream3; // Only 2 stacks were allocated, we've run out of stacks
390
+ BufferPool pool3(stream3);
391
+ GpuMat d_src3 = pool3.getBuffer(1024, 1024, CV_8UC1); // Memory is allocated with DefaultAllocator
392
+ @endcode
393
+
394
+ @warning When utilizing StackAllocator, deallocation order is important.
395
+
396
+ Just like a stack, deallocation must be done in LIFO order. Below is an example of
397
+ erroneous usage that violates LIFO rule. If OpenCV is compiled in Debug mode, this
398
+ sample code will emit CV_Assert error.
399
+
400
+ @code
401
+ int main()
402
+ {
403
+ setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
404
+ Stream stream; // A default size (10 MB) stack is allocated to this stream
405
+ BufferPool pool(stream);
406
+
407
+ GpuMat mat1 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat1 (1MB)
408
+ GpuMat mat2 = pool.getBuffer(1024, 1024, CV_8UC1); // Allocate mat2 (1MB)
409
+
410
+ mat1.release(); // erroneous usage : mat2 must be deallocated before mat1
411
+ }
412
+ @endcode
413
+
414
+ Since C++ local variables are destroyed in the reverse order of construction,
415
+ the code sample below satisfies the LIFO rule. Local GpuMat's are deallocated
416
+ and the corresponding memory is automatically returned to the pool for later usage.
417
+
418
+ @code
419
+ int main()
420
+ {
421
+ setBufferPoolUsage(true); // Tell OpenCV that we are going to utilize BufferPool
422
+ setBufferPoolConfig(getDevice(), 1024 * 1024 * 64, 2); // Allocate 64 MB, 2 stacks (default is 10 MB, 5 stacks)
423
+
424
+ Stream stream1, stream2; // Each stream uses 1 stack
425
+ BufferPool pool1(stream1), pool2(stream2);
426
+
427
+ for (int i = 0; i < 10; i++)
428
+ {
429
+ GpuMat d_src1 = pool1.getBuffer(4096, 4096, CV_8UC1); // 16MB
430
+ GpuMat d_dst1 = pool1.getBuffer(4096, 4096, CV_8UC3); // 48MB, pool1 is now full
431
+
432
+ GpuMat d_src2 = pool2.getBuffer(1024, 1024, CV_8UC1); // 1MB
433
+ GpuMat d_dst2 = pool2.getBuffer(1024, 1024, CV_8UC3); // 3MB
434
+
435
+ d_src1.setTo(Scalar(i), stream1);
436
+ d_src2.setTo(Scalar(i), stream2);
437
+
438
+ cvtColor(d_src1, d_dst1, CV_GRAY2BGR, 0, stream1);
439
+ cvtColor(d_src2, d_dst2, CV_GRAY2BGR, 0, stream2);
440
+ // The order of destruction of the local variables is:
441
+ // d_dst2 => d_src2 => d_dst1 => d_src1
442
+ // LIFO rule is satisfied, this code runs without error
443
+ }
444
+ }
445
+ @endcode
337
446
*/
338
447
class CV_EXPORTS BufferPool
339
448
{
0 commit comments