Skip to content

Commit 74fce7f

Browse files
authored
Merge pull request #3355 from cudawarped:cudacodec_add_resize_crop
Add scaling and cropping options to `cudacodec::VideoReader`
2 parents de84cc0 + 58e7e30 commit 74fce7f

File tree

8 files changed

+107
-12
lines changed

8 files changed

+107
-12
lines changed

modules/cudacodec/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ set(the_description "CUDA-accelerated Video Encoding/Decoding")
66

77
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wshadow)
88

9-
ocv_add_module(cudacodec opencv_core opencv_videoio OPTIONAL opencv_cudev WRAP python)
9+
ocv_add_module(cudacodec opencv_core opencv_videoio opencv_cudaarithm opencv_cudawarping OPTIONAL opencv_cudev WRAP python)
1010

1111
ocv_module_include_directories()
1212
ocv_glob_module_sources()

modules/cudacodec/include/opencv2/cudacodec.hpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,9 @@ struct CV_EXPORTS_W_SIMPLE FormatInfo
309309
CV_PROP_RW double fps;
310310
CV_PROP_RW int ulNumDecodeSurfaces;//!< Maximum number of internal decode surfaces.
311311
CV_PROP_RW DeinterlaceMode deinterlaceMode;
312+
CV_PROP_RW cv::Size targetSz;//!< Post-processed size of the output frame.
313+
CV_PROP_RW cv::Rect srcRoi;//!< Region of interest decoded from video source.
314+
CV_PROP_RW cv::Rect targetRoi;//!< Region of interest in the output frame containing the decoded frame.
312315
};
313316

314317
/** @brief cv::cudacodec::VideoReader generic properties identifier.
@@ -516,13 +519,20 @@ surfaces it requires for correct functionality and optimal video memory usage bu
516519
overall application. The optimal number of decode surfaces (in terms of performance and memory utilization) should be decided by experimentation for each application,
517520
but it cannot go below the number determined by NVDEC.
518521
@param rawMode Allow the raw encoded data which has been read up until the last call to grab() to be retrieved by calling retrieve(rawData,RAW_DATA_IDX).
522+
@param targetSz Post-processed size (width/height should be multiples of 2) of the output frame, defaults to the size of the encoded video source.
523+
@param srcRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) decoded from video source, defaults to the full frame.
524+
@param targetRoi Region of interest (x/width should be multiples of 4 and y/height multiples of 2) within the output frame to copy and resize the decoded frame to,
525+
defaults to the full frame.
519526
*/
520527
struct CV_EXPORTS_W_SIMPLE VideoReaderInitParams {
521528
CV_WRAP VideoReaderInitParams() : udpSource(false), allowFrameDrop(false), minNumDecodeSurfaces(0), rawMode(0) {};
522529
CV_PROP_RW bool udpSource;
523530
CV_PROP_RW bool allowFrameDrop;
524531
CV_PROP_RW int minNumDecodeSurfaces;
525532
CV_PROP_RW bool rawMode;
533+
CV_PROP_RW cv::Size targetSz;
534+
CV_PROP_RW cv::Rect srcRoi;
535+
CV_PROP_RW cv::Rect targetRoi;
526536
};
527537

528538
/** @brief Creates video reader.

modules/cudacodec/src/video_decoder.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,14 @@ void cv::cudacodec::detail::VideoDecoder::create(const FormatInfo& videoFormat)
148148
createInfo_.ulTargetHeight = videoFormat.height;
149149
createInfo_.ulMaxWidth = videoFormat.ulMaxWidth;
150150
createInfo_.ulMaxHeight = videoFormat.ulMaxHeight;
151+
createInfo_.display_area.left = videoFormat.displayArea.x;
152+
createInfo_.display_area.right = videoFormat.displayArea.x + videoFormat.displayArea.width;
153+
createInfo_.display_area.top = videoFormat.displayArea.y;
154+
createInfo_.display_area.bottom = videoFormat.displayArea.y + videoFormat.displayArea.height;
155+
createInfo_.target_rect.left = videoFormat.targetRoi.x;
156+
createInfo_.target_rect.right = videoFormat.targetRoi.x + videoFormat.targetRoi.width;
157+
createInfo_.target_rect.top = videoFormat.targetRoi.y;
158+
createInfo_.target_rect.bottom = videoFormat.targetRoi.y + videoFormat.targetRoi.height;
151159
createInfo_.ulNumOutputSurfaces = 2;
152160
createInfo_.ulCreationFlags = videoCreateFlags;
153161
createInfo_.vidLock = lock_;

modules/cudacodec/src/video_decoder.hpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,17 @@ namespace cv { namespace cudacodec { namespace detail {
4949
class VideoDecoder
5050
{
5151
public:
52-
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, CUcontext ctx, CUvideoctxlock lock) : ctx_(ctx), lock_(lock), decoder_(0)
52+
VideoDecoder(const Codec& codec, const int minNumDecodeSurfaces, cv::Size targetSz, cv::Rect srcRoi, cv::Rect targetRoi, CUcontext ctx, CUvideoctxlock lock) :
53+
ctx_(ctx), lock_(lock), decoder_(0)
5354
{
5455
videoFormat_.codec = codec;
5556
videoFormat_.ulNumDecodeSurfaces = minNumDecodeSurfaces;
57+
// alignment enforced by nvcuvid, likely due to chroma subsampling
58+
videoFormat_.targetSz.width = targetSz.width - targetSz.width % 2; videoFormat_.targetSz.height = targetSz.height - targetSz.height % 2;
59+
videoFormat_.srcRoi.x = srcRoi.x - srcRoi.x % 4; videoFormat_.srcRoi.width = srcRoi.width - srcRoi.width % 4;
60+
videoFormat_.srcRoi.y = srcRoi.y - srcRoi.y % 2; videoFormat_.srcRoi.height = srcRoi.height - srcRoi.height % 2;
61+
videoFormat_.targetRoi.x = targetRoi.x - targetRoi.x % 4; videoFormat_.targetRoi.width = targetRoi.width - targetRoi.width % 4;
62+
videoFormat_.targetRoi.y = targetRoi.y - targetRoi.y % 2; videoFormat_.targetRoi.height = targetRoi.height - targetRoi.height % 2;
5663
}
5764

5865
~VideoDecoder()
@@ -66,6 +73,9 @@ class VideoDecoder
6673
// Get the code-type currently used.
6774
cudaVideoCodec codec() const { return static_cast<cudaVideoCodec>(videoFormat_.codec); }
6875
int nDecodeSurfaces() const { return videoFormat_.ulNumDecodeSurfaces; }
76+
cv::Size getTargetSz() const { return videoFormat_.targetSz; }
77+
cv::Rect getSrcRoi() const { return videoFormat_.srcRoi; }
78+
cv::Rect getTargetRoi() const { return videoFormat_.targetRoi; }
6979

7080
unsigned long frameWidth() const { return videoFormat_.ulWidth; }
7181
unsigned long frameHeight() const { return videoFormat_.ulHeight; }
@@ -89,7 +99,7 @@ class VideoDecoder
8999

90100
cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) );
91101

92-
return cuda::GpuMat(frameHeight() * 3 / 2, frameWidth(), CV_8UC1, (void*) ptr, pitch);
102+
return cuda::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch);
93103
}
94104

95105
void unmapFrame(cuda::GpuMat& frame)

modules/cudacodec/src/video_parser.cpp

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,19 @@ int CUDAAPI cv::cudacodec::detail::VideoParser::HandleVideoSequence(void* userDa
120120
newFormat.nBitDepthMinus8 = format->bit_depth_luma_minus8;
121121
newFormat.ulWidth = format->coded_width;
122122
newFormat.ulHeight = format->coded_height;
123-
newFormat.width = format->coded_width;
124-
newFormat.height = format->coded_height;
125-
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
126123
newFormat.fps = format->frame_rate.numerator / static_cast<float>(format->frame_rate.denominator);
124+
newFormat.targetSz = thiz->videoDecoder_->getTargetSz();
125+
newFormat.width = newFormat.targetSz.width ? newFormat.targetSz.width : format->coded_width;
126+
newFormat.height = newFormat.targetSz.height ? newFormat.targetSz.height : format->coded_height;
127+
newFormat.srcRoi = thiz->videoDecoder_->getSrcRoi();
128+
if (newFormat.srcRoi.empty()) {
129+
format->display_area.right = format->coded_width;
130+
format->display_area.bottom = format->coded_height;
131+
newFormat.displayArea = Rect(Point(format->display_area.left, format->display_area.top), Point(format->display_area.right, format->display_area.bottom));
132+
}
133+
else
134+
newFormat.displayArea = newFormat.srcRoi;
135+
newFormat.targetRoi = thiz->videoDecoder_->getTargetRoi();
127136
newFormat.ulNumDecodeSurfaces = min(!thiz->allowFrameDrop_ ? max(thiz->videoDecoder_->nDecodeSurfaces(), static_cast<int>(format->min_num_decode_surfaces)) :
128137
format->min_num_decode_surfaces * 2, 32);
129138
if (format->progressive_sequence)

modules/cudacodec/src/video_reader.cpp

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,8 @@ namespace
8686
class VideoReaderImpl : public VideoReader
8787
{
8888
public:
89-
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false);
89+
explicit VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop = false , const bool udpSource = false,
90+
const Size targetSz = Size(), const Rect srcRoi = Rect(), const Rect targetRoi = Rect());
9091
~VideoReaderImpl();
9192

9293
bool nextFrame(GpuMat& frame, Stream& stream) CV_OVERRIDE;
@@ -131,7 +132,8 @@ namespace
131132
return videoSource_->format();
132133
}
133134

134-
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource) :
135+
VideoReaderImpl::VideoReaderImpl(const Ptr<VideoSource>& source, const int minNumDecodeSurfaces, const bool allowFrameDrop, const bool udpSource,
136+
const Size targetSz, const Rect srcRoi, const Rect targetRoi) :
135137
videoSource_(source),
136138
lock_(0)
137139
{
@@ -143,7 +145,7 @@ namespace
143145
cuSafeCall( cuCtxGetCurrent(&ctx) );
144146
cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) );
145147
frameQueue_.reset(new FrameQueue());
146-
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, ctx, lock_));
148+
videoDecoder_.reset(new VideoDecoder(videoSource_->format().codec, minNumDecodeSurfaces, targetSz, srcRoi, targetRoi, ctx, lock_));
147149
videoParser_.reset(new VideoParser(videoDecoder_, frameQueue_, allowFrameDrop, udpSource));
148150
videoSource_->setVideoParser(videoParser_);
149151
videoSource_->start();
@@ -357,13 +359,15 @@ Ptr<VideoReader> cv::cudacodec::createVideoReader(const String& filename, const
357359
videoSource.reset(new CuvidVideoSource(filename));
358360
}
359361

360-
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource);
362+
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
363+
params.srcRoi, params.targetRoi);
361364
}
362365

363366
Ptr<VideoReader> cv::cudacodec::createVideoReader(const Ptr<RawVideoSource>& source, const VideoReaderInitParams params)
364367
{
365368
Ptr<VideoSource> videoSource(new RawVideoSourceWrapper(source, params.rawMode));
366-
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces);
369+
return makePtr<VideoReaderImpl>(videoSource, params.minNumDecodeSurfaces, params.allowFrameDrop, params.udpSource, params.targetSz,
370+
params.srcRoi, params.targetRoi);
367371
}
368372

369373
#endif // HAVE_NVCUVID

modules/cudacodec/test/test_precomp.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@
4747
#include "opencv2/ts/cuda_test.hpp"
4848

4949
#include "opencv2/cudacodec.hpp"
50+
#include "opencv2/cudawarping.hpp"
51+
#include "opencv2/cudaarithm.hpp"
5052

5153
#include "cvconfig.h"
5254

modules/cudacodec/test/test_video.cpp

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ PARAM_TEST_CASE(CheckExtraData, cv::cuda::DeviceInfo, check_extra_data_params_t)
5454
{
5555
};
5656

57+
PARAM_TEST_CASE(Scaling, cv::cuda::DeviceInfo, std::string, Size2f, Rect2f, Rect2f)
58+
{
59+
};
60+
5761
PARAM_TEST_CASE(Video, cv::cuda::DeviceInfo, std::string)
5862
{
5963
};
@@ -177,6 +181,47 @@ CUDA_TEST_P(CheckKeyFrame, Reader)
177181
}
178182
}
179183

184+
CUDA_TEST_P(Scaling, Reader)
185+
{
186+
cv::cuda::setDevice(GET_PARAM(0).deviceID());
187+
std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "../" + GET_PARAM(1);
188+
const Size2f targetSzIn = GET_PARAM(2);
189+
const Rect2f srcRoiIn = GET_PARAM(3);
190+
const Rect2f targetRoiIn = GET_PARAM(4);
191+
192+
GpuMat frameOr;
193+
{
194+
cv::Ptr<cv::cudacodec::VideoReader> readerGs = cv::cudacodec::createVideoReader(inputFile);
195+
readerGs->set(cudacodec::ColorFormat::GRAY);
196+
ASSERT_TRUE(readerGs->nextFrame(frameOr));
197+
}
198+
199+
cudacodec::VideoReaderInitParams params;
200+
params.targetSz = Size(frameOr.cols * targetSzIn.width, frameOr.rows * targetSzIn.height);
201+
params.srcRoi = Rect(frameOr.cols * srcRoiIn.x, frameOr.rows * srcRoiIn.y, frameOr.cols * srcRoiIn.width, frameOr.rows * srcRoiIn.height);
202+
params.targetRoi = Rect(params.targetSz.width * targetRoiIn.x, params.targetSz.height * targetRoiIn.y, params.targetSz.width * targetRoiIn.width,
203+
params.targetSz.height * targetRoiIn.height);
204+
cv::Ptr<cv::cudacodec::VideoReader> reader = cv::cudacodec::createVideoReader(inputFile, {}, params);
205+
reader->set(cudacodec::ColorFormat::GRAY);
206+
GpuMat frame;
207+
ASSERT_TRUE(reader->nextFrame(frame));
208+
const cudacodec::FormatInfo format = reader->format();
209+
Size targetSzOut;
210+
targetSzOut.width = params.targetSz.width - params.targetSz.width % 2; targetSzOut.height = params.targetSz.height - params.targetSz.height % 2;
211+
Rect srcRoiOut, targetRoiOut;
212+
srcRoiOut.x = params.srcRoi.x - params.srcRoi.x % 4; srcRoiOut.width = params.srcRoi.width - params.srcRoi.width % 4;
213+
srcRoiOut.y = params.srcRoi.y - params.srcRoi.y % 2; srcRoiOut.height = params.srcRoi.height - params.srcRoi.height % 2;
214+
targetRoiOut.x = params.targetRoi.x - params.targetRoi.x % 4; targetRoiOut.width = params.targetRoi.width - params.targetRoi.width % 4;
215+
targetRoiOut.y = params.targetRoi.y - params.targetRoi.y % 2; targetRoiOut.height = params.targetRoi.height - params.targetRoi.height % 2;
216+
ASSERT_TRUE(format.valid && format.targetSz == targetSzOut && format.srcRoi == srcRoiOut && format.targetRoi == targetRoiOut);
217+
ASSERT_TRUE(frame.size() == targetSzOut);
218+
GpuMat frameGs;
219+
cv::cuda::resize(frameOr(srcRoiOut), frameGs, targetRoiOut.size(), 0, 0, INTER_AREA);
220+
// assert on mean absolute error due to different resize algorithms
221+
const double mae = cv::cuda::norm(frameGs, frame(targetRoiOut), NORM_L1)/frameGs.size().area();
222+
ASSERT_LT(mae, 2.35);
223+
}
224+
180225
CUDA_TEST_P(Video, Reader)
181226
{
182227
cv::cuda::setDevice(GET_PARAM(0).deviceID());
@@ -431,7 +476,14 @@ INSTANTIATE_TEST_CASE_P(CUDA_Codec, CheckSet, testing::Combine(
431476
ALL_DEVICES,
432477
testing::Values("highgui/video/big_buck_bunny.mp4")));
433478

434-
#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
479+
#define VIDEO_SRC_SCALING "highgui/video/big_buck_bunny.mp4"
480+
#define TARGET_SZ Size2f(1,1), Size2f(0.8,0.9), Size2f(2.3,1.8)
481+
#define SRC_ROI Rect2f(0,0,1,1), Rect2f(0.25,0.25,0.5,0.5)
482+
#define TARGET_ROI Rect2f(0,0,1,1), Rect2f(0.2,0.3,0.6,0.7)
483+
INSTANTIATE_TEST_CASE_P(CUDA_Codec, Scaling, testing::Combine(
484+
ALL_DEVICES, testing::Values(VIDEO_SRC_SCALING), testing::Values(TARGET_SZ), testing::Values(SRC_ROI), testing::Values(TARGET_ROI)));
485+
486+
#define VIDEO_SRC_R "highgui/video/big_buck_bunny.mp4", "cv/video/768x576.avi", "cv/video/1920x1080.avi", "highgui/video/big_buck_bunny.avi", \
435487
"highgui/video/big_buck_bunny.h264", "highgui/video/big_buck_bunny.h265", "highgui/video/big_buck_bunny.mpg", \
436488
"highgui/video/sample_322x242_15frames.yuv420p.libvpx-vp9.mp4", "highgui/video/sample_322x242_15frames.yuv420p.libaom-av1.mp4", \
437489
"cv/tracking/faceocc2/data/faceocc2.webm"

0 commit comments

Comments
 (0)