Skip to content

Commit 29e72f9

Browse files
committed
Merge branch 'nvdec-h265' into nvdec-av1
2 parents 8ad66ce + c42388f commit 29e72f9

22 files changed

+892
-608
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 65 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,19 @@ static bool g_cuda_beta = registerDeviceInterface(
3535

3636
static int CUDAAPI
3737
pfnSequenceCallback(void* pUserData, CUVIDEOFORMAT* videoFormat) {
38-
BetaCudaDeviceInterface* decoder =
39-
static_cast<BetaCudaDeviceInterface*>(pUserData);
38+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
4039
return decoder->streamPropertyChange(videoFormat);
4140
}
4241

4342
static int CUDAAPI
4443
pfnDecodePictureCallback(void* pUserData, CUVIDPICPARAMS* picParams) {
45-
BetaCudaDeviceInterface* decoder =
46-
static_cast<BetaCudaDeviceInterface*>(pUserData);
44+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
4745
return decoder->frameReadyForDecoding(picParams);
4846
}
4947

5048
static int CUDAAPI
5149
pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) {
52-
BetaCudaDeviceInterface* decoder =
53-
static_cast<BetaCudaDeviceInterface*>(pUserData);
50+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
5451
return decoder->frameReadyInDisplayOrder(dispInfo);
5552
}
5653

@@ -112,27 +109,29 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
112109
caps.nMaxMBCount);
113110

114111
// Decoder creation parameters, taken from DALI
115-
CUVIDDECODECREATEINFO decoder_info = {};
116-
decoder_info.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
117-
decoder_info.ChromaFormat = videoFormat->chroma_format;
118-
decoder_info.CodecType = videoFormat->codec;
119-
decoder_info.ulHeight = videoFormat->coded_height;
120-
decoder_info.ulWidth = videoFormat->coded_width;
121-
decoder_info.ulMaxHeight = videoFormat->coded_height;
122-
decoder_info.ulMaxWidth = videoFormat->coded_width;
123-
decoder_info.ulTargetHeight =
112+
CUVIDDECODECREATEINFO decoderParams = {};
113+
decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
114+
decoderParams.ChromaFormat = videoFormat->chroma_format;
115+
decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12;
116+
decoderParams.ulCreationFlags = cudaVideoCreate_Default;
117+
decoderParams.CodecType = videoFormat->codec;
118+
decoderParams.ulHeight = videoFormat->coded_height;
119+
decoderParams.ulWidth = videoFormat->coded_width;
120+
decoderParams.ulMaxHeight = videoFormat->coded_height;
121+
decoderParams.ulMaxWidth = videoFormat->coded_width;
122+
decoderParams.ulTargetHeight =
124123
videoFormat->display_area.bottom - videoFormat->display_area.top;
125-
decoder_info.ulTargetWidth =
124+
decoderParams.ulTargetWidth =
126125
videoFormat->display_area.right - videoFormat->display_area.left;
127-
decoder_info.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
128-
decoder_info.ulNumOutputSurfaces = 2;
129-
decoder_info.display_area.left = videoFormat->display_area.left;
130-
decoder_info.display_area.right = videoFormat->display_area.right;
131-
decoder_info.display_area.top = videoFormat->display_area.top;
132-
decoder_info.display_area.bottom = videoFormat->display_area.bottom;
126+
decoderParams.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
127+
decoderParams.ulNumOutputSurfaces = 2;
128+
decoderParams.display_area.left = videoFormat->display_area.left;
129+
decoderParams.display_area.right = videoFormat->display_area.right;
130+
decoderParams.display_area.top = videoFormat->display_area.top;
131+
decoderParams.display_area.bottom = videoFormat->display_area.bottom;
133132

134133
CUvideodecoder* decoder = new CUvideodecoder();
135-
result = cuvidCreateDecoder(decoder, &decoder_info);
134+
result = cuvidCreateDecoder(decoder, &decoderParams);
136135
TORCH_CHECK(
137136
result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result);
138137
return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{});
@@ -182,6 +181,44 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
182181
}
183182
}
184183

184+
void BetaCudaDeviceInterface::initialize(
185+
const AVStream* avStream,
186+
const UniqueDecodingAVFormatContext& avFormatCtx) {
187+
torch::Tensor dummyTensorForCudaInitialization = torch::empty(
188+
{1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
189+
190+
auto cudaDevice = torch::Device(torch::kCUDA);
191+
defaultCudaInterface_ =
192+
std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
193+
AVCodecContext dummyCodecContext = {};
194+
defaultCudaInterface_->initialize(avStream, avFormatCtx);
195+
defaultCudaInterface_->registerHardwareDeviceWithCodec(&dummyCodecContext);
196+
197+
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
198+
timeBase_ = avStream->time_base;
199+
200+
const AVCodecParameters* codecPar = avStream->codecpar;
201+
TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
202+
203+
initializeBSF(codecPar, avFormatCtx);
204+
205+
// Create parser. Default values that aren't obvious are taken from DALI.
206+
CUVIDPARSERPARAMS parserParams = {};
207+
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
208+
parserParams.ulMaxNumDecodeSurfaces = 8;
209+
parserParams.ulMaxDisplayDelay = 0;
210+
// Callback setup, all are triggered by the parser within a call
211+
// to cuvidParseVideoData
212+
parserParams.pUserData = this;
213+
parserParams.pfnSequenceCallback = pfnSequenceCallback;
214+
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
215+
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
216+
217+
CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
218+
TORCH_CHECK(
219+
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
220+
}
221+
185222
void BetaCudaDeviceInterface::initializeBSF(
186223
const AVCodecParameters* codecPar,
187224
const UniqueDecodingAVFormatContext& avFormatCtx) {
@@ -261,38 +298,6 @@ void BetaCudaDeviceInterface::initializeBSF(
261298
getFFMPEGErrorStringFromErrorCode(retVal));
262299
}
263300

264-
void BetaCudaDeviceInterface::initializeInterface(
265-
const AVStream* avStream,
266-
const UniqueDecodingAVFormatContext& avFormatCtx) {
267-
torch::Tensor dummyTensorForCudaInitialization = torch::empty(
268-
{1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
269-
270-
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
271-
timeBase_ = avStream->time_base;
272-
frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
273-
274-
const AVCodecParameters* codecPar = avStream->codecpar;
275-
TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
276-
277-
initializeBSF(codecPar, avFormatCtx);
278-
279-
// Create parser. Default values that aren't obvious are taken from DALI.
280-
CUVIDPARSERPARAMS parserParams = {};
281-
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
282-
parserParams.ulMaxNumDecodeSurfaces = 8;
283-
parserParams.ulMaxDisplayDelay = 0;
284-
// Callback setup, all are triggered by the parser within a call
285-
// to cuvidParseVideoData
286-
parserParams.pUserData = this;
287-
parserParams.pfnSequenceCallback = pfnSequenceCallback;
288-
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
289-
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
290-
291-
CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
292-
TORCH_CHECK(
293-
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
294-
}
295-
296301
// This callback is called by the parser within cuvidParseVideoData when there
297302
// is a change in the stream's properties (like resolution change), as specified
298303
// by CUVIDEOFORMAT. Particularly (but not just!), this is called at the very
@@ -424,6 +429,10 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
424429
CUVIDPARSERDISPINFO dispInfo = readyFrames_.front();
425430
readyFrames_.pop();
426431

432+
// TODONVDEC P1 we need to set the procParams.output_stream field to the
433+
// current CUDA stream and ensure proper synchronization. There's a related
434+
// NVDECTODO in CudaDeviceInterface.cpp where we do the necessary
435+
// synchronization for NPP.
427436
CUVIDPROCPARAMS procParams = {};
428437
procParams.progressive_frame = dispInfo.progressive_frame;
429438
procParams.top_field_first = dispInfo.top_field_first;
@@ -562,8 +571,6 @@ void BetaCudaDeviceInterface::flush() {
562571
}
563572

564573
void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
565-
const VideoStreamOptions& videoStreamOptions,
566-
const AVRational& timeBase,
567574
UniqueAVFrame& avFrame,
568575
FrameOutput& frameOutput,
569576
std::optional<torch::Tensor> preAllocatedOutputTensor) {
@@ -574,20 +581,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
574581
// TODONVDEC P1: we use the 'default' cuda device interface for color
575582
// conversion. That's a temporary hack to make things work. we should abstract
576583
// the color conversion stuff separately.
577-
if (!defaultCudaInterface_) {
578-
auto cudaDevice = torch::Device(torch::kCUDA);
579-
defaultCudaInterface_ =
580-
std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
581-
AVCodecContext dummyCodecContext = {};
582-
defaultCudaInterface_->initializeContext(&dummyCodecContext);
583-
}
584-
585584
defaultCudaInterface_->convertAVFrameToFrameOutput(
586-
videoStreamOptions,
587-
timeBase,
588-
avFrame,
589-
frameOutput,
590-
preAllocatedOutputTensor);
585+
avFrame, frameOutput, preAllocatedOutputTensor);
591586
}
592587

593588
} // namespace facebook::torchcodec

src/torchcodec/_core/BetaCudaDeviceInterface.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,11 @@ class BetaCudaDeviceInterface : public DeviceInterface {
3737
explicit BetaCudaDeviceInterface(const torch::Device& device);
3838
virtual ~BetaCudaDeviceInterface();
3939

40-
void initializeInterface(
41-
const AVStream* stream,
40+
void initialize(
41+
const AVStream* avStream,
4242
const UniqueDecodingAVFormatContext& avFormatCtx) override;
4343

4444
void convertAVFrameToFrameOutput(
45-
const VideoStreamOptions& videoStreamOptions,
46-
const AVRational& timeBase,
4745
UniqueAVFrame& avFrame,
4846
FrameOutput& frameOutput,
4947
std::optional<torch::Tensor> preAllocatedOutputTensor =

src/torchcodec/_core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ function(make_torchcodec_libraries
9595
SingleStreamDecoder.cpp
9696
Encoder.cpp
9797
ValidationUtils.cpp
98+
Transform.cpp
9899
)
99100

100101
if(ENABLE_CUDA)

0 commit comments

Comments
 (0)