@@ -35,22 +35,19 @@ static bool g_cuda_beta = registerDeviceInterface(
3535
3636static int CUDAAPI
3737pfnSequenceCallback (void * pUserData, CUVIDEOFORMAT* videoFormat) {
38- BetaCudaDeviceInterface* decoder =
39- static_cast <BetaCudaDeviceInterface*>(pUserData);
38+ auto decoder = static_cast <BetaCudaDeviceInterface*>(pUserData);
4039 return decoder->streamPropertyChange (videoFormat);
4140}
4241
4342static int CUDAAPI
4443pfnDecodePictureCallback (void * pUserData, CUVIDPICPARAMS* picParams) {
45- BetaCudaDeviceInterface* decoder =
46- static_cast <BetaCudaDeviceInterface*>(pUserData);
44+ auto decoder = static_cast <BetaCudaDeviceInterface*>(pUserData);
4745 return decoder->frameReadyForDecoding (picParams);
4846}
4947
5048static int CUDAAPI
5149pfnDisplayPictureCallback (void * pUserData, CUVIDPARSERDISPINFO* dispInfo) {
52- BetaCudaDeviceInterface* decoder =
53- static_cast <BetaCudaDeviceInterface*>(pUserData);
50+ auto decoder = static_cast <BetaCudaDeviceInterface*>(pUserData);
5451 return decoder->frameReadyInDisplayOrder (dispInfo);
5552}
5653
@@ -112,27 +109,29 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
112109 caps.nMaxMBCount );
113110
114111 // Decoder creation parameters, taken from DALI
115- CUVIDDECODECREATEINFO decoder_info = {};
116- decoder_info.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8 ;
117- decoder_info.ChromaFormat = videoFormat->chroma_format ;
118- decoder_info.CodecType = videoFormat->codec ;
119- decoder_info.ulHeight = videoFormat->coded_height ;
120- decoder_info.ulWidth = videoFormat->coded_width ;
121- decoder_info.ulMaxHeight = videoFormat->coded_height ;
122- decoder_info.ulMaxWidth = videoFormat->coded_width ;
123- decoder_info.ulTargetHeight =
112+ CUVIDDECODECREATEINFO decoderParams = {};
113+ decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8 ;
114+ decoderParams.ChromaFormat = videoFormat->chroma_format ;
115+ decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12;
116+ decoderParams.ulCreationFlags = cudaVideoCreate_Default;
117+ decoderParams.CodecType = videoFormat->codec ;
118+ decoderParams.ulHeight = videoFormat->coded_height ;
119+ decoderParams.ulWidth = videoFormat->coded_width ;
120+ decoderParams.ulMaxHeight = videoFormat->coded_height ;
121+ decoderParams.ulMaxWidth = videoFormat->coded_width ;
122+ decoderParams.ulTargetHeight =
124123 videoFormat->display_area .bottom - videoFormat->display_area .top ;
125- decoder_info .ulTargetWidth =
124+ decoderParams .ulTargetWidth =
126125 videoFormat->display_area .right - videoFormat->display_area .left ;
127- decoder_info .ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces ;
128- decoder_info .ulNumOutputSurfaces = 2 ;
129- decoder_info .display_area .left = videoFormat->display_area .left ;
130- decoder_info .display_area .right = videoFormat->display_area .right ;
131- decoder_info .display_area .top = videoFormat->display_area .top ;
132- decoder_info .display_area .bottom = videoFormat->display_area .bottom ;
126+ decoderParams .ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces ;
127+ decoderParams .ulNumOutputSurfaces = 2 ;
128+ decoderParams .display_area .left = videoFormat->display_area .left ;
129+ decoderParams .display_area .right = videoFormat->display_area .right ;
130+ decoderParams .display_area .top = videoFormat->display_area .top ;
131+ decoderParams .display_area .bottom = videoFormat->display_area .bottom ;
133132
134133 CUvideodecoder* decoder = new CUvideodecoder ();
135- result = cuvidCreateDecoder (decoder, &decoder_info );
134+ result = cuvidCreateDecoder (decoder, &decoderParams );
136135 TORCH_CHECK (
137136 result == CUDA_SUCCESS, " Failed to create NVDEC decoder: " , result);
138137 return UniqueCUvideodecoder (decoder, CUvideoDecoderDeleter{});
@@ -182,6 +181,45 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
182181 }
183182}
184183
184+ void BetaCudaDeviceInterface::initialize (
185+ const AVStream* avStream,
186+ const UniqueDecodingAVFormatContext& avFormatCtx) {
187+ torch::Tensor dummyTensorForCudaInitialization = torch::empty (
188+ {1 }, torch::TensorOptions ().dtype (torch::kUInt8 ).device (device_));
189+
190+ auto cudaDevice = torch::Device (torch::kCUDA );
191+ defaultCudaInterface_ =
192+ std::unique_ptr<DeviceInterface>(createDeviceInterface (cudaDevice));
193+ AVCodecContext dummyCodecContext = {};
194+ defaultCudaInterface_->initialize (avStream, avFormatCtx);
195+ defaultCudaInterface_->registerHardwareDeviceWithCodec (&dummyCodecContext);
196+
197+ TORCH_CHECK (avStream != nullptr , " AVStream cannot be null" );
198+ timeBase_ = avStream->time_base ;
199+ frameRateAvgFromFFmpeg_ = avStream->r_frame_rate ;
200+
201+ const AVCodecParameters* codecPar = avStream->codecpar ;
202+ TORCH_CHECK (codecPar != nullptr , " CodecParameters cannot be null" );
203+
204+ initializeBSF (codecPar, avFormatCtx);
205+
206+ // Create parser. Default values that aren't obvious are taken from DALI.
207+ CUVIDPARSERPARAMS parserParams = {};
208+ parserParams.CodecType = validateCodecSupport (codecPar->codec_id );
209+ parserParams.ulMaxNumDecodeSurfaces = 8 ;
210+ parserParams.ulMaxDisplayDelay = 0 ;
211+ // Callback setup, all are triggered by the parser within a call
212+ // to cuvidParseVideoData
213+ parserParams.pUserData = this ;
214+ parserParams.pfnSequenceCallback = pfnSequenceCallback;
215+ parserParams.pfnDecodePicture = pfnDecodePictureCallback;
216+ parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
217+
218+ CUresult result = cuvidCreateVideoParser (&videoParser_, &parserParams);
219+ TORCH_CHECK (
220+ result == CUDA_SUCCESS, " Failed to create video parser: " , result);
221+ }
222+
185223void BetaCudaDeviceInterface::initializeBSF (
186224 const AVCodecParameters* codecPar,
187225 const UniqueDecodingAVFormatContext& avFormatCtx) {
@@ -261,38 +299,6 @@ void BetaCudaDeviceInterface::initializeBSF(
261299 getFFMPEGErrorStringFromErrorCode (retVal));
262300}
263301
264- void BetaCudaDeviceInterface::initializeInterface (
265- const AVStream* avStream,
266- const UniqueDecodingAVFormatContext& avFormatCtx) {
267- torch::Tensor dummyTensorForCudaInitialization = torch::empty (
268- {1 }, torch::TensorOptions ().dtype (torch::kUInt8 ).device (device_));
269-
270- TORCH_CHECK (avStream != nullptr , " AVStream cannot be null" );
271- timeBase_ = avStream->time_base ;
272- frameRateAvgFromFFmpeg_ = avStream->r_frame_rate ;
273-
274- const AVCodecParameters* codecPar = avStream->codecpar ;
275- TORCH_CHECK (codecPar != nullptr , " CodecParameters cannot be null" );
276-
277- initializeBSF (codecPar, avFormatCtx);
278-
279- // Create parser. Default values that aren't obvious are taken from DALI.
280- CUVIDPARSERPARAMS parserParams = {};
281- parserParams.CodecType = validateCodecSupport (codecPar->codec_id );
282- parserParams.ulMaxNumDecodeSurfaces = 8 ;
283- parserParams.ulMaxDisplayDelay = 0 ;
284- // Callback setup, all are triggered by the parser within a call
285- // to cuvidParseVideoData
286- parserParams.pUserData = this ;
287- parserParams.pfnSequenceCallback = pfnSequenceCallback;
288- parserParams.pfnDecodePicture = pfnDecodePictureCallback;
289- parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
290-
291- CUresult result = cuvidCreateVideoParser (&videoParser_, &parserParams);
292- TORCH_CHECK (
293- result == CUDA_SUCCESS, " Failed to create video parser: " , result);
294- }
295-
296302// This callback is called by the parser within cuvidParseVideoData when there
297303// is a change in the stream's properties (like resolution change), as specified
298304// by CUVIDEOFORMAT. Particularly (but not just!), this is called at the very
@@ -418,12 +424,16 @@ int BetaCudaDeviceInterface::frameReadyInDisplayOrder(
418424int BetaCudaDeviceInterface::receiveFrame (UniqueAVFrame& avFrame) {
419425 if (readyFrames_.empty ()) {
420426 // No frame found, instruct caller to try again later after sending more
421- // packets, or to stop if EOF was already sent .
422- return eofSent_ ? AVERROR_EOF : AVERROR (EAGAIN);
427+ // packets.
428+ return AVERROR (EAGAIN);
423429 }
424430 CUVIDPARSERDISPINFO dispInfo = readyFrames_.front ();
425431 readyFrames_.pop ();
426432
433+ // TODONVDEC P1 we need to set the procParams.output_stream field to the
434+ // current CUDA stream and ensure proper synchronization. There's a related
435+ // NVDECTODO in CudaDeviceInterface.cpp where we do the necessary
436+ // synchronization for NPP.
427437 CUVIDPROCPARAMS procParams = {};
428438 procParams.progressive_frame = dispInfo.progressive_frame ;
429439 procParams.top_field_first = dispInfo.top_field_first ;
@@ -562,8 +572,6 @@ void BetaCudaDeviceInterface::flush() {
562572}
563573
564574void BetaCudaDeviceInterface::convertAVFrameToFrameOutput (
565- const VideoStreamOptions& videoStreamOptions,
566- const AVRational& timeBase,
567575 UniqueAVFrame& avFrame,
568576 FrameOutput& frameOutput,
569577 std::optional<torch::Tensor> preAllocatedOutputTensor) {
@@ -574,20 +582,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
574582 // TODONVDEC P1: we use the 'default' cuda device interface for color
575583 // conversion. That's a temporary hack to make things work. we should abstract
576584 // the color conversion stuff separately.
577- if (!defaultCudaInterface_) {
578- auto cudaDevice = torch::Device (torch::kCUDA );
579- defaultCudaInterface_ =
580- std::unique_ptr<DeviceInterface>(createDeviceInterface (cudaDevice));
581- AVCodecContext dummyCodecContext = {};
582- defaultCudaInterface_->initializeContext (&dummyCodecContext);
583- }
584-
585585 defaultCudaInterface_->convertAVFrameToFrameOutput (
586- videoStreamOptions,
587- timeBase,
588- avFrame,
589- frameOutput,
590- preAllocatedOutputTensor);
586+ avFrame, frameOutput, preAllocatedOutputTensor);
591587}
592588
593589} // namespace facebook::torchcodec
0 commit comments