@@ -35,22 +35,19 @@ static bool g_cuda_beta = registerDeviceInterface(
3535
3636static int CUDAAPI
3737pfnSequenceCallback (void * pUserData, CUVIDEOFORMAT* videoFormat) {
38- BetaCudaDeviceInterface* decoder =
39- static_cast <BetaCudaDeviceInterface*>(pUserData);
38+ auto decoder = static_cast <BetaCudaDeviceInterface*>(pUserData);
4039 return decoder->streamPropertyChange (videoFormat);
4140}
4241
4342static int CUDAAPI
4443pfnDecodePictureCallback (void * pUserData, CUVIDPICPARAMS* picParams) {
45- BetaCudaDeviceInterface* decoder =
46- static_cast <BetaCudaDeviceInterface*>(pUserData);
44+ auto decoder = static_cast <BetaCudaDeviceInterface*>(pUserData);
4745 return decoder->frameReadyForDecoding (picParams);
4846}
4947
5048static int CUDAAPI
5149pfnDisplayPictureCallback (void * pUserData, CUVIDPARSERDISPINFO* dispInfo) {
52- BetaCudaDeviceInterface* decoder =
53- static_cast <BetaCudaDeviceInterface*>(pUserData);
50+ auto decoder = static_cast <BetaCudaDeviceInterface*>(pUserData);
5451 return decoder->frameReadyInDisplayOrder (dispInfo);
5552}
5653
@@ -112,27 +109,29 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
112109 caps.nMaxMBCount );
113110
114111 // Decoder creation parameters, taken from DALI
115- CUVIDDECODECREATEINFO decoder_info = {};
116- decoder_info.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8 ;
117- decoder_info.ChromaFormat = videoFormat->chroma_format ;
118- decoder_info.CodecType = videoFormat->codec ;
119- decoder_info.ulHeight = videoFormat->coded_height ;
120- decoder_info.ulWidth = videoFormat->coded_width ;
121- decoder_info.ulMaxHeight = videoFormat->coded_height ;
122- decoder_info.ulMaxWidth = videoFormat->coded_width ;
123- decoder_info.ulTargetHeight =
112+ CUVIDDECODECREATEINFO decoderParams = {};
113+ decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8 ;
114+ decoderParams.ChromaFormat = videoFormat->chroma_format ;
115+ decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12;
116+ decoderParams.ulCreationFlags = cudaVideoCreate_Default;
117+ decoderParams.CodecType = videoFormat->codec ;
118+ decoderParams.ulHeight = videoFormat->coded_height ;
119+ decoderParams.ulWidth = videoFormat->coded_width ;
120+ decoderParams.ulMaxHeight = videoFormat->coded_height ;
121+ decoderParams.ulMaxWidth = videoFormat->coded_width ;
122+ decoderParams.ulTargetHeight =
124123 videoFormat->display_area .bottom - videoFormat->display_area .top ;
125- decoder_info .ulTargetWidth =
124+ decoderParams .ulTargetWidth =
126125 videoFormat->display_area .right - videoFormat->display_area .left ;
127- decoder_info .ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces ;
128- decoder_info .ulNumOutputSurfaces = 2 ;
129- decoder_info .display_area .left = videoFormat->display_area .left ;
130- decoder_info .display_area .right = videoFormat->display_area .right ;
131- decoder_info .display_area .top = videoFormat->display_area .top ;
132- decoder_info .display_area .bottom = videoFormat->display_area .bottom ;
126+ decoderParams .ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces ;
127+ decoderParams .ulNumOutputSurfaces = 2 ;
128+ decoderParams .display_area .left = videoFormat->display_area .left ;
129+ decoderParams .display_area .right = videoFormat->display_area .right ;
130+ decoderParams .display_area .top = videoFormat->display_area .top ;
131+ decoderParams .display_area .bottom = videoFormat->display_area .bottom ;
133132
134133 CUvideodecoder* decoder = new CUvideodecoder ();
135- result = cuvidCreateDecoder (decoder, &decoder_info );
134+ result = cuvidCreateDecoder (decoder, &decoderParams );
136135 TORCH_CHECK (
137136 result == CUDA_SUCCESS, " Failed to create NVDEC decoder: " , result);
138137 return UniqueCUvideodecoder (decoder, CUvideoDecoderDeleter{});
@@ -182,6 +181,44 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
182181 }
183182}
184183
184+ void BetaCudaDeviceInterface::initialize (
185+ const AVStream* avStream,
186+ const UniqueDecodingAVFormatContext& avFormatCtx) {
187+ torch::Tensor dummyTensorForCudaInitialization = torch::empty (
188+ {1 }, torch::TensorOptions ().dtype (torch::kUInt8 ).device (device_));
189+
190+ auto cudaDevice = torch::Device (torch::kCUDA );
191+ defaultCudaInterface_ =
192+ std::unique_ptr<DeviceInterface>(createDeviceInterface (cudaDevice));
193+ AVCodecContext dummyCodecContext = {};
194+ defaultCudaInterface_->initialize (avStream, avFormatCtx);
195+ defaultCudaInterface_->registerHardwareDeviceWithCodec (&dummyCodecContext);
196+
197+ TORCH_CHECK (avStream != nullptr , " AVStream cannot be null" );
198+ timeBase_ = avStream->time_base ;
199+
200+ const AVCodecParameters* codecPar = avStream->codecpar ;
201+ TORCH_CHECK (codecPar != nullptr , " CodecParameters cannot be null" );
202+
203+ initializeBSF (codecPar, avFormatCtx);
204+
205+ // Create parser. Default values that aren't obvious are taken from DALI.
206+ CUVIDPARSERPARAMS parserParams = {};
207+ parserParams.CodecType = validateCodecSupport (codecPar->codec_id );
208+ parserParams.ulMaxNumDecodeSurfaces = 8 ;
209+ parserParams.ulMaxDisplayDelay = 0 ;
210+ // Callback setup, all are triggered by the parser within a call
211+ // to cuvidParseVideoData
212+ parserParams.pUserData = this ;
213+ parserParams.pfnSequenceCallback = pfnSequenceCallback;
214+ parserParams.pfnDecodePicture = pfnDecodePictureCallback;
215+ parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
216+
217+ CUresult result = cuvidCreateVideoParser (&videoParser_, &parserParams);
218+ TORCH_CHECK (
219+ result == CUDA_SUCCESS, " Failed to create video parser: " , result);
220+ }
221+
185222void BetaCudaDeviceInterface::initializeBSF (
186223 const AVCodecParameters* codecPar,
187224 const UniqueDecodingAVFormatContext& avFormatCtx) {
@@ -261,38 +298,6 @@ void BetaCudaDeviceInterface::initializeBSF(
261298 getFFMPEGErrorStringFromErrorCode (retVal));
262299}
263300
264- void BetaCudaDeviceInterface::initializeInterface (
265- const AVStream* avStream,
266- const UniqueDecodingAVFormatContext& avFormatCtx) {
267- torch::Tensor dummyTensorForCudaInitialization = torch::empty (
268- {1 }, torch::TensorOptions ().dtype (torch::kUInt8 ).device (device_));
269-
270- TORCH_CHECK (avStream != nullptr , " AVStream cannot be null" );
271- timeBase_ = avStream->time_base ;
272- frameRateAvgFromFFmpeg_ = avStream->r_frame_rate ;
273-
274- const AVCodecParameters* codecPar = avStream->codecpar ;
275- TORCH_CHECK (codecPar != nullptr , " CodecParameters cannot be null" );
276-
277- initializeBSF (codecPar, avFormatCtx);
278-
279- // Create parser. Default values that aren't obvious are taken from DALI.
280- CUVIDPARSERPARAMS parserParams = {};
281- parserParams.CodecType = validateCodecSupport (codecPar->codec_id );
282- parserParams.ulMaxNumDecodeSurfaces = 8 ;
283- parserParams.ulMaxDisplayDelay = 0 ;
284- // Callback setup, all are triggered by the parser within a call
285- // to cuvidParseVideoData
286- parserParams.pUserData = this ;
287- parserParams.pfnSequenceCallback = pfnSequenceCallback;
288- parserParams.pfnDecodePicture = pfnDecodePictureCallback;
289- parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
290-
291- CUresult result = cuvidCreateVideoParser (&videoParser_, &parserParams);
292- TORCH_CHECK (
293- result == CUDA_SUCCESS, " Failed to create video parser: " , result);
294- }
295-
296301// This callback is called by the parser within cuvidParseVideoData when there
297302// is a change in the stream's properties (like resolution change), as specified
298303// by CUVIDEOFORMAT. Particularly (but not just!), this is called at the very
@@ -424,6 +429,10 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
424429 CUVIDPARSERDISPINFO dispInfo = readyFrames_.front ();
425430 readyFrames_.pop ();
426431
432+ // TODONVDEC P1 we need to set the procParams.output_stream field to the
433+ // current CUDA stream and ensure proper synchronization. There's a related
434+ // NVDECTODO in CudaDeviceInterface.cpp where we do the necessary
435+ // synchronization for NPP.
427436 CUVIDPROCPARAMS procParams = {};
428437 procParams.progressive_frame = dispInfo.progressive_frame ;
429438 procParams.top_field_first = dispInfo.top_field_first ;
@@ -562,8 +571,6 @@ void BetaCudaDeviceInterface::flush() {
562571}
563572
564573void BetaCudaDeviceInterface::convertAVFrameToFrameOutput (
565- const VideoStreamOptions& videoStreamOptions,
566- const AVRational& timeBase,
567574 UniqueAVFrame& avFrame,
568575 FrameOutput& frameOutput,
569576 std::optional<torch::Tensor> preAllocatedOutputTensor) {
@@ -574,20 +581,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
574581 // TODONVDEC P1: we use the 'default' cuda device interface for color
575582 // conversion. That's a temporary hack to make things work. we should abstract
576583 // the color conversion stuff separately.
577- if (!defaultCudaInterface_) {
578- auto cudaDevice = torch::Device (torch::kCUDA );
579- defaultCudaInterface_ =
580- std::unique_ptr<DeviceInterface>(createDeviceInterface (cudaDevice));
581- AVCodecContext dummyCodecContext = {};
582- defaultCudaInterface_->initializeContext (&dummyCodecContext);
583- }
584-
585584 defaultCudaInterface_->convertAVFrameToFrameOutput (
586- videoStreamOptions,
587- timeBase,
588- avFrame,
589- frameOutput,
590- preAllocatedOutputTensor);
585+ avFrame, frameOutput, preAllocatedOutputTensor);
591586}
592587
593588} // namespace facebook::torchcodec
0 commit comments