Skip to content

Commit 121a038

Browse files
committed
Merge branch 'main' of github.com:pytorch/torchcodec into nvdec-tests
2 parents f8f0402 + bcbb889 commit 121a038

23 files changed

+907
-614
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 68 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,19 @@ static bool g_cuda_beta = registerDeviceInterface(
3535

3636
static int CUDAAPI
3737
pfnSequenceCallback(void* pUserData, CUVIDEOFORMAT* videoFormat) {
38-
BetaCudaDeviceInterface* decoder =
39-
static_cast<BetaCudaDeviceInterface*>(pUserData);
38+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
4039
return decoder->streamPropertyChange(videoFormat);
4140
}
4241

4342
static int CUDAAPI
4443
pfnDecodePictureCallback(void* pUserData, CUVIDPICPARAMS* picParams) {
45-
BetaCudaDeviceInterface* decoder =
46-
static_cast<BetaCudaDeviceInterface*>(pUserData);
44+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
4745
return decoder->frameReadyForDecoding(picParams);
4846
}
4947

5048
static int CUDAAPI
5149
pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) {
52-
BetaCudaDeviceInterface* decoder =
53-
static_cast<BetaCudaDeviceInterface*>(pUserData);
50+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
5451
return decoder->frameReadyInDisplayOrder(dispInfo);
5552
}
5653

@@ -112,27 +109,29 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
112109
caps.nMaxMBCount);
113110

114111
// Decoder creation parameters, taken from DALI
115-
CUVIDDECODECREATEINFO decoder_info = {};
116-
decoder_info.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
117-
decoder_info.ChromaFormat = videoFormat->chroma_format;
118-
decoder_info.CodecType = videoFormat->codec;
119-
decoder_info.ulHeight = videoFormat->coded_height;
120-
decoder_info.ulWidth = videoFormat->coded_width;
121-
decoder_info.ulMaxHeight = videoFormat->coded_height;
122-
decoder_info.ulMaxWidth = videoFormat->coded_width;
123-
decoder_info.ulTargetHeight =
112+
CUVIDDECODECREATEINFO decoderParams = {};
113+
decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
114+
decoderParams.ChromaFormat = videoFormat->chroma_format;
115+
decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12;
116+
decoderParams.ulCreationFlags = cudaVideoCreate_Default;
117+
decoderParams.CodecType = videoFormat->codec;
118+
decoderParams.ulHeight = videoFormat->coded_height;
119+
decoderParams.ulWidth = videoFormat->coded_width;
120+
decoderParams.ulMaxHeight = videoFormat->coded_height;
121+
decoderParams.ulMaxWidth = videoFormat->coded_width;
122+
decoderParams.ulTargetHeight =
124123
videoFormat->display_area.bottom - videoFormat->display_area.top;
125-
decoder_info.ulTargetWidth =
124+
decoderParams.ulTargetWidth =
126125
videoFormat->display_area.right - videoFormat->display_area.left;
127-
decoder_info.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
128-
decoder_info.ulNumOutputSurfaces = 2;
129-
decoder_info.display_area.left = videoFormat->display_area.left;
130-
decoder_info.display_area.right = videoFormat->display_area.right;
131-
decoder_info.display_area.top = videoFormat->display_area.top;
132-
decoder_info.display_area.bottom = videoFormat->display_area.bottom;
126+
decoderParams.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
127+
decoderParams.ulNumOutputSurfaces = 2;
128+
decoderParams.display_area.left = videoFormat->display_area.left;
129+
decoderParams.display_area.right = videoFormat->display_area.right;
130+
decoderParams.display_area.top = videoFormat->display_area.top;
131+
decoderParams.display_area.bottom = videoFormat->display_area.bottom;
133132

134133
CUvideodecoder* decoder = new CUvideodecoder();
135-
result = cuvidCreateDecoder(decoder, &decoder_info);
134+
result = cuvidCreateDecoder(decoder, &decoderParams);
136135
TORCH_CHECK(
137136
result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result);
138137
return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{});
@@ -182,6 +181,45 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
182181
}
183182
}
184183

184+
void BetaCudaDeviceInterface::initialize(
185+
const AVStream* avStream,
186+
const UniqueDecodingAVFormatContext& avFormatCtx) {
187+
torch::Tensor dummyTensorForCudaInitialization = torch::empty(
188+
{1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
189+
190+
auto cudaDevice = torch::Device(torch::kCUDA);
191+
defaultCudaInterface_ =
192+
std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
193+
AVCodecContext dummyCodecContext = {};
194+
defaultCudaInterface_->initialize(avStream, avFormatCtx);
195+
defaultCudaInterface_->registerHardwareDeviceWithCodec(&dummyCodecContext);
196+
197+
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
198+
timeBase_ = avStream->time_base;
199+
frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
200+
201+
const AVCodecParameters* codecPar = avStream->codecpar;
202+
TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
203+
204+
initializeBSF(codecPar, avFormatCtx);
205+
206+
// Create parser. Default values that aren't obvious are taken from DALI.
207+
CUVIDPARSERPARAMS parserParams = {};
208+
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
209+
parserParams.ulMaxNumDecodeSurfaces = 8;
210+
parserParams.ulMaxDisplayDelay = 0;
211+
// Callback setup, all are triggered by the parser within a call
212+
// to cuvidParseVideoData
213+
parserParams.pUserData = this;
214+
parserParams.pfnSequenceCallback = pfnSequenceCallback;
215+
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
216+
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
217+
218+
CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
219+
TORCH_CHECK(
220+
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
221+
}
222+
185223
void BetaCudaDeviceInterface::initializeBSF(
186224
const AVCodecParameters* codecPar,
187225
const UniqueDecodingAVFormatContext& avFormatCtx) {
@@ -261,38 +299,6 @@ void BetaCudaDeviceInterface::initializeBSF(
261299
getFFMPEGErrorStringFromErrorCode(retVal));
262300
}
263301

264-
void BetaCudaDeviceInterface::initializeInterface(
265-
const AVStream* avStream,
266-
const UniqueDecodingAVFormatContext& avFormatCtx) {
267-
torch::Tensor dummyTensorForCudaInitialization = torch::empty(
268-
{1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
269-
270-
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
271-
timeBase_ = avStream->time_base;
272-
frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
273-
274-
const AVCodecParameters* codecPar = avStream->codecpar;
275-
TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
276-
277-
initializeBSF(codecPar, avFormatCtx);
278-
279-
// Create parser. Default values that aren't obvious are taken from DALI.
280-
CUVIDPARSERPARAMS parserParams = {};
281-
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
282-
parserParams.ulMaxNumDecodeSurfaces = 8;
283-
parserParams.ulMaxDisplayDelay = 0;
284-
// Callback setup, all are triggered by the parser within a call
285-
// to cuvidParseVideoData
286-
parserParams.pUserData = this;
287-
parserParams.pfnSequenceCallback = pfnSequenceCallback;
288-
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
289-
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
290-
291-
CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
292-
TORCH_CHECK(
293-
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
294-
}
295-
296302
// This callback is called by the parser within cuvidParseVideoData when there
297303
// is a change in the stream's properties (like resolution change), as specified
298304
// by CUVIDEOFORMAT. Particularly (but not just!), this is called at the very
@@ -418,12 +424,16 @@ int BetaCudaDeviceInterface::frameReadyInDisplayOrder(
418424
int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
419425
if (readyFrames_.empty()) {
420426
// No frame found, instruct caller to try again later after sending more
421-
// packets, or to stop if EOF was already sent.
422-
return eofSent_ ? AVERROR_EOF : AVERROR(EAGAIN);
427+
// packets.
428+
return AVERROR(EAGAIN);
423429
}
424430
CUVIDPARSERDISPINFO dispInfo = readyFrames_.front();
425431
readyFrames_.pop();
426432

433+
// TODONVDEC P1 we need to set the procParams.output_stream field to the
434+
// current CUDA stream and ensure proper synchronization. There's a related
435+
// NVDECTODO in CudaDeviceInterface.cpp where we do the necessary
436+
// synchronization for NPP.
427437
CUVIDPROCPARAMS procParams = {};
428438
procParams.progressive_frame = dispInfo.progressive_frame;
429439
procParams.top_field_first = dispInfo.top_field_first;
@@ -562,8 +572,6 @@ void BetaCudaDeviceInterface::flush() {
562572
}
563573

564574
void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
565-
const VideoStreamOptions& videoStreamOptions,
566-
const AVRational& timeBase,
567575
UniqueAVFrame& avFrame,
568576
FrameOutput& frameOutput,
569577
std::optional<torch::Tensor> preAllocatedOutputTensor) {
@@ -574,20 +582,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
574582
// TODONVDEC P1: we use the 'default' cuda device interface for color
575583
// conversion. That's a temporary hack to make things work. we should abstract
576584
// the color conversion stuff separately.
577-
if (!defaultCudaInterface_) {
578-
auto cudaDevice = torch::Device(torch::kCUDA);
579-
defaultCudaInterface_ =
580-
std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
581-
AVCodecContext dummyCodecContext = {};
582-
defaultCudaInterface_->initializeContext(&dummyCodecContext);
583-
}
584-
585585
defaultCudaInterface_->convertAVFrameToFrameOutput(
586-
videoStreamOptions,
587-
timeBase,
588-
avFrame,
589-
frameOutput,
590-
preAllocatedOutputTensor);
586+
avFrame, frameOutput, preAllocatedOutputTensor);
591587
}
592588

593589
} // namespace facebook::torchcodec

src/torchcodec/_core/BetaCudaDeviceInterface.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,11 @@ class BetaCudaDeviceInterface : public DeviceInterface {
3737
explicit BetaCudaDeviceInterface(const torch::Device& device);
3838
virtual ~BetaCudaDeviceInterface();
3939

40-
void initializeInterface(
41-
const AVStream* stream,
40+
void initialize(
41+
const AVStream* avStream,
4242
const UniqueDecodingAVFormatContext& avFormatCtx) override;
4343

4444
void convertAVFrameToFrameOutput(
45-
const VideoStreamOptions& videoStreamOptions,
46-
const AVRational& timeBase,
4745
UniqueAVFrame& avFrame,
4846
FrameOutput& frameOutput,
4947
std::optional<torch::Tensor> preAllocatedOutputTensor =

src/torchcodec/_core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ function(make_torchcodec_libraries
9595
SingleStreamDecoder.cpp
9696
Encoder.cpp
9797
ValidationUtils.cpp
98+
Transform.cpp
9899
)
99100

100101
if(ENABLE_CUDA)

0 commit comments

Comments
 (0)