Skip to content

Commit 993d510

Browse files
committed
Merge branch 'nvdec-tests' into nvdec-send-eof
2 parents bc55810 + 204970e commit 993d510

23 files changed

+905
-611
lines changed

src/torchcodec/_core/BetaCudaDeviceInterface.cpp

Lines changed: 66 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -35,22 +35,19 @@ static bool g_cuda_beta = registerDeviceInterface(
3535

3636
static int CUDAAPI
3737
pfnSequenceCallback(void* pUserData, CUVIDEOFORMAT* videoFormat) {
38-
BetaCudaDeviceInterface* decoder =
39-
static_cast<BetaCudaDeviceInterface*>(pUserData);
38+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
4039
return decoder->streamPropertyChange(videoFormat);
4140
}
4241

4342
static int CUDAAPI
4443
pfnDecodePictureCallback(void* pUserData, CUVIDPICPARAMS* picParams) {
45-
BetaCudaDeviceInterface* decoder =
46-
static_cast<BetaCudaDeviceInterface*>(pUserData);
44+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
4745
return decoder->frameReadyForDecoding(picParams);
4846
}
4947

5048
static int CUDAAPI
5149
pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) {
52-
BetaCudaDeviceInterface* decoder =
53-
static_cast<BetaCudaDeviceInterface*>(pUserData);
50+
auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
5451
return decoder->frameReadyInDisplayOrder(dispInfo);
5552
}
5653

@@ -112,27 +109,29 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
112109
caps.nMaxMBCount);
113110

114111
// Decoder creation parameters, taken from DALI
115-
CUVIDDECODECREATEINFO decoder_info = {};
116-
decoder_info.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
117-
decoder_info.ChromaFormat = videoFormat->chroma_format;
118-
decoder_info.CodecType = videoFormat->codec;
119-
decoder_info.ulHeight = videoFormat->coded_height;
120-
decoder_info.ulWidth = videoFormat->coded_width;
121-
decoder_info.ulMaxHeight = videoFormat->coded_height;
122-
decoder_info.ulMaxWidth = videoFormat->coded_width;
123-
decoder_info.ulTargetHeight =
112+
CUVIDDECODECREATEINFO decoderParams = {};
113+
decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
114+
decoderParams.ChromaFormat = videoFormat->chroma_format;
115+
decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12;
116+
decoderParams.ulCreationFlags = cudaVideoCreate_Default;
117+
decoderParams.CodecType = videoFormat->codec;
118+
decoderParams.ulHeight = videoFormat->coded_height;
119+
decoderParams.ulWidth = videoFormat->coded_width;
120+
decoderParams.ulMaxHeight = videoFormat->coded_height;
121+
decoderParams.ulMaxWidth = videoFormat->coded_width;
122+
decoderParams.ulTargetHeight =
124123
videoFormat->display_area.bottom - videoFormat->display_area.top;
125-
decoder_info.ulTargetWidth =
124+
decoderParams.ulTargetWidth =
126125
videoFormat->display_area.right - videoFormat->display_area.left;
127-
decoder_info.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
128-
decoder_info.ulNumOutputSurfaces = 2;
129-
decoder_info.display_area.left = videoFormat->display_area.left;
130-
decoder_info.display_area.right = videoFormat->display_area.right;
131-
decoder_info.display_area.top = videoFormat->display_area.top;
132-
decoder_info.display_area.bottom = videoFormat->display_area.bottom;
126+
decoderParams.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
127+
decoderParams.ulNumOutputSurfaces = 2;
128+
decoderParams.display_area.left = videoFormat->display_area.left;
129+
decoderParams.display_area.right = videoFormat->display_area.right;
130+
decoderParams.display_area.top = videoFormat->display_area.top;
131+
decoderParams.display_area.bottom = videoFormat->display_area.bottom;
133132

134133
CUvideodecoder* decoder = new CUvideodecoder();
135-
result = cuvidCreateDecoder(decoder, &decoder_info);
134+
result = cuvidCreateDecoder(decoder, &decoderParams);
136135
TORCH_CHECK(
137136
result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result);
138137
return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{});
@@ -182,6 +181,45 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
182181
}
183182
}
184183

184+
void BetaCudaDeviceInterface::initialize(
185+
const AVStream* avStream,
186+
const UniqueDecodingAVFormatContext& avFormatCtx) {
187+
torch::Tensor dummyTensorForCudaInitialization = torch::empty(
188+
{1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
189+
190+
auto cudaDevice = torch::Device(torch::kCUDA);
191+
defaultCudaInterface_ =
192+
std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
193+
AVCodecContext dummyCodecContext = {};
194+
defaultCudaInterface_->initialize(avStream, avFormatCtx);
195+
defaultCudaInterface_->registerHardwareDeviceWithCodec(&dummyCodecContext);
196+
197+
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
198+
timeBase_ = avStream->time_base;
199+
frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
200+
201+
const AVCodecParameters* codecPar = avStream->codecpar;
202+
TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
203+
204+
initializeBSF(codecPar, avFormatCtx);
205+
206+
// Create parser. Default values that aren't obvious are taken from DALI.
207+
CUVIDPARSERPARAMS parserParams = {};
208+
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
209+
parserParams.ulMaxNumDecodeSurfaces = 8;
210+
parserParams.ulMaxDisplayDelay = 0;
211+
// Callback setup, all are triggered by the parser within a call
212+
// to cuvidParseVideoData
213+
parserParams.pUserData = this;
214+
parserParams.pfnSequenceCallback = pfnSequenceCallback;
215+
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
216+
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
217+
218+
CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
219+
TORCH_CHECK(
220+
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
221+
}
222+
185223
void BetaCudaDeviceInterface::initializeBSF(
186224
const AVCodecParameters* codecPar,
187225
const UniqueDecodingAVFormatContext& avFormatCtx) {
@@ -261,38 +299,6 @@ void BetaCudaDeviceInterface::initializeBSF(
261299
getFFMPEGErrorStringFromErrorCode(retVal));
262300
}
263301

264-
void BetaCudaDeviceInterface::initializeInterface(
265-
const AVStream* avStream,
266-
const UniqueDecodingAVFormatContext& avFormatCtx) {
267-
torch::Tensor dummyTensorForCudaInitialization = torch::empty(
268-
{1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
269-
270-
TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
271-
timeBase_ = avStream->time_base;
272-
frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
273-
274-
const AVCodecParameters* codecPar = avStream->codecpar;
275-
TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
276-
277-
initializeBSF(codecPar, avFormatCtx);
278-
279-
// Create parser. Default values that aren't obvious are taken from DALI.
280-
CUVIDPARSERPARAMS parserParams = {};
281-
parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
282-
parserParams.ulMaxNumDecodeSurfaces = 8;
283-
parserParams.ulMaxDisplayDelay = 0;
284-
// Callback setup, all are triggered by the parser within a call
285-
// to cuvidParseVideoData
286-
parserParams.pUserData = this;
287-
parserParams.pfnSequenceCallback = pfnSequenceCallback;
288-
parserParams.pfnDecodePicture = pfnDecodePictureCallback;
289-
parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
290-
291-
CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
292-
TORCH_CHECK(
293-
result == CUDA_SUCCESS, "Failed to create video parser: ", result);
294-
}
295-
296302
// This callback is called by the parser within cuvidParseVideoData when there
297303
// is a change in the stream's properties (like resolution change), as specified
298304
// by CUVIDEOFORMAT. Particularly (but not just!), this is called at the very
@@ -429,6 +435,10 @@ int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
429435
CUVIDPARSERDISPINFO dispInfo = readyFrames_.front();
430436
readyFrames_.pop();
431437

438+
// TODONVDEC P1 we need to set the procParams.output_stream field to the
439+
// current CUDA stream and ensure proper synchronization. There's a related
440+
// NVDECTODO in CudaDeviceInterface.cpp where we do the necessary
441+
// synchronization for NPP.
432442
CUVIDPROCPARAMS procParams = {};
433443
procParams.progressive_frame = dispInfo.progressive_frame;
434444
procParams.top_field_first = dispInfo.top_field_first;
@@ -557,8 +567,6 @@ void BetaCudaDeviceInterface::flush() {
557567
}
558568

559569
void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
560-
const VideoStreamOptions& videoStreamOptions,
561-
const AVRational& timeBase,
562570
UniqueAVFrame& avFrame,
563571
FrameOutput& frameOutput,
564572
std::optional<torch::Tensor> preAllocatedOutputTensor) {
@@ -569,20 +577,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
569577
// TODONVDEC P1: we use the 'default' cuda device interface for color
570578
// conversion. That's a temporary hack to make things work. we should abstract
571579
// the color conversion stuff separately.
572-
if (!defaultCudaInterface_) {
573-
auto cudaDevice = torch::Device(torch::kCUDA);
574-
defaultCudaInterface_ =
575-
std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
576-
AVCodecContext dummyCodecContext = {};
577-
defaultCudaInterface_->initializeContext(&dummyCodecContext);
578-
}
579-
580580
defaultCudaInterface_->convertAVFrameToFrameOutput(
581-
videoStreamOptions,
582-
timeBase,
583-
avFrame,
584-
frameOutput,
585-
preAllocatedOutputTensor);
581+
avFrame, frameOutput, preAllocatedOutputTensor);
586582
}
587583

588584
} // namespace facebook::torchcodec

src/torchcodec/_core/BetaCudaDeviceInterface.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,11 @@ class BetaCudaDeviceInterface : public DeviceInterface {
3737
explicit BetaCudaDeviceInterface(const torch::Device& device);
3838
virtual ~BetaCudaDeviceInterface();
3939

40-
void initializeInterface(
41-
const AVStream* stream,
40+
void initialize(
41+
const AVStream* avStream,
4242
const UniqueDecodingAVFormatContext& avFormatCtx) override;
4343

4444
void convertAVFrameToFrameOutput(
45-
const VideoStreamOptions& videoStreamOptions,
46-
const AVRational& timeBase,
4745
UniqueAVFrame& avFrame,
4846
FrameOutput& frameOutput,
4947
std::optional<torch::Tensor> preAllocatedOutputTensor =

src/torchcodec/_core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ function(make_torchcodec_libraries
9595
SingleStreamDecoder.cpp
9696
Encoder.cpp
9797
ValidationUtils.cpp
98+
Transform.cpp
9899
)
99100

100101
if(ENABLE_CUDA)

0 commit comments

Comments
 (0)