meta-pytorch
diff --git a/‎src/torchcodec/_core/BetaCudaDeviceInterface.cpp‎
Lines changed: 68 additions & 72 deletions b/‎src/torchcodec/_core/BetaCudaDeviceInterface.cpp‎
Lines changed: 68 additions & 72 deletions
diff --git a/‎src/torchcodec/_core/BetaCudaDeviceInterface.h‎
Lines changed: 2 additions & 4 deletions b/‎src/torchcodec/_core/BetaCudaDeviceInterface.h‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎src/torchcodec/_core/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions b/‎src/torchcodec/_core/CMakeLists.txt‎
Lines changed: 1 addition & 0 deletions
@@ -35,22 +35,19 @@ static bool g_cuda_beta = registerDeviceInterface(
 
 static int CUDAAPI
 pfnSequenceCallback(void* pUserData, CUVIDEOFORMAT* videoFormat) {
-  BetaCudaDeviceInterface* decoder =
-      static_cast<BetaCudaDeviceInterface*>(pUserData);
+  auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
   return decoder->streamPropertyChange(videoFormat);
 }
 
 static int CUDAAPI
 pfnDecodePictureCallback(void* pUserData, CUVIDPICPARAMS* picParams) {
-  BetaCudaDeviceInterface* decoder =
-      static_cast<BetaCudaDeviceInterface*>(pUserData);
+  auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
   return decoder->frameReadyForDecoding(picParams);
 }
 
 static int CUDAAPI
 pfnDisplayPictureCallback(void* pUserData, CUVIDPARSERDISPINFO* dispInfo) {
-  BetaCudaDeviceInterface* decoder =
-      static_cast<BetaCudaDeviceInterface*>(pUserData);
+  auto decoder = static_cast<BetaCudaDeviceInterface*>(pUserData);
   return decoder->frameReadyInDisplayOrder(dispInfo);
 }
 
@@ -112,27 +109,29 @@ static UniqueCUvideodecoder createDecoder(CUVIDEOFORMAT* videoFormat) {
       caps.nMaxMBCount);
 
   // Decoder creation parameters, taken from DALI
-  CUVIDDECODECREATEINFO decoder_info = {};
-  decoder_info.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
-  decoder_info.ChromaFormat = videoFormat->chroma_format;
-  decoder_info.CodecType = videoFormat->codec;
-  decoder_info.ulHeight = videoFormat->coded_height;
-  decoder_info.ulWidth = videoFormat->coded_width;
-  decoder_info.ulMaxHeight = videoFormat->coded_height;
-  decoder_info.ulMaxWidth = videoFormat->coded_width;
-  decoder_info.ulTargetHeight =
+  CUVIDDECODECREATEINFO decoderParams = {};
+  decoderParams.bitDepthMinus8 = videoFormat->bit_depth_luma_minus8;
+  decoderParams.ChromaFormat = videoFormat->chroma_format;
+  decoderParams.OutputFormat = cudaVideoSurfaceFormat_NV12;
+  decoderParams.ulCreationFlags = cudaVideoCreate_Default;
+  decoderParams.CodecType = videoFormat->codec;
+  decoderParams.ulHeight = videoFormat->coded_height;
+  decoderParams.ulWidth = videoFormat->coded_width;
+  decoderParams.ulMaxHeight = videoFormat->coded_height;
+  decoderParams.ulMaxWidth = videoFormat->coded_width;
+  decoderParams.ulTargetHeight =
       videoFormat->display_area.bottom - videoFormat->display_area.top;
-  decoder_info.ulTargetWidth =
+  decoderParams.ulTargetWidth =
       videoFormat->display_area.right - videoFormat->display_area.left;
-  decoder_info.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
-  decoder_info.ulNumOutputSurfaces = 2;
-  decoder_info.display_area.left = videoFormat->display_area.left;
-  decoder_info.display_area.right = videoFormat->display_area.right;
-  decoder_info.display_area.top = videoFormat->display_area.top;
-  decoder_info.display_area.bottom = videoFormat->display_area.bottom;
+  decoderParams.ulNumDecodeSurfaces = videoFormat->min_num_decode_surfaces;
+  decoderParams.ulNumOutputSurfaces = 2;
+  decoderParams.display_area.left = videoFormat->display_area.left;
+  decoderParams.display_area.right = videoFormat->display_area.right;
+  decoderParams.display_area.top = videoFormat->display_area.top;
+  decoderParams.display_area.bottom = videoFormat->display_area.bottom;
 
   CUvideodecoder* decoder = new CUvideodecoder();
-  result = cuvidCreateDecoder(decoder, &decoder_info);
+  result = cuvidCreateDecoder(decoder, &decoderParams);
   TORCH_CHECK(
       result == CUDA_SUCCESS, "Failed to create NVDEC decoder: ", result);
   return UniqueCUvideodecoder(decoder, CUvideoDecoderDeleter{});
@@ -182,6 +181,45 @@ BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
   }
 }
 
+void BetaCudaDeviceInterface::initialize(
+    const AVStream* avStream,
+    const UniqueDecodingAVFormatContext& avFormatCtx) {
+  torch::Tensor dummyTensorForCudaInitialization = torch::empty(
+      {1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
+
+  auto cudaDevice = torch::Device(torch::kCUDA);
+  defaultCudaInterface_ =
+      std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
+  AVCodecContext dummyCodecContext = {};
+  defaultCudaInterface_->initialize(avStream, avFormatCtx);
+  defaultCudaInterface_->registerHardwareDeviceWithCodec(&dummyCodecContext);
+
+  TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
+  timeBase_ = avStream->time_base;
+  frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
+
+  const AVCodecParameters* codecPar = avStream->codecpar;
+  TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
+
+  initializeBSF(codecPar, avFormatCtx);
+
+  // Create parser. Default values that aren't obvious are taken from DALI.
+  CUVIDPARSERPARAMS parserParams = {};
+  parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
+  parserParams.ulMaxNumDecodeSurfaces = 8;
+  parserParams.ulMaxDisplayDelay = 0;
+  // Callback setup, all are triggered by the parser within a call
+  // to cuvidParseVideoData
+  parserParams.pUserData = this;
+  parserParams.pfnSequenceCallback = pfnSequenceCallback;
+  parserParams.pfnDecodePicture = pfnDecodePictureCallback;
+  parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
+
+  CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
+  TORCH_CHECK(
+      result == CUDA_SUCCESS, "Failed to create video parser: ", result);
+}
+
 void BetaCudaDeviceInterface::initializeBSF(
     const AVCodecParameters* codecPar,
     const UniqueDecodingAVFormatContext& avFormatCtx) {
@@ -261,38 +299,6 @@ void BetaCudaDeviceInterface::initializeBSF(
       getFFMPEGErrorStringFromErrorCode(retVal));
 }
 
-void BetaCudaDeviceInterface::initializeInterface(
-    const AVStream* avStream,
-    const UniqueDecodingAVFormatContext& avFormatCtx) {
-  torch::Tensor dummyTensorForCudaInitialization = torch::empty(
-      {1}, torch::TensorOptions().dtype(torch::kUInt8).device(device_));
-
-  TORCH_CHECK(avStream != nullptr, "AVStream cannot be null");
-  timeBase_ = avStream->time_base;
-  frameRateAvgFromFFmpeg_ = avStream->r_frame_rate;
-
-  const AVCodecParameters* codecPar = avStream->codecpar;
-  TORCH_CHECK(codecPar != nullptr, "CodecParameters cannot be null");
-
-  initializeBSF(codecPar, avFormatCtx);
-
-  // Create parser. Default values that aren't obvious are taken from DALI.
-  CUVIDPARSERPARAMS parserParams = {};
-  parserParams.CodecType = validateCodecSupport(codecPar->codec_id);
-  parserParams.ulMaxNumDecodeSurfaces = 8;
-  parserParams.ulMaxDisplayDelay = 0;
-  // Callback setup, all are triggered by the parser within a call
-  // to cuvidParseVideoData
-  parserParams.pUserData = this;
-  parserParams.pfnSequenceCallback = pfnSequenceCallback;
-  parserParams.pfnDecodePicture = pfnDecodePictureCallback;
-  parserParams.pfnDisplayPicture = pfnDisplayPictureCallback;
-
-  CUresult result = cuvidCreateVideoParser(&videoParser_, &parserParams);
-  TORCH_CHECK(
-      result == CUDA_SUCCESS, "Failed to create video parser: ", result);
-}
-
 // This callback is called by the parser within cuvidParseVideoData when there
 // is a change in the stream's properties (like resolution change), as specified
 // by CUVIDEOFORMAT. Particularly (but not just!), this is called at the very
@@ -418,12 +424,16 @@ int BetaCudaDeviceInterface::frameReadyInDisplayOrder(
 int BetaCudaDeviceInterface::receiveFrame(UniqueAVFrame& avFrame) {
   if (readyFrames_.empty()) {
     // No frame found, instruct caller to try again later after sending more
-    // packets, or to stop if EOF was already sent.
-    return eofSent_ ? AVERROR_EOF : AVERROR(EAGAIN);
+    // packets.
+    return AVERROR(EAGAIN);
   }
   CUVIDPARSERDISPINFO dispInfo = readyFrames_.front();
   readyFrames_.pop();
 
+  // TODONVDEC P1 we need to set the procParams.output_stream field to the
+  // current CUDA stream and ensure proper synchronization. There's a related
+  // NVDECTODO in CudaDeviceInterface.cpp where we do the necessary
+  // synchronization for NPP.
   CUVIDPROCPARAMS procParams = {};
   procParams.progressive_frame = dispInfo.progressive_frame;
   procParams.top_field_first = dispInfo.top_field_first;
@@ -562,8 +572,6 @@ void BetaCudaDeviceInterface::flush() {
 }
 
 void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
-    const VideoStreamOptions& videoStreamOptions,
-    const AVRational& timeBase,
     UniqueAVFrame& avFrame,
     FrameOutput& frameOutput,
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
@@ -574,20 +582,8 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
   // TODONVDEC P1: we use the 'default' cuda device interface for color
   // conversion. That's a temporary hack to make things work. we should abstract
   // the color conversion stuff separately.
-  if (!defaultCudaInterface_) {
-    auto cudaDevice = torch::Device(torch::kCUDA);
-    defaultCudaInterface_ =
-        std::unique_ptr<DeviceInterface>(createDeviceInterface(cudaDevice));
-    AVCodecContext dummyCodecContext = {};
-    defaultCudaInterface_->initializeContext(&dummyCodecContext);
-  }
-
   defaultCudaInterface_->convertAVFrameToFrameOutput(
-      videoStreamOptions,
-      timeBase,
-      avFrame,
-      frameOutput,
-      preAllocatedOutputTensor);
+      avFrame, frameOutput, preAllocatedOutputTensor);
 }
 
 } // namespace facebook::torchcodec
@@ -37,13 +37,11 @@ class BetaCudaDeviceInterface : public DeviceInterface {
   explicit BetaCudaDeviceInterface(const torch::Device& device);
   virtual ~BetaCudaDeviceInterface();
 
-  void initializeInterface(
-      const AVStream* stream,
+  void initialize(
+      const AVStream* avStream,
       const UniqueDecodingAVFormatContext& avFormatCtx) override;
 
   void convertAVFrameToFrameOutput(
-      const VideoStreamOptions& videoStreamOptions,
-      const AVRational& timeBase,
       UniqueAVFrame& avFrame,
       FrameOutput& frameOutput,
       std::optional<torch::Tensor> preAllocatedOutputTensor =
 
@@ -95,6 +95,7 @@ function(make_torchcodec_libraries
         SingleStreamDecoder.cpp
         Encoder.cpp
         ValidationUtils.cpp
+        Transform.cpp
     )
 
     if(ENABLE_CUDA)
Original file line number	Diff line number	Diff line change
`@@ -95,6 +95,7 @@ function(make_torchcodec_libraries`
`95`	`95`	`SingleStreamDecoder.cpp`
`96`	`96`	`Encoder.cpp`
`97`	`97`	`ValidationUtils.cpp`
	`98`	`+ Transform.cpp`
`98`	`99`	`)`
`99`	`100`
`100`	`101`	`if(ENABLE_CUDA)`