use avFrameWithStreamIndex

NicolasHug · NicolasHug · commit 24d003541d16 · 2025-01-23T11:30:02.000Z
diff --git a/src/torchcodec/decoders/_core/CPUOnlyDevice.cpp b/src/torchcodec/decoders/_core/CPUOnlyDevice.cpp
@@ -17,7 +17,8 @@ namespace facebook::torchcodec {
 void convertAVFrameToFrameOutputOnCuda(
     const torch::Device& device,
     [[maybe_unused]] const VideoDecoder::VideoStreamOptions& videoStreamOptions,
-    [[maybe_unused]] VideoDecoder::AVFrameWithStreamIndex& rawOutput,
+    [[maybe_unused]] VideoDecoder::AVFrameWithStreamIndex&
+        avFrameWithStreamIndex,
     [[maybe_unused]] VideoDecoder::FrameOutput& frameOutput,
     [[maybe_unused]] std::optional<torch::Tensor> preAllocatedOutputTensor) {
   throwUnsupportedDeviceError(device);
diff --git a/src/torchcodec/decoders/_core/CudaDevice.cpp b/src/torchcodec/decoders/_core/CudaDevice.cpp
@@ -186,10 +186,10 @@ void initializeContextOnCuda(
 void convertAVFrameToFrameOutputOnCuda(
     const torch::Device& device,
     const VideoDecoder::VideoStreamOptions& videoStreamOptions,
-    VideoDecoder::AVFrameWithStreamIndex& rawOutput,
+    VideoDecoder::AVFrameWithStreamIndex& avFrameWithStreamIndex,
     VideoDecoder::FrameOutput& frameOutput,
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
-  AVFrame* avFrame = rawOutput.avFrame.get();
+  AVFrame* avFrame = avFrameWithStreamIndex.avFrame.get();
 
   TORCH_CHECK(
       avFrame->format == AV_PIX_FMT_CUDA,
diff --git a/src/torchcodec/decoders/_core/DeviceInterface.h b/src/torchcodec/decoders/_core/DeviceInterface.h
@@ -32,7 +32,7 @@ void initializeContextOnCuda(
 void convertAVFrameToFrameOutputOnCuda(
     const torch::Device& device,
     const VideoDecoder::VideoStreamOptions& videoStreamOptions,
-    VideoDecoder::AVFrameWithStreamIndex& rawOutput,
+    VideoDecoder::AVFrameWithStreamIndex& avFrameWithStreamIndex,
     VideoDecoder::FrameOutput& frameOutput,
     std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
 
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -913,19 +913,19 @@ VideoDecoder::getAVFrameUsingFilterFunction(
   StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
   activeStreamInfo.currentPts = avFrame->pts;
   activeStreamInfo.currentDuration = getDuration(avFrame);
-  AVFrameWithStreamIndex rawOutput;
-  rawOutput.streamIndex = frameStreamIndex;
-  rawOutput.avFrame = std::move(avFrame);
-  return rawOutput;
+  AVFrameWithStreamIndex avFrameWithStreamIndex;
+  avFrameWithStreamIndex.streamIndex = frameStreamIndex;
+  avFrameWithStreamIndex.avFrame = std::move(avFrame);
+  return avFrameWithStreamIndex;
 }
 
 VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
-    VideoDecoder::AVFrameWithStreamIndex& rawOutput,
+    VideoDecoder::AVFrameWithStreamIndex& avFrameWithStreamIndex,
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
   // Convert the frame to tensor.
   FrameOutput frameOutput;
-  int streamIndex = rawOutput.streamIndex;
-  AVFrame* avFrame = rawOutput.avFrame.get();
+  int streamIndex = avFrameWithStreamIndex.streamIndex;
+  AVFrame* avFrame = avFrameWithStreamIndex.avFrame.get();
   frameOutput.streamIndex = streamIndex;
   auto& streamInfo = streamInfos_[streamIndex];
   TORCH_CHECK(streamInfo.stream->codecpar->codec_type == AVMEDIA_TYPE_VIDEO);
@@ -936,12 +936,12 @@ VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
   // TODO: we should fold preAllocatedOutputTensor into AVFrameWithStreamIndex.
   if (streamInfo.videoStreamOptions.device.type() == torch::kCPU) {
     convertAVFrameToFrameOutputOnCPU(
-        rawOutput, frameOutput, preAllocatedOutputTensor);
+        avFrameWithStreamIndex, frameOutput, preAllocatedOutputTensor);
   } else if (streamInfo.videoStreamOptions.device.type() == torch::kCUDA) {
     convertAVFrameToFrameOutputOnCuda(
         streamInfo.videoStreamOptions.device,
         streamInfo.videoStreamOptions,
-        rawOutput,
+        avFrameWithStreamIndex,
         frameOutput,
         preAllocatedOutputTensor);
   } else {
@@ -962,11 +962,11 @@ VideoDecoder::FrameOutput VideoDecoder::convertAVFrameToFrameOutput(
 // Dimension order of the preAllocatedOutputTensor must be HWC, regardless of
 // `dimension_order` parameter. It's up to callers to re-shape it if needed.
 void VideoDecoder::convertAVFrameToFrameOutputOnCPU(
-    VideoDecoder::AVFrameWithStreamIndex& rawOutput,
+    VideoDecoder::AVFrameWithStreamIndex& avFrameWithStreamIndex,
     FrameOutput& output,
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
-  int streamIndex = rawOutput.streamIndex;
-  AVFrame* avFrame = rawOutput.avFrame.get();
+  int streamIndex = avFrameWithStreamIndex.streamIndex;
+  AVFrame* avFrame = avFrameWithStreamIndex.avFrame.get();
   auto& streamInfo = streamInfos_[streamIndex];
 
   auto frameDims = getHeightAndWidthFromOptionsOrAVFrame(
@@ -1080,7 +1080,7 @@ VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAtTimestampNoDemux(
   }
 
   setCursorPtsInSeconds(seconds);
-  AVFrameWithStreamIndex rawOutput = getAVFrameUsingFilterFunction(
+  AVFrameWithStreamIndex avFrameWithStreamIndex = getAVFrameUsingFilterFunction(
       [seconds, this](int frameStreamIndex, AVFrame* avFrame) {
         StreamInfo& streamInfo = streamInfos_[frameStreamIndex];
         double frameStartTime = ptsToSeconds(avFrame->pts, streamInfo.timeBase);
@@ -1100,7 +1100,7 @@ VideoDecoder::FrameOutput VideoDecoder::getFramePlayedAtTimestampNoDemux(
       });
 
   // Convert the frame to tensor.
-  FrameOutput frameOutput = convertAVFrameToFrameOutput(rawOutput);
+  FrameOutput frameOutput = convertAVFrameToFrameOutput(avFrameWithStreamIndex);
   frameOutput.data =
       maybePermuteHWC2CHW(frameOutput.streamIndex, frameOutput.data);
   return frameOutput;
@@ -1473,14 +1473,13 @@ VideoDecoder::FrameBatchOutput VideoDecoder::getFramesPlayedByTimestampInRange(
   return frameBatchOutput;
 }
 
-VideoDecoder::AVFrameWithStreamIndex
-VideoDecoder::getNextAVFrameNoDemux() {
-  auto rawOutput = getAVFrameUsingFilterFunction(
+VideoDecoder::AVFrameWithStreamIndex VideoDecoder::getNextAVFrameNoDemux() {
+  auto avFrameWithStreamIndex = getAVFrameUsingFilterFunction(
       [this](int frameStreamIndex, AVFrame* avFrame) {
         StreamInfo& activeStreamInfo = streamInfos_[frameStreamIndex];
         return avFrame->pts >= activeStreamInfo.discardFramesBeforePts;
       });
-  return rawOutput;
+  return avFrameWithStreamIndex;
 }
 
 VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemux() {
@@ -1491,8 +1490,9 @@ VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemux() {
 
 VideoDecoder::FrameOutput VideoDecoder::getNextFrameNoDemuxInternal(
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
-  auto rawOutput = getNextAVFrameNoDemux();
-  return convertAVFrameToFrameOutput(rawOutput, preAllocatedOutputTensor);
+  auto avFrameWithStreamIndex = getNextAVFrameNoDemux();
+  return convertAVFrameToFrameOutput(
+      avFrameWithStreamIndex, preAllocatedOutputTensor);
 }
 
 void VideoDecoder::setCursorPtsInSeconds(double seconds) {
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -405,10 +405,10 @@ class VideoDecoder {
       const AVFrame* avFrame,
       torch::Tensor& outputTensor);
   FrameOutput convertAVFrameToFrameOutput(
-      AVFrameWithStreamIndex& rawOutput,
+      AVFrameWithStreamIndex& avFrameWithStreamIndex,
       std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);
   void convertAVFrameToFrameOutputOnCPU(
-      AVFrameWithStreamIndex& rawOutput,
+      AVFrameWithStreamIndex& avFrameWithStreamIndex,
       FrameOutput& frameOutput,
       std::optional<torch::Tensor> preAllocatedOutputTensor = std::nullopt);