Address comments

NicolasHug · NicolasHug · commit c453a3c8d9a0 · 2025-03-12T09:44:56.000Z
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.cpp b/src/torchcodec/decoders/_core/FFMPEGCommon.cpp
@@ -60,26 +60,22 @@ int64_t getDuration(const AVFrame* frame) {
 #endif
 }
 
-int64_t getNumChannels(const AVFrame* avFrame) {
+int getNumChannels(const AVFrame* avFrame) {
 #if LIBAVFILTER_VERSION_MAJOR > 8 || \
     (LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
-  int numChannels = avFrame->ch_layout.nb_channels;
+  return avFrame->ch_layout.nb_channels;
 #else
-  int numChannels = av_get_channel_layout_nb_channels(avFrame->channel_layout);
+  return av_get_channel_layout_nb_channels(avFrame->channel_layout);
 #endif
-
-  return static_cast<int64_t>(numChannels);
 }
 
-int64_t getNumChannels(const UniqueAVCodecContext& avCodecContext) {
+int getNumChannels(const UniqueAVCodecContext& avCodecContext) {
 #if LIBAVFILTER_VERSION_MAJOR > 8 || \
     (LIBAVFILTER_VERSION_MAJOR == 8 && LIBAVFILTER_VERSION_MINOR >= 44)
-  int numChannels = avCodecContext->ch_layout.nb_channels;
+  return avCodecContext->ch_layout.nb_channels;
 #else
-  int numChannels = avCodecContext->channels;
+  return avCodecContext->channels;
 #endif
-
-  return static_cast<int64_t>(numChannels);
 }
 
 AVIOBytesContext::AVIOBytesContext(
diff --git a/src/torchcodec/decoders/_core/FFMPEGCommon.h b/src/torchcodec/decoders/_core/FFMPEGCommon.h
@@ -139,8 +139,8 @@ std::string getFFMPEGErrorStringFromErrorCode(int errorCode);
 int64_t getDuration(const UniqueAVFrame& frame);
 int64_t getDuration(const AVFrame* frame);
 
-int64_t getNumChannels(const AVFrame* avFrame);
-int64_t getNumChannels(const UniqueAVCodecContext& avCodecContext);
+int getNumChannels(const AVFrame* avFrame);
+int getNumChannels(const UniqueAVCodecContext& avCodecContext);
 
 // Returns true if sws_scale can handle unaligned data.
 bool canSwsScaleHandleUnalignedData();
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -553,7 +553,8 @@ void VideoDecoder::addAudioStream(int streamIndex) {
       containerMetadata_.allStreamMetadata[activeStreamIndex_];
   streamMetadata.sampleRate =
       static_cast<int64_t>(streamInfo.codecContext->sample_rate);
-  streamMetadata.numChannels = getNumChannels(streamInfo.codecContext);
+  streamMetadata.numChannels =
+      static_cast<int64_t>(getNumChannels(streamInfo.codecContext));
 }
 
 // --------------------------------------------------------------------------
@@ -875,16 +876,16 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
   std::vector<torch::Tensor> tensors;
 
   auto stopPts = secondsToClosestPts(stopSeconds, streamInfo.timeBase);
-  auto shouldStopDecoding = false;
-  while (!shouldStopDecoding) {
+  auto finished = false;
+  while (!finished) {
     try {
       AVFrameStream avFrameStream = decodeAVFrame([this](AVFrame* avFrame) {
         return cursor_ < avFrame->pts + getDuration(avFrame);
       });
       auto frameOutput = convertAVFrameToFrameOutput(avFrameStream);
       tensors.push_back(frameOutput.data);
     } catch (const EndOfFileException& e) {
-      shouldStopDecoding = true;
+      finished = true;
     }
 
     // If stopSeconds is in [begin, end] of the last decoded frame, we should
@@ -893,7 +894,7 @@ torch::Tensor VideoDecoder::getFramesPlayedInRangeAudio(
     // stopSeconds, which isn't what we want!
     auto lastDecodedAvFrameEnd = streamInfo.lastDecodedAvFramePts +
         streamInfo.lastDecodedAvFrameDuration;
-    shouldStopDecoding |= (streamInfo.lastDecodedAvFramePts) <= stopPts &&
+    finished |= (streamInfo.lastDecodedAvFramePts) <= stopPts &&
         (stopPts <= lastDecodedAvFrameEnd);
   }
   return torch::cat(tensors, 1);
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.h b/src/torchcodec/decoders/_core/VideoDecoder.h
@@ -395,7 +395,6 @@ class VideoDecoder {
       const AVFrame* avFrame,
       torch::Tensor& outputTensor);
 
-  FrameBatchOutput makeFrameBatchOutput(int64_t numFrames);
   // --------------------------------------------------------------------------
   // COLOR CONVERSION LIBRARIES HANDLERS CREATION
   // --------------------------------------------------------------------------
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -697,22 +697,24 @@ def test_get_frames_by_pts_in_range_audio(self, range, asset):
 
         torch.testing.assert_close(frames, reference_frames)
 
-    @pytest.mark.parametrize(
-        "asset, expected_shape", ((NASA_AUDIO, (2, 1024)), (NASA_AUDIO_MP3, (2, 576)))
-    )
-    def test_decode_epsilon_range(self, asset, expected_shape):
+    @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
+    def test_decode_epsilon_range(self, asset):
         decoder = create_from_file(str(asset.path), seek_mode="approximate")
         add_audio_stream(decoder)
 
+        start_seconds = 5
         frames = get_frames_by_pts_in_range_audio(
-            decoder, start_seconds=5, stop_seconds=5 + 1e-5
+            decoder, start_seconds=start_seconds, stop_seconds=start_seconds + 1e-5
+        )
+        torch.testing.assert_close(
+            frames,
+            asset.get_frame_data_by_index(
+                asset.get_frame_index(pts_seconds=start_seconds)
+            ),
         )
-        assert frames.shape == expected_shape
 
-    @pytest.mark.parametrize(
-        "asset, expected_shape", ((NASA_AUDIO, (2, 1024)), (NASA_AUDIO_MP3, (2, 576)))
-    )
-    def test_decode_just_one_frame_at_boundaries(self, asset, expected_shape):
+    @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
+    def test_decode_just_one_frame_at_boundaries(self, asset):
         decoder = create_from_file(str(asset.path), seek_mode="approximate")
         add_audio_stream(decoder)
 
@@ -721,7 +723,12 @@ def test_decode_just_one_frame_at_boundaries(self, asset, expected_shape):
         frames = get_frames_by_pts_in_range_audio(
             decoder, start_seconds=start_seconds, stop_seconds=stop_seconds
         )
-        assert frames.shape == expected_shape
+        torch.testing.assert_close(
+            frames,
+            asset.get_frame_data_by_index(
+                asset.get_frame_index(pts_seconds=start_seconds)
+            ),
+        )
 
     @pytest.mark.parametrize("asset", (NASA_AUDIO, NASA_AUDIO_MP3))
     def test_decode_start_equal_stop(self, asset):
diff --git a/test/utils.py b/test/utils.py
@@ -377,6 +377,7 @@ def get_frame_index(
             # 0.13~, not 0.
             return 0
         try:
+            # Could use bisect() to maek this faster if needed
             return next(
                 frame_index
                 for (frame_index, frame_info) in self.frames[stream_index].items()