more stuff

NicolasHug · NicolasHug · commit 9ee63e6575e8 · 2025-03-13T19:08:22.000Z
diff --git a/src/torchcodec/decoders/_core/VideoDecoder.cpp b/src/torchcodec/decoders/_core/VideoDecoder.cpp
@@ -573,13 +573,15 @@ void VideoDecoder::addAudioStream(int streamIndex) {
 
 VideoDecoder::FrameOutput VideoDecoder::getNextFrame() {
   auto output = getNextFrameInternal();
-  output.data = maybePermuteHWC2CHW(output.data);
+  if (streamInfos_[activeStreamIndex_].avMediaType == AVMEDIA_TYPE_VIDEO) {
+    output.data = maybePermuteHWC2CHW(output.data);
+  }
   return output;
 }
 
 VideoDecoder::FrameOutput VideoDecoder::getNextFrameInternal(
     std::optional<torch::Tensor> preAllocatedOutputTensor) {
-  validateActiveStream(AVMEDIA_TYPE_VIDEO);
+  validateActiveStream();
   AVFrameStream avFrameStream = decodeAVFrame(
       [this](AVFrame* avFrame) { return avFrame->pts >= cursor_; });
   return convertAVFrameToFrameOutput(avFrameStream, preAllocatedOutputTensor);
diff --git a/test/decoders/test_decoders.py b/test/decoders/test_decoders.py
@@ -1080,3 +1080,21 @@ def test_frame_start_is_not_zero(self):
 
         reference_frames = asset.get_frame_data_by_range(start=0, stop=stop_frame_index)
         torch.testing.assert_close(samples.data, reference_frames)
+
+    def test_single_channel(self):
+        asset = SINE_MONO_S32
+        decoder = AudioDecoder(asset.path)
+
+        samples = decoder.get_samples_played_in_range(start_seconds=0, stop_seconds=2)
+        assert samples.data.shape[0] == asset.num_channels == 1
+
+    def test_format_conversion(self):
+        asset = SINE_MONO_S32
+        decoder = AudioDecoder(asset.path)
+        assert decoder.metadata.sample_format == asset.sample_format == "s32"
+
+        all_samples = decoder.get_samples_played_in_range(start_seconds=0)
+        assert all_samples.data.dtype == torch.float32
+
+        reference_frames = asset.get_frame_data_by_range(start=0, stop=asset.num_frames)
+        torch.testing.assert_close(all_samples.data, reference_frames)
diff --git a/test/decoders/test_ops.py b/test/decoders/test_ops.py
@@ -626,7 +626,6 @@ class TestAudioOps:
             partial(get_frames_in_range, start=4, stop=5),
             partial(get_frame_at_pts, seconds=2),
             partial(get_frames_by_pts, timestamps=[0, 1.5]),
-            partial(get_next_frame),
         ),
     )
     def test_audio_bad_method(self, method):
diff --git a/test/resources/sine_mono_s32.wav.stream0.all_frames.pt b/test/resources/sine_mono_s32.wav.stream0.all_frames.pt
diff --git a/test/utils.py b/test/utils.py
@@ -444,6 +444,9 @@ def sample_format(self) -> str:
     },
 )
 
+# Note that the file itself is s32 sample format, but the reference frames are
+# stored as fltp. We can add the s32 original reference frames once we support
+# decoding to non-fltp format, but for now we don't need to.
 SINE_MONO_S32 = TestAudio(
     filename="sine_mono_s32.wav",
     default_stream_index=0,

Original file line number	Diff line number	Diff line change
`@@ -626,7 +626,6 @@ class TestAudioOps:`
`626`	`626`	`partial(get_frames_in_range, start=4, stop=5),`
`627`	`627`	`partial(get_frame_at_pts, seconds=2),`
`628`	`628`	`partial(get_frames_by_pts, timestamps=[0, 1.5]),`
`629`		`- partial(get_next_frame),`
`630`	`629`	`),`
`631`	`630`	`)`
`632`	`631`	`def test_audio_bad_method(self, method):`
Original file line number	Diff line number	Diff line change
`@@ -444,6 +444,9 @@ def sample_format(self) -> str:`
`444`	`444`	`},`
`445`	`445`	`)`
`446`	`446`
	`447`	`+# Note that the file itself is s32 sample format, but the reference frames are`
	`448`	`+# stored as fltp. We can add the s32 original reference frames once we support`
	`449`	`+# decoding to non-fltp format, but for now we don't need to.`
`447`	`450`	`SINE_MONO_S32 = TestAudio(`
`448`	`451`	`filename="sine_mono_s32.wav",`
`449`	`452`	`default_stream_index=0,`