Skip to content

Commit 7ecd09d

Browse files
committed
Merge branch 'main' of https://github.com/meta-pytorch/torchcodec into python314_on_ci
2 parents 4aaa281 + 392bab3 commit 7ecd09d

File tree

6 files changed

+148
-86
lines changed

6 files changed

+148
-86
lines changed

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 37 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1088,32 +1088,17 @@ void SingleStreamDecoder::setCursor(int64_t pts) {
10881088
cursor_ = pts;
10891089
}
10901090

1091-
/*
1092-
Videos have I frames and non-I frames (P and B frames). Non-I frames need data
1093-
from the previous I frame to be decoded.
1094-
1095-
Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x for
1096-
brevity) and we wish to seek to a user-specified PTS=y.
1097-
1098-
If y < x, we don't have a choice but to seek backwards to the highest I frame
1099-
before y.
1100-
1101-
If y > x, we have two choices:
1102-
1103-
1. We could keep decoding forward until we hit y. Illustrated below:
1104-
1105-
I P P P I P P P I P P I P P I P
1106-
x y
1107-
1108-
2. We could try to jump to an I frame between x and y (indicated by j below).
1109-
And then start decoding until we encounter y. Illustrated below:
1110-
1111-
I P P P I P P P I P P I P P I P
1112-
x j y
1113-
1114-
(2) is more efficient than (1) if there is an I frame between x and y.
1115-
*/
11161091
bool SingleStreamDecoder::canWeAvoidSeeking() const {
1092+
// Returns true if we can avoid seeking in the AVFormatContext based on
1093+
// heuristics that rely on the target cursor_ and the last decoded frame.
1094+
// Seeking is expensive, so we try to avoid it when possible.
1095+
// Note that this function itself isn't always that cheap to call: in
1096+
// particular the calls to getKeyFrameIndexForPts below in approximate mode
1097+
// are sometimes slow.
1098+
// TODO we should understand why (is it because it reads the file?) and
1099+
// potentially optimize it. E.g. we may not want to ever seek, or even *check*
1100+
// if we need to seek in some cases, like if we're going to decode 80% of the
1101+
// frames anyway.
11171102
const StreamInfo& streamInfo = streamInfos_.at(activeStreamIndex_);
11181103
if (streamInfo.avMediaType == AVMEDIA_TYPE_AUDIO) {
11191104
// For audio, we only need to seek if a backwards seek was requested
@@ -1136,13 +1121,34 @@ bool SingleStreamDecoder::canWeAvoidSeeking() const {
11361121
// implement caching.
11371122
return false;
11381123
}
1139-
// We are seeking forwards.
1140-
// We can only skip a seek if both lastDecodedAvFramePts and
1141-
// cursor_ share the same keyframe.
1142-
int lastDecodedAvFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
1124+
// We are seeking forwards. We can skip a seek if both the last decoded frame
1125+
// and cursor_ share the same keyframe:
1126+
// Videos have I frames and non-I frames (P and B frames). Non-I frames need
1127+
// data from the previous I frame to be decoded.
1128+
//
1129+
// Imagine the cursor is at a random frame with PTS=lastDecodedAvFramePts (x
1130+
// for brevity) and we wish to seek to a user-specified PTS=y.
1131+
//
1132+
// If y < x, we don't have a choice but to seek backwards to the highest I
1133+
// frame before y.
1134+
//
1135+
// If y > x, we have two choices:
1136+
//
1137+
// 1. We could keep decoding forward until we hit y. Illustrated below:
1138+
//
1139+
// I P P P I P P P I P P I P
1140+
// x y
1141+
//
1142+
// 2. We could try to jump to an I frame between x and y (indicated by j
1143+
// below). And then start decoding until we encounter y. Illustrated below:
1144+
//
1145+
// I P P P I P P P I P P I P
1146+
// x j y
1147+
// (2) is only more efficient than (1) if there is an I frame between x and y.
1148+
int lastKeyFrameIndex = getKeyFrameIndexForPts(lastDecodedAvFramePts_);
11431149
int targetKeyFrameIndex = getKeyFrameIndexForPts(cursor_);
1144-
return lastDecodedAvFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1145-
lastDecodedAvFrameIndex == targetKeyFrameIndex;
1150+
return lastKeyFrameIndex >= 0 && targetKeyFrameIndex >= 0 &&
1151+
lastKeyFrameIndex == targetKeyFrameIndex;
11461152
}
11471153

11481154
// This method looks at currentPts and desiredPts and seeks in the

test/conftest.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,17 @@
44
import pytest
55
import torch
66

7+
from .utils import in_fbcode
8+
79

810
def pytest_configure(config):
911
# register an additional marker (see pytest_collection_modifyitems)
1012
config.addinivalue_line(
1113
"markers", "needs_cuda: mark for tests that rely on a CUDA device"
1214
)
15+
config.addinivalue_line(
16+
"markers", "needs_ffmpeg_cli: mark for tests that rely on ffmpeg"
17+
)
1318

1419

1520
def pytest_collection_modifyitems(items):
@@ -28,6 +33,15 @@ def pytest_collection_modifyitems(items):
2833
# 'needs_cuda' mark, and the ones with device == 'cpu' won't have the
2934
# mark.
3035
needs_cuda = item.get_closest_marker("needs_cuda") is not None
36+
needs_ffmpeg_cli = item.get_closest_marker("needs_ffmpeg_cli") is not None
37+
has_skip_marker = item.get_closest_marker("skip") is not None
38+
has_skipif_marker = item.get_closest_marker("skipif") is not None
39+
40+
if in_fbcode():
41+
# fbcode doesn't like skipping tests, so instead we just don't collect the test
42+
# so that they don't even "exist", hence the continue statements.
43+
if needs_ffmpeg_cli or has_skip_marker or has_skipif_marker:
44+
continue
3145

3246
if (
3347
needs_cuda

test/test_decoders.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
BT709_FULL_RANGE,
3030
cuda_version_used_for_building_torch,
3131
get_ffmpeg_major_version,
32+
get_python_version,
3233
H264_10BITS,
3334
H265_10BITS,
3435
H265_VIDEO,
@@ -39,6 +40,7 @@
3940
NASA_AUDIO_MP3_44100,
4041
NASA_VIDEO,
4142
needs_cuda,
43+
needs_ffmpeg_cli,
4244
psnr,
4345
SINE_MONO_S16,
4446
SINE_MONO_S32,
@@ -1146,6 +1148,10 @@ def test_get_key_frame_indices(self, device):
11461148

11471149
# TODO investigate why this fails internally.
11481150
@pytest.mark.skipif(in_fbcode(), reason="Compile test fails internally.")
1151+
@pytest.mark.skipif(
1152+
get_python_version() >= (3, 14),
1153+
reason="torch.compile is not supported on Python 3.14+",
1154+
)
11491155
@pytest.mark.parametrize("device", all_supported_devices())
11501156
def test_compile(self, device):
11511157
decoder, device = make_video_decoder(NASA_VIDEO.path, device=device)
@@ -1311,10 +1317,7 @@ def setup_frame_mappings(tmp_path, file, stream_index):
13111317
# Return the custom frame mappings as a JSON string
13121318
return custom_frame_mappings
13131319

1314-
@pytest.mark.skipif(
1315-
in_fbcode(),
1316-
reason="ffprobe not available internally",
1317-
)
1320+
@needs_ffmpeg_cli
13181321
@pytest.mark.parametrize("device", all_supported_devices())
13191322
@pytest.mark.parametrize("stream_index", [0, 3])
13201323
@pytest.mark.parametrize(
@@ -1361,10 +1364,7 @@ def test_custom_frame_mappings_json_and_bytes(
13611364
),
13621365
)
13631366

1364-
@pytest.mark.skipif(
1365-
in_fbcode(),
1366-
reason="ffprobe not available internally",
1367-
)
1367+
@needs_ffmpeg_cli
13681368
@pytest.mark.parametrize("device", all_supported_devices())
13691369
@pytest.mark.parametrize(
13701370
"custom_frame_mappings,expected_match",

test/test_encoders.py

Lines changed: 70 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@
1717
assert_tensor_close_on_at_least,
1818
get_ffmpeg_major_version,
1919
get_ffmpeg_minor_version,
20-
in_fbcode,
2120
IS_WINDOWS,
2221
NASA_AUDIO_MP3,
22+
needs_ffmpeg_cli,
2323
psnr,
2424
SINE_MONO_S32,
2525
TEST_SRC_2_720P,
@@ -217,13 +217,22 @@ def test_bad_input_parametrized(self, method, tmp_path):
217217
getattr(decoder, method)(**valid_params, num_channels=num_channels)
218218

219219
@pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
220-
@pytest.mark.parametrize("format", ("wav", "flac"))
220+
@pytest.mark.parametrize(
221+
"format",
222+
[
223+
pytest.param(
224+
"wav",
225+
marks=pytest.mark.skipif(
226+
get_ffmpeg_major_version() == 4,
227+
reason="Swresample with FFmpeg 4 doesn't work on wav files",
228+
),
229+
),
230+
"flac",
231+
],
232+
)
221233
def test_round_trip(self, method, format, tmp_path):
222234
# Check that decode(encode(samples)) == samples on lossless formats
223235

224-
if get_ffmpeg_major_version() == 4 and format == "wav":
225-
pytest.skip("Swresample with FFmpeg 4 doesn't work on wav files")
226-
227236
asset = NASA_AUDIO_MP3
228237
source_samples = self.decode(asset).data
229238

@@ -249,7 +258,7 @@ def test_round_trip(self, method, format, tmp_path):
249258
self.decode(encoded_source).data, source_samples, rtol=rtol, atol=atol
250259
)
251260

252-
@pytest.mark.skipif(in_fbcode(), reason="TODO: enable ffmpeg CLI")
261+
@needs_ffmpeg_cli
253262
@pytest.mark.parametrize("asset", (NASA_AUDIO_MP3, SINE_MONO_S32))
254263
@pytest.mark.parametrize("bit_rate", (None, 0, 44_100, 999_999_999))
255264
@pytest.mark.parametrize("num_channels", (None, 1, 2))
@@ -356,17 +365,31 @@ def test_against_cli(
356365
@pytest.mark.parametrize("asset", (NASA_AUDIO_MP3, SINE_MONO_S32))
357366
@pytest.mark.parametrize("bit_rate", (None, 0, 44_100, 999_999_999))
358367
@pytest.mark.parametrize("num_channels", (None, 1, 2))
359-
@pytest.mark.parametrize("format", ("mp3", "wav", "flac"))
368+
@pytest.mark.parametrize(
369+
"format",
370+
[
371+
# TODO: https://github.com/pytorch/torchcodec/issues/837
372+
pytest.param(
373+
"mp3",
374+
marks=pytest.mark.skipif(
375+
IS_WINDOWS and get_ffmpeg_major_version() <= 5,
376+
reason="Encoding mp3 on Windows is weirdly buggy",
377+
),
378+
),
379+
pytest.param(
380+
"wav",
381+
marks=pytest.mark.skipif(
382+
get_ffmpeg_major_version() == 4,
383+
reason="Swresample with FFmpeg 4 doesn't work on wav files",
384+
),
385+
),
386+
"flac",
387+
],
388+
)
360389
@pytest.mark.parametrize("method", ("to_tensor", "to_file_like"))
361390
def test_against_to_file(
362391
self, asset, bit_rate, num_channels, format, tmp_path, method
363392
):
364-
if get_ffmpeg_major_version() == 4 and format == "wav":
365-
pytest.skip("Swresample with FFmpeg 4 doesn't work on wav files")
366-
if IS_WINDOWS and get_ffmpeg_major_version() <= 5 and format == "mp3":
367-
# TODO: https://github.com/pytorch/torchcodec/issues/837
368-
pytest.skip("Encoding mp3 on Windows is weirdly buggy")
369-
370393
encoder = AudioEncoder(self.decode(asset).data, sample_rate=asset.sample_rate)
371394

372395
params = dict(bit_rate=bit_rate, num_channels=num_channels)
@@ -847,16 +870,27 @@ def encode_to_tensor(frames):
847870
)
848871

849872
@pytest.mark.parametrize(
850-
"format", ("mov", "mp4", "mkv", pytest.param("webm", marks=pytest.mark.slow))
873+
"format",
874+
[
875+
"mov",
876+
"mp4",
877+
"mkv",
878+
pytest.param(
879+
"webm",
880+
marks=[
881+
pytest.mark.slow,
882+
pytest.mark.skipif(
883+
get_ffmpeg_major_version() == 4
884+
or (IS_WINDOWS and get_ffmpeg_major_version() in (6, 7)),
885+
reason="Codec for webm is not available in this FFmpeg installation.",
886+
),
887+
],
888+
),
889+
],
851890
)
852891
@pytest.mark.parametrize("method", ("to_file", "to_tensor", "to_file_like"))
853892
def test_round_trip(self, tmp_path, format, method):
854893
# Test that decode(encode(decode(frames))) == decode(frames)
855-
ffmpeg_version = get_ffmpeg_major_version()
856-
if format == "webm" and (
857-
ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
858-
):
859-
pytest.skip("Codec for webm is not available in this FFmpeg installation.")
860894
source_frames, frame_rate = self.decode_and_get_frame_rate(TEST_SRC_2_720P.path)
861895

862896
encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
@@ -889,25 +923,29 @@ def test_round_trip(self, tmp_path, format, method):
889923

890924
@pytest.mark.parametrize(
891925
"format",
892-
(
926+
[
893927
"mov",
894928
"mp4",
895929
"avi",
896930
"mkv",
897931
"flv",
898932
"gif",
899-
pytest.param("webm", marks=pytest.mark.slow),
900-
),
933+
pytest.param(
934+
"webm",
935+
marks=[
936+
pytest.mark.slow,
937+
pytest.mark.skipif(
938+
get_ffmpeg_major_version() == 4
939+
or (IS_WINDOWS and get_ffmpeg_major_version() in (6, 7)),
940+
reason="Codec for webm is not available in this FFmpeg installation.",
941+
),
942+
],
943+
),
944+
],
901945
)
902946
@pytest.mark.parametrize("method", ("to_tensor", "to_file_like"))
903947
def test_against_to_file(self, tmp_path, format, method):
904948
# Test that to_file, to_tensor, and to_file_like produce the same results
905-
ffmpeg_version = get_ffmpeg_major_version()
906-
if format == "webm" and (
907-
ffmpeg_version == 4 or (IS_WINDOWS and ffmpeg_version in (6, 7))
908-
):
909-
pytest.skip("Codec for webm is not available in this FFmpeg installation.")
910-
911949
source_frames, frame_rate = self.decode_and_get_frame_rate(TEST_SRC_2_720P.path)
912950
encoder = VideoEncoder(frames=source_frames, frame_rate=frame_rate)
913951

@@ -928,7 +966,7 @@ def test_against_to_file(self, tmp_path, format, method):
928966
rtol=0,
929967
)
930968

931-
@pytest.mark.skipif(in_fbcode(), reason="ffmpeg CLI not available")
969+
@needs_ffmpeg_cli
932970
@pytest.mark.parametrize(
933971
"format",
934972
(
@@ -1150,10 +1188,7 @@ def write(self, data):
11501188
):
11511189
encoder.to_file_like(NoSeekMethod(), format="mp4")
11521190

1153-
@pytest.mark.skipif(
1154-
in_fbcode(),
1155-
reason="ffprobe not available internally",
1156-
)
1191+
@needs_ffmpeg_cli
11571192
@pytest.mark.parametrize(
11581193
"format,codec_spec",
11591194
[
@@ -1181,10 +1216,7 @@ def test_codec_parameter_utilized(self, tmp_path, format, codec_spec):
11811216
]
11821217
assert actual_codec_spec == codec_spec
11831218

1184-
@pytest.mark.skipif(
1185-
in_fbcode(),
1186-
reason="ffprobe not available internally",
1187-
)
1219+
@needs_ffmpeg_cli
11881220
@pytest.mark.parametrize(
11891221
"codec_spec,codec_impl",
11901222
[
@@ -1227,7 +1259,7 @@ def test_codec_spec_vs_impl_equivalence(self, tmp_path, codec_spec, codec_impl):
12271259
frames_impl = self.decode(impl_output)
12281260
torch.testing.assert_close(frames_spec, frames_impl, rtol=0, atol=0)
12291261

1230-
@pytest.mark.skipif(in_fbcode(), reason="ffprobe not available")
1262+
@needs_ffmpeg_cli
12311263
@pytest.mark.parametrize(
12321264
"profile,colorspace,color_range",
12331265
[

0 commit comments

Comments
 (0)