diff --git a/mapillary_tools/mp4/mp4_sample_parser.py b/mapillary_tools/mp4/mp4_sample_parser.py index 1161edfe9..a24c834ad 100644 --- a/mapillary_tools/mp4/mp4_sample_parser.py +++ b/mapillary_tools/mp4/mp4_sample_parser.py @@ -7,6 +7,14 @@ from . import construct_mp4_parser as cparser, simple_mp4_parser as sparser +def _convert_to_signed_int32(unsigned_int32: int) -> int: + """Interpret an unsigned 32-bit value as negative if high bit is set.""" + if (unsigned_int32 & (1 << 31)) == 0: + return unsigned_int32 + else: + return unsigned_int32 - (1 << 32) + + class RawSample(T.NamedTuple): # 1-based index description_idx: int @@ -192,7 +200,13 @@ def extract_raw_samples_from_stbl_data( composition_offsets = [] for entry in data["entries"]: for _ in range(entry["sample_count"]): - composition_offsets.append(entry["sample_offset"]) + # Some encodings like H.264 and H.265 support negative offsets. + # We cannot rely on the version field since some encoders incorrectly set + # ctts version to 0 instead of 1 even when using signed offsets. + # Leigitimate positive values are relatively small so we can assume the value is signed. + composition_offsets.append( + _convert_to_signed_int32(entry["sample_offset"]) + ) elif box["type"] == b"stss": syncs = set(data["entries"]) diff --git a/tests/data/videos/sample-5s_h265.mp4 b/tests/data/videos/sample-5s_h265.mp4 new file mode 100644 index 000000000..09bf7fa19 Binary files /dev/null and b/tests/data/videos/sample-5s_h265.mp4 differ diff --git a/tests/integration/test_process_and_upload.py b/tests/integration/test_process_and_upload.py index c21cb2c91..b8dc942c8 100644 --- a/tests/integration/test_process_and_upload.py +++ b/tests/integration/test_process_and_upload.py @@ -233,6 +233,48 @@ def test_video_process_and_upload( "MAPOrientation": 1, "filetype": "image", }, + "sample-5s_h265_v_000001.jpg": { + "filename": "sample-5s_h265_v_000001.jpg", + "MAPFilename": "sample-5s_h265_v_000001.jpg", + "MAPAltitude": 94.75, + "MAPCaptureTime": "2025_03_14_07_00_00_000", + "MAPCompassHeading": { + "MagneticHeading": 0.484, + "TrueHeading": 0.484, + }, + "MAPLatitude": 37.793585, + "MAPLongitude": -122.461396, + "MAPOrientation": 1, + "filetype": "image", + }, + "sample-5s_h265_v_000002.jpg": { + "filename": "sample-5s_h265_v_000002.jpg", + "MAPFilename": "sample-5s_h265_v_000002.jpg", + "MAPAltitude": 93.347, + "MAPCaptureTime": "2025_03_14_07_00_02_000", + "MAPCompassHeading": { + "MagneticHeading": 0.484, + "TrueHeading": 0.484, + }, + "MAPLatitude": 37.7937349, + "MAPLongitude": -122.4613944, + "MAPOrientation": 1, + "filetype": "image", + }, + "sample-5s_h265_v_000003.jpg": { + "filename": "sample-5s_h265_v_000003.jpg", + "MAPFilename": "sample-5s_h265_v_000003.jpg", + "MAPAltitude": 92.492, + "MAPCaptureTime": "2025_03_14_07_00_04_000", + "MAPCompassHeading": { + "MagneticHeading": 343.286, + "TrueHeading": 343.286, + }, + "MAPLatitude": 37.7938825, + "MAPLongitude": -122.4614226, + "MAPOrientation": 1, + "filetype": "image", + }, } uploaded_descs = sum(extract_all_uploaded_descs(Path(setup_upload)), []) assert_same_image_descs(uploaded_descs, list(expected.values())) diff --git a/tests/integration/test_video_process.py b/tests/integration/test_video_process.py index 34f59762c..ab99d3cbb 100644 --- a/tests/integration/test_video_process.py +++ b/tests/integration/test_video_process.py @@ -125,7 +125,7 @@ def test_video_process(setup_data: py.path.local): str(video_dir.join("my_samples")), ] ) - assert 3 == len(descs) + assert 6 == len(descs) assert 0 == len([d for d in descs if "error" in d]) diff --git a/tests/unit/test_mp4_sample_parser.py b/tests/unit/test_mp4_sample_parser.py index 6e561fcb0..3ac30e0b3 100644 --- a/tests/unit/test_mp4_sample_parser.py +++ b/tests/unit/test_mp4_sample_parser.py @@ -52,3 +52,16 @@ def test_movie_box_parser(): assert sample.raw_sample.offset == raw_sample.offset assert sample.raw_sample.is_sync == raw_sample.is_sync assert sample.raw_sample.size == raw_sample.size + + +def test_movie_box_parser_negative_composition_offset(): + moov_parser = mp4_sample_parser.MovieBoxParser.parse_file( + Path("tests/data/videos/sample-5s_h265.mp4") + ) + assert 2 == len(list(moov_parser.extract_tracks())) + video_track = moov_parser.extract_track_at(0) + assert video_track.is_video_track() + raw_samples = list(video_track.extract_raw_samples()) + assert 146 == len(raw_samples) + # Make sure the parser can parse negative composition offsets + assert 0 < len([s for s in raw_samples if s.composition_offset < 0])