Skip to content

Commit 37930a6

Browse files
authored
feat: add IMU data in CAMM as experimental feature (#694)
* update the interface * write the streams to CAMM * extract * sort imports * add MAPILLARY__EXPERIMENTAL_ENABLE_IMU * parse IMU from camm * introduce TelemetryMeasurement as baseclass * print IMU data * print IMU for comparison * format * rename * relative imports * fix tests
1 parent ced3aa6 commit 37930a6

File tree

10 files changed

+373
-144
lines changed

10 files changed

+373
-144
lines changed

mapillary_tools/camm/camm_builder.py

Lines changed: 87 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import io
22
import typing as T
33

4-
from .. import geo, types
4+
from .. import geo, telemetry, types
55
from ..mp4 import (
66
construct_mp4_parser as cparser,
77
mp4_sample_parser as sample_parser,
@@ -11,20 +11,65 @@
1111
from . import camm_parser
1212

1313

14-
def build_camm_sample(point: geo.Point) -> bytes:
15-
return camm_parser.CAMMSampleData.build(
16-
{
17-
"type": camm_parser.CAMMType.MIN_GPS.value,
18-
"data": [
19-
point.lat,
20-
point.lon,
21-
-1.0 if point.alt is None else point.alt,
22-
],
23-
}
24-
)
14+
TelemetryMeasurement = T.Union[
15+
geo.Point,
16+
telemetry.TelemetryMeasurement,
17+
]
2518

2619

27-
def _create_edit_list(
20+
def _build_camm_sample(measurement: TelemetryMeasurement) -> bytes:
21+
if isinstance(measurement, geo.Point):
22+
return camm_parser.CAMMSampleData.build(
23+
{
24+
"type": camm_parser.CAMMType.MIN_GPS.value,
25+
"data": [
26+
measurement.lat,
27+
measurement.lon,
28+
-1.0 if measurement.alt is None else measurement.alt,
29+
],
30+
}
31+
)
32+
elif isinstance(measurement, telemetry.AccelerationData):
33+
# Accelerometer reading in meters/second^2 along XYZ axes of the camera.
34+
return camm_parser.CAMMSampleData.build(
35+
{
36+
"type": camm_parser.CAMMType.ACCELERATION.value,
37+
"data": [
38+
measurement.x,
39+
measurement.y,
40+
measurement.z,
41+
],
42+
}
43+
)
44+
elif isinstance(measurement, telemetry.GyroscopeData):
45+
# Gyroscope signal in radians/seconds around XYZ axes of the camera. Rotation is positive in the counterclockwise direction.
46+
return camm_parser.CAMMSampleData.build(
47+
{
48+
"type": camm_parser.CAMMType.GYRO.value,
49+
"data": [
50+
measurement.x,
51+
measurement.y,
52+
measurement.z,
53+
],
54+
}
55+
)
56+
elif isinstance(measurement, telemetry.MagnetometerData):
57+
# Ambient magnetic field.
58+
return camm_parser.CAMMSampleData.build(
59+
{
60+
"type": camm_parser.CAMMType.MAGNETIC_FIELD.value,
61+
"data": [
62+
measurement.x,
63+
measurement.y,
64+
measurement.z,
65+
],
66+
}
67+
)
68+
else:
69+
raise ValueError(f"unexpected measurement type {type(measurement)}")
70+
71+
72+
def _create_edit_list_from_points(
2873
point_segments: T.Sequence[T.Sequence[geo.Point]],
2974
movie_timescale: int,
3075
media_timescale: int,
@@ -82,18 +127,30 @@ def _create_edit_list(
82127
}
83128

84129

85-
def convert_points_to_raw_samples(
86-
points: T.Sequence[geo.Point], timescale: int
130+
def _multiplex(
131+
points: T.Sequence[geo.Point],
132+
measurements: T.Optional[T.List[telemetry.TelemetryMeasurement]] = None,
133+
) -> T.List[TelemetryMeasurement]:
134+
mutiplexed: T.List[TelemetryMeasurement] = [*points, *(measurements or [])]
135+
mutiplexed.sort(key=lambda m: m.time)
136+
137+
return mutiplexed
138+
139+
140+
def convert_telemetry_to_raw_samples(
141+
measurements: T.Sequence[TelemetryMeasurement],
142+
timescale: int,
87143
) -> T.Generator[sample_parser.RawSample, None, None]:
88-
for idx, point in enumerate(points):
89-
camm_sample_data = build_camm_sample(point)
144+
for idx, measurement in enumerate(measurements):
145+
camm_sample_data = _build_camm_sample(measurement)
90146

91-
if idx + 1 < len(points):
92-
timedelta = int((points[idx + 1].time - point.time) * timescale)
147+
if idx + 1 < len(measurements):
148+
timedelta = int((measurements[idx + 1].time - measurement.time) * timescale)
93149
else:
94150
timedelta = 0
151+
95152
assert 0 <= timedelta <= builder.UINT32_MAX, (
96-
f"expected timedelta {timedelta} between {points[idx]} and {points[idx + 1]} with timescale {timescale} to be <= UINT32_MAX"
153+
f"expected timedelta {timedelta} between {measurements[idx]} and {measurements[idx + 1]} with timescale {timescale} to be <= UINT32_MAX"
97154
)
98155

99156
yield sample_parser.RawSample(
@@ -232,19 +289,23 @@ def create_camm_trak(
232289
}
233290

234291

235-
def camm_sample_generator2(video_metadata: types.VideoMetadata):
292+
def camm_sample_generator2(
293+
video_metadata: types.VideoMetadata,
294+
telemetry_measurements: T.Optional[T.List[telemetry.TelemetryMeasurement]] = None,
295+
):
236296
def _f(
237297
fp: T.BinaryIO,
238298
moov_children: T.List[builder.BoxDict],
239299
) -> T.Generator[io.IOBase, None, None]:
240300
movie_timescale = builder.find_movie_timescale(moov_children)
241301
# make sure the precision of timedeltas not lower than 0.001 (1ms)
242302
media_timescale = max(1000, movie_timescale)
303+
measurements = _multiplex(video_metadata.points, telemetry_measurements)
243304
camm_samples = list(
244-
convert_points_to_raw_samples(video_metadata.points, media_timescale)
305+
convert_telemetry_to_raw_samples(measurements, media_timescale)
245306
)
246307
camm_trak = create_camm_trak(camm_samples, media_timescale)
247-
elst = _create_edit_list(
308+
elst = _create_edit_list_from_points(
248309
[video_metadata.points], movie_timescale, media_timescale
249310
)
250311
if T.cast(T.Dict, elst["data"])["entries"]:
@@ -280,6 +341,8 @@ def _f(
280341
)
281342

282343
# if yield, the moov_children will not be modified
283-
return (io.BytesIO(build_camm_sample(point)) for point in video_metadata.points)
344+
return (
345+
io.BytesIO(_build_camm_sample(measurement)) for measurement in measurements
346+
)
284347

285348
return _f

mapillary_tools/camm/camm_parser.py

Lines changed: 117 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,22 @@
99

1010
import construct as C
1111

12-
from .. import geo
13-
from ..mp4 import mp4_sample_parser as sample_parser, simple_mp4_parser as sparser
12+
from .. import geo, telemetry
13+
from ..mp4 import simple_mp4_parser as sparser
14+
from ..mp4.mp4_sample_parser import MovieBoxParser, Sample, TrackBoxParser
1415

1516

1617
LOG = logging.getLogger(__name__)
1718

1819

20+
TelemetryMeasurement = T.Union[
21+
geo.Point,
22+
telemetry.AccelerationData,
23+
telemetry.GyroscopeData,
24+
telemetry.MagnetometerData,
25+
]
26+
27+
1928
# Camera Motion Metadata Spec https://developers.google.com/streetview/publish/camm-spec
2029
class CAMMType(Enum):
2130
ANGLE_AXIS = 0
@@ -75,9 +84,9 @@ class CAMMType(Enum):
7584
)
7685

7786

78-
def _parse_point_from_sample(
79-
fp: T.BinaryIO, sample: sample_parser.Sample
80-
) -> T.Optional[geo.Point]:
87+
def _parse_telemetry_from_sample(
88+
fp: T.BinaryIO, sample: Sample
89+
) -> T.Optional[TelemetryMeasurement]:
8190
fp.seek(sample.raw_sample.offset, io.SEEK_SET)
8291
data = fp.read(sample.raw_sample.size)
8392
box = CAMMSampleData.parse(data)
@@ -99,12 +108,34 @@ def _parse_point_from_sample(
99108
alt=box.data.altitude,
100109
angle=None,
101110
)
111+
elif box.type == CAMMType.ACCELERATION.value:
112+
return telemetry.AccelerationData(
113+
time=sample.exact_time,
114+
x=box.data[0],
115+
y=box.data[1],
116+
z=box.data[2],
117+
)
118+
elif box.type == CAMMType.GYRO.value:
119+
return telemetry.GyroscopeData(
120+
time=sample.exact_time,
121+
x=box.data[0],
122+
y=box.data[1],
123+
z=box.data[2],
124+
)
125+
elif box.type == CAMMType.MAGNETIC_FIELD.value:
126+
return telemetry.MagnetometerData(
127+
time=sample.exact_time,
128+
x=box.data[0],
129+
y=box.data[1],
130+
z=box.data[2],
131+
)
102132
return None
103133

104134

105-
def filter_points_by_elst(
106-
points: T.Iterable[geo.Point], elst: T.Sequence[T.Tuple[float, float]]
107-
) -> T.Generator[geo.Point, None, None]:
135+
def _filter_telemetry_by_elst_segments(
136+
measurements: T.Iterable[TelemetryMeasurement],
137+
elst: T.Sequence[T.Tuple[float, float]],
138+
) -> T.Generator[TelemetryMeasurement, None, None]:
108139
empty_elst = [entry for entry in elst if entry[0] == -1]
109140
if empty_elst:
110141
offset = empty_elst[-1][1]
@@ -114,20 +145,26 @@ def filter_points_by_elst(
114145
elst = [entry for entry in elst if entry[0] != -1]
115146

116147
if not elst:
117-
for p in points:
118-
yield dataclasses.replace(p, time=p.time + offset)
148+
for m in measurements:
149+
if dataclasses.is_dataclass(m):
150+
yield dataclasses.replace(m, time=m.time + offset)
151+
else:
152+
m._replace(time=m.time + offset)
119153
return
120154

121155
elst.sort(key=lambda entry: entry[0])
122156
elst_idx = 0
123-
for p in points:
157+
for m in measurements:
124158
if len(elst) <= elst_idx:
125159
break
126160
media_time, duration = elst[elst_idx]
127-
if p.time < media_time:
161+
if m.time < media_time:
128162
pass
129-
elif p.time <= media_time + duration:
130-
yield dataclasses.replace(p, time=p.time + offset)
163+
elif m.time <= media_time + duration:
164+
if dataclasses.is_dataclass(m):
165+
yield dataclasses.replace(m, time=m.time + offset)
166+
else:
167+
m._replace(time=m.time + offset)
131168
else:
132169
elst_idx += 1
133170

@@ -148,46 +185,84 @@ def _is_camm_description(description: T.Dict) -> bool:
148185
return description["format"] == b"camm"
149186

150187

188+
def _contains_camm_description(track: TrackBoxParser) -> bool:
189+
descriptions = track.extract_sample_descriptions()
190+
return any(_is_camm_description(d) for d in descriptions)
191+
192+
193+
def _filter_telemetry_by_track_elst(
194+
moov: MovieBoxParser,
195+
track: TrackBoxParser,
196+
measurements: T.Iterable[TelemetryMeasurement],
197+
) -> T.List[TelemetryMeasurement]:
198+
elst_boxdata = track.extract_elst_boxdata()
199+
200+
if elst_boxdata is not None:
201+
elst_entries = elst_boxdata["entries"]
202+
if elst_entries:
203+
# media_timescale
204+
mdhd_boxdata = track.extract_mdhd_boxdata()
205+
media_timescale = mdhd_boxdata["timescale"]
206+
207+
# movie_timescale
208+
mvhd_boxdata = moov.extract_mvhd_boxdata()
209+
movie_timescale = mvhd_boxdata["timescale"]
210+
211+
segments = [
212+
elst_entry_to_seconds(
213+
entry,
214+
movie_timescale=movie_timescale,
215+
media_timescale=media_timescale,
216+
)
217+
for entry in elst_entries
218+
]
219+
220+
return list(_filter_telemetry_by_elst_segments(measurements, segments))
221+
222+
return list(measurements)
223+
224+
151225
def extract_points(fp: T.BinaryIO) -> T.Optional[T.List[geo.Point]]:
152226
"""
153227
Return a list of points (could be empty) if it is a valid CAMM video,
154228
otherwise None
155229
"""
156230

157-
points = None
231+
moov = MovieBoxParser.parse_stream(fp)
158232

159-
moov = sample_parser.MovieBoxParser.parse_stream(fp)
160233
for track in moov.extract_tracks():
161-
descriptions = track.extract_sample_descriptions()
162-
if any(_is_camm_description(d) for d in descriptions):
163-
maybe_points = (
164-
_parse_point_from_sample(fp, sample)
234+
if _contains_camm_description(track):
235+
maybe_measurements = (
236+
_parse_telemetry_from_sample(fp, sample)
165237
for sample in track.extract_samples()
166238
if _is_camm_description(sample.description)
167239
)
168-
points = [p for p in maybe_points if p is not None]
169-
if points:
170-
elst_boxdata = track.extract_elst_boxdata()
171-
if elst_boxdata is not None:
172-
elst_entries = elst_boxdata["entries"]
173-
if elst_entries:
174-
# media_timescale
175-
mdhd_boxdata = track.extract_mdhd_boxdata()
176-
media_timescale = mdhd_boxdata["timescale"]
177-
# movie_timescale
178-
mvhd_boxdata = moov.extract_mvhd_boxdata()
179-
movie_timescale = mvhd_boxdata["timescale"]
180-
segments = [
181-
elst_entry_to_seconds(
182-
entry,
183-
movie_timescale=movie_timescale,
184-
media_timescale=media_timescale,
185-
)
186-
for entry in elst_entries
187-
]
188-
points = list(filter_points_by_elst(points, segments))
240+
points = [m for m in maybe_measurements if isinstance(m, geo.Point)]
189241

190-
return points
242+
return T.cast(
243+
T.List[geo.Point], _filter_telemetry_by_track_elst(moov, track, points)
244+
)
245+
246+
return None
247+
248+
249+
def extract_telemetry_data(fp: T.BinaryIO) -> T.Optional[T.List[TelemetryMeasurement]]:
250+
moov = MovieBoxParser.parse_stream(fp)
251+
252+
for track in moov.extract_tracks():
253+
if _contains_camm_description(track):
254+
maybe_measurements = (
255+
_parse_telemetry_from_sample(fp, sample)
256+
for sample in track.extract_samples()
257+
if _is_camm_description(sample.description)
258+
)
259+
measurements = [m for m in maybe_measurements if m is not None]
260+
261+
measurements = _filter_telemetry_by_track_elst(moov, track, measurements)
262+
263+
return measurements
264+
265+
return None
191266

192267

193268
def parse_gpx(path: pathlib.Path) -> T.List[geo.Point]:

0 commit comments

Comments
 (0)