Skip to content

Commit e78afdd

Browse files
NicolasHugfacebook-github-bot
authored andcommitted
Add 'scan' term to glossary, renaming, doc updates (#124)
Summary: - Add scan term to glossary - Rename `begin_stream_from_content_seconds` into `begin_stream_seconds`. Same for `end_...` - Some minor doc updates Pull Request resolved: #124 Reviewed By: scotts Differential Revision: D60445996 Pulled By: NicolasHug fbshipit-source-id: 2ef6d30f8f031c459ca5e67c89643bf8eccb1f75
1 parent 6ec5788 commit e78afdd

File tree

6 files changed

+54
-71
lines changed

6 files changed

+54
-71
lines changed

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,12 +23,13 @@ from torchcodec.decoders import SimpleVideoDecoder
2323
decoder = SimpleVideoDecoder("path/to/video.mp4")
2424

2525
decoder.metadata
26-
# VideoStreamMetadata: (Truncated output)
26+
# VideoStreamMetadata:
2727
# num_frames: 250
2828
# duration_seconds: 10.0
2929
# bit_rate: 31315.0
3030
# codec: h264
3131
# average_fps: 25.0
32+
# ... (truncated output)
3233

3334
len(decoder) # == decoder.metadata.num_frames!
3435
# 250

docs/source/glossary.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,8 @@ Glossary
1212
<https://ffmpeg.org/doxygen/trunk/group__lavf__decoding.html#ga757780d38f482deb4d809c6c521fbcc2>`_:
1313

1414
*The best stream is determined according to various heuristics as the most likely to be what the user expects.*
15+
16+
scan
17+
A scan corresponds to an entire pass over a video file, with the purpose
18+
of retrieving metadata about the different streams and frames. **It does
19+
not involve decoding**, so it is a lot cheaper than decoding the file.

examples/basic_example.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -150,14 +150,11 @@ def plot(frames: torch.Tensor, title : Optional[str] = None):
150150
# -------------------------
151151
#
152152
# So far, we have retrieved frames based on their index. We can also retrieve
153-
# frames based on *when* they are displayed. The available method are
154-
# :meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frame_displayed_at` and
155-
# :meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frames_displayed_at`, which
156-
# also return :class:`~torchcodec.decoders.Frame` and
157-
# :class:`~torchcodec.decoders.FrameBatch` objects respectively.
153+
# frames based on *when* they are displayed with
154+
# :meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frame_displayed_at`, which
155+
# also returns :class:`~torchcodec.decoders.Frame`.
158156

159157
frame_at_2_seconds = decoder.get_frame_displayed_at(seconds=2)
160158
print(f"{type(frame_at_2_seconds) = }")
161159
print(frame_at_2_seconds)
162160
plot(frame_at_2_seconds.data, "Frame displayed at 2 seconds")
163-
# TODO_BEFORE_RELEASE: illustrate get_frames_displayed_at

src/torchcodec/decoders/_core/_metadata.py

Lines changed: 32 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class VideoStreamMetadata:
2424
"""Metadata of a single video stream."""
2525

2626
duration_seconds_from_header: Optional[float]
27-
"""Duration of the stream, in seconds obtained from the header (float or
27+
"""Duration of the stream, in seconds, obtained from the header (float or
2828
None). This could be inaccurate."""
2929
bit_rate: Optional[float]
3030
"""Bit rate of the stream, in seconds (float or None)."""
@@ -37,36 +37,38 @@ class VideoStreamMetadata:
3737
content (the scan doesn't involve decoding). This is more accurate
3838
than ``num_frames_from_header``. We recommend using the
3939
``num_frames`` attribute instead. (int or None)."""
40-
begin_stream_from_content_seconds: Optional[float]
41-
"""Beginning of the stream in seconds (float or None).
42-
This is min(frame.pts) for all frames in this stream."""
43-
end_stream_from_content_seconds: Optional[float]
44-
"""End of the stream in seconds (float or None).
45-
This is max(frame.pts + frame.duration) for all frames in this stream.
46-
Note that frames have a pts and duration and the interval defined by
47-
[pts, pts + duration) is a half-open interval (the right boundary is open).
48-
Therefore no frame is displayed at this time value.
49-
Calling
50-
SimpleVideoDecoder.get_frame_displayed_at(end_stream_from_content_seconds)
51-
will raise a StopIteration exception.
52-
If you want to get the last frame you can use [-1] on a SimpleVideoDecoder
53-
object."""
40+
begin_stream_seconds: Optional[float]
41+
"""Beginning of the stream, in seconds (float or None).
42+
Conceptually, this corresponds to the first frame's :term:`pts`. It is
43+
computed as min(frame.pts) across all frames in the stream. Usually, this is
44+
equal to 0."""
45+
end_stream_seconds: Optional[float]
46+
"""End of the stream, in seconds (float or None).
47+
Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
48+
is computed as max(frame.pts + frame.duration) across all frames in the
49+
stream. Note that no frame is displayed at this time value, so calling
50+
:meth:`~torchcodec.decoders.SimpleVideoDecoder.get_frame_displayed_at` with
51+
this value would result in an error. Retrieving the last frame is best done
52+
by simply indexing the :class:`~torchcodec.decoders.SimpleVideoDecoder`
53+
object with ``[-1]``.
54+
"""
5455
codec: Optional[str]
5556
"""Codec (str or None)."""
5657
width: Optional[int]
5758
"""Width of the frames (int or None)."""
5859
height: Optional[int]
5960
"""Height of the frames (int or None)."""
6061
average_fps_from_header: Optional[float]
61-
"""Averate fps of the stream (float or None)."""
62+
"""Averate fps of the stream, obtained from the header (float or None).
63+
We recommend using the ``average_fps`` attribute instead."""
6264
stream_index: int
6365
"""Index of the stream within the video (int)."""
6466

6567
@property
6668
def num_frames(self) -> Optional[int]:
6769
"""Number of frames in the stream. This corresponds to
68-
``num_frames_from_content`` if it's not None, otherwise it corresponds
69-
to ``num_frames_from_header``.
70+
``num_frames_from_content`` if a :term:`scan` was made, otherwise it
71+
corresponds to ``num_frames_from_header``.
7072
"""
7173
if self.num_frames_from_content is not None:
7274
return self.num_frames_from_content
@@ -76,35 +78,26 @@ def num_frames(self) -> Optional[int]:
7678
@property
7779
def duration_seconds(self) -> Optional[float]:
7880
"""Duration of the stream in seconds. We try to calculate the duration
79-
from the actual frames if we scanned the frames. Otherwise we fall back
80-
to the duration obtained from the header.
81+
from the actual frames if a :term:`scan` was performed. Otherwise we
82+
fall back to ``duration_seconds_from_header``.
8183
"""
82-
if (
83-
self.end_stream_from_content_seconds is None
84-
or self.begin_stream_from_content_seconds is None
85-
):
84+
if self.end_stream_seconds is None or self.begin_stream_seconds is None:
8685
return self.duration_seconds_from_header
87-
return (
88-
self.end_stream_from_content_seconds
89-
- self.begin_stream_from_content_seconds
90-
)
86+
return self.end_stream_seconds - self.begin_stream_seconds
9187

9288
@property
9389
def average_fps(self) -> Optional[float]:
94-
"""Average fps of the stream. We try to get the average fps from the
95-
actual frames if we scanned the frames. Otherwise we fall back to the
96-
fps obtained from the header.
90+
"""Average fps of the stream. If a :term:`scan` was perfomed, this is
91+
computed from the number of frames and the duration of the stream.
92+
Otherwise we fall back to ``average_fps_from_header``.
9793
"""
9894
if (
99-
self.end_stream_from_content_seconds is None
100-
or self.begin_stream_from_content_seconds is None
95+
self.end_stream_seconds is None
96+
or self.begin_stream_seconds is None
10197
or self.num_frames is None
10298
):
10399
return self.average_fps_from_header
104-
return self.num_frames / (
105-
self.end_stream_from_content_seconds
106-
- self.begin_stream_from_content_seconds
107-
)
100+
return self.num_frames / (self.end_stream_seconds - self.begin_stream_seconds)
108101

109102
def __repr__(self):
110103
# Overridden because properites are not printed by default.
@@ -159,12 +152,8 @@ def get_video_metadata(decoder: torch.Tensor) -> VideoMetadata:
159152
bit_rate=stream_dict.get("bitRate"),
160153
num_frames_from_header=stream_dict.get("numFrames"),
161154
num_frames_from_content=stream_dict.get("numFramesFromScan"),
162-
begin_stream_from_content_seconds=stream_dict.get(
163-
"minPtsSecondsFromScan"
164-
),
165-
end_stream_from_content_seconds=stream_dict.get(
166-
"maxPtsSecondsFromScan"
167-
),
155+
begin_stream_seconds=stream_dict.get("minPtsSecondsFromScan"),
156+
end_stream_seconds=stream_dict.get("maxPtsSecondsFromScan"),
168157
codec=stream_dict.get("codec"),
169158
width=stream_dict.get("width"),
170159
height=stream_dict.get("height"),

src/torchcodec/decoders/_simple_video_decoder.py

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ def __repr__(self):
7777
class SimpleVideoDecoder:
7878
"""A single-stream video decoder.
7979
80-
If the video contains multiple video streams, the :term:`best stream` is used.
80+
If the video contains multiple video streams, the :term:`best stream` is
81+
used. This decoder always performs a :term:`scan` of the video.
8182
8283
Args:
8384
source (str, ``Pathlib.path``, ``torch.Tensor``, or bytes): The source of the video.
@@ -140,23 +141,19 @@ def __init__(
140141
)
141142
self._num_frames = self.metadata.num_frames_from_content
142143

143-
if self.metadata.begin_stream_from_content_seconds is None:
144+
if self.metadata.begin_stream_seconds is None:
144145
raise ValueError(
145146
"The minimum pts value in seconds is unknown. "
146147
+ _ERROR_REPORTING_INSTRUCTIONS
147148
)
148-
self._begin_stream_from_content_seconds = (
149-
self.metadata.begin_stream_from_content_seconds
150-
)
149+
self._begin_stream_seconds = self.metadata.begin_stream_seconds
151150

152-
if self.metadata.end_stream_from_content_seconds is None:
151+
if self.metadata.end_stream_seconds is None:
153152
raise ValueError(
154153
"The maximum pts value in seconds is unknown. "
155154
+ _ERROR_REPORTING_INSTRUCTIONS
156155
)
157-
self._end_stream_from_content_seconds = (
158-
self.metadata.end_stream_from_content_seconds
159-
)
156+
self._end_stream_seconds = self.metadata.end_stream_seconds
160157

161158
def __len__(self) -> int:
162159
return self._num_frames
@@ -267,22 +264,16 @@ def get_frame_displayed_at(self, seconds: float) -> Frame:
267264
"""Return a single frame displayed at the given timestamp in seconds.
268265
269266
Args:
270-
seconds (float): The time stamp in seconds when the frame is
271-
displayed, i.e. seconds is in
272-
[:term:`pts`, :term:`pts` + duration).
267+
seconds (float): The time stamp in seconds when the frame is displayed.
273268
274269
Returns:
275270
Frame: The frame that is displayed at ``seconds``.
276271
"""
277-
if (
278-
not self._begin_stream_from_content_seconds
279-
<= seconds
280-
< self._end_stream_from_content_seconds
281-
):
272+
if not self._begin_stream_seconds <= seconds < self._end_stream_seconds:
282273
raise IndexError(
283274
f"Invalid pts in seconds: {seconds}. "
284-
f"It must be greater than or equal to {self._begin_stream_from_content_seconds} "
285-
f"and less than {self._end_stream_from_content_seconds}."
275+
f"It must be greater than or equal to {self._begin_stream_seconds} "
276+
f"and less than {self._end_stream_seconds}."
286277
)
287278
data, pts_seconds, duration_seconds = core.get_frame_at_pts(
288279
self._decoder, seconds

test/decoders/test_metadata.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,8 @@ def test_num_frames_fallback(
9292
bit_rate=123,
9393
num_frames_from_header=num_frames_from_header,
9494
num_frames_from_content=num_frames_from_content,
95-
begin_stream_from_content_seconds=0,
96-
end_stream_from_content_seconds=4,
95+
begin_stream_seconds=0,
96+
end_stream_seconds=4,
9797
codec="whatever",
9898
width=123,
9999
height=321,

0 commit comments

Comments
 (0)