@@ -32,27 +32,59 @@ class StreamMetadata:
32
32
duration_seconds_from_header : Optional [float ]
33
33
"""Duration of the stream, in seconds, obtained from the header (float or
34
34
None). This could be inaccurate."""
35
+ begin_stream_seconds_from_header : Optional [float ]
36
+ """Beginning of the stream, in seconds, obtained from the header (float or
37
+ None). Usually, this is equal to 0."""
35
38
bit_rate : Optional [float ]
36
39
"""Bit rate of the stream, in seconds (float or None)."""
40
+ codec : Optional [str ]
41
+ """Codec (str or None)."""
42
+ stream_index : int
43
+ """Index of the stream within the video (int)."""
44
+
45
+ def __repr__ (self ):
46
+ s = self .__class__ .__name__ + ":\n "
47
+ for field in dataclasses .fields (self ):
48
+ s += f"{ SPACES } { field .name } : { getattr (self , field .name )} \n "
49
+ return s
50
+
51
+
52
+ @dataclass
53
+ class VideoStreamMetadata (StreamMetadata ):
54
+ """Metadata of a single video stream."""
55
+
37
56
begin_stream_seconds_from_content : Optional [float ]
38
57
"""Beginning of the stream, in seconds (float or None).
39
- Conceptually, this corresponds to the first frame's :term:`pts`. It is
40
- computed as min(frame.pts) across all frames in the stream. Usually, this is
41
- equal to 0."""
58
+ Conceptually, this corresponds to the first frame's :term:`pts`. It is only
59
+ computed when a :term:`scan` is done as min(frame.pts) across all frames in
60
+ the stream. Usually, this is equal to 0."""
42
61
end_stream_seconds_from_content : Optional [float ]
43
62
"""End of the stream, in seconds (float or None).
44
63
Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
45
- is computed as max(frame.pts + frame.duration) across all frames in the
46
- stream. Note that no frame is played at this time value, so calling
47
- :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with
48
- this value would result in an error. Retrieving the last frame is best done
49
- by simply indexing the :class:`~torchcodec.decoders.VideoDecoder`
50
- object with ``[-1]``.
64
+ is only computed when a :term:`scan` is done as max(frame.pts +
65
+ frame.duration) across all frames in the stream. Note that no frame is
66
+ played at this time value, so calling
67
+ :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with this
68
+ value would result in an error. Retrieving the last frame is best done by
69
+ simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with
70
+ ``[-1]``.
51
71
"""
52
- codec : Optional [str ]
53
- """Codec (str or None)."""
54
- stream_index : int
55
- """Index of the stream within the video (int)."""
72
+ width : Optional [int ]
73
+ """Width of the frames (int or None)."""
74
+ height : Optional [int ]
75
+ """Height of the frames (int or None)."""
76
+ num_frames_from_header : Optional [int ]
77
+ """Number of frames, from the stream's metadata. This is potentially
78
+ inaccurate. We recommend using the ``num_frames`` attribute instead.
79
+ (int or None)."""
80
+ num_frames_from_content : Optional [int ]
81
+ """Number of frames computed by TorchCodec by scanning the stream's
82
+ content (the scan doesn't involve decoding). This is more accurate
83
+ than ``num_frames_from_header``. We recommend using the
84
+ ``num_frames`` attribute instead. (int or None)."""
85
+ average_fps_from_header : Optional [float ]
86
+ """Averate fps of the stream, obtained from the header (float or None).
87
+ We recommend using the ``average_fps`` attribute instead."""
56
88
57
89
@property
58
90
def duration_seconds (self ) -> Optional [float ]:
@@ -94,36 +126,6 @@ def end_stream_seconds(self) -> Optional[float]:
94
126
else :
95
127
return self .end_stream_seconds_from_content
96
128
97
- def __repr__ (self ):
98
- # Overridden because properites are not printed by default.
99
- s = self .__class__ .__name__ + ":\n "
100
- s += f"{ SPACES } duration_seconds: { self .duration_seconds } \n "
101
- for field in dataclasses .fields (self ):
102
- s += f"{ SPACES } { field .name } : { getattr (self , field .name )} \n "
103
- return s
104
-
105
-
106
- @dataclass
107
- class VideoStreamMetadata (StreamMetadata ):
108
- """Metadata of a single video stream."""
109
-
110
- width : Optional [int ]
111
- """Width of the frames (int or None)."""
112
- height : Optional [int ]
113
- """Height of the frames (int or None)."""
114
- num_frames_from_header : Optional [int ]
115
- """Number of frames, from the stream's metadata. This is potentially
116
- inaccurate. We recommend using the ``num_frames`` attribute instead.
117
- (int or None)."""
118
- num_frames_from_content : Optional [int ]
119
- """Number of frames computed by TorchCodec by scanning the stream's
120
- content (the scan doesn't involve decoding). This is more accurate
121
- than ``num_frames_from_header``. We recommend using the
122
- ``num_frames`` attribute instead. (int or None)."""
123
- average_fps_from_header : Optional [float ]
124
- """Averate fps of the stream, obtained from the header (float or None).
125
- We recommend using the ``average_fps`` attribute instead."""
126
-
127
129
@property
128
130
def num_frames (self ) -> Optional [int ]:
129
131
"""Number of frames in the stream. This corresponds to
@@ -154,6 +156,9 @@ def average_fps(self) -> Optional[float]:
154
156
155
157
def __repr__ (self ):
156
158
s = super ().__repr__ ()
159
+ s += f"{ SPACES } duration_seconds: { self .duration_seconds } \n "
160
+ s += f"{ SPACES } begin_stream_seconds: { self .begin_stream_seconds } \n "
161
+ s += f"{ SPACES } end_stream_seconds: { self .end_stream_seconds } \n "
157
162
s += f"{ SPACES } num_frames: { self .num_frames } \n "
158
163
s += f"{ SPACES } average_fps: { self .average_fps } \n "
159
164
return s
@@ -224,14 +229,19 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
224
229
common_meta = dict (
225
230
duration_seconds_from_header = stream_dict .get ("durationSeconds" ),
226
231
bit_rate = stream_dict .get ("bitRate" ),
227
- begin_stream_seconds_from_content = stream_dict .get ("minPtsSecondsFromScan" ),
228
- end_stream_seconds_from_content = stream_dict .get ("maxPtsSecondsFromScan" ),
232
+ begin_stream_seconds_from_header = stream_dict .get ("beginStreamFromHeader" ),
229
233
codec = stream_dict .get ("codec" ),
230
234
stream_index = stream_index ,
231
235
)
232
236
if stream_dict ["mediaType" ] == "video" :
233
237
streams_metadata .append (
234
238
VideoStreamMetadata (
239
+ begin_stream_seconds_from_content = stream_dict .get (
240
+ "minPtsSecondsFromScan"
241
+ ),
242
+ end_stream_seconds_from_content = stream_dict .get (
243
+ "maxPtsSecondsFromScan"
244
+ ),
235
245
width = stream_dict .get ("width" ),
236
246
height = stream_dict .get ("height" ),
237
247
num_frames_from_header = stream_dict .get ("numFrames" ),
0 commit comments