1919)
2020
2121
22+ SPACES = " "
23+
24+
2225# TODO-audio: docs below are mostly for video streams, we should edit them and /
2326# or make sure they're OK for audio streams as well. Not sure how to best handle
2427# docs for such class hierarchy.
@@ -29,15 +32,6 @@ class StreamMetadata:
2932 None). This could be inaccurate."""
3033 bit_rate : Optional [float ]
3134 """Bit rate of the stream, in seconds (float or None)."""
32- num_frames_from_header : Optional [int ]
33- """Number of frames, from the stream's metadata. This is potentially
34- inaccurate. We recommend using the ``num_frames`` attribute instead.
35- (int or None)."""
36- num_frames_from_content : Optional [int ]
37- """Number of frames computed by TorchCodec by scanning the stream's
38- content (the scan doesn't involve decoding). This is more accurate
39- than ``num_frames_from_header``. We recommend using the
40- ``num_frames`` attribute instead. (int or None)."""
4135 begin_stream_seconds_from_content : Optional [float ]
4236 """Beginning of the stream, in seconds (float or None).
4337 Conceptually, this corresponds to the first frame's :term:`pts`. It is
@@ -55,23 +49,9 @@ class StreamMetadata:
5549 """
5650 codec : Optional [str ]
5751 """Codec (str or None)."""
58- average_fps_from_header : Optional [float ]
59- """Averate fps of the stream, obtained from the header (float or None).
60- We recommend using the ``average_fps`` attribute instead."""
6152 stream_index : int
6253 """Index of the stream within the video (int)."""
6354
64- @property
65- def num_frames (self ) -> Optional [int ]:
66- """Number of frames in the stream. This corresponds to
67- ``num_frames_from_content`` if a :term:`scan` was made, otherwise it
68- corresponds to ``num_frames_from_header``.
69- """
70- if self .num_frames_from_content is not None :
71- return self .num_frames_from_content
72- else :
73- return self .num_frames_from_header
74-
7555 @property
7656 def duration_seconds (self ) -> Optional [float ]:
7757 """Duration of the stream in seconds. We try to calculate the duration
@@ -88,23 +68,6 @@ def duration_seconds(self) -> Optional[float]:
8868 - self .begin_stream_seconds_from_content
8969 )
9070
91- @property
92- def average_fps (self ) -> Optional [float ]:
93- """Average fps of the stream. If a :term:`scan` was perfomed, this is
94- computed from the number of frames and the duration of the stream.
95- Otherwise we fall back to ``average_fps_from_header``.
96- """
97- if (
98- self .end_stream_seconds_from_content is None
99- or self .begin_stream_seconds_from_content is None
100- or self .num_frames is None
101- ):
102- return self .average_fps_from_header
103- return self .num_frames / (
104- self .end_stream_seconds_from_content
105- - self .begin_stream_seconds_from_content
106- )
107-
10871 @property
10972 def begin_stream_seconds (self ) -> float :
11073 """Beginning of the stream, in seconds (float). Conceptually, this
@@ -132,12 +95,9 @@ def end_stream_seconds(self) -> Optional[float]:
13295 def __repr__ (self ):
13396 # Overridden because properites are not printed by default.
13497 s = self .__class__ .__name__ + ":\n "
135- spaces = " "
136- s += f"{ spaces } num_frames: { self .num_frames } \n "
137- s += f"{ spaces } duration_seconds: { self .duration_seconds } \n "
138- s += f"{ spaces } average_fps: { self .average_fps } \n "
98+ s += f"{ SPACES } duration_seconds: { self .duration_seconds } \n "
13999 for field in dataclasses .fields (self ):
140- s += f"{ spaces } { field .name } : { getattr (self , field .name )} \n "
100+ s += f"{ SPACES } { field .name } : { getattr (self , field .name )} \n "
141101 return s
142102
143103
@@ -149,17 +109,58 @@ class VideoStreamMetadata(StreamMetadata):
149109 """Width of the frames (int or None)."""
150110 height : Optional [int ]
151111 """Height of the frames (int or None)."""
112+ num_frames_from_header : Optional [int ]
113+ """Number of frames, from the stream's metadata. This is potentially
114+ inaccurate. We recommend using the ``num_frames`` attribute instead.
115+ (int or None)."""
116+ num_frames_from_content : Optional [int ]
117+ """Number of frames computed by TorchCodec by scanning the stream's
118+ content (the scan doesn't involve decoding). This is more accurate
119+ than ``num_frames_from_header``. We recommend using the
120+ ``num_frames`` attribute instead. (int or None)."""
121+ average_fps_from_header : Optional [float ]
122+ """Averate fps of the stream, obtained from the header (float or None).
123+ We recommend using the ``average_fps`` attribute instead."""
124+
125+ @property
126+ def num_frames (self ) -> Optional [int ]:
127+ """Number of frames in the stream. This corresponds to
128+ ``num_frames_from_content`` if a :term:`scan` was made, otherwise it
129+ corresponds to ``num_frames_from_header``.
130+ """
131+ if self .num_frames_from_content is not None :
132+ return self .num_frames_from_content
133+ else :
134+ return self .num_frames_from_header
135+
136+ @property
137+ def average_fps (self ) -> Optional [float ]:
138+ """Average fps of the stream. If a :term:`scan` was perfomed, this is
139+ computed from the number of frames and the duration of the stream.
140+ Otherwise we fall back to ``average_fps_from_header``.
141+ """
142+ if (
143+ self .end_stream_seconds_from_content is None
144+ or self .begin_stream_seconds_from_content is None
145+ or self .num_frames is None
146+ ):
147+ return self .average_fps_from_header
148+ return self .num_frames / (
149+ self .end_stream_seconds_from_content
150+ - self .begin_stream_seconds_from_content
151+ )
152152
153153 def __repr__ (self ):
154- return super ().__repr__ ()
154+ s = super ().__repr__ ()
155+ s += f"{ SPACES } num_frames: { self .num_frames } \n "
156+ s += f"{ SPACES } average_fps: { self .average_fps } \n "
157+ return s
155158
156159
157160@dataclass
158161class AudioStreamMetadata (StreamMetadata ):
159162 """Metadata of a single audio stream."""
160163
161- # TODO-AUDIO do we expose the notion of frame here, like in fps? It's technically
162- # valid, but potentially is an FFmpeg-specific concept for audio
163164 # TODO-AUDIO Need sample rate and format and num_channels
164165 sample_rate : Optional [int ]
165166
@@ -192,6 +193,14 @@ def best_video_stream(self) -> VideoStreamMetadata:
192193 assert isinstance (metadata , VideoStreamMetadata ) # mypy <3
193194 return metadata
194195
196+ @property
197+ def best_audio_stream (self ) -> AudioStreamMetadata :
198+ if self .best_audio_stream_index is None :
199+ raise ValueError ("The best audio stream is unknown." )
200+ metadata = self .streams [self .best_audio_stream_index ]
201+ assert isinstance (metadata , AudioStreamMetadata ) # mypy <3
202+ return metadata
203+
195204
196205def get_container_metadata (decoder : torch .Tensor ) -> ContainerMetadata :
197206 """Return container metadata from a decoder.
@@ -207,19 +216,19 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
207216 common_meta = dict (
208217 duration_seconds_from_header = stream_dict .get ("durationSeconds" ),
209218 bit_rate = stream_dict .get ("bitRate" ),
210- num_frames_from_header = stream_dict .get ("numFrames" ),
211- num_frames_from_content = stream_dict .get ("numFramesFromScan" ),
212219 begin_stream_seconds_from_content = stream_dict .get ("minPtsSecondsFromScan" ),
213220 end_stream_seconds_from_content = stream_dict .get ("maxPtsSecondsFromScan" ),
214221 codec = stream_dict .get ("codec" ),
215- average_fps_from_header = stream_dict .get ("averageFps" ),
216222 stream_index = stream_index ,
217223 )
218224 if stream_dict ["mediaType" ] == "video" :
219225 streams_metadata .append (
220226 VideoStreamMetadata (
221227 width = stream_dict .get ("width" ),
222228 height = stream_dict .get ("height" ),
229+ num_frames_from_header = stream_dict .get ("numFrames" ),
230+ num_frames_from_content = stream_dict .get ("numFramesFromScan" ),
231+ average_fps_from_header = stream_dict .get ("averageFps" ),
223232 ** common_meta ,
224233 )
225234 )
@@ -232,9 +241,8 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
232241 )
233242 else :
234243 # This is neither a video nor audio stream. Could be e.g. subtitles.
235- # We still need to add an entry to streams_metadata to keep its
236- # length consistent with the number of streams, so we add a dummy
237- # entry.
244+ # We still need to add a dummy entry so that len(streams_metadata)
245+ # is consistent with the number of streams.
238246 streams_metadata .append (StreamMetadata (** common_meta ))
239247
240248 return ContainerMetadata (
0 commit comments