|
22 | 22 | SPACES = " " |
23 | 23 |
|
24 | 24 |
|
25 | | -# TODO-AUDIO: docs below are mostly for video streams, we should edit them and / |
26 | | -# or make sure they're OK for audio streams as well. Not sure how to best handle |
27 | | -# docs for such class hierarchy. |
28 | | -# TODO very related, none of these common fields in this base class show up in |
29 | | -# the docs right now. |
30 | 25 | @dataclass |
31 | 26 | class StreamMetadata: |
32 | 27 | duration_seconds_from_header: Optional[float] |
33 | 28 | """Duration of the stream, in seconds, obtained from the header (float or |
34 | 29 | None). This could be inaccurate.""" |
| 30 | + begin_stream_seconds_from_header: Optional[float] |
| 31 | + """Beginning of the stream, in seconds, obtained from the header (float or |
| 32 | + None). Usually, this is equal to 0.""" |
35 | 33 | bit_rate: Optional[float] |
36 | 34 | """Bit rate of the stream, in seconds (float or None).""" |
| 35 | + codec: Optional[str] |
| 36 | + """Codec (str or None).""" |
| 37 | + stream_index: int |
| 38 | + """Index of the stream that this metadata refers to (int).""" |
| 39 | + |
| 40 | + def __repr__(self): |
| 41 | + s = self.__class__.__name__ + ":\n" |
| 42 | + for field in dataclasses.fields(self): |
| 43 | + s += f"{SPACES}{field.name}: {getattr(self, field.name)}\n" |
| 44 | + return s |
| 45 | + |
| 46 | + |
| 47 | +@dataclass |
| 48 | +class VideoStreamMetadata(StreamMetadata): |
| 49 | + """Metadata of a single video stream.""" |
| 50 | + |
37 | 51 | begin_stream_seconds_from_content: Optional[float] |
38 | 52 | """Beginning of the stream, in seconds (float or None). |
39 | | - Conceptually, this corresponds to the first frame's :term:`pts`. It is |
40 | | - computed as min(frame.pts) across all frames in the stream. Usually, this is |
41 | | - equal to 0.""" |
| 53 | + Conceptually, this corresponds to the first frame's :term:`pts`. It is only |
| 54 | + computed when a :term:`scan` is done as min(frame.pts) across all frames in |
| 55 | + the stream. Usually, this is equal to 0.""" |
42 | 56 | end_stream_seconds_from_content: Optional[float] |
43 | 57 | """End of the stream, in seconds (float or None). |
44 | 58 | Conceptually, this corresponds to last_frame.pts + last_frame.duration. It |
45 | | - is computed as max(frame.pts + frame.duration) across all frames in the |
46 | | - stream. Note that no frame is played at this time value, so calling |
47 | | - :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with |
48 | | - this value would result in an error. Retrieving the last frame is best done |
49 | | - by simply indexing the :class:`~torchcodec.decoders.VideoDecoder` |
50 | | - object with ``[-1]``. |
| 59 | + is only computed when a :term:`scan` is done as max(frame.pts + |
| 60 | + frame.duration) across all frames in the stream. Note that no frame is |
| 61 | + played at this time value, so calling |
| 62 | + :meth:`~torchcodec.decoders.VideoDecoder.get_frame_played_at` with this |
| 63 | + value would result in an error. Retrieving the last frame is best done by |
| 64 | + simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with |
| 65 | + ``[-1]``. |
51 | 66 | """ |
52 | | - codec: Optional[str] |
53 | | - """Codec (str or None).""" |
54 | | - stream_index: int |
55 | | - """Index of the stream within the video (int).""" |
| 67 | + width: Optional[int] |
| 68 | + """Width of the frames (int or None).""" |
| 69 | + height: Optional[int] |
| 70 | + """Height of the frames (int or None).""" |
| 71 | + num_frames_from_header: Optional[int] |
| 72 | + """Number of frames, from the stream's metadata. This is potentially |
| 73 | + inaccurate. We recommend using the ``num_frames`` attribute instead. |
| 74 | + (int or None).""" |
| 75 | + num_frames_from_content: Optional[int] |
| 76 | + """Number of frames computed by TorchCodec by scanning the stream's |
| 77 | + content (the scan doesn't involve decoding). This is more accurate |
| 78 | + than ``num_frames_from_header``. We recommend using the |
| 79 | + ``num_frames`` attribute instead. (int or None).""" |
| 80 | + average_fps_from_header: Optional[float] |
| 81 | + """Averate fps of the stream, obtained from the header (float or None). |
| 82 | + We recommend using the ``average_fps`` attribute instead.""" |
56 | 83 |
|
57 | 84 | @property |
58 | 85 | def duration_seconds(self) -> Optional[float]: |
@@ -94,36 +121,6 @@ def end_stream_seconds(self) -> Optional[float]: |
94 | 121 | else: |
95 | 122 | return self.end_stream_seconds_from_content |
96 | 123 |
|
97 | | - def __repr__(self): |
98 | | - # Overridden because properites are not printed by default. |
99 | | - s = self.__class__.__name__ + ":\n" |
100 | | - s += f"{SPACES}duration_seconds: {self.duration_seconds}\n" |
101 | | - for field in dataclasses.fields(self): |
102 | | - s += f"{SPACES}{field.name}: {getattr(self, field.name)}\n" |
103 | | - return s |
104 | | - |
105 | | - |
106 | | -@dataclass |
107 | | -class VideoStreamMetadata(StreamMetadata): |
108 | | - """Metadata of a single video stream.""" |
109 | | - |
110 | | - width: Optional[int] |
111 | | - """Width of the frames (int or None).""" |
112 | | - height: Optional[int] |
113 | | - """Height of the frames (int or None).""" |
114 | | - num_frames_from_header: Optional[int] |
115 | | - """Number of frames, from the stream's metadata. This is potentially |
116 | | - inaccurate. We recommend using the ``num_frames`` attribute instead. |
117 | | - (int or None).""" |
118 | | - num_frames_from_content: Optional[int] |
119 | | - """Number of frames computed by TorchCodec by scanning the stream's |
120 | | - content (the scan doesn't involve decoding). This is more accurate |
121 | | - than ``num_frames_from_header``. We recommend using the |
122 | | - ``num_frames`` attribute instead. (int or None).""" |
123 | | - average_fps_from_header: Optional[float] |
124 | | - """Averate fps of the stream, obtained from the header (float or None). |
125 | | - We recommend using the ``average_fps`` attribute instead.""" |
126 | | - |
127 | 124 | @property |
128 | 125 | def num_frames(self) -> Optional[int]: |
129 | 126 | """Number of frames in the stream. This corresponds to |
@@ -154,6 +151,9 @@ def average_fps(self) -> Optional[float]: |
154 | 151 |
|
155 | 152 | def __repr__(self): |
156 | 153 | s = super().__repr__() |
| 154 | + s += f"{SPACES}duration_seconds: {self.duration_seconds}\n" |
| 155 | + s += f"{SPACES}begin_stream_seconds: {self.begin_stream_seconds}\n" |
| 156 | + s += f"{SPACES}end_stream_seconds: {self.end_stream_seconds}\n" |
157 | 157 | s += f"{SPACES}num_frames: {self.num_frames}\n" |
158 | 158 | s += f"{SPACES}average_fps: {self.average_fps}\n" |
159 | 159 | return s |
@@ -224,14 +224,19 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata: |
224 | 224 | common_meta = dict( |
225 | 225 | duration_seconds_from_header=stream_dict.get("durationSeconds"), |
226 | 226 | bit_rate=stream_dict.get("bitRate"), |
227 | | - begin_stream_seconds_from_content=stream_dict.get("minPtsSecondsFromScan"), |
228 | | - end_stream_seconds_from_content=stream_dict.get("maxPtsSecondsFromScan"), |
| 227 | + begin_stream_seconds_from_header=stream_dict.get("beginStreamFromHeader"), |
229 | 228 | codec=stream_dict.get("codec"), |
230 | 229 | stream_index=stream_index, |
231 | 230 | ) |
232 | 231 | if stream_dict["mediaType"] == "video": |
233 | 232 | streams_metadata.append( |
234 | 233 | VideoStreamMetadata( |
| 234 | + begin_stream_seconds_from_content=stream_dict.get( |
| 235 | + "minPtsSecondsFromScan" |
| 236 | + ), |
| 237 | + end_stream_seconds_from_content=stream_dict.get( |
| 238 | + "maxPtsSecondsFromScan" |
| 239 | + ), |
235 | 240 | width=stream_dict.get("width"), |
236 | 241 | height=stream_dict.get("height"), |
237 | 242 | num_frames_from_header=stream_dict.get("numFrames"), |
|
0 commit comments