Skip to content

Commit c51a341

Browse files
committed
VideoStreamMetadata.sample_aspect_ratio: new metadata field (#733)
New field to VideoStreamMetadata to at least get information about the stream sample/pixel aspect ratio. Getting this information is the minimum required to support non-square pixels.
1 parent d88cf1a commit c51a341

File tree

5 files changed

+33
-0
lines changed

5 files changed

+33
-0
lines changed

src/torchcodec/_core/Metadata.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
#include <optional>
1010
#include <string>
11+
#include <utility>
1112
#include <vector>
1213

1314
extern "C" {
@@ -45,6 +46,7 @@ struct StreamMetadata {
4546
// Video-only fields derived from the AVCodecContext.
4647
std::optional<int64_t> width;
4748
std::optional<int64_t> height;
49+
std::optional<std::pair<int, int>> sampleAspectRatio;
4850

4951
// Audio-only fields
5052
std::optional<int64_t> sampleRate;

src/torchcodec/_core/SingleStreamDecoder.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -459,6 +459,9 @@ void SingleStreamDecoder::addVideoStream(
459459

460460
streamMetadata.width = streamInfo.codecContext->width;
461461
streamMetadata.height = streamInfo.codecContext->height;
462+
streamMetadata.sampleAspectRatio = {
463+
streamInfo.codecContext->sample_aspect_ratio.num,
464+
streamInfo.codecContext->sample_aspect_ratio.den};
462465
}
463466

464467
void SingleStreamDecoder::addAudioStream(

src/torchcodec/_core/_metadata.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,12 @@ class VideoStreamMetadata(StreamMetadata):
8080
average_fps_from_header: Optional[float]
8181
"""Averate fps of the stream, obtained from the header (float or None).
8282
We recommend using the ``average_fps`` attribute instead."""
83+
sample_aspect_ratio: Optional[tuple[int, int]]
84+
"""Sample Aspect Ratio (SAR), also known as Pixel Aspect Ratio
85+
(PAR), is the ratio between the width of a pixel and the height of
86+
each pixel. This is a tuple of two ints: the first element is the
87+
numerator, and the second element is the denominator. Not to be
88+
confused with Storage Aspect Ratio (also SAR)."""
8389

8490
@property
8591
def duration_seconds(self) -> Optional[float]:
@@ -211,6 +217,16 @@ def best_audio_stream(self) -> AudioStreamMetadata:
211217
return metadata
212218

213219

220+
def _get_optional_sar_tuple(stream_dict):
221+
try:
222+
return (
223+
stream_dict["sampleAspectRatioNum"],
224+
stream_dict["sampleAspectRatioDen"],
225+
)
226+
except KeyError:
227+
return None
228+
229+
214230
# TODO-AUDIO: This is user-facing. Should this just be `get_metadata`, without
215231
# the "container" name in it? Same below.
216232
def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
@@ -247,6 +263,9 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
247263
num_frames_from_header=stream_dict.get("numFramesFromHeader"),
248264
num_frames_from_content=stream_dict.get("numFramesFromContent"),
249265
average_fps_from_header=stream_dict.get("averageFpsFromHeader"),
266+
# sample_aspect_ratio is a tuple. Return None,
267+
# and not (None, None), if missing.
268+
sample_aspect_ratio=_get_optional_sar_tuple(stream_dict),
250269
**common_meta,
251270
)
252271
)

src/torchcodec/_core/custom_ops.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,12 @@ std::string get_stream_json_metadata(
601601
if (streamMetadata.height.has_value()) {
602602
map["height"] = std::to_string(*streamMetadata.height);
603603
}
604+
if (streamMetadata.sampleAspectRatio.has_value()) {
605+
map["sampleAspectRatioNum"] =
606+
std::to_string((*streamMetadata.sampleAspectRatio).first);
607+
map["sampleAspectRatioDen"] =
608+
std::to_string((*streamMetadata.sampleAspectRatio).second);
609+
}
604610
if (streamMetadata.averageFpsFromHeader.has_value()) {
605611
map["averageFpsFromHeader"] =
606612
std::to_string(*streamMetadata.averageFpsFromHeader);

test/test_metadata.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ def test_get_metadata(metadata_getter):
8181
assert best_video_stream_metadata.begin_stream_seconds_from_header == 0
8282
assert best_video_stream_metadata.bit_rate == 128783
8383
assert best_video_stream_metadata.average_fps == pytest.approx(29.97, abs=0.001)
84+
assert best_video_stream_metadata.sample_aspect_ratio is None
8485
assert best_video_stream_metadata.codec == "h264"
8586
assert best_video_stream_metadata.num_frames_from_content == (
8687
390 if with_scan else None
@@ -137,6 +138,7 @@ def test_num_frames_fallback(
137138
width=123,
138139
height=321,
139140
average_fps_from_header=30,
141+
sample_aspect_ratio=(1, 1),
140142
stream_index=0,
141143
)
142144

@@ -161,6 +163,7 @@ def test_repr():
161163
num_frames_from_header: 390
162164
num_frames_from_content: 390
163165
average_fps_from_header: 29.97003
166+
sample_aspect_ratio: (1, 1)
164167
duration_seconds: 13.013
165168
begin_stream_seconds: 0.0
166169
end_stream_seconds: 13.013

0 commit comments

Comments
 (0)