Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions examples/decoding/basic_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
# plotting utility. You can ignore that part and jump right below to
# :ref:`creating_decoder`.

from typing import Optional
import torch
import requests

Expand All @@ -33,7 +32,7 @@
raw_video_bytes = response.content


def plot(frames: torch.Tensor, title : Optional[str] = None):
def plot(frames: torch.Tensor, title: str | None = None):
try:
from torchvision.utils import make_grid
from torchvision.transforms.v2.functional import to_pil_image
Expand Down
12 changes: 10 additions & 2 deletions examples/decoding/custom_frame_mappings.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,15 @@

# Lets define a simple function to run ffprobe on a video's first stream index, then writes the results in output_json_path.
def generate_frame_mappings(video_path, output_json_path, stream_index):
ffprobe_cmd = ["ffprobe", "-i", f"{video_path}", "-select_streams", f"{stream_index}", "-show_frames", "-show_entries", "frame=pts,duration,key_frame", "-of", "json"]
ffprobe_cmd = [
"ffprobe",
"-i", f"{video_path}",
"-select_streams", f"{stream_index}",
"-show_frames",
"-show_entries",
"frame=pts,duration,key_frame",
"-of", "json",
]
print(f"Running ffprobe:\n{' '.join(ffprobe_cmd)}\n")
ffprobe_result = subprocess.run(ffprobe_cmd, check=True, capture_output=True, text=True)
with open(output_json_path, "w") as f:
Expand Down Expand Up @@ -157,7 +165,7 @@ def bench(f, file_like=False, average_over=50, warmup=2, **f_kwargs):
# so the performance benefits are realized.


def decode_frames(video_path, seek_mode = "exact", custom_frame_mappings = None):
def decode_frames(video_path, seek_mode="exact", custom_frame_mappings=None):
decoder = VideoDecoder(
source=video_path,
seek_mode=seek_mode,
Expand Down
17 changes: 10 additions & 7 deletions examples/decoding/parallel_decoding.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
# require efficient processing. You can ignore that part and jump right below to
# :ref:`start_parallel_decoding`.

from typing import List
import torch
import requests
import tempfile
Expand Down Expand Up @@ -74,7 +73,7 @@ def report_stats(times, unit="s"):
return med


def split_indices(indices: List[int], num_chunks: int) -> List[List[int]]:
def split_indices(indices: list[int], num_chunks: int) -> list[list[int]]:
"""Split a list of indices into approximately equal chunks."""
chunk_size = len(indices) // num_chunks
chunks = []
Expand Down Expand Up @@ -155,7 +154,8 @@ def generate_long_video(temp_dir: str):
# Let's start with a sequential approach as our baseline. This processes
# frames one by one without any parallelization.

def decode_sequentially(indices: List[int], video_path=long_video_path):

def decode_sequentially(indices: list[int], video_path=long_video_path):
"""Decode frames sequentially using a single decoder instance."""
decoder = VideoDecoder(video_path, seek_mode="approximate")
return decoder.get_frames_at(indices)
Expand All @@ -173,8 +173,9 @@ def decode_sequentially(indices: List[int], video_path=long_video_path):
# via the ``num_ffmpeg_threads`` parameter. This approach uses multiple
# threads within FFmpeg itself to accelerate decoding operations.


def decode_with_ffmpeg_parallelism(
indices: List[int],
indices: list[int],
num_threads: int,
video_path=long_video_path
):
Expand All @@ -197,10 +198,11 @@ def decode_with_ffmpeg_parallelism(
#
# Process-based parallelism distributes work across multiple Python processes.


def decode_with_multiprocessing(
indices: List[int],
indices: list[int],
num_processes: int,
video_path=long_video_path
video_path=long_video_path,
):
"""Decode frames using multiple processes with joblib."""
chunks = split_indices(indices, num_chunks=num_processes)
Expand All @@ -226,8 +228,9 @@ def decode_with_multiprocessing(
# Thread-based parallelism uses multiple threads within a single process.
# TorchCodec releases the GIL, so this can be very effective.


def decode_with_multithreading(
indices: List[int],
indices: list[int],
num_threads: int,
video_path=long_video_path
):
Expand Down
3 changes: 1 addition & 2 deletions examples/decoding/sampling.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
# plotting utility. You can ignore that part and jump right below to
# :ref:`sampling_tuto_start`.

from typing import Optional
import torch
import requests

Expand All @@ -34,7 +33,7 @@
raw_video_bytes = response.content


def plot(frames: torch.Tensor, title : Optional[str] = None):
def plot(frames: torch.Tensor, title: str | None = None):
try:
from torchvision.utils import make_grid
from torchvision.transforms.v2.functional import to_pil_image
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "torchcodec"
description = "A video decoder for PyTorch"
readme = "README.md"
requires-python = ">=3.8"
requires-python = ">=3.10"
license-files = ["LICENSE"]
authors = [
{ name = "PyTorch Team", email = "[email protected]" },
Expand Down Expand Up @@ -32,7 +32,7 @@ dev = [
first_party_detection = false

[tool.black]
target-version = ["py38"]
target-version = ["py310"]

[tool.ufmt]

Expand Down
60 changes: 30 additions & 30 deletions src/torchcodec/_core/_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


import dataclasses
import json
import pathlib
from dataclasses import dataclass
from fractions import Fraction
from typing import List, Optional, Union

import torch

Expand All @@ -25,29 +25,29 @@

@dataclass
class StreamMetadata:
duration_seconds_from_header: Optional[float]
duration_seconds_from_header: float | None
"""Duration of the stream, in seconds, obtained from the header (float or
None). This could be inaccurate."""
begin_stream_seconds_from_header: Optional[float]
begin_stream_seconds_from_header: float | None
"""Beginning of the stream, in seconds, obtained from the header (float or
None). Usually, this is equal to 0."""
bit_rate: Optional[float]
bit_rate: float | None
"""Bit rate of the stream, in seconds (float or None)."""
codec: Optional[str]
codec: str | None
"""Codec (str or None)."""
stream_index: int
"""Index of the stream that this metadata refers to (int)."""

# Computed fields (computed in C++ with fallback logic)
duration_seconds: Optional[float]
duration_seconds: float | None
"""Duration of the stream in seconds. We try to calculate the duration
from the actual frames if a :term:`scan` was performed. Otherwise we
fall back to ``duration_seconds_from_header``. If that value is also None,
we instead calculate the duration from ``num_frames_from_header`` and
``average_fps_from_header``. If all of those are unavailable, we fall back
to the container-level ``duration_seconds_from_header``.
"""
begin_stream_seconds: Optional[float]
begin_stream_seconds: float | None
"""Beginning of the stream, in seconds (float). Conceptually, this
corresponds to the first frame's :term:`pts`. If a :term:`scan` was performed
and ``begin_stream_seconds_from_content`` is not None, then it is returned.
Expand All @@ -65,12 +65,12 @@ def __repr__(self):
class VideoStreamMetadata(StreamMetadata):
"""Metadata of a single video stream."""

begin_stream_seconds_from_content: Optional[float]
begin_stream_seconds_from_content: float | None
"""Beginning of the stream, in seconds (float or None).
Conceptually, this corresponds to the first frame's :term:`pts`. It is only
computed when a :term:`scan` is done as min(frame.pts) across all frames in
the stream. Usually, this is equal to 0."""
end_stream_seconds_from_content: Optional[float]
end_stream_seconds_from_content: float | None
"""End of the stream, in seconds (float or None).
Conceptually, this corresponds to last_frame.pts + last_frame.duration. It
is only computed when a :term:`scan` is done as max(frame.pts +
Expand All @@ -81,42 +81,42 @@ class VideoStreamMetadata(StreamMetadata):
simply indexing the :class:`~torchcodec.decoders.VideoDecoder` object with
``[-1]``.
"""
width: Optional[int]
width: int | None
"""Width of the frames (int or None)."""
height: Optional[int]
height: int | None
"""Height of the frames (int or None)."""
num_frames_from_header: Optional[int]
num_frames_from_header: int | None
"""Number of frames, from the stream's metadata. This is potentially
inaccurate. We recommend using the ``num_frames`` attribute instead.
(int or None)."""
num_frames_from_content: Optional[int]
num_frames_from_content: int | None
"""Number of frames computed by TorchCodec by scanning the stream's
content (the scan doesn't involve decoding). This is more accurate
than ``num_frames_from_header``. We recommend using the
``num_frames`` attribute instead. (int or None)."""
average_fps_from_header: Optional[float]
average_fps_from_header: float | None
"""Averate fps of the stream, obtained from the header (float or None).
We recommend using the ``average_fps`` attribute instead."""
pixel_aspect_ratio: Optional[Fraction]
pixel_aspect_ratio: Fraction | None
"""Pixel Aspect Ratio (PAR), also known as Sample Aspect Ratio
(SAR --- not to be confused with Storage Aspect Ratio, also SAR),
is the ratio between the width and height of each pixel
(``fractions.Fraction`` or None)."""

# Computed fields (computed in C++ with fallback logic)
end_stream_seconds: Optional[float]
end_stream_seconds: float | None
"""End of the stream, in seconds (float or None).
Conceptually, this corresponds to last_frame.pts + last_frame.duration.
If :term:`scan` was performed and``end_stream_seconds_from_content`` is not None, then that value is
returned. Otherwise, returns ``duration_seconds``.
"""
num_frames: Optional[int]
num_frames: int | None
"""Number of frames in the stream (int or None).
This corresponds to ``num_frames_from_content`` if a :term:`scan` was made,
otherwise it corresponds to ``num_frames_from_header``. If that value is also
None, the number of frames is calculated from the duration and the average fps.
"""
average_fps: Optional[float]
average_fps: float | None
"""Average fps of the stream. If a :term:`scan` was perfomed, this is
computed from the number of frames and the duration of the stream.
Otherwise we fall back to ``average_fps_from_header``.
Expand All @@ -130,11 +130,11 @@ def __repr__(self):
class AudioStreamMetadata(StreamMetadata):
"""Metadata of a single audio stream."""

sample_rate: Optional[int]
sample_rate: int | None
"""The original sample rate."""
num_channels: Optional[int]
num_channels: int | None
"""The number of channels (1 for mono, 2 for stereo, etc.)"""
sample_format: Optional[str]
sample_format: str | None
"""The original sample format, as described by FFmpeg. E.g. 'fltp', 's32', etc."""

def __repr__(self):
Expand All @@ -143,19 +143,19 @@ def __repr__(self):

@dataclass
class ContainerMetadata:
duration_seconds_from_header: Optional[float]
bit_rate_from_header: Optional[float]
best_video_stream_index: Optional[int]
best_audio_stream_index: Optional[int]
duration_seconds_from_header: float | None
bit_rate_from_header: float | None
best_video_stream_index: int | None
best_audio_stream_index: int | None

streams: List[StreamMetadata]
streams: list[StreamMetadata]

@property
def duration_seconds(self) -> Optional[float]:
def duration_seconds(self) -> float | None:
raise NotImplementedError("Decide on logic and implement this!")

@property
def bit_rate(self) -> Optional[float]:
def bit_rate(self) -> float | None:
raise NotImplementedError("Decide on logic and implement this!")

@property
Expand Down Expand Up @@ -195,7 +195,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:
"""

container_dict = json.loads(_get_container_json_metadata(decoder))
streams_metadata: List[StreamMetadata] = []
streams_metadata: list[StreamMetadata] = []
for stream_index in range(container_dict["numStreams"]):
stream_dict = json.loads(_get_stream_json_metadata(decoder, stream_index))
common_meta = dict(
Expand Down Expand Up @@ -255,7 +255,7 @@ def get_container_metadata(decoder: torch.Tensor) -> ContainerMetadata:


def get_container_metadata_from_header(
filename: Union[str, pathlib.Path]
filename: str | pathlib.Path,
) -> ContainerMetadata:
return get_container_metadata(
create_from_file(str(filename), seek_mode="approximate")
Expand Down
Loading
Loading