Skip to content

Commit 6d2aef1

Browse files
committed
Merge branch 'main' of github.com:pytorch/torchcodec into encoding_sample_rate_lezzzgo
2 parents ef1b461 + 7945e6a commit 6d2aef1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+1418
-657
lines changed

.github/workflows/linux_cuda_wheel.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ jobs:
6767
# For the actual release we should add that label and change this to
6868
# include more python versions.
6969
python-version: ['3.9']
70-
cuda-version: ['11.8', '12.6', '12.8']
70+
cuda-version: ['12.6', '12.8']
7171
# TODO: put back ffmpeg 5 https://github.com/pytorch/torchcodec/issues/325
7272
ffmpeg-version-for-tests: ['4.4.2', '6', '7']
7373

README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,18 @@
33
# TorchCodec
44

55
TorchCodec is a Python library for decoding video and audio data into PyTorch
6-
tensors, on CPU and CUDA GPU. It aims to be fast, easy to use, and well
7-
integrated into the PyTorch ecosystem. If you want to use PyTorch to train ML
8-
models on videos and audio, TorchCodec is how you turn these into data.
6+
tensors, on CPU and CUDA GPU. It also supports audio encoding, and video
7+
encoding will come soon! It aims to be fast, easy to use, and well integrated
8+
into the PyTorch ecosystem. If you want to use PyTorch to train ML models on
9+
videos and audio, TorchCodec is how you turn these into data.
910

1011
We achieve these capabilities through:
1112

1213
* Pythonic APIs that mirror Python and PyTorch conventions.
13-
* Relying on [FFmpeg](https://www.ffmpeg.org/) to do the decoding. TorchCodec
14-
uses the version of FFmpeg you already have installed. FFmpeg is a mature
15-
library with broad coverage available on most systems. It is, however, not
16-
easy to use. TorchCodec abstracts FFmpeg's complexity to ensure it is used
14+
* Relying on [FFmpeg](https://www.ffmpeg.org/) to do the decoding and encoding.
15+
TorchCodec uses the version of FFmpeg you already have installed. FFmpeg is a
16+
mature library with broad coverage available on most systems. It is, however,
17+
not easy to use. TorchCodec abstracts FFmpeg's complexity to ensure it is used
1718
correctly and efficiently.
1819
* Returning data as PyTorch tensors, ready to be fed into PyTorch transforms
1920
or used directly to train models.

benchmarks/decoders/benchmark_decoders.py

Lines changed: 34 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -8,62 +8,18 @@
88
import importlib.resources
99
import os
1010
import platform
11-
import typing
12-
from dataclasses import dataclass, field
1311
from pathlib import Path
1412

1513
import torch
1614

1715
from benchmark_decoders_library import (
18-
AbstractDecoder,
19-
DecordAccurate,
20-
DecordAccurateBatch,
16+
decoder_registry,
2117
plot_data,
2218
run_benchmarks,
23-
TorchAudioDecoder,
24-
TorchCodecCore,
25-
TorchCodecCoreBatch,
26-
TorchCodecCoreCompiled,
27-
TorchCodecCoreNonBatch,
28-
TorchCodecPublic,
29-
TorchCodecPublicNonBatch,
30-
TorchVision,
19+
verify_outputs,
3120
)
3221

3322

34-
@dataclass
35-
class DecoderKind:
36-
display_name: str
37-
kind: typing.Type[AbstractDecoder]
38-
default_options: dict[str, str] = field(default_factory=dict)
39-
40-
41-
decoder_registry = {
42-
"decord": DecoderKind("DecordAccurate", DecordAccurate),
43-
"decord_batch": DecoderKind("DecordAccurateBatch", DecordAccurateBatch),
44-
"torchcodec_core": DecoderKind("TorchCodecCore", TorchCodecCore),
45-
"torchcodec_core_batch": DecoderKind("TorchCodecCoreBatch", TorchCodecCoreBatch),
46-
"torchcodec_core_nonbatch": DecoderKind(
47-
"TorchCodecCoreNonBatch", TorchCodecCoreNonBatch
48-
),
49-
"torchcodec_core_compiled": DecoderKind(
50-
"TorchCodecCoreCompiled", TorchCodecCoreCompiled
51-
),
52-
"torchcodec_public": DecoderKind("TorchCodecPublic", TorchCodecPublic),
53-
"torchcodec_public_nonbatch": DecoderKind(
54-
"TorchCodecPublicNonBatch", TorchCodecPublicNonBatch
55-
),
56-
"torchvision": DecoderKind(
57-
# We don't compare against TorchVision's "pyav" backend because it doesn't support
58-
# accurate seeks.
59-
"TorchVision[backend=video_reader]",
60-
TorchVision,
61-
{"backend": "video_reader"},
62-
),
63-
"torchaudio": DecoderKind("TorchAudio", TorchAudioDecoder),
64-
}
65-
66-
6723
def in_fbcode() -> bool:
6824
return "FB_PAR_RUNTIME_FILES" in os.environ
6925

@@ -144,6 +100,12 @@ def main() -> None:
144100
type=str,
145101
default="benchmarks.png",
146102
)
103+
parser.add_argument(
104+
"--verify-outputs",
105+
help="Verify that the outputs of the decoders are the same",
106+
default=False,
107+
action=argparse.BooleanOptionalAction,
108+
)
147109

148110
args = parser.parse_args()
149111
specified_decoders = set(args.decoders.split(","))
@@ -173,29 +135,32 @@ def main() -> None:
173135
if entry.is_file() and entry.name.endswith(".mp4"):
174136
video_paths.append(entry.path)
175137

176-
results = run_benchmarks(
177-
decoders_to_run,
178-
video_paths,
179-
num_uniform_samples,
180-
num_sequential_frames_from_start=[1, 10, 100],
181-
min_runtime_seconds=args.min_run_seconds,
182-
benchmark_video_creation=args.bm_video_creation,
183-
)
184-
data = {
185-
"experiments": results,
186-
"system_metadata": {
187-
"cpu_count": os.cpu_count(),
188-
"system": platform.system(),
189-
"machine": platform.machine(),
190-
"python_version": str(platform.python_version()),
191-
"cuda": (
192-
torch.cuda.get_device_properties(0).name
193-
if torch.cuda.is_available()
194-
else "not available"
195-
),
196-
},
197-
}
198-
plot_data(data, args.plot_path)
138+
if args.verify_outputs:
139+
verify_outputs(decoders_to_run, video_paths, num_uniform_samples)
140+
else:
141+
results = run_benchmarks(
142+
decoders_to_run,
143+
video_paths,
144+
num_uniform_samples,
145+
num_sequential_frames_from_start=[1, 10, 100],
146+
min_runtime_seconds=args.min_run_seconds,
147+
benchmark_video_creation=args.bm_video_creation,
148+
)
149+
data = {
150+
"experiments": results,
151+
"system_metadata": {
152+
"cpu_count": os.cpu_count(),
153+
"system": platform.system(),
154+
"machine": platform.machine(),
155+
"python_version": str(platform.python_version()),
156+
"cuda": (
157+
torch.cuda.get_device_properties(0).name
158+
if torch.cuda.is_available()
159+
else "not available"
160+
),
161+
},
162+
}
163+
plot_data(data, args.plot_path)
199164

200165

201166
if __name__ == "__main__":

0 commit comments

Comments
 (0)