Skip to content

Commit e0040a0

Browse files
committed
Refactor and add benchmarks
1 parent 76900f6 commit e0040a0

File tree

5 files changed

+189
-165
lines changed

5 files changed

+189
-165
lines changed

benchmarks/decoders/benchmark_decoders.py

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@
1414
plot_data,
1515
run_benchmarks,
1616
TorchAudioDecoder,
17-
TorchcodecCompiled,
18-
TorchCodecNonCompiledBatch,
19-
TorchcodecNonCompiledWithOptions,
20-
TVNewAPIDecoderWithBackend,
17+
TorchCodecCoreCompiled,
18+
TorchCodecCoreBatch,
19+
TorchCodecCore,
20+
TorchCodecPublic,
21+
TorchVision,
2122
)
2223

2324

@@ -70,7 +71,7 @@ def main() -> None:
7071
"For torchcodec, you can specify options with tcoptions:<plus-separated-options>. "
7172
),
7273
type=str,
73-
default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,tcoptions:num_threads=1",
74+
default="decord,tcoptions:,torchvision,torchaudio,torchcodec_compiled,torchcodec_public,tcoptions:num_threads=1,tcbatchoptions:",
7475
)
7576
parser.add_argument(
7677
"--bm_video_dir",
@@ -98,14 +99,16 @@ def main() -> None:
9899
DecordNonBatchDecoderAccurateSeek()
99100
)
100101
elif decoder == "torchcodec":
101-
decoder_dict["TorchCodecNonCompiled"] = TorchcodecNonCompiledWithOptions()
102+
decoder_dict["TorchCodecCore:"] = TorchCodecCore()
102103
elif decoder == "torchcodec_compiled":
103-
decoder_dict["TorchcodecCompiled"] = TorchcodecCompiled()
104+
decoder_dict["TorchCodecCoreCompiled"] = TorchCodecCoreCompiled()
105+
elif decoder == "torchcodec_public":
106+
decoder_dict["TorchCodecPublic"] = TorchCodecPublic()
104107
elif decoder == "torchvision":
105-
decoder_dict["TVNewAPIDecoderWithBackendVideoReader"] = (
108+
decoder_dict["TorchVision[backend=video_reader]"] = (
106109
# We don't compare TorchVision's "pyav" backend because it doesn't support
107110
# accurate seeks.
108-
TVNewAPIDecoderWithBackend("video_reader")
111+
TorchVision("video_reader")
109112
)
110113
elif decoder == "torchaudio":
111114
decoder_dict["TorchAudioDecoder"] = TorchAudioDecoder()
@@ -117,8 +120,8 @@ def main() -> None:
117120
continue
118121
k, v = item.split("=")
119122
kwargs_dict[k] = v
120-
decoder_dict["TorchCodecNonCompiledBatch:" + options] = (
121-
TorchCodecNonCompiledBatch(**kwargs_dict)
123+
decoder_dict["TorchCodecCoreBatch" + options] = (
124+
TorchCodecCoreBatch(**kwargs_dict)
122125
)
123126
elif decoder.startswith("tcoptions:"):
124127
options = decoder[len("tcoptions:") :]
@@ -128,8 +131,8 @@ def main() -> None:
128131
continue
129132
k, v = item.split("=")
130133
kwargs_dict[k] = v
131-
decoder_dict["TorchcodecNonCompiled:" + options] = (
132-
TorchcodecNonCompiledWithOptions(**kwargs_dict)
134+
decoder_dict["TorchCodecCore:" + options] = (
135+
TorchCodecCore(**kwargs_dict)
133136
)
134137
video_paths = args.bm_video_paths.split(",")
135138
if args.bm_video_dir:

benchmarks/decoders/benchmark_decoders_library.py

Lines changed: 72 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
import torch
1414
import torch.utils.benchmark as benchmark
15-
from torchcodec.decoders import VideoDecoder
15+
from torchcodec.decoders import VideoDecoder, VideoStreamMetadata
1616

1717
from torchcodec.decoders._core import (
1818
_add_video_stream,
@@ -78,7 +78,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
7878
return frames
7979

8080

81-
class TVNewAPIDecoderWithBackend(AbstractDecoder):
81+
class TorchVision(AbstractDecoder):
8282
def __init__(self, backend):
8383
self._backend = backend
8484
self._print_each_iteration_time = False
@@ -125,7 +125,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
125125
return frames
126126

127127

128-
class TorchcodecNonCompiledWithOptions(AbstractDecoder):
128+
class TorchCodecCore(AbstractDecoder):
129129
def __init__(self, num_threads=None, color_conversion_library=None, device="cpu"):
130130
self._print_each_iteration_time = False
131131
self._num_threads = int(num_threads) if num_threads else None
@@ -186,7 +186,7 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
186186
return frames
187187

188188

189-
class TorchCodecNonCompiledBatch(AbstractDecoder):
189+
class TorchCodecCoreBatch(AbstractDecoder):
190190
def __init__(self, num_threads=None, color_conversion_library=None):
191191
self._print_each_iteration_time = False
192192
self._num_threads = int(num_threads) if num_threads else None
@@ -227,6 +227,24 @@ def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
227227
)
228228
return frames
229229

230+
class TorchCodecPublic(AbstractDecoder):
231+
def __init__(self, num_ffmpeg_threads=None):
232+
self._num_ffmpeg_threads = int(num_ffmpeg_threads) if num_ffmpeg_threads else None
233+
234+
def get_frames_from_video(self, video_file, pts_list):
235+
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
236+
return decoder.get_frames_played_at(pts_list)
237+
238+
def get_consecutive_frames_from_video(self, video_file, numFramesToDecode):
239+
decoder = VideoDecoder(video_file, num_ffmpeg_threads=self._num_ffmpeg_threads)
240+
frames = []
241+
count = 0
242+
for frame in decoder:
243+
frames.append(frame)
244+
count += 1
245+
if count == numFramesToDecode:
246+
break
247+
return frames
230248

231249
@torch.compile(fullgraph=True, backend="eager")
232250
def compiled_seek_and_next(decoder, pts):
@@ -239,7 +257,7 @@ def compiled_next(decoder):
239257
return get_next_frame(decoder)
240258

241259

242-
class TorchcodecCompiled(AbstractDecoder):
260+
class TorchCodecCoreCompiled(AbstractDecoder):
243261
def __init__(self):
244262
pass
245263

@@ -444,11 +462,13 @@ def plot_data(df_data, plot_path):
444462
plot_path,
445463
)
446464

465+
def get_metadata(video_file_path: str) -> VideoStreamMetadata:
466+
return VideoDecoder(video_file_path).metadata
447467

448468
def run_benchmarks(
449469
decoder_dict,
450470
video_files_paths,
451-
num_uniform_samples,
471+
num_samples,
452472
min_runtime_seconds,
453473
benchmark_video_creation,
454474
) -> list[dict[str, str | float | int]]:
@@ -459,55 +479,57 @@ def run_benchmarks(
459479
for decoder_name, decoder in decoder_dict.items():
460480
for video_file_path in video_files_paths:
461481
print(f"video={video_file_path}, decoder={decoder_name}")
462-
# We only use the VideoDecoder to get the metadata and get
463-
# the list of PTS values to seek to.
464-
simple_decoder = VideoDecoder(video_file_path)
465-
duration = simple_decoder.metadata.duration_seconds
466-
pts_list = [
467-
i * duration / num_uniform_samples for i in range(num_uniform_samples)
482+
metadata = get_metadata(video_file_path)
483+
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
484+
485+
duration = metadata.duration_seconds
486+
uniform_pts_list = [
487+
i * duration / num_samples for i in range(num_samples)
468488
]
469-
metadata = simple_decoder.metadata
470-
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
471-
if verbose:
472-
print(
473-
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}"
489+
490+
random_pts_list = (torch.rand(num_samples) * duration).tolist()
491+
492+
for kind, pts_list in [("uniform", uniform_pts_list), ("random", random_pts_list)]:
493+
if verbose:
494+
print(
495+
f"video={video_file_path}, decoder={decoder_name}, pts_list={pts_list}"
496+
)
497+
seeked_result = benchmark.Timer(
498+
stmt="decoder.get_frames_from_video(video_file, pts_list)",
499+
globals={
500+
"video_file": video_file_path,
501+
"pts_list": pts_list,
502+
"decoder": decoder,
503+
},
504+
label=f"video={video_file_path} {metadata_label}",
505+
sub_label=decoder_name,
506+
description=f"{kind} {num_samples} seek()+next()",
474507
)
475-
seeked_result = benchmark.Timer(
476-
stmt="decoder.get_frames_from_video(video_file, pts_list)",
477-
globals={
478-
"video_file": video_file_path,
479-
"pts_list": pts_list,
480-
"decoder": decoder,
481-
},
482-
label=f"video={video_file_path} {metadata_string}",
483-
sub_label=decoder_name,
484-
description=f"{num_uniform_samples} seek()+next()",
485-
)
486-
results.append(
487-
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
488-
)
489-
df_item = {}
490-
df_item["decoder"] = decoder_name
491-
df_item["video"] = video_file_path
492-
df_item["description"] = results[-1].description
493-
df_item["frame_count"] = num_uniform_samples
494-
df_item["median"] = results[-1].median
495-
df_item["iqr"] = results[-1].iqr
496-
df_item["type"] = "seek()+next()"
497-
df_item["fps"] = 1.0 * num_uniform_samples / results[-1].median
498-
df_item["fps_p75"] = 1.0 * num_uniform_samples / results[-1]._p75
499-
df_item["fps_p25"] = 1.0 * num_uniform_samples / results[-1]._p25
500-
df_data.append(df_item)
501-
502-
for num_consecutive_nexts in [1, 10]:
508+
results.append(
509+
seeked_result.blocked_autorange(min_run_time=min_runtime_seconds)
510+
)
511+
df_item = {}
512+
df_item["decoder"] = decoder_name
513+
df_item["video"] = video_file_path
514+
df_item["description"] = results[-1].description
515+
df_item["frame_count"] = num_samples
516+
df_item["median"] = results[-1].median
517+
df_item["iqr"] = results[-1].iqr
518+
df_item["type"] = f"{kind}:seek()+next()"
519+
df_item["fps"] = 1.0 * num_samples / results[-1].median
520+
df_item["fps_p75"] = 1.0 * num_samples / results[-1]._p75
521+
df_item["fps_p25"] = 1.0 * num_samples / results[-1]._p25
522+
df_data.append(df_item)
523+
524+
for num_consecutive_nexts in [100]:
503525
consecutive_frames_result = benchmark.Timer(
504526
stmt="decoder.get_consecutive_frames_from_video(video_file, consecutive_frames_to_extract)",
505527
globals={
506528
"video_file": video_file_path,
507529
"consecutive_frames_to_extract": num_consecutive_nexts,
508530
"decoder": decoder,
509531
},
510-
label=f"video={video_file_path} {metadata_string}",
532+
label=f"video={video_file_path} {metadata_label}",
511533
sub_label=decoder_name,
512534
description=f"{num_consecutive_nexts} next()",
513535
)
@@ -531,17 +553,16 @@ def run_benchmarks(
531553

532554
first_video_file_path = video_files_paths[0]
533555
if benchmark_video_creation:
534-
simple_decoder = VideoDecoder(first_video_file_path)
535-
metadata = simple_decoder.metadata
536-
metadata_string = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
556+
metadata = get_metadata(video_file_path)
557+
metadata_label = f"{metadata.codec} {metadata.width}x{metadata.height}, {metadata.duration_seconds}s {metadata.average_fps}fps"
537558
creation_result = benchmark.Timer(
538559
stmt="create_torchcodec_decoder_from_file(video_file)",
539560
globals={
540561
"video_file": first_video_file_path,
541562
"create_torchcodec_decoder_from_file": create_torchcodec_decoder_from_file,
542563
},
543-
label=f"video={first_video_file_path} {metadata_string}",
544-
sub_label="TorchcodecNonCompiled",
564+
label=f"video={first_video_file_path} {metadata_label}",
565+
sub_label="TorchCodecCore:",
545566
description="create()+next()",
546567
)
547568
results.append(
-962 Bytes
Loading

0 commit comments

Comments
 (0)