meta-pytorch
diff --git a/‎benchmarks/decoders/benchmark_transforms.py‎
Lines changed: 164 additions & 0 deletions b/‎benchmarks/decoders/benchmark_transforms.py‎
Lines changed: 164 additions & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/index.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎packaging/build_ffmpeg.bat‎
Lines changed: 6 additions & 0 deletions b/‎packaging/build_ffmpeg.bat‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packaging/build_ffmpeg.sh‎
Lines changed: 5 additions & 0 deletions b/‎packaging/build_ffmpeg.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎packaging/check_glibcxx.py‎
Lines changed: 6 additions & 0 deletions b/‎packaging/check_glibcxx.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎packaging/helpers.sh‎
Lines changed: 5 additions & 0 deletions b/‎packaging/helpers.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎packaging/post_build_script.sh‎
Lines changed: 5 additions & 0 deletions b/‎packaging/post_build_script.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎packaging/pre_build_script.sh‎
Lines changed: 5 additions & 0 deletions b/‎packaging/pre_build_script.sh‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎packaging/vc_env_helper.bat‎
Lines changed: 6 additions & 0 deletions b/‎packaging/vc_env_helper.bat‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎src/torchcodec/_core/BetaCudaDeviceInterface.cpp‎
Lines changed: 18 additions & 2 deletions b/‎src/torchcodec/_core/BetaCudaDeviceInterface.cpp‎
Lines changed: 18 additions & 2 deletions
@@ -0,0 +1,164 @@
+import math
+from argparse import ArgumentParser
+from pathlib import Path
+from time import perf_counter_ns
+
+import torch
+from torch import Tensor
+from torchcodec._core import add_video_stream, create_from_file, get_frames_by_pts
+from torchcodec.decoders import VideoDecoder
+from torchvision.transforms import v2
+
+DEFAULT_NUM_EXP = 20
+
+
+def bench(f, *args, num_exp=DEFAULT_NUM_EXP, warmup=1) -> Tensor:
+
+    for _ in range(warmup):
+        f(*args)
+
+    times = []
+    for _ in range(num_exp):
+        start = perf_counter_ns()
+        f(*args)
+        end = perf_counter_ns()
+        times.append(end - start)
+    return torch.tensor(times).float()
+
+
+def report_stats(times: Tensor, unit: str = "ms", prefix: str = "") -> float:
+    mul = {
+        "ns": 1,
+        "µs": 1e-3,
+        "ms": 1e-6,
+        "s": 1e-9,
+    }[unit]
+    times = times * mul
+    std = times.std().item()
+    med = times.median().item()
+    mean = times.mean().item()
+    min = times.min().item()
+    max = times.max().item()
+    print(
+        f"{prefix:<45} {med = :.2f}, {mean = :.2f} +- {std:.2f}, {min = :.2f}, {max = :.2f} - in {unit}"
+    )
+
+
+def torchvision_resize(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int]
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder)
+    raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
+    return v2.functional.resize(raw_frames, size=dims)
+
+
+def torchvision_crop(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder)
+    raw_frames, *_ = get_frames_by_pts(decoder, timestamps=pts_seconds)
+    return v2.functional.crop(raw_frames, top=y, left=x, height=dims[0], width=dims[1])
+
+
+def decoder_native_resize(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int]
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder, transform_specs=f"resize, {dims[0]}, {dims[1]}")
+    return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
+
+
+def decoder_native_crop(
+    path: Path, pts_seconds: list[float], dims: tuple[int, int], x: int, y: int
+) -> None:
+    decoder = create_from_file(str(path), seek_mode="approximate")
+    add_video_stream(decoder, transform_specs=f"crop, {dims[0]}, {dims[1]}, {x}, {y}")
+    return get_frames_by_pts(decoder, timestamps=pts_seconds)[0]
+
+
+def main():
+    parser = ArgumentParser()
+    parser.add_argument("--path", type=str, help="path to file", required=True)
+    parser.add_argument(
+        "--num-exp",
+        type=int,
+        default=DEFAULT_NUM_EXP,
+        help="number of runs to average over",
+    )
+
+    args = parser.parse_args()
+    path = Path(args.path)
+
+    metadata = VideoDecoder(path).metadata
+    duration = metadata.duration_seconds
+
+    print(
+        f"Benchmarking {path.name}, duration: {duration}, codec: {metadata.codec}, averaging over {args.num_exp} runs:"
+    )
+
+    input_height = metadata.height
+    input_width = metadata.width
+    fraction_of_total_frames_to_sample = [0.005, 0.01, 0.05, 0.1]
+    fraction_of_input_dimensions = [0.5, 0.25, 0.125]
+
+    for num_fraction in fraction_of_total_frames_to_sample:
+        num_frames_to_sample = math.ceil(metadata.num_frames * num_fraction)
+        print(
+            f"Sampling {num_fraction * 100}%, {num_frames_to_sample}, of {metadata.num_frames} frames"
+        )
+        uniform_timestamps = [
+            i * duration / num_frames_to_sample for i in range(num_frames_to_sample)
+        ]
+
+        for dims_fraction in fraction_of_input_dimensions:
+            dims = (int(input_height * dims_fraction), int(input_width * dims_fraction))
+
+            times = bench(
+                torchvision_resize, path, uniform_timestamps, dims, num_exp=args.num_exp
+            )
+            report_stats(times, prefix=f"torchvision_resize({dims})")
+
+            times = bench(
+                decoder_native_resize,
+                path,
+                uniform_timestamps,
+                dims,
+                num_exp=args.num_exp,
+            )
+            report_stats(times, prefix=f"decoder_native_resize({dims})")
+            print()
+
+            center_x = (input_height - dims[0]) // 2
+            center_y = (input_width - dims[1]) // 2
+            times = bench(
+                torchvision_crop,
+                path,
+                uniform_timestamps,
+                dims,
+                center_x,
+                center_y,
+                num_exp=args.num_exp,
+            )
+            report_stats(
+                times, prefix=f"torchvision_crop({dims}, {center_x}, {center_y})"
+            )
+
+            times = bench(
+                decoder_native_crop,
+                path,
+                uniform_timestamps,
+                dims,
+                center_x,
+                center_y,
+                num_exp=args.num_exp,
+            )
+            report_stats(
+                times, prefix=f"decoder_native_crop({dims}, {center_x}, {center_y})"
+            )
+            print()
+
+
+if __name__ == "__main__":
+    main()
@@ -11,7 +11,7 @@ We achieve these capabilities through:
 
 * Pythonic APIs that mirror Python and PyTorch conventions.
 * Relying on `FFmpeg <https://www.ffmpeg.org/>`_ to do the decoding / encoding.
-  TorchCodec uses the version of FFmpeg you already have installed. FMPEG is a
+  TorchCodec uses the version of FFmpeg you already have installed. FFmpeg is a
   mature library with broad coverage available on most systems. It is, however,
   not easy to use.  TorchCodec abstracts FFmpeg's complexity to ensure it is
   used correctly and efficiently.
 
@@ -1,3 +1,9 @@
+:: Copyright (c) Meta Platforms, Inc. and affiliates.
+:: All rights reserved.
+::
+:: This source code is licensed under the BSD-style license found in the
+:: LICENSE file in the root directory of this source tree.
+
 :: Taken from torchaudio
 @echo off
 
 
@@ -1,4 +1,9 @@
 #!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 # This is taken and adapated from torchaudio, only keeping the parts relevant to
 # linux.
 
@@ -1,3 +1,9 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
 """
 The goal of this script is to ensure that the .so files we ship do not contain
 symbol versions from libstdc++ that are too recent. This is a very manual way of
 
@@ -1,4 +1,9 @@
 #!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 _list_wheel_files() {
     unzip -l "$1" | awk '{print $4}'
 
@@ -1,4 +1,9 @@
 #!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 set -ex
 
 
@@ -1,4 +1,9 @@
 #!/bin/bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
 
 set -ex
 
 
@@ -1,3 +1,9 @@
+:: Copyright (c) Meta Platforms, Inc. and affiliates.
+:: All rights reserved.
+::
+:: This source code is licensed under the BSD-style license found in the
+:: LICENSE file in the root directory of this source tree.
+
 :: Taken from torchaudio
 @echo on
 
 
@@ -15,7 +15,7 @@
 #include "src/torchcodec/_core/FFMPEGCommon.h"
 #include "src/torchcodec/_core/NVDECCache.h"
 
-// #include <cuda_runtime.h> // For cudaStreamSynchronize
+#include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h"
 #include "src/torchcodec/_core/nvcuvid_include/cuviddec.h"
 #include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"
 
@@ -155,6 +155,7 @@ std::optional<cudaVideoCodec> validateCodecSupport(AVCodecID codecId) {
 bool nativeNVDECSupport(const SharedAVCodecContext& codecContext) {
   // Return true iff the input video stream is supported by our NVDEC
   // implementation.
+
   auto codecType = validateCodecSupport(codecContext->codec_id);
   if (!codecType.has_value()) {
     return false;
@@ -222,6 +223,8 @@ BetaCudaDeviceInterface::BetaCudaDeviceInterface(const torch::Device& device)
 
   initializeCudaContextWithPytorch(device_);
   nppCtx_ = getNppStreamContext(device_);
+
+  nvcuvidAvailable_ = loadNVCUVIDLibrary();
 }
 
 BetaCudaDeviceInterface::~BetaCudaDeviceInterface() {
@@ -249,7 +252,7 @@ void BetaCudaDeviceInterface::initialize(
     const AVStream* avStream,
     const UniqueDecodingAVFormatContext& avFormatCtx,
     [[maybe_unused]] const SharedAVCodecContext& codecContext) {
-  if (!nativeNVDECSupport(codecContext)) {
+  if (!nvcuvidAvailable_ || !nativeNVDECSupport(codecContext)) {
     cpuFallback_ = createDeviceInterface(torch::kCPU);
     TORCH_CHECK(
         cpuFallback_ != nullptr, "Failed to create CPU device interface");
@@ -699,4 +702,17 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput(
       avFrame, device_, nppCtx_, nvdecStream, preAllocatedOutputTensor);
 }
 
+std::string BetaCudaDeviceInterface::getDetails() {
+  std::string details = "Beta CUDA Device Interface.";
+  if (cpuFallback_) {
+    details += " Using CPU fallback.";
+    if (!nvcuvidAvailable_) {
+      details += " NVCUVID not available!";
+    }
+  } else {
+    details += " Using NVDEC.";
+  }
+  return details;
+}
+
 } // namespace facebook::torchcodec