diff --git a/docs/source/api_ref_decoders.rst b/docs/source/api_ref_decoders.rst index 0ae159c37..1417d7aea 100644 --- a/docs/source/api_ref_decoders.rst +++ b/docs/source/api_ref_decoders.rst @@ -19,6 +19,12 @@ For an audio decoder tutorial, see: :ref:`sphx_glr_generated_examples_decoding_a VideoDecoder AudioDecoder +.. autosummary:: + :toctree: generated/ + :nosignatures: + :template: function.rst + + set_cuda_backend .. autosummary:: :toctree: generated/ diff --git a/examples/decoding/basic_cuda_example.py b/examples/decoding/basic_cuda_example.py index 7c29e4475..8f82940c0 100644 --- a/examples/decoding/basic_cuda_example.py +++ b/examples/decoding/basic_cuda_example.py @@ -94,9 +94,10 @@ # # To use CUDA decoder, you need to pass in a cuda device to the decoder. # -from torchcodec.decoders import VideoDecoder +from torchcodec.decoders import set_cuda_backend, VideoDecoder -decoder = VideoDecoder(video_file, device="cuda") +with set_cuda_backend("beta"): # Use the BETA backend, it's faster! + decoder = VideoDecoder(video_file, device="cuda") frame = decoder[0] # %% @@ -120,7 +121,8 @@ # against equivalent results from the CPU decoders. timestamps = [12, 19, 45, 131, 180] cpu_decoder = VideoDecoder(video_file, device="cpu") -cuda_decoder = VideoDecoder(video_file, device="cuda") +with set_cuda_backend("beta"): + cuda_decoder = VideoDecoder(video_file, device="cuda") cpu_frames = cpu_decoder.get_frames_played_at(timestamps).data cuda_frames = cuda_decoder.get_frames_played_at(timestamps).data diff --git a/src/torchcodec/decoders/_decoder_utils.py b/src/torchcodec/decoders/_decoder_utils.py index 549756b81..2619acd24 100644 --- a/src/torchcodec/decoders/_decoder_utils.py +++ b/src/torchcodec/decoders/_decoder_utils.py @@ -66,20 +66,29 @@ def set_cuda_backend(backend: str) -> Generator[None, None, None]: This context manager allows you to specify which CUDA backend implementation to use when creating :class:`~torchcodec.decoders.VideoDecoder` instances - with CUDA devices. This is thread-safe and async-safe. + with CUDA devices. - Note that you still need to pass `device="cuda"` when creating the - :class:`~torchcodec.decoders.VideoDecoder` instance. If a CUDA device isn't - specified, this context manager will have no effect. + .. note:: + **We recommend trying the "beta" backend instead of the default "ffmpeg" + backend!** The beta backend is faster, and will eventually become the + default in future versions. It may have rough edges that we'll polish + over time, but it's already quite stable and ready for adoption. Let us + know what you think! Only the creation of the decoder needs to be inside the context manager, the - decoding methods can be called outside of it. + decoding methods can be called outside of it. You still need to pass + ``device="cuda"`` when creating the + :class:`~torchcodec.decoders.VideoDecoder` instance. If a CUDA device isn't + specified, this context manager will have no effect. See example below. + + This is thread-safe and async-safe. Args: - backend (str): The CUDA backend to use. Can be "ffmpeg" or "beta". Default is "ffmpeg". + backend (str): The CUDA backend to use. Can be "ffmpeg" (default) or + "beta". We recommend trying "beta" as it's faster! Example: - >>> with torchcodec.set_cuda_backend("beta"): + >>> with set_cuda_backend("beta"): ... decoder = VideoDecoder("video.mp4", device="cuda") ... ... # Only the decoder creation needs to be part of the context manager. diff --git a/src/torchcodec/decoders/_video_decoder.py b/src/torchcodec/decoders/_video_decoder.py index f22f5a3fc..331c7ba79 100644 --- a/src/torchcodec/decoders/_video_decoder.py +++ b/src/torchcodec/decoders/_video_decoder.py @@ -56,6 +56,8 @@ class VideoDecoder: Passing 0 lets FFmpeg decide on the number of threads. Default: 1. device (str or torch.device, optional): The device to use for decoding. Default: "cpu". + If you pass a CUDA device, we recommend trying the "beta" CUDA + backend which is faster! See :func:`~torchcodec.decoders.set_cuda_backend`. seek_mode (str, optional): Determines if frame access will be "exact" or "approximate". Exact guarantees that requesting frame i will always return frame i, but doing so requires an initial :term:`scan` of the