Skip to content

Commit baff1fa

Browse files
committed
Merge branch 'main' of github.com:pytorch/torchcodec into workflow_partition
2 parents 4f81d2f + 3e1f4b5 commit baff1fa

File tree

3 files changed

+270
-20
lines changed

3 files changed

+270
-20
lines changed

.github/workflows/docs.yaml

Lines changed: 86 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -9,45 +9,111 @@ on:
99
- docs/*
1010
- .github/workflows/docs.yaml # self reference
1111

12+
permissions:
13+
id-token: write
14+
contents: write
15+
1216
defaults:
1317
run:
1418
shell: bash -l -eo pipefail {0}
1519

1620
jobs:
21+
generate-matrix:
22+
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
23+
with:
24+
package-type: wheel
25+
os: linux
26+
test-infra-repository: pytorch/test-infra
27+
test-infra-ref: main
28+
with-cpu: disable
29+
with-xpu: disable
30+
with-rocm: disable
31+
with-cuda: enable
32+
build-python-only: "disable"
1733
build:
18-
runs-on: ubuntu-latest
34+
needs: generate-matrix
35+
strategy:
36+
fail-fast: false
37+
name: Build and Upload wheel
38+
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
39+
with:
40+
repository: pytorch/torchcodec
41+
ref: ""
42+
test-infra-repository: pytorch/test-infra
43+
test-infra-ref: main
44+
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
45+
post-script: packaging/post_build_script.sh
46+
smoke-test-script: packaging/fake_smoke_test.py
47+
package-name: torchcodec
48+
trigger-event: ${{ github.event_name }}
49+
build-platform: "python-build-package"
50+
build-command: "BUILD_AGAINST_ALL_FFMPEG_FROM_S3=1 ENABLE_CUDA=1 python -m build --wheel -vvv --no-isolation"
51+
52+
build-docs:
53+
runs-on: linux.4xlarge.nvidia.gpu
1954
strategy:
2055
fail-fast: false
56+
matrix:
57+
# 3.9 corresponds to the minimum python version for which we build
58+
# the wheel unless the label cliflow/binaries/all is present in the
59+
# PR.
60+
python-version: ['3.9']
61+
cuda-version: ['12.4']
62+
ffmpeg-version-for-tests: ['7']
63+
container:
64+
image: "pytorch/manylinux-builder:cuda${{ matrix.cuda-version }}"
65+
options: "--gpus all -e NVIDIA_DRIVER_CAPABILITIES=video,compute,utility"
66+
needs: build
2167
steps:
22-
- name: Check out repo
23-
uses: actions/checkout@v3
24-
- name: Setup conda env
25-
uses: conda-incubator/setup-miniconda@v2
68+
- name: Setup env vars
69+
run: |
70+
cuda_version_without_periods=$(echo "${{ matrix.cuda-version }}" | sed 's/\.//g')
71+
echo cuda_version_without_periods=${cuda_version_without_periods} >> $GITHUB_ENV
72+
- uses: actions/download-artifact@v3
2673
with:
27-
auto-update-conda: true
28-
miniconda-version: "latest"
29-
activate-environment: test
30-
python-version: '3.12'
74+
name: pytorch_torchcodec__3.9_cu${{ env.cuda_version_without_periods }}_x86_64
75+
path: pytorch/torchcodec/dist/
76+
- name: Setup miniconda using test-infra
77+
uses: pytorch/test-infra/.github/actions/setup-miniconda@main
78+
with:
79+
python-version: ${{ matrix.python-version }}
80+
#
81+
# For some reason nvidia::libnpp=12.4 doesn't install but nvidia/label/cuda-12.4.0::libnpp does.
82+
# So we use the latter convention for libnpp.
83+
# We install conda packages at the start because otherwise conda may have conflicts with dependencies.
84+
default-packages: "nvidia/label/cuda-${{ matrix.cuda-version }}.0::libnpp nvidia::cuda-nvrtc=${{ matrix.cuda-version }} nvidia::cuda-toolkit=${{ matrix.cuda-version }} nvidia::cuda-cudart=${{ matrix.cuda-version }} nvidia::cuda-driver-dev=${{ matrix.cuda-version }} conda-forge::ffmpeg=${{ matrix.ffmpeg-version-for-tests }}"
85+
- name: Check env
86+
run: |
87+
${CONDA_RUN} env
88+
${CONDA_RUN} conda info
89+
${CONDA_RUN} nvidia-smi
90+
${CONDA_RUN} conda list
91+
- name: Assert ffmpeg exists
92+
run: |
93+
${CONDA_RUN} ffmpeg -buildconf
3194
- name: Update pip
32-
run: python -m pip install --upgrade pip
33-
- name: Install dependencies and FFmpeg
95+
run: ${CONDA_RUN} python -m pip install --upgrade pip
96+
- name: Install PyTorch
3497
run: |
35-
# TODO: torchvision and torchaudio shouldn't be needed. They were only added
36-
# to silence an error as seen in https://github.com/pytorch/torchcodec/issues/203
37-
python -m pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cpu
38-
conda install "ffmpeg=7.0.1" pkg-config -c conda-forge
39-
ffmpeg -version
40-
- name: Build and install torchcodec
98+
${CONDA_RUN} python -m pip install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu${{ env.cuda_version_without_periods }}
99+
${CONDA_RUN} python -c 'import torch; print(f"{torch.__version__}"); print(f"{torch.__file__}"); print(f"{torch.cuda.is_available()=}")'
100+
- name: Install torchcodec from the wheel
41101
run: |
42-
python -m pip install -e ".[dev]" --no-build-isolation -vvv
102+
wheel_path=`find pytorch/torchcodec/dist -type f -name "*.whl"`
103+
echo Installing $wheel_path
104+
${CONDA_RUN} python -m pip install $wheel_path -vvv
105+
106+
- name: Check out repo
107+
uses: actions/checkout@v3
108+
43109
- name: Install doc dependencies
44110
run: |
45111
cd docs
46-
python -m pip install -r requirements.txt
112+
${CONDA_RUN} python -m pip install -r requirements.txt
47113
- name: Build docs
48114
run: |
49115
cd docs
50-
make html
116+
${CONDA_RUN} make html
51117
- uses: actions/upload-artifact@v3
52118
with:
53119
name: Built-Docs

docs/source/index.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ We achieve these capabilities through:
5050

5151
How to sample video clips
5252

53+
.. grid-item-card:: :octicon:`file-code;1em`
54+
GPU decoding using TorchCodec
55+
:img-top: _static/img/card-background.svg
56+
:link: generated_examples/basic_cuda_example.html
57+
:link-type: url
58+
59+
A simple example demonstrating CUDA GPU decoding
60+
5361
.. toctree::
5462
:maxdepth: 1
5563
:caption: TorchCodec documentation

examples/basic_cuda_example.py

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
"""
7+
Accelerated video decoding on GPUs with CUDA and NVDEC
8+
================================================================
9+
10+
TorchCodec can use supported Nvidia hardware (see support matrix
11+
`here <https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new>`_) to speed-up
12+
video decoding. This is called "CUDA Decoding" and it uses Nvidia's
13+
`NVDEC hardware decoder <https://developer.nvidia.com/video-codec-sdk>`_
14+
and CUDA kernels to respectively decompress and convert to RGB.
15+
CUDA Decoding can be faster than CPU Decoding for the actual decoding step and also for
16+
subsequent transform steps like scaling, cropping or rotating. This is because the decode step leaves
17+
the decoded tensor in GPU memory so the GPU doesn't have to fetch from main memory before
18+
running the transform steps. Encoded packets are often much smaller than decoded frames so
19+
CUDA decoding also uses less PCI-e bandwidth.
20+
21+
CUDA Decoding can offer speed-up over CPU Decoding in a few scenarios:
22+
23+
#. You are decoding a large resolution video
24+
#. You are decoding a large batch of videos that's saturating the CPU
25+
#. You want to do whole-image transforms like scaling or convolutions on the decoded tensors
26+
after decoding
27+
#. Your CPU is saturated and you want to free it up for other work
28+
29+
30+
Here are situations where CUDA Decoding may not make sense:
31+
32+
#. You want bit-exact results compared to CPU Decoding
33+
#. You have small resolution videos and the PCI-e transfer latency is large
34+
#. Your GPU is already busy and CPU is not
35+
36+
It's best to experiment with CUDA Decoding to see if it improves your use-case. With
37+
TorchCodec you can simply pass in a device parameter to the
38+
:class:`~torchcodec.decoders.VideoDecoder` class to use CUDA Decoding.
39+
40+
41+
In order to use CUDA Decoding will need the following installed in your environment:
42+
43+
#. An Nvidia GPU that supports decoding the video format you want to decode. See
44+
the support matrix `here <https://developer.nvidia.com/video-encode-and-decode-gpu-support-matrix-new>`_
45+
#. `CUDA-enabled pytorch <https://pytorch.org/get-started/locally/>`_
46+
#. FFmpeg binaries that support
47+
`NVDEC-enabled <https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html>`_
48+
codecs
49+
#. libnpp and nvrtc (these are usually installed when you install the full cuda-toolkit)
50+
51+
52+
FFmpeg versions 5, 6 and 7 from conda-forge are built with
53+
`NVDEC support <https://docs.nvidia.com/video-technologies/video-codec-sdk/12.0/ffmpeg-with-nvidia-gpu/index.html>`_
54+
and you can install them with conda. For example, to install FFmpeg version 7:
55+
56+
57+
.. code-block:: bash
58+
59+
conda install ffmpeg=7 -c conda-forge
60+
conda install libnpp cuda-nvrtc -c nvidia
61+
62+
63+
"""
64+
65+
# %%
66+
# Checking if Pytorch has CUDA enabled
67+
# -------------------------------------
68+
#
69+
# .. note::
70+
#
71+
# This tutorial requires FFmpeg libraries compiled with CUDA support.
72+
#
73+
#
74+
import torch
75+
76+
print(f"{torch.__version__=}")
77+
print(f"{torch.cuda.is_available()=}")
78+
print(f"{torch.cuda.get_device_properties(0)=}")
79+
80+
81+
# %%
82+
# Downloading the video
83+
# -------------------------------------
84+
#
85+
# We will use the following video which has the following properties:
86+
#
87+
# - Codec: H.264
88+
# - Resolution: 960x540
89+
# - FPS: 29.97
90+
# - Pixel format: YUV420P
91+
#
92+
# .. raw:: html
93+
#
94+
# <video style="max-width: 100%" controls>
95+
# <source src="https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4" type="video/mp4">
96+
# </video>
97+
import urllib.request
98+
99+
video_file = "video.mp4"
100+
urllib.request.urlretrieve(
101+
"https://download.pytorch.org/torchaudio/tutorial-assets/stream-api/NASAs_Most_Scientifically_Complex_Space_Observatory_Requires_Precision-MP4_small.mp4",
102+
video_file,
103+
)
104+
105+
106+
# %%
107+
# CUDA Decoding using VideoDecoder
108+
# -------------------------------------
109+
#
110+
# To use CUDA decoder, you need to pass in a cuda device to the decoder.
111+
#
112+
from torchcodec.decoders import VideoDecoder
113+
114+
decoder = VideoDecoder(video_file, device="cuda")
115+
frame = decoder[0]
116+
117+
# %%
118+
#
119+
# The video frames are decoded and returned as tensor of NCHW format.
120+
121+
print(frame.shape, frame.dtype)
122+
123+
# %%
124+
#
125+
# The video frames are left on the GPU memory.
126+
127+
print(frame.data.device)
128+
129+
130+
# %%
131+
# Visualizing Frames
132+
# -------------------------------------
133+
#
134+
# Let's look at the frames decoded by CUDA decoder and compare them
135+
# against equivalent results from the CPU decoders.
136+
timestamps = [12, 19, 45, 131, 180]
137+
cpu_decoder = VideoDecoder(video_file, device="cpu")
138+
cuda_decoder = VideoDecoder(video_file, device="cuda")
139+
cpu_frames = cpu_decoder.get_frames_played_at(timestamps).data
140+
cuda_frames = cuda_decoder.get_frames_played_at(timestamps).data
141+
142+
143+
def plot_cpu_and_cuda_frames(cpu_frames: torch.Tensor, cuda_frames: torch.Tensor):
144+
try:
145+
import matplotlib.pyplot as plt
146+
from torchvision.transforms.v2.functional import to_pil_image
147+
except ImportError:
148+
print("Cannot plot, please run `pip install torchvision matplotlib`")
149+
return
150+
n_rows = len(timestamps)
151+
fig, axes = plt.subplots(n_rows, 2, figsize=[12.8, 16.0])
152+
for i in range(n_rows):
153+
axes[i][0].imshow(to_pil_image(cpu_frames[i].to("cpu")))
154+
axes[i][1].imshow(to_pil_image(cuda_frames[i].to("cpu")))
155+
156+
axes[0][0].set_title("CPU decoder", fontsize=24)
157+
axes[0][1].set_title("CUDA decoder", fontsize=24)
158+
plt.setp(axes, xticks=[], yticks=[])
159+
plt.tight_layout()
160+
161+
162+
plot_cpu_and_cuda_frames(cpu_frames, cuda_frames)
163+
164+
# %%
165+
#
166+
# They look visually similar to the human eye but there may be subtle
167+
# differences because CUDA math is not bit-exact with respect to CPU math.
168+
#
169+
frames_equal = torch.equal(cpu_frames.to("cuda"), cuda_frames)
170+
mean_abs_diff = torch.mean(
171+
torch.abs(cpu_frames.float().to("cuda") - cuda_frames.float())
172+
)
173+
max_abs_diff = torch.max(torch.abs(cpu_frames.to("cuda").float() - cuda_frames.float()))
174+
print(f"{frames_equal=}")
175+
print(f"{mean_abs_diff=}")
176+
print(f"{max_abs_diff=}")

0 commit comments

Comments
 (0)