Skip to content

Commit 77ef215

Browse files
committed
Some docs
1 parent 01297e1 commit 77ef215

File tree

3 files changed

+45
-9
lines changed

3 files changed

+45
-9
lines changed

docs/source/io.rst

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,8 @@ images and videos.
99
Image Decoding
1010
--------------
1111

12-
Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG
13-
decoding can also be done on CUDA GPUs.
12+
Torchvision currently supports decoding JPEG, PNG, WEBP, GIF, AVIF, and HEIC
13+
images. JPEG decoding can also be done on CUDA GPUs.
1414

1515
The main entry point is the :func:`~torchvision.io.decode_image` function, which
1616
you can use as an alternative to ``PIL.Image.open()``. It will decode images
@@ -30,9 +30,10 @@ run transforms/preproc natively on tensors.
3030
3131
3232
:func:`~torchvision.io.decode_image` will automatically detect the image format,
33-
and call the corresponding decoder. You can also use the lower-level
34-
format-specific decoders which can be more powerful, e.g. if you want to
35-
encode/decode JPEGs on CUDA.
33+
and call the corresponding decoder (except for HEIC and AVIF images, see details
34+
in :func:`~torchvision.io.decode_avif` and :func:`~torchvision.io.decode_heic`).
35+
You can also use the lower-level format-specific decoders which can be more
36+
powerful, e.g. if you want to encode/decode JPEGs on CUDA.
3637

3738
.. autosummary::
3839
:toctree: generated/

test/test_image.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -925,9 +925,7 @@ def test_decode_webp_against_pil(decode_fun, scripted, mode, pil_mode, filename)
925925
img += 123 # make sure image buffer wasn't freed by underlying decoding lib
926926

927927

928-
# TODO_AVIF_HEIC make decode_image work
929928
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
930-
# @pytest.mark.parametrize("decode_fun", (decode_avif, decode_image))
931929
@pytest.mark.parametrize("decode_fun", (decode_avif,))
932930
def test_decode_avif(decode_fun):
933931
encoded_bytes = read_file(next(get_images(FAKEDATA_DIR, ".avif")))
@@ -1016,8 +1014,6 @@ def test_decode_avif_heic_against_pil(decode_fun, mode, pil_mode, filename):
10161014
torch.testing.assert_close(img, from_pil, rtol=0, atol=3)
10171015

10181016

1019-
# TODO_AVIF_HEIC make decode_image work
1020-
# @pytest.mark.parametrize("decode_fun", (decode_heic, decode_image))
10211017
@pytest.mark.skipif(not IS_LINUX, reason=HEIC_AVIF_MESSAGE)
10221018
@pytest.mark.parametrize("decode_fun", (decode_heic,))
10231019
def test_decode_heic(decode_fun):

torchvision/io/image.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,12 @@ def decode_image(
296296
after this function to convert the decoded image into a uint8 or float
297297
tensor.
298298
299+
.. note::
300+
301+
``decode_image()`` doesn't work yet on AVIF or HEIC images. For these
302+
formats, directly call :func:`~torchvision.io.decode_avif` or
303+
:func:`~torchvision.io.decode_heic`.
304+
299305
Args:
300306
input (Tensor or str or ``pathlib.Path``): The image to decode. If a
301307
tensor is passed, it must be one dimensional uint8 tensor containing
@@ -384,6 +390,17 @@ def decode_webp(
384390
# The ops (torch.ops.extra_decoders_ns.decode_*) are otherwise torchscript-able,
385391
# and users who need torchscript can always just wrap those.
386392

393+
# TODO_AVIF_HEIC: decode_image() should work for those. The key technical issue
394+
# we have here is that the format detection logic of decode_image() is
395+
# implemented in torchvision, and torchvision has zero knowledge of
396+
# torchvision-extra-decoders, so we cannot call the AVIF/HEIC C++ decoders
397+
# (those in torchvision-extra-decoders) from there.
398+
# A trivial check that could be done within torchvision would be to check the
399+
# file extension, if a path was passed. We could also just implement the
400+
# AVIF/HEIC detection logic in Python as a fallback, if the file detection
401+
# didn't find any format. In any case: properly determining whether a file is
402+
# HEIC is far from trivial, and relying on libmagic would probably be best
403+
387404

388405
_EXTRA_DECODERS_ALREADY_LOADED = False
389406

@@ -423,6 +440,17 @@ def _load_extra_decoders_once():
423440
def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
424441
"""Decode an AVIF image into a 3 dimensional RGB[A] Tensor.
425442
443+
.. warning::
444+
In order to enable the AVIF decoding capabilities of torchvision, you
445+
first need to run ``pip install torchvision-extra-decoders``. Just
446+
install the package, you don't need to update your code. This is only
447+
supported on Linux, and this feature is still in BETA stage. Please let
448+
us know of any issue:
449+
https://github.com/pytorch/vision/issues/new/choose. Note that
450+
`torchvision-extra-decoders
451+
<https://github.com/pytorch-labs/torchvision-extra-decoders/>`_ is
452+
released under the LGPL license.
453+
426454
The values of the output tensor are in uint8 in [0, 255] for most images. If
427455
the image has a bit-depth of more than 8, then the output tensor is uint16
428456
in [0, 65535]. Since uint16 support is limited in pytorch, we recommend
@@ -449,6 +477,17 @@ def decode_avif(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANG
449477
def decode_heic(input: torch.Tensor, mode: ImageReadMode = ImageReadMode.UNCHANGED) -> torch.Tensor:
450478
"""Decode an HEIC image into a 3 dimensional RGB[A] Tensor.
451479
480+
.. warning::
481+
In order to enable the AVIF decoding capabilities of torchvision, you
482+
first need to run ``pip install torchvision-extra-decoders``. Just
483+
install the package, you don't need to update your code. This is only
484+
supported on Linux, and this feature is still in BETA stage. Please let
485+
us know of any issue:
486+
https://github.com/pytorch/vision/issues/new/choose. Note that
487+
`torchvision-extra-decoders
488+
<https://github.com/pytorch-labs/torchvision-extra-decoders/>`_ is
489+
released under the LGPL license.
490+
452491
The values of the output tensor are in uint8 in [0, 255] for most images. If
453492
the image has a bit-depth of more than 8, then the output tensor is uint16
454493
in [0, 65535]. Since uint16 support is limited in pytorch, we recommend

0 commit comments

Comments
 (0)