Skip to content

Commit dac3180

Browse files
authored
Merge branch 'main' into please_dont_modify_this_branch_unless_you_are_just_merging_with_main__
2 parents 58254fb + f0ed07d commit dac3180

File tree

7 files changed

+184
-93
lines changed

7 files changed

+184
-93
lines changed

docs/source/io.rst

Lines changed: 61 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,46 @@ Decoding / Encoding images and videos
33

44
.. currentmodule:: torchvision.io
55

6-
The :mod:`torchvision.io` package provides functions for performing IO
7-
operations. They are currently specific to reading and writing images and
8-
videos.
6+
The :mod:`torchvision.io` module provides utilities for decoding and encoding
7+
images and videos.
98

10-
Images
11-
------
9+
Image Decoding
10+
--------------
1211

1312
Torchvision currently supports decoding JPEG, PNG, WEBP and GIF images. JPEG
1413
decoding can also be done on CUDA GPUs.
1514

16-
For encoding, JPEG (cpu and CUDA) and PNG are supported.
15+
The main entry point is the :func:`~torchvision.io.decode_image` function, which
16+
you can use as an alternative to ``PIL.Image.open()``. It will decode images
17+
straight into image Tensors, thus saving you the conversion and allowing you to
18+
run transforms/preproc natively on tensors.
19+
20+
.. code::
21+
22+
from torchvision.io import decode_image
23+
24+
img = decode_image("path_to_image", mode="RGB")
25+
img.dtype # torch.uint8
26+
27+
# Or
28+
raw_encoded_bytes = ... # read encoded bytes from your file system
29+
img = decode_image(raw_encoded_bytes, mode="RGB")
30+
31+
32+
:func:`~torchvision.io.decode_image` will automatically detect the image format,
33+
and call the corresponding decoder. You can also use the lower-level
34+
format-specific decoders which can be more powerful, e.g. if you want to
35+
encode/decode JPEGs on CUDA.
1736

1837
.. autosummary::
1938
:toctree: generated/
2039
:template: function.rst
2140

2241
decode_image
23-
encode_jpeg
2442
decode_jpeg
25-
write_jpeg
43+
encode_png
2644
decode_gif
2745
decode_webp
28-
encode_png
29-
decode_png
30-
write_png
31-
read_file
32-
write_file
3346

3447
.. autosummary::
3548
:toctree: generated/
@@ -41,14 +54,47 @@ Obsolete decoding function:
4154

4255
.. autosummary::
4356
:toctree: generated/
44-
:template: class.rst
57+
:template: function.rst
4558

4659
read_image
4760

61+
Image Encoding
62+
--------------
63+
64+
For encoding, JPEG (cpu and CUDA) and PNG are supported.
65+
66+
67+
.. autosummary::
68+
:toctree: generated/
69+
:template: function.rst
70+
71+
encode_jpeg
72+
write_jpeg
73+
encode_png
74+
write_png
75+
76+
IO operations
77+
-------------
78+
79+
.. autosummary::
80+
:toctree: generated/
81+
:template: function.rst
82+
83+
read_file
84+
write_file
4885

4986
Video
5087
-----
5188

89+
.. warning::
90+
91+
Torchvision supports video decoding through different APIs listed below,
92+
some of which are still in BETA stage. In the near future, we intend to
93+
centralize PyTorch's video decoding capabilities within the `torchcodec
94+
<https://github.com/pytorch/torchcodec>`_ project. We encourage you to try
95+
it out and share your feedback, as the torchvision video decoders will
96+
eventually be deprecated.
97+
5298
.. autosummary::
5399
:toctree: generated/
54100
:template: function.rst
@@ -58,45 +104,14 @@ Video
58104
write_video
59105

60106

61-
Fine-grained video API
62-
^^^^^^^^^^^^^^^^^^^^^^
107+
**Fine-grained video API**
63108

64109
In addition to the :mod:`read_video` function, we provide a high-performance
65110
lower-level API for more fine-grained control compared to the :mod:`read_video` function.
66111
It does all this whilst fully supporting torchscript.
67112

68-
.. betastatus:: fine-grained video API
69-
70113
.. autosummary::
71114
:toctree: generated/
72115
:template: class.rst
73116

74117
VideoReader
75-
76-
77-
Example of inspecting a video:
78-
79-
.. code:: python
80-
81-
import torchvision
82-
video_path = "path to a test video"
83-
# Constructor allocates memory and a threaded decoder
84-
# instance per video. At the moment it takes two arguments:
85-
# path to the video file, and a wanted stream.
86-
reader = torchvision.io.VideoReader(video_path, "video")
87-
88-
# The information about the video can be retrieved using the
89-
# `get_metadata()` method. It returns a dictionary for every stream, with
90-
# duration and other relevant metadata (often frame rate)
91-
reader_md = reader.get_metadata()
92-
93-
# metadata is structured as a dict of dicts with following structure
94-
# {"stream_type": {"attribute": [attribute per stream]}}
95-
#
96-
# following would print out the list of frame rates for every present video stream
97-
print(reader_md["video"]["fps"])
98-
99-
# we explicitly select the stream we would like to operate on. In
100-
# the constructor we select a default video stream, but
101-
# in practice, we can set whichever stream we would like
102-
video.set_current_stream("video:0")

setup.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
IS_ROCM = (torch.version.hip is not None) and (ROCM_HOME is not None)
4343
BUILD_CUDA_SOURCES = (torch.cuda.is_available() and ((CUDA_HOME is not None) or IS_ROCM)) or FORCE_CUDA
4444

45-
PACKAGE_NAME = "torchvision"
45+
package_name = os.getenv("TORCHVISION_PACKAGE_NAME", "torchvision")
4646

4747
print("Torchvision build configuration:")
4848
print(f"{FORCE_CUDA = }")
@@ -98,7 +98,7 @@ def get_dist(pkgname):
9898
except DistributionNotFound:
9999
return None
100100

101-
pytorch_dep = "torch"
101+
pytorch_dep = os.getenv("TORCH_PACKAGE_NAME", "torch")
102102
if os.getenv("PYTORCH_VERSION"):
103103
pytorch_dep += "==" + os.getenv("PYTORCH_VERSION")
104104

@@ -561,7 +561,7 @@ def run(self):
561561
version, sha = get_version()
562562
write_version_file(version, sha)
563563

564-
print(f"Building wheel {PACKAGE_NAME}-{version}")
564+
print(f"Building wheel {package_name}-{version}")
565565

566566
with open("README.md") as f:
567567
readme = f.read()
@@ -573,7 +573,7 @@ def run(self):
573573
]
574574

575575
setup(
576-
name=PACKAGE_NAME,
576+
name=package_name,
577577
version=version,
578578
author="PyTorch Core Team",
579579
author_email="[email protected]",
@@ -583,7 +583,7 @@ def run(self):
583583
long_description_content_type="text/markdown",
584584
license="BSD",
585585
packages=find_packages(exclude=("test",)),
586-
package_data={PACKAGE_NAME: ["*.dll", "*.dylib", "*.so", "prototype/datasets/_builtin/*.categories"]},
586+
package_data={package_name: ["*.dll", "*.dylib", "*.so", "prototype/datasets/_builtin/*.categories"]},
587587
zip_safe=False,
588588
install_requires=get_requirements(),
589589
extras_require={

test/test_transforms_v2.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6169,3 +6169,50 @@ def test_transform_sequence_len_error(self, quality):
61696169
def test_transform_invalid_quality_error(self, quality):
61706170
with pytest.raises(ValueError, match="quality must be an integer from 1 to 100"):
61716171
transforms.JPEG(quality=quality)
6172+
6173+
6174+
class TestUtils:
6175+
# TODO: Still need to test has_all, has_any, check_type and get_bouding_boxes
6176+
@pytest.mark.parametrize(
6177+
"make_input1", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask]
6178+
)
6179+
@pytest.mark.parametrize(
6180+
"make_input2", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask]
6181+
)
6182+
@pytest.mark.parametrize("query", [transforms.query_size, transforms.query_chw])
6183+
def test_query_size_and_query_chw(self, make_input1, make_input2, query):
6184+
size = (32, 64)
6185+
input1 = make_input1(size)
6186+
input2 = make_input2(size)
6187+
6188+
if query is transforms.query_chw and not any(
6189+
transforms.check_type(inpt, (is_pure_tensor, tv_tensors.Image, PIL.Image.Image, tv_tensors.Video))
6190+
for inpt in (input1, input2)
6191+
):
6192+
return
6193+
6194+
expected = size if query is transforms.query_size else ((3,) + size)
6195+
assert query([input1, input2]) == expected
6196+
6197+
@pytest.mark.parametrize(
6198+
"make_input1", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask]
6199+
)
6200+
@pytest.mark.parametrize(
6201+
"make_input2", [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask]
6202+
)
6203+
@pytest.mark.parametrize("query", [transforms.query_size, transforms.query_chw])
6204+
def test_different_sizes(self, make_input1, make_input2, query):
6205+
input1 = make_input1((10, 10))
6206+
input2 = make_input2((20, 20))
6207+
if query is transforms.query_chw and not all(
6208+
transforms.check_type(inpt, (is_pure_tensor, tv_tensors.Image, PIL.Image.Image, tv_tensors.Video))
6209+
for inpt in (input1, input2)
6210+
):
6211+
return
6212+
with pytest.raises(ValueError, match="Found multiple"):
6213+
query([input1, input2])
6214+
6215+
@pytest.mark.parametrize("query", [transforms.query_size, transforms.query_chw])
6216+
def test_no_valid_input(self, query):
6217+
with pytest.raises(TypeError, match="No image"):
6218+
query(["blah"])

0 commit comments

Comments
 (0)