Skip to content

Commit 4cff7b1

Browse files
CVCUDA backend design
Summary: Users have to explicitly opt-in for those transforms. Here we provide the first building block for this interface. We add the functionals `to_nvcv_image` and `nvcv_to_tensor` to transform `torch.Tensor` to `nvcv.Tensor`. We also implement the corresponding class transforms `ToNVCVImage` and `NVCVToTensor`. ## How to use ```python from PIL import Image import torchvision.transforms.v2.functional as F orig_img = Image.open("leaning_tower.jpg") img_tensor = F.pil_to_tensor(orig_img) nvcv_tensor = F.to_nvcv_tensor(img_tensor.cuda()) img_tensor = F.nvcv_to_tensor(nvcv_tensor) ``` NOTE: NVCV tensors are automatically converted to NHWC format. Contrary to torchvision convention, which relies on NCHW format. Differential Revision: D85862362
1 parent ca22124 commit 4cff7b1

File tree

6 files changed

+526
-0
lines changed

6 files changed

+526
-0
lines changed

test/test_cvcuda.py

Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
from torchvision import _is_cvcuda_available
2+
import torch
3+
import pytest
4+
import torch
5+
from torchvision.transforms.v2 import functional as F
6+
CVCUDA_AVAILABLE = _is_cvcuda_available()
7+
CUDA_AVAILABLE = torch.cuda.is_available()
8+
9+
10+
if CVCUDA_AVAILABLE:
11+
import nvcv
12+
13+
@pytest.mark.skipif(CVCUDA_AVAILABLE is False, reason="test requires CVCUDA")
14+
@pytest.mark.skipif(CUDA_AVAILABLE is False, reason="test requires CUDA")
15+
class TestToNvcvTensor:
16+
"""Tests for to_nvcv_tensor function following patterns from TestToPil"""
17+
18+
def test_1_channel_uint8_tensor_to_nvcv_tensor(self):
19+
img_data = torch.ByteTensor(1, 4, 4).random_(0, 255).cuda()
20+
nvcv_img = F.to_nvcv_tensor(img_data)
21+
# Check that the conversion succeeded and format is correct
22+
assert nvcv_img is not None
23+
24+
def test_1_channel_int16_tensor_to_nvcv_tensor(self):
25+
img_data = torch.ShortTensor(1, 4, 4).random_().cuda()
26+
nvcv_img = F.to_nvcv_tensor(img_data)
27+
assert nvcv_img is not None
28+
29+
def test_1_channel_int32_tensor_to_nvcv_tensor(self):
30+
img_data = torch.IntTensor(1, 4, 4).random_().cuda()
31+
nvcv_img = F.to_nvcv_tensor(img_data)
32+
assert nvcv_img is not None
33+
34+
def test_1_channel_float32_tensor_to_nvcv_tensor(self):
35+
img_data = torch.Tensor(1, 4, 4).uniform_().cuda()
36+
nvcv_img = F.to_nvcv_tensor(img_data)
37+
assert nvcv_img is not None
38+
39+
def test_2_channel_uint8_tensor_to_nvcv_tensor(self):
40+
img_data = torch.ByteTensor(2, 4, 4).random_(0, 255).cuda()
41+
# NVCV doesn't support 2-channel uint8 images
42+
with pytest.raises(TypeError, match="Unsupported dtype.*for 2-channel image"):
43+
F.to_nvcv_tensor(img_data)
44+
45+
def test_2_channel_float32_tensor_to_nvcv_tensor(self):
46+
img_data = torch.Tensor(2, 4, 4).uniform_().cuda()
47+
nvcv_img = F.to_nvcv_tensor(img_data)
48+
assert nvcv_img is not None
49+
50+
def test_3_channel_uint8_tensor_to_nvcv_tensor(self):
51+
img_data = torch.ByteTensor(3, 4, 4).random_(0, 255).cuda()
52+
nvcv_img = F.to_nvcv_tensor(img_data)
53+
assert nvcv_img is not None
54+
55+
def test_3_channel_float32_tensor_to_nvcv_tensor(self):
56+
img_data = torch.Tensor(3, 4, 4).uniform_().cuda()
57+
nvcv_img = F.to_nvcv_tensor(img_data)
58+
assert nvcv_img is not None
59+
60+
def test_4_channel_uint8_tensor_to_nvcv_tensor(self):
61+
img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).cuda()
62+
nvcv_img = F.to_nvcv_tensor(img_data)
63+
assert nvcv_img is not None
64+
65+
def test_4_channel_float32_tensor_to_nvcv_tensor(self):
66+
img_data = torch.Tensor(4, 4, 4).uniform_().cuda()
67+
nvcv_img = F.to_nvcv_tensor(img_data)
68+
assert nvcv_img is not None
69+
70+
def test_2d_uint8_tensor_to_nvcv_tensor(self):
71+
img_data = torch.ByteTensor(4, 4).random_(0, 255).cuda()
72+
nvcv_img = F.to_nvcv_tensor(img_data)
73+
assert nvcv_img is not None
74+
75+
def test_2d_float32_tensor_to_nvcv_tensor(self):
76+
img_data = torch.Tensor(4, 4).uniform_().cuda()
77+
nvcv_img = F.to_nvcv_tensor(img_data)
78+
assert nvcv_img is not None
79+
80+
def test_1_channel_uint8_ndarray_to_nvcv_tensor(self):
81+
img_data = torch.ByteTensor(4, 4, 1).random_(0, 255).numpy()
82+
nvcv_img = F.to_nvcv_tensor(img_data)
83+
assert nvcv_img is not None
84+
85+
def test_3_channel_uint8_ndarray_to_nvcv_tensor(self):
86+
img_data = torch.ByteTensor(4, 4, 3).random_(0, 255).numpy()
87+
nvcv_img = F.to_nvcv_tensor(img_data)
88+
assert nvcv_img is not None
89+
90+
def test_4_channel_uint8_ndarray_to_nvcv_tensor(self):
91+
img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).numpy()
92+
nvcv_img = F.to_nvcv_tensor(img_data)
93+
assert nvcv_img is not None
94+
95+
def test_explicit_format_rgb8(self):
96+
img_data = torch.ByteTensor(3, 4, 4).random_(0, 255).cuda()
97+
nvcv_img = F.to_nvcv_tensor(img_data, format=nvcv.Format.RGB8)
98+
assert nvcv_img is not None
99+
100+
def test_explicit_format_bgr8(self):
101+
img_data = torch.ByteTensor(3, 4, 4).random_(0, 255).cuda()
102+
nvcv_img = F.to_nvcv_tensor(img_data, format=nvcv.Format.BGR8)
103+
assert nvcv_img is not None
104+
105+
def test_explicit_format_hsv8(self):
106+
img_data = torch.ByteTensor(3, 4, 4).random_(0, 255).cuda()
107+
# HSV8 should work for 3-channel images
108+
nvcv_img = F.to_nvcv_tensor(img_data, format=nvcv.Format.HSV8)
109+
assert nvcv_img is not None
110+
111+
def test_explicit_format_rgba8(self):
112+
img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).cuda()
113+
nvcv_img = F.to_nvcv_tensor(img_data, format=nvcv.Format.RGBA8)
114+
assert nvcv_img is not None
115+
116+
def test_explicit_format_bgra8(self):
117+
img_data = torch.ByteTensor(4, 4, 4).random_(0, 255).cuda()
118+
# BGRA8 should work for 4-channel images
119+
nvcv_img = F.to_nvcv_tensor(img_data, format=nvcv.Format.BGRA8)
120+
assert nvcv_img is not None
121+
122+
def test_invalid_input_type(self):
123+
with pytest.raises(TypeError, match=r"pic should be Tensor or ndarray"):
124+
F.to_nvcv_tensor("invalid_input")
125+
126+
def test_invalid_dimensions(self):
127+
# Test 1D array (too few dimensions)
128+
with pytest.raises(ValueError, match=r"pic should be 2/3/4 dimensional"):
129+
F.to_nvcv_tensor(torch.ByteTensor(4).cuda())
130+
131+
# Test 5D array (too many dimensions)
132+
with pytest.raises(ValueError, match=r"pic should be 2/3/4 dimensional"):
133+
F.to_nvcv_tensor(torch.ByteTensor(1, 1, 3, 4, 4).cuda())
134+
135+
def test_too_many_channels(self):
136+
with pytest.raises(ValueError, match=r"pic should not have > 4 channels"):
137+
F.to_nvcv_tensor(torch.ByteTensor(5, 4, 4).random_(0, 255).cuda())
138+
139+
def test_unsupported_dtype_for_channels(self):
140+
# Float64 is not supported
141+
img_data = torch.DoubleTensor(3, 4, 4).uniform_().cuda()
142+
with pytest.raises(TypeError, match=r"Unsupported dtype"):
143+
F.to_nvcv_tensor(img_data)
144+
145+
146+
def make_nvcv_image(num_channels=3, dtype=torch.uint8):
147+
"""Helper function to create NVCV Tensor for testing"""
148+
if dtype == torch.uint8:
149+
img_data = torch.ByteTensor(num_channels, 4, 4).random_(0, 255).cuda()
150+
else:
151+
img_data = torch.Tensor(num_channels, 4, 4).uniform_().cuda()
152+
return F.to_nvcv_tensor(img_data)
153+
154+
155+
def transform_cls_to_functional(get_transform_cls):
156+
def wrapper(inpt):
157+
transform_cls = get_transform_cls()
158+
return transform_cls()(inpt)
159+
return wrapper
160+
161+
162+
@pytest.mark.skipif(CVCUDA_AVAILABLE is False, reason="test requires CVCUDA")
163+
@pytest.mark.skipif(CUDA_AVAILABLE is False, reason="test requires CUDA")
164+
class TestNVCVToTensor:
165+
166+
@pytest.mark.parametrize("num_channels", [1, 3, 4])
167+
@pytest.mark.parametrize("dtype", [torch.uint8, torch.float32])
168+
@pytest.mark.parametrize("fn", [F.nvcv_to_tensor, transform_cls_to_functional(lambda: __import__('torchvision.transforms.v2', fromlist=['NVCVToTensor']).NVCVToTensor)])
169+
def test_functional_and_transform(self, num_channels, dtype, fn):
170+
input = make_nvcv_image(num_channels=num_channels, dtype=dtype)
171+
output = fn(input)
172+
173+
assert isinstance(output, torch.Tensor)
174+
# Convert input to tensor to compare sizes
175+
input_tensor = F.nvcv_to_tensor(input)
176+
assert F.get_size(output) == F.get_size(input_tensor)
177+
178+
def test_functional_error(self):
179+
with pytest.raises(TypeError, match="nvcv_img should be NVCV Tensor"):
180+
F.nvcv_to_tensor(object())

torchvision/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,14 @@ def _is_tracing():
9999
return torch._C._get_tracing_state()
100100

101101

102+
def _is_cvcuda_available() -> bool:
103+
try:
104+
import cvcuda, nvcv
105+
except ImportError:
106+
return False
107+
return True
108+
109+
102110
def disable_beta_transforms_warning():
103111
# Noop, only exists to avoid breaking existing code.
104112
# See https://github.com/pytorch/vision/issues/7896

torchvision/transforms/v2/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,7 @@
5959
from ._utils import check_type, get_bounding_boxes, get_keypoints, has_all, has_any, query_chw, query_size
6060

6161
from ._deprecated import ToTensor # usort: skip
62+
from torchvision import _is_cvcuda_available
63+
64+
if _is_cvcuda_available():
65+
from ._cvcuda import ToNVCVTensor, NVCVToTensor
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
from torchvision.utils import _log_api_usage_once
2+
from torchvision.transforms.v2 import functional as F
3+
4+
5+
class ToNVCVTensor:
6+
"""Convert a tensor or an ndarray to NVCV Tensor
7+
8+
This transform does not support torchscript.
9+
10+
Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
11+
H x W x C to an NVCV Tensor.
12+
13+
Args:
14+
format (`nvcv.Format`_): color format specification from nvcv.Format enum (optional).
15+
If ``format`` is ``None`` (default) the format is inferred from the input data:
16+
17+
- **1 channel images**: Inferred based on dtype
18+
- uint8 → U8, int16 → S16, int32 → S32, float32 → F32
19+
- **2 channel images**: float32 → _2F32 (only float32 is supported for 2-channel images)
20+
- **3 channel images**: Defaults to RGB-based formats
21+
- uint8 → RGB8, float32 → RGBf32
22+
- **4 channel images**: Defaults to RGBA-based formats
23+
- uint8 → RGBA8, float32 → RGBAf32
24+
25+
Explicit format examples: nvcv.Format.RGB8, nvcv.Format.BGR8, nvcv.Format.HSV8,
26+
nvcv.Format.RGBA8, nvcv.Format.BGRA8
27+
28+
.. _nvcv.Format: https://cvcuda.github.io/CV-CUDA/_python_api/nvcv/format.html
29+
"""
30+
31+
def __init__(self, format=None):
32+
_log_api_usage_once(self)
33+
self.format = format
34+
35+
def __call__(self, pic):
36+
"""
37+
Args:
38+
pic (Tensor or numpy.ndarray): Image to be converted to NVCV Tensor.
39+
40+
Returns:
41+
NVCV Tensor: Image converted to NVCV Tensor.
42+
43+
"""
44+
return F.to_nvcv_tensor(pic, self.format)
45+
46+
def __repr__(self) -> str:
47+
format_string = self.__class__.__name__ + "("
48+
if self.format is not None:
49+
format_string += f"format={self.format}"
50+
format_string += ")"
51+
return format_string
52+
53+
54+
class NVCVToTensor:
55+
"""Convert an NVCV Image to a tensor of the same type - this does not scale values.
56+
57+
This transform does not support torchscript.
58+
59+
Converts an NVCV Image with H height, W width, and C channels to a PyTorch Tensor
60+
of shape (C x H x W). The conversion happens directly on GPU when the NVCV Image
61+
is stored on GPU, avoiding unnecessary data transfers.
62+
63+
Example:
64+
>>> import nvcv
65+
>>> import torchvision.transforms.v2 as T
66+
>>> # Create an NVCV Image (320x240 RGB)
67+
>>> nvcv_img = nvcv.Image(nvcv.Size2D(320, 240), nvcv.Format.RGB8)
68+
>>> tensor = T.NVCVToTensor()(nvcv_img)
69+
>>> print(tensor.shape)
70+
torch.Size([3, 240, 320])
71+
"""
72+
73+
def __init__(self) -> None:
74+
_log_api_usage_once(self)
75+
76+
def __call__(self, pic):
77+
"""
78+
Args:
79+
pic (nvcv.Image): NVCV Image to be converted to tensor.
80+
81+
Returns:
82+
Tensor: Converted image in CHW format.
83+
"""
84+
return F.nvcv_to_tensor(pic)
85+
86+
def __repr__(self) -> str:
87+
return f"{self.__class__.__name__}()"

torchvision/transforms/v2/functional/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,8 @@
165165
from ._type_conversion import pil_to_tensor, to_image, to_pil_image
166166

167167
from ._deprecated import get_image_size, to_tensor # usort: skip
168+
169+
from torchvision import _is_cvcuda_available
170+
171+
if _is_cvcuda_available():
172+
from ._cvcuda import nvcv_to_tensor, to_nvcv_tensor

0 commit comments

Comments
 (0)