-
Notifications
You must be signed in to change notification settings - Fork 243
Open
Labels
bugSomething isn't working as expected (software, install, documentation)Something isn't working as expected (software, install, documentation)
Description
Describe the bug
Can't align
Steps/Code to reproduce bug
I use same image and mse to test the result
def np_to_cuda_buffer(host_data, dtype=None) -> torch.Tensor:
"""Convert host data to a CUDA buffer
Args:
host_data (numpy array): Host data
Returns:
CudaBuffer: The converted CUDA buffer
"""
dtype = to_torch_dtype(dtype) if dtype else None
return torch.as_tensor(host_data, dtype=dtype, device="cuda").cuda()
class CVCudaUtils:
# current_cache_limit = nvcv.get_cache_limit_inbytes()
# print(current_cache_limit)
stream = cvcuda.Stream()
PIL_INTERPOLATION_MODE = {
InterpolationMode.NEAREST: cvcuda.Interp.NEAREST,
InterpolationMode.BILINEAR: cvcuda.Interp.LINEAR,
InterpolationMode.BICUBIC: cvcuda.Interp.CUBIC,
InterpolationMode.BOX: cvcuda.Interp.BOX,
InterpolationMode.HAMMING: cvcuda.Interp.HAMMING,
InterpolationMode.LANCZOS: cvcuda.Interp.LANCZOS,
}
@classmethod
def _normalize(cls, cv_tensor, mean_params=(0.485, 0.456, 0.406), std_params=(0.229, 0.224, 0.225),
dtype=np.float32):
"""
Normalize an image array and rearrange dimensions.
"""
# https://github.com/CVCUDA/CV-CUDA/issues/260 Only use float32 work
mean_cp = np_to_cuda_buffer(mean_params, dtype=np.float32).reshape(1, 1, 3)
std_cp = np_to_cuda_buffer(std_params, dtype=np.float32).reshape(1, 1, 3)
mean_tensor = cvcuda.as_tensor(mean_cp, nvcv.TensorLayout.HWC)
std_tensor = cvcuda.as_tensor(std_cp, nvcv.TensorLayout.HWC)
# Convert image to numpy array and scale to [0,1]
cv_tensor = cvcuda.convertto(cv_tensor, nvcv.Type.F32, scale=1.0 / 255.0, stream=cls.stream)
# Normalize using mean and std (broadcast across height and width)
cv_tensor = cvcuda.normalize(cv_tensor,
base=mean_tensor,
scale=std_tensor,
flags=cvcuda.NormalizeFlags.SCALE_IS_STDDEV, stream=cls.stream)
# Rearrange dimensions from HWC to CHW and add a batch dimension
cv_tensor = cvcuda.reformat(cv_tensor, nvcv.TensorLayout.CHW, stream=cls.stream)
cp_img = cp.asarray(cv_tensor.cuda())[None].astype(cp.dtype(str(np.dtype(dtype))))
return cp_img
@classmethod
def resize_normalize(cls, img, resize, interp=InterpolationMode.BILINEAR,
mean_params=(0.485, 0.456, 0.406),
std_params=(0.229, 0.224, 0.225),
dtype=np.float32):
cv_tensor = cvcuda.as_tensor(np_to_cuda_buffer(img), nvcv.TensorLayout.HWC)
h, w, _ = cv_tensor.shape
# 根据最短边,按比例缩放
if isinstance(resize, int):
scale = resize / min(h, w)
new_h = int(h * scale)
new_w = int(w * scale)
elif isinstance(resize, tuple) and len(resize) == 2:
new_h, new_w = resize
else:
raise ValueError(f"Invalid resize: {resize}")
cv_tensor = cvcuda.pillowresize(cv_tensor, (new_h, new_w, 3), format=cvcuda.Format.RGB8,
interp=cls.PIL_INTERPOLATION_MODE[interp], stream=cls.stream)
cp_img = cls._normalize(cv_tensor, mean_params, std_params, dtype)
return cp_img
def mse(a, b):
print(a.shape)
print(b.shape)
atol = 1e-6
diff = np.mean((a.astype(np.float32) - b.astype(np.float32)) ** 2)
abs_diff = np.allclose(a, b, atol=atol)
print(
f'MSE: {diff}, | Max diff: {np.abs(a.astype(np.int32) - b.astype(np.int32)).max()}| abs_diff {atol}: {abs_diff} | abs_equal| {TestUtils.equal(a, b, logger=logger)}')
return diff < atol
def test_resize_normalize_cvcuda():
image_url = 'xxxxx'
img = get_image(image_url)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
resize = (768, 512)
# resize = 320
crop_size = 320
dtype = np.float16
def torch_preprocess(img):
img = pil_utils.exif_image(img)
torch_transform = transforms.Compose([
transforms.Resize(size=resize),
# transforms.CenterCrop(size=crop_size),
transforms.ToTensor(),
transforms.Normalize(mean, std)
])
img = torch_transform(img)
img = img.unsqueeze(0)
return img.numpy().astype(dtype)
def cvcuda_preprocess(img):
# mean_cp = cp.asarray(mean, dtype=cp.float32).reshape(1, 1, 3)
# std_cp = cp.asarray(std, dtype=cp.float32).reshape(1, 1, 3)
# return CVCudaUtils.resize_center_crop_normalize(img, resize=resize, crop_size=crop_size, dtype=np.float32)
# img = pil_utils.exif_image(img)
# img = np.asarray(img)
# img = pil_utils.exif_image(img)
# torch_transform = transforms.Compose([
# transforms.Resize(size=resize),
# # transforms.CenterCrop(size=crop_size),
# # transforms.ToTensor(),
# # transforms.Normalize(mean, std)
# ])
# img = np.array(torch_transform(img))
return CVCudaUtils.resize_normalize(img, resize=resize, dtype=dtype)
a = torch_preprocess(img)
b = cvcuda_preprocess(img)
print(mse(a, b.get()))
torch_preprocess()
When use
resize=320
It can align but I must calculate new_h and new_w by myself which is very ridiculous.
(1, 3, 320, 320)
(1, 3, 320, 320)
MSE: 2.348394367857054e-08, | Max diff: 0| abs_diff 1e-06: False | abs_equal| None
SDK 2025-08-01 03:50:11.150 triton_sdk.framework.util - equal - line:68 INFO TestUtils equal False
True
but use resize = (768, 512) MSE become bigger than 1e-6 and this will affect my model results
(1, 3, 768, 512)
(1, 3, 768, 512)
MSE: 2.0567511000990635e-06, | Max diff: 1| abs_diff 1e-06: False | abs_equal| None
SDK 2025-08-01 03:51:06.764 triton_sdk.framework.util - equal - line:68 INFO TestUtils equal False
False
Expected behavior
https://developer.nvidia.com/zh-cn/blog/cv-cuda-high-performance-image-processing/
I had read this blog and it told me that cvcuda can replace the lib like opencv and torchvision and its result had been align, but I don't think so.
Environment overview (please complete the following information)
- Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)]
- Method of cuDF install: [Docker, pip, or from source]
- If method of install is [Docker], provide
docker pull&docker runcommands used
pip install cvcuda-cu12==0.15.0
Environment details
Please run and paste the output of thecvcuda/print_env.shscript here, to gather any other relevant environment details
- If method of install is [Docker], provide
Additional context
Add any other context about the problem here.
Metadata
Metadata
Assignees
Labels
bugSomething isn't working as expected (software, install, documentation)Something isn't working as expected (software, install, documentation)