Skip to content

[BUG] pillowresize not run like transforms.Resize in BILINEAR and resize not run like cv2 resize #261

@631068264

Description

@631068264

Describe the bug
Can't align

Steps/Code to reproduce bug

I use same image and mse to test the result


def np_to_cuda_buffer(host_data, dtype=None) -> torch.Tensor:
    """Convert host data to a CUDA buffer

    Args:
        host_data (numpy array): Host data

    Returns:
        CudaBuffer: The converted CUDA buffer
    """
    dtype = to_torch_dtype(dtype) if dtype else None
    return torch.as_tensor(host_data, dtype=dtype, device="cuda").cuda()


class CVCudaUtils:
    # current_cache_limit = nvcv.get_cache_limit_inbytes()
    # print(current_cache_limit)
    stream = cvcuda.Stream()
    PIL_INTERPOLATION_MODE = {
        InterpolationMode.NEAREST: cvcuda.Interp.NEAREST,
        InterpolationMode.BILINEAR: cvcuda.Interp.LINEAR,
        InterpolationMode.BICUBIC: cvcuda.Interp.CUBIC,
        InterpolationMode.BOX: cvcuda.Interp.BOX,
        InterpolationMode.HAMMING: cvcuda.Interp.HAMMING,
        InterpolationMode.LANCZOS: cvcuda.Interp.LANCZOS,
    }

    @classmethod
    def _normalize(cls, cv_tensor, mean_params=(0.485, 0.456, 0.406), std_params=(0.229, 0.224, 0.225),
                   dtype=np.float32):
        """
        Normalize an image array and rearrange dimensions.
        """
        # https://github.com/CVCUDA/CV-CUDA/issues/260 Only use float32 work
        mean_cp = np_to_cuda_buffer(mean_params, dtype=np.float32).reshape(1, 1, 3)
        std_cp = np_to_cuda_buffer(std_params, dtype=np.float32).reshape(1, 1, 3)
        mean_tensor = cvcuda.as_tensor(mean_cp, nvcv.TensorLayout.HWC)
        std_tensor = cvcuda.as_tensor(std_cp, nvcv.TensorLayout.HWC)
        # Convert image to numpy array and scale to [0,1]
        cv_tensor = cvcuda.convertto(cv_tensor, nvcv.Type.F32, scale=1.0 / 255.0, stream=cls.stream)

        # Normalize using mean and std (broadcast across height and width)
        cv_tensor = cvcuda.normalize(cv_tensor,
                                     base=mean_tensor,
                                     scale=std_tensor,
                                     flags=cvcuda.NormalizeFlags.SCALE_IS_STDDEV, stream=cls.stream)

        # Rearrange dimensions from HWC to CHW and add a batch dimension
        cv_tensor = cvcuda.reformat(cv_tensor, nvcv.TensorLayout.CHW, stream=cls.stream)
        cp_img = cp.asarray(cv_tensor.cuda())[None].astype(cp.dtype(str(np.dtype(dtype))))
        return cp_img

    @classmethod
    def resize_normalize(cls, img, resize, interp=InterpolationMode.BILINEAR,
                         mean_params=(0.485, 0.456, 0.406),
                         std_params=(0.229, 0.224, 0.225),
                         dtype=np.float32):
        cv_tensor = cvcuda.as_tensor(np_to_cuda_buffer(img), nvcv.TensorLayout.HWC)
        h, w, _ = cv_tensor.shape
        # 根据最短边,按比例缩放
        if isinstance(resize, int):
            scale = resize / min(h, w)
            new_h = int(h * scale)
            new_w = int(w * scale)
        elif isinstance(resize, tuple) and len(resize) == 2:
            new_h, new_w = resize
        else:
            raise ValueError(f"Invalid resize: {resize}")

        cv_tensor = cvcuda.pillowresize(cv_tensor, (new_h, new_w, 3), format=cvcuda.Format.RGB8,
                                        interp=cls.PIL_INTERPOLATION_MODE[interp], stream=cls.stream)
        cp_img = cls._normalize(cv_tensor, mean_params, std_params, dtype)
        return cp_img







def mse(a, b):
    print(a.shape)
    print(b.shape)
    atol = 1e-6
    diff = np.mean((a.astype(np.float32) - b.astype(np.float32)) ** 2)
    abs_diff = np.allclose(a, b, atol=atol)
    print(
        f'MSE: {diff}, | Max diff: {np.abs(a.astype(np.int32) - b.astype(np.int32)).max()}| abs_diff {atol}: {abs_diff} | abs_equal| {TestUtils.equal(a, b, logger=logger)}')
    return diff < atol


def test_resize_normalize_cvcuda():
    image_url = 'xxxxx'
   
    img = get_image(image_url)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    mean = (0.485, 0.456, 0.406)
    std = (0.229, 0.224, 0.225)
    resize = (768, 512)
    # resize = 320
    crop_size = 320
    dtype = np.float16
   

    
    def torch_preprocess(img):
        img = pil_utils.exif_image(img)
        torch_transform = transforms.Compose([
            transforms.Resize(size=resize),
            # transforms.CenterCrop(size=crop_size),
            transforms.ToTensor(),
            transforms.Normalize(mean, std)
        ])

        img = torch_transform(img)
        img = img.unsqueeze(0)
        return img.numpy().astype(dtype)

    def cvcuda_preprocess(img):
        # mean_cp = cp.asarray(mean, dtype=cp.float32).reshape(1, 1, 3)
        # std_cp = cp.asarray(std, dtype=cp.float32).reshape(1, 1, 3)
        # return CVCudaUtils.resize_center_crop_normalize(img, resize=resize, crop_size=crop_size, dtype=np.float32)
        # img = pil_utils.exif_image(img)
        # img = np.asarray(img)

        # img = pil_utils.exif_image(img)
        # torch_transform = transforms.Compose([
        #     transforms.Resize(size=resize),
        #     # transforms.CenterCrop(size=crop_size),
        #     # transforms.ToTensor(),
        #     # transforms.Normalize(mean, std)
        # ])
        # img = np.array(torch_transform(img))

        return CVCudaUtils.resize_normalize(img, resize=resize, dtype=dtype)

    a = torch_preprocess(img)
    b = cvcuda_preprocess(img)
    print(mse(a, b.get()))


torch_preprocess()

When use

resize=320
It can align but I must calculate new_h and new_w by myself which is very ridiculous.

(1, 3, 320, 320)
(1, 3, 320, 320)
MSE: 2.348394367857054e-08, | Max diff: 0| abs_diff 1e-06: False | abs_equal| None
SDK 2025-08-01 03:50:11.150 triton_sdk.framework.util - equal - line:68 INFO TestUtils equal False
True

but use resize = (768, 512) MSE become bigger than 1e-6 and this will affect my model results


(1, 3, 768, 512)
(1, 3, 768, 512)
MSE: 2.0567511000990635e-06, | Max diff: 1| abs_diff 1e-06: False | abs_equal| None
SDK 2025-08-01 03:51:06.764 triton_sdk.framework.util - equal - line:68 INFO TestUtils equal False
False

Expected behavior
https://developer.nvidia.com/zh-cn/blog/cv-cuda-high-performance-image-processing/

I had read this blog and it told me that cvcuda can replace the lib like opencv and torchvision and its result had been align, but I don't think so.

Environment overview (please complete the following information)

  • Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)]
  • Method of cuDF install: [Docker, pip, or from source]
    • If method of install is [Docker], provide docker pull & docker run commands used
      pip install cvcuda-cu12==0.15.0
      Environment details
      Please run and paste the output of the cvcuda/print_env.sh script here, to gather any other relevant environment details

Additional context
Add any other context about the problem here.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working as expected (software, install, documentation)

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions