|
35 | 35 | import numpy as np |
36 | 36 | import torch |
37 | 37 | from jde.utils.datasets import letterbox |
| 38 | +from torchvision import transforms |
38 | 39 |
|
39 | | -__all__ = ["JDECustomMapper", "LinearMapper"] |
| 40 | +__all__ = ["YOLOXCustomMapper", "JDECustomMapper", "LinearMapper"] |
| 41 | + |
| 42 | + |
| 43 | +def yolox_style_scaling(img, input_size, padding=False): |
| 44 | + r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1]) |
| 45 | + |
| 46 | + resized_img = cv2.resize( |
| 47 | + img, |
| 48 | + (int(img.shape[1] * r), int(img.shape[0] * r)), |
| 49 | + interpolation=cv2.INTER_LINEAR, |
| 50 | + ).astype(np.uint8) |
| 51 | + |
| 52 | + if padding: |
| 53 | + padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114 |
| 54 | + padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img |
| 55 | + |
| 56 | + return padded_img |
| 57 | + |
| 58 | + return resized_img |
| 59 | + |
| 60 | + |
| 61 | +class YOLOXCustomMapper: |
| 62 | + """ |
| 63 | + A callable which takes a dataset dict in CompressAI-Vision generic dataset format, but for YOLOX evaluation, |
| 64 | + and map it into a format used by the model. |
| 65 | +
|
| 66 | + This is the default callable to be used to map your dataset dict into inference data. |
| 67 | +
|
| 68 | + This callable function refers to |
| 69 | + preproc function at |
| 70 | + <https://github.com/Megvii-BaseDetection/YOLOX/yolox/data/data_augment.py> |
| 71 | +
|
| 72 | + Full license statement can be found at |
| 73 | + <https://github.com/Megvii-BaseDetection/YOLOX?tab=Apache-2.0-1-ov-file#readme> |
| 74 | +
|
| 75 | + """ |
| 76 | + |
| 77 | + def __init__(self, img_size=[640, 640], aug_transforms=None): |
| 78 | + """ |
| 79 | + Args: |
| 80 | + img_size: expected input size (Height, Width) |
| 81 | + """ |
| 82 | + |
| 83 | + self.input_img_size = img_size |
| 84 | + |
| 85 | + if aug_transforms != None: |
| 86 | + self.aug_transforms = aug_transforms |
| 87 | + else: |
| 88 | + self.aug_transforms = transforms.Compose([transforms.ToTensor()]) |
| 89 | + |
| 90 | + def __call__(self, dataset_dict): |
| 91 | + """ |
| 92 | + Args: |
| 93 | + dataset_dict (dict): Metadata of one image. |
| 94 | +
|
| 95 | + Returns: |
| 96 | + dict: a format that compressai-vision pipelines accept |
| 97 | + """ |
| 98 | + |
| 99 | + dataset_dict = copy.deepcopy(dataset_dict) |
| 100 | + # the copied dictionary will be modified by code below |
| 101 | + |
| 102 | + dataset_dict.pop("annotations", None) |
| 103 | + |
| 104 | + # replicate the implemetation of the original codes |
| 105 | + # Read image |
| 106 | + org_img = cv2.imread(dataset_dict["file_name"]) # return img in BGR by default |
| 107 | + |
| 108 | + assert ( |
| 109 | + len(org_img.shape) == 3 |
| 110 | + ), f"detect an input image with 2 chs, {dataset_dict['file_name']}" |
| 111 | + |
| 112 | + dataset_dict["height"], dataset_dict["width"], _ = org_img.shape |
| 113 | + |
| 114 | + # yolox style input scaling |
| 115 | + # 1st scaling |
| 116 | + resized_img = yolox_style_scaling(org_img, self.input_img_size) |
| 117 | + # 2nd scaling & padding |
| 118 | + resized_img = yolox_style_scaling( |
| 119 | + resized_img, self.input_img_size, padding=True |
| 120 | + ) |
| 121 | + |
| 122 | + tensor_image = self.aug_transforms( |
| 123 | + np.ascontiguousarray(resized_img, dtype=np.float32) |
| 124 | + ) |
| 125 | + |
| 126 | + # old way |
| 127 | + # kept BGR & swap axis |
| 128 | + # image = resized_img.transpose(2, 0, 1) |
| 129 | + # normalize contiguous array of image |
| 130 | + # image = np.ascontiguousarray(image, dtype=np.float32) |
| 131 | + # to tensor |
| 132 | + # tensor_image = torch.as_tensor(image) |
| 133 | + |
| 134 | + dataset_dict["image"] = tensor_image |
| 135 | + |
| 136 | + return dataset_dict |
40 | 137 |
|
41 | 138 |
|
42 | 139 | class JDECustomMapper: |
|
0 commit comments