Skip to content

Commit c7896c7

Browse files
chyomin06fracape
authored andcommitted
[feat] added support for split inference with yolox-darknet53
1 parent c5c91e4 commit c7896c7

File tree

13 files changed

+556
-41
lines changed

13 files changed

+556
-41
lines changed

README.MD

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ It currently focuses on two types of pipeline:
2020

2121
- [JDE](https://github.com/Zhongdao/Towards-Realtime-MOT) is used for Object Tracking
2222

23+
- [YOLOX-Darknet53](https://github.com/Megvii-BaseDetection/YOLOX) is used for object detection
24+
2325
## Documentation
2426

2527
A complete documentation is provided [here](https://interdigitalinc.github.io/CompressAI-Vision/index.html), including [installation](https://interdigitalinc.github.io/CompressAI-Vision/installation), [CLI usage](https://interdigitalinc.github.io/CompressAI-Vision/cli_usage.html), as well as [tutorials](https://interdigitalinc.github.io/CompressAI-Vision/tutorials).
@@ -136,3 +138,4 @@ Fabien Racapé, Hyomin Choi, Eimran Eimon, Sampsa Riikonen, Jacky Yat-Hong Lam
136138
* [VVC VTM reference software](https://vcgit.hhi.fraunhofer.de/jvet/VVCSoftware_VTM)
137139
* [Detectron2](https://detectron2.readthedocs.io/en/latest/index.html)
138140
* [JDE](https://github.com/Zhongdao/Towards-Realtime-MOT.git)
141+
* [YOLOX](https://github.com/Megvii-BaseDetection/YOLOX)

cfgs/vision_model/default.yaml

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,13 @@ jde_1088x608:
3737
track_buffer: 30
3838
frame_rate: 30 # It is odd to consider this at here but following original code.
3939
splits : [36, 61, 74] # MPEG FCM TEST with JDE on TVD
40-
#splits : [105, 90, 75] # MPEG FCM TEST with JDE on HiEve
40+
#splits : [105, 90, 75] # MPEG FCM TEST with JDE on HiEve
41+
42+
yolox_darknet53:
43+
model_path_prefix: ${..model_root_path}
44+
cfg: "Built-in configurations"
45+
num_classes: 80
46+
conf_thres: 0.001
47+
nms_thres: 0.65
48+
weights: "weights/yolox/darknet53/yolox_darknet.pth"
49+
splits: "l13"

compressai_vision/datasets/image.py

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@
4747

4848
from compressai_vision.registry import register_datacatalog, register_dataset
4949

50-
from .utils import JDECustomMapper, LinearMapper
50+
from .utils import JDECustomMapper, LinearMapper, YOLOXCustomMapper
5151

5252

5353
def manual_load_data(path, ext):
@@ -295,6 +295,48 @@ def __len__(self):
295295
return len(self.mapDataset)
296296

297297

298+
@register_dataset("YOLOXDataset")
299+
class YOLOXDataset(BaseDataset):
300+
def __init__(self, root, dataset_name, imgs_folder, **kwargs):
301+
super().__init__(root, dataset_name, imgs_folder, **kwargs)
302+
303+
self.dataset = kwargs["dataset"].dataset
304+
305+
self.sampler = InferenceSampler(len(kwargs["dataset"]))
306+
self.collate_fn = bypass_collator
307+
308+
_dataset = DatasetFromList(self.dataset, copy=False)
309+
310+
if kwargs["linear_mapper"] is True:
311+
mapper = LinearMapper()
312+
else:
313+
mapper = YOLOXCustomMapper(kwargs["patch_size"])
314+
315+
self.input_size = kwargs["patch_size"]
316+
self.mapDataset = MapDataset(_dataset, mapper)
317+
self._org_mapper_func = PicklableWrapper(
318+
YOLOXCustomMapper(kwargs["patch_size"])
319+
)
320+
321+
metaData = MetadataCatalog.get(dataset_name)
322+
try:
323+
self.thing_classes = metaData.thing_classes
324+
self.thing_dataset_id_to_contiguous_id = (
325+
metaData.thing_dataset_id_to_contiguous_id
326+
)
327+
except AttributeError:
328+
self.logger.warning("No attribute: thing_classes")
329+
330+
def get_org_mapper_func(self):
331+
return self._org_mapper_func
332+
333+
def __getitem__(self, idx):
334+
return self.mapDataset[idx]
335+
336+
def __len__(self):
337+
return len(self.mapDataset)
338+
339+
298340
class DataCatalog:
299341
def __init__(
300342
self,

compressai_vision/datasets/utils.py

Lines changed: 98 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,105 @@
3535
import numpy as np
3636
import torch
3737
from jde.utils.datasets import letterbox
38+
from torchvision import transforms
3839

39-
__all__ = ["JDECustomMapper", "LinearMapper"]
40+
__all__ = ["YOLOXCustomMapper", "JDECustomMapper", "LinearMapper"]
41+
42+
43+
def yolox_style_scaling(img, input_size, padding=False):
44+
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
45+
46+
resized_img = cv2.resize(
47+
img,
48+
(int(img.shape[1] * r), int(img.shape[0] * r)),
49+
interpolation=cv2.INTER_LINEAR,
50+
).astype(np.uint8)
51+
52+
if padding:
53+
padded_img = np.ones((input_size[0], input_size[1], 3), dtype=np.uint8) * 114
54+
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
55+
56+
return padded_img
57+
58+
return resized_img
59+
60+
61+
class YOLOXCustomMapper:
62+
"""
63+
A callable which takes a dataset dict in CompressAI-Vision generic dataset format, but for YOLOX evaluation,
64+
and map it into a format used by the model.
65+
66+
This is the default callable to be used to map your dataset dict into inference data.
67+
68+
This callable function refers to
69+
preproc function at
70+
<https://github.com/Megvii-BaseDetection/YOLOX/yolox/data/data_augment.py>
71+
72+
Full license statement can be found at
73+
<https://github.com/Megvii-BaseDetection/YOLOX?tab=Apache-2.0-1-ov-file#readme>
74+
75+
"""
76+
77+
def __init__(self, img_size=[640, 640], aug_transforms=None):
78+
"""
79+
Args:
80+
img_size: expected input size (Height, Width)
81+
"""
82+
83+
self.input_img_size = img_size
84+
85+
if aug_transforms != None:
86+
self.aug_transforms = aug_transforms
87+
else:
88+
self.aug_transforms = transforms.Compose([transforms.ToTensor()])
89+
90+
def __call__(self, dataset_dict):
91+
"""
92+
Args:
93+
dataset_dict (dict): Metadata of one image.
94+
95+
Returns:
96+
dict: a format that compressai-vision pipelines accept
97+
"""
98+
99+
dataset_dict = copy.deepcopy(dataset_dict)
100+
# the copied dictionary will be modified by code below
101+
102+
dataset_dict.pop("annotations", None)
103+
104+
# replicate the implemetation of the original codes
105+
# Read image
106+
org_img = cv2.imread(dataset_dict["file_name"]) # return img in BGR by default
107+
108+
assert (
109+
len(org_img.shape) == 3
110+
), f"detect an input image with 2 chs, {dataset_dict['file_name']}"
111+
112+
dataset_dict["height"], dataset_dict["width"], _ = org_img.shape
113+
114+
# yolox style input scaling
115+
# 1st scaling
116+
resized_img = yolox_style_scaling(org_img, self.input_img_size)
117+
# 2nd scaling & padding
118+
resized_img = yolox_style_scaling(
119+
resized_img, self.input_img_size, padding=True
120+
)
121+
122+
tensor_image = self.aug_transforms(
123+
np.ascontiguousarray(resized_img, dtype=np.float32)
124+
)
125+
126+
# old way
127+
# kept BGR & swap axis
128+
# image = resized_img.transpose(2, 0, 1)
129+
# normalize contiguous array of image
130+
# image = np.ascontiguousarray(image, dtype=np.float32)
131+
# to tensor
132+
# tensor_image = torch.as_tensor(image)
133+
134+
dataset_dict["image"] = tensor_image
135+
136+
return dataset_dict
40137

41138

42139
class JDECustomMapper:

compressai_vision/evaluators/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@
3535
MOT_TVD_Eval,
3636
OpenImagesChallengeEval,
3737
VisualQualityEval,
38-
YOLOEval,
38+
YOLOXCOCOEval,
3939
)
4040

4141
__all__ = [
4242
"BaseEvaluator",
4343
"COCOEVal",
4444
"OpenImagesChallengeEval",
45-
"YOLOEval",
45+
"YOLOXCOCOEval",
4646
"MOT_JDE_Eval",
4747
"MOT_HiEve_Eval",
4848
"MOT_TVD_Eval",

compressai_vision/evaluators/evaluators.py

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
import json
3131
import math
32+
from collections import defaultdict
3233
from pathlib import Path
3334

3435
import motmetrics as mm
@@ -37,8 +38,11 @@
3738
import torch
3839
from detectron2.evaluation import COCOEvaluator
3940
from jde.utils.io import unzip_objs
41+
from pycocotools.coco import COCO
4042
from pytorch_msssim import ms_ssim
4143
from tqdm import tqdm
44+
from yolox.data.datasets.coco import remove_useless_info
45+
from yolox.evaluators import COCOEvaluator as YOLOX_COCOEvaluator
4246

4347
from compressai_vision.datasets import deccode_compressed_rle
4448
from compressai_vision.registry import register_evaluator
@@ -627,20 +631,76 @@ def mot_eval(self):
627631
return self.digest_summary(summary)
628632

629633

630-
@register_evaluator("YOLO-EVAL")
631-
class YOLOEval(BaseEvaluator):
634+
@register_evaluator("YOLOX-COCO-EVAL")
635+
class YOLOXCOCOEval(BaseEvaluator):
632636
def __init__(
633637
self,
634638
datacatalog_name,
635639
dataset_name,
636640
dataset,
637641
output_dir="./vision_output/",
638-
criteria="AP50",
642+
criteria="AP",
639643
):
640644
super().__init__(datacatalog_name, dataset_name, dataset, output_dir, criteria)
641645

642646
self.set_annotation_info(dataset)
643647

648+
cocoapi = COCO(self.annotation_path)
649+
remove_useless_info(cocoapi)
650+
class_ids = sorted(cocoapi.getCatIds())
651+
cats = cocoapi.loadCats(cocoapi.getCatIds())
652+
653+
class dummy_dataloader:
654+
def __init__(self):
655+
class dummy_dataset:
656+
def __init__(self):
657+
self.coco = cocoapi
658+
self.class_ids = class_ids
659+
self.cats = cats
660+
661+
self.dataset = dummy_dataset()
662+
self.batch_size = 1
663+
664+
dataloader = dummy_dataloader()
665+
self._evaluator = YOLOX_COCOEvaluator(
666+
dataloader, dataset.input_size, -1, -1, -1
667+
)
668+
self.reset()
669+
670+
def reset(self):
671+
self.data_list = []
672+
self.output_data = defaultdict()
673+
674+
def digest(self, gt, pred):
675+
assert len(gt) == 1
676+
677+
img_heights = [gt[0]["height"]]
678+
img_widths = [gt[0]["width"]]
679+
img_ids = [gt[0]["image_id"]]
680+
681+
data_list_elem, image_wise_data = self._evaluator.convert_to_coco_format(
682+
pred, [img_heights, img_widths], img_ids, return_outputs=True
683+
)
684+
self.data_list.extend(data_list_elem)
685+
self.output_data.update(image_wise_data)
686+
687+
def results(self, save_path: str = None):
688+
dummy_statistics = torch.FloatTensor([0, 0, len(self.output_data)])
689+
eval_results = self._evaluator.evaluate_prediction(
690+
self.data_list, dummy_statistics
691+
)
692+
693+
if save_path:
694+
self.write_results(eval_results, save_path)
695+
696+
self.write_results(eval_results)
697+
698+
*listed_items, summary = eval_results
699+
700+
self._logger.info("\n" + summary)
701+
702+
return {"AP": listed_items[0] * 100, "AP50": listed_items[1] * 100}
703+
644704

645705
@register_evaluator("VISUAL-QUALITY-EVAL")
646706
class VisualQualityEval(BaseEvaluator):

compressai_vision/model_wrappers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
mask_rcnn_X_101_32x8d_FPN_3x,
3636
)
3737
from .jde import jde_1088x608
38+
from .yolox import yolox_darknet53
3839

3940
__all__ = [
4041
"BaseWrapper",
@@ -43,4 +44,5 @@
4344
"faster_rcnn_R_50_FPN_3x",
4445
"mask_rcnn_R_50_FPN_3x",
4546
"jde_1088x608",
47+
"yolox_darknet53",
4648
]

compressai_vision/model_wrappers/yolo.py

Lines changed: 0 additions & 28 deletions
This file was deleted.

0 commit comments

Comments
 (0)