Skip to content

Commit c9fd5a6

Browse files
[Feature] Support MultiModalityDet3DInferencer (#2342)
* add multimodality inferencer * add multimodality inferencer * add Loader and Inferencer, fix some mono vis bugs * add UT for multi_modality_inferencer * add calib file in UT * fix type hint * resolve comments * small update * update to Base3DInferencer * fix docstring * minor fix * resolve comments * resolve comments * add suffix check * fix UT
1 parent 98d2642 commit c9fd5a6

File tree

14 files changed

+512
-18
lines changed

14 files changed

+512
-18
lines changed

configs/imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
modality=input_modality,
120120
test_mode=False,
121121
metainfo=metainfo,
122+
box_type_3d='LiDAR',
122123
backend_args=backend_args)))
123124
val_dataloader = dict(
124125
batch_size=1,
@@ -135,6 +136,7 @@
135136
modality=input_modality,
136137
test_mode=True,
137138
metainfo=metainfo,
139+
box_type_3d='LiDAR',
138140
backend_args=backend_args))
139141
test_dataloader = val_dataloader
140142

@@ -168,3 +170,7 @@
168170

169171
# runtime
170172
find_unused_parameters = True # only 1 of 4 FPN outputs is used
173+
174+
vis_backends = [dict(type='LocalVisBackend')]
175+
visualizer = dict(
176+
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')

configs/mvxnet/metafile.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Collections:
1818

1919
Models:
2020
- Name: dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class
21+
Alias: mvxnet_kitti-3class
2122
In Collection: MVX-Net
2223
Config: configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py
2324
Metadata:

configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2-80e_kitti-3d-3class.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,5 +263,9 @@
263263
type='KittiMetric', ann_file='data/kitti/kitti_infos_val.pkl')
264264
test_evaluator = val_evaluator
265265

266+
vis_backends = [dict(type='LocalVisBackend')]
267+
visualizer = dict(
268+
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
269+
266270
# You may need to download the model first is the network is unstable
267271
load_from = 'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth' # noqa

mmdet3d/apis/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
inference_multi_modality_detector, inference_segmentor,
55
init_model)
66
from .inferencers import (Base3DInferencer, LidarDet3DInferencer,
7-
LidarSeg3DInferencer, MonoDet3DInferencer)
7+
LidarSeg3DInferencer, MonoDet3DInferencer,
8+
MultiModalityDet3DInferencer)
89

910
__all__ = [
1011
'inference_detector', 'init_model', 'inference_mono_3d_detector',
1112
'convert_SyncBN', 'inference_multi_modality_detector',
1213
'inference_segmentor', 'Base3DInferencer', 'MonoDet3DInferencer',
13-
'LidarDet3DInferencer', 'LidarSeg3DInferencer'
14+
'LidarDet3DInferencer', 'LidarSeg3DInferencer',
15+
'MultiModalityDet3DInferencer'
1416
]

mmdet3d/apis/inference.py

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -176,8 +176,10 @@ def inference_multi_modality_detector(model: nn.Module,
176176
pcds: Union[str, Sequence[str]],
177177
imgs: Union[str, Sequence[str]],
178178
ann_file: Union[str, Sequence[str]],
179-
cam_type: str = 'CAM_FRONT'):
180-
"""Inference point cloud with the multi-modality detector.
179+
cam_type: str = 'CAM2'):
180+
"""Inference point cloud with the multi-modality detector. Now we only
181+
support multi-modality detector for KITTI dataset since the multi-view
182+
image loading is not supported yet in this inference function.
181183
182184
Args:
183185
model (nn.Module): The loaded detector.
@@ -187,7 +189,7 @@ def inference_multi_modality_detector(model: nn.Module,
187189
Either image files or loaded images.
188190
ann_file (str, Sequence[str]): Annotation files.
189191
cam_type (str): Image of Camera chose to infer.
190-
For kitti dataset, it should be 'CAM_2',
192+
For kitti dataset, it should be 'CAM2',
191193
and for nuscenes dataset, it should be
192194
'CAM_FRONT'. Defaults to 'CAM_FRONT'.
193195
@@ -216,7 +218,6 @@ def inference_multi_modality_detector(model: nn.Module,
216218
get_box_type(cfg.test_dataloader.dataset.box_type_3d)
217219

218220
data_list = mmengine.load(ann_file)['data_list']
219-
assert len(imgs) == len(data_list)
220221

221222
data = []
222223
for index, pcd in enumerate(pcds):
@@ -228,13 +229,18 @@ def inference_multi_modality_detector(model: nn.Module,
228229
if osp.basename(img_path) != osp.basename(img):
229230
raise ValueError(f'the info file of {img_path} is not provided.')
230231

232+
data_info['images'][cam_type]['img_path'] = img
233+
cam2img = np.array(data_info['images'][cam_type]['cam2img'])
234+
231235
# TODO: check the name consistency of
232236
# image file and point cloud file
237+
# TODO: support multi-view image loading
233238
data_ = dict(
234239
lidar_points=dict(lidar_path=pcd),
235240
img_path=img,
236241
box_type_3d=box_type_3d,
237-
box_mode_3d=box_mode_3d)
242+
box_mode_3d=box_mode_3d,
243+
cam2img=cam2img)
238244

239245
# LiDAR to image conversion for KITTI dataset
240246
if box_mode_3d == Box3DMode.LIDAR:
@@ -295,7 +301,7 @@ def inference_mono_3d_detector(model: nn.Module,
295301
box_type_3d, box_mode_3d = \
296302
get_box_type(cfg.test_dataloader.dataset.box_type_3d)
297303

298-
data_list = mmengine.load(ann_file)
304+
data_list = mmengine.load(ann_file)['data_list']
299305
assert len(imgs) == len(data_list)
300306

301307
data = []

mmdet3d/apis/inferencers/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
from .lidar_det3d_inferencer import LidarDet3DInferencer
44
from .lidar_seg3d_inferencer import LidarSeg3DInferencer
55
from .mono_det3d_inferencer import MonoDet3DInferencer
6+
from .multi_modality_det3d_inferencer import MultiModalityDet3DInferencer
67

78
__all__ = [
89
'Base3DInferencer', 'MonoDet3DInferencer', 'LidarDet3DInferencer',
9-
'LidarSeg3DInferencer'
10+
'LidarSeg3DInferencer', 'MultiModalityDet3DInferencer'
1011
]

mmdet3d/apis/inferencers/lidar_det3d_inferencer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,19 +93,19 @@ def _init_pipeline(self, cfg: ConfigType) -> Compose:
9393
"""Initialize the test pipeline."""
9494
pipeline_cfg = cfg.test_dataloader.dataset.pipeline
9595

96-
load_img_idx = self._get_transform_idx(pipeline_cfg,
97-
'LoadPointsFromFile')
98-
if load_img_idx == -1:
96+
load_point_idx = self._get_transform_idx(pipeline_cfg,
97+
'LoadPointsFromFile')
98+
if load_point_idx == -1:
9999
raise ValueError(
100100
'LoadPointsFromFile is not found in the test pipeline')
101101

102-
load_cfg = pipeline_cfg[load_img_idx]
102+
load_cfg = pipeline_cfg[load_point_idx]
103103
self.coord_type, self.load_dim = load_cfg['coord_type'], load_cfg[
104104
'load_dim']
105105
self.use_dim = list(range(load_cfg['use_dim'])) if isinstance(
106106
load_cfg['use_dim'], int) else load_cfg['use_dim']
107107

108-
pipeline_cfg[load_img_idx]['type'] = 'LidarDet3DInferencerLoader'
108+
pipeline_cfg[load_point_idx]['type'] = 'LidarDet3DInferencerLoader'
109109
return Compose(pipeline_cfg)
110110

111111
def visualize(self,
Lines changed: 233 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,233 @@
1+
# Copyright (c) OpenMMLab. All rights reserved.
2+
import os.path as osp
3+
import warnings
4+
from typing import Dict, List, Optional, Sequence, Union
5+
6+
import mmcv
7+
import mmengine
8+
import numpy as np
9+
from mmengine.dataset import Compose
10+
from mmengine.infer.infer import ModelType
11+
from mmengine.structures import InstanceData
12+
13+
from mmdet3d.registry import INFERENCERS
14+
from mmdet3d.utils import ConfigType
15+
from .base_3d_inferencer import Base3DInferencer
16+
17+
InstanceList = List[InstanceData]
18+
InputType = Union[str, np.ndarray]
19+
InputsType = Union[InputType, Sequence[InputType]]
20+
PredType = Union[InstanceData, InstanceList]
21+
ImgType = Union[np.ndarray, Sequence[np.ndarray]]
22+
ResType = Union[Dict, List[Dict], InstanceData, List[InstanceData]]
23+
24+
25+
@INFERENCERS.register_module(name='det3d-multi_modality')
26+
@INFERENCERS.register_module()
27+
class MultiModalityDet3DInferencer(Base3DInferencer):
28+
"""The inferencer of multi-modality detection.
29+
30+
Args:
31+
model (str, optional): Path to the config file or the model name
32+
defined in metafile. For example, it could be
33+
"pointpillars_kitti-3class" or
34+
"configs/pointpillars/pointpillars_hv_secfpn_8xb6-160e_kitti-3d-3class.py". # noqa: E501
35+
If model is not specified, user must provide the
36+
`weights` saved by MMEngine which contains the config string.
37+
Defaults to None.
38+
weights (str, optional): Path to the checkpoint. If it is not specified
39+
and model is a model name of metafile, the weights will be loaded
40+
from metafile. Defaults to None.
41+
device (str, optional): Device to run inference. If None, the available
42+
device will be automatically used. Defaults to None.
43+
scope (str): The scope of registry. Defaults to 'mmdet3d'.
44+
palette (str): The palette of visualization. Defaults to 'none'.
45+
"""
46+
47+
preprocess_kwargs: set = set()
48+
forward_kwargs: set = set()
49+
visualize_kwargs: set = {
50+
'return_vis', 'show', 'wait_time', 'draw_pred', 'pred_score_thr',
51+
'img_out_dir'
52+
}
53+
postprocess_kwargs: set = {
54+
'print_result', 'pred_out_file', 'return_datasample'
55+
}
56+
57+
def __init__(self,
58+
model: Union[ModelType, str, None] = None,
59+
weights: Optional[str] = None,
60+
device: Optional[str] = None,
61+
scope: str = 'mmdet3d',
62+
palette: str = 'none') -> None:
63+
# A global counter tracking the number of frames processed, for
64+
# naming of the output results
65+
self.num_visualized_frames = 0
66+
super(MultiModalityDet3DInferencer, self).__init__(
67+
model=model,
68+
weights=weights,
69+
device=device,
70+
scope=scope,
71+
palette=palette)
72+
73+
def _inputs_to_list(self, inputs: Union[dict, list]) -> list:
74+
"""Preprocess the inputs to a list.
75+
76+
Preprocess inputs to a list according to its type:
77+
78+
- list or tuple: return inputs
79+
- dict: the value with key 'points' is
80+
- Directory path: return all files in the directory
81+
- other cases: return a list containing the string. The string
82+
could be a path to file, a url or other types of string according
83+
to the task.
84+
85+
Args:
86+
inputs (Union[dict, list]): Inputs for the inferencer.
87+
88+
Returns:
89+
list: List of input for the :meth:`preprocess`.
90+
"""
91+
return super()._inputs_to_list(inputs, modality_key=['points', 'img'])
92+
93+
def _init_pipeline(self, cfg: ConfigType) -> Compose:
94+
"""Initialize the test pipeline."""
95+
pipeline_cfg = cfg.test_dataloader.dataset.pipeline
96+
97+
load_point_idx = self._get_transform_idx(pipeline_cfg,
98+
'LoadPointsFromFile')
99+
load_mv_img_idx = self._get_transform_idx(
100+
pipeline_cfg, 'LoadMultiViewImageFromFiles')
101+
if load_mv_img_idx != -1:
102+
warnings.warn(
103+
'LoadMultiViewImageFromFiles is not supported yet in the '
104+
'multi-modality inferencer. Please remove it')
105+
# Now, we only support ``LoadImageFromFile`` as the image loader in the
106+
# original piepline. `LoadMultiViewImageFromFiles` is not supported
107+
# yet.
108+
load_img_idx = self._get_transform_idx(pipeline_cfg,
109+
'LoadImageFromFile')
110+
111+
if load_point_idx == -1 or load_img_idx == -1:
112+
raise ValueError(
113+
'Both LoadPointsFromFile and LoadImageFromFile must '
114+
'be specified the pipeline, but LoadPointsFromFile is '
115+
f'{load_point_idx == -1} and LoadImageFromFile is '
116+
f'{load_img_idx}')
117+
118+
load_cfg = pipeline_cfg[load_point_idx]
119+
self.coord_type, self.load_dim = load_cfg['coord_type'], load_cfg[
120+
'load_dim']
121+
self.use_dim = list(range(load_cfg['use_dim'])) if isinstance(
122+
load_cfg['use_dim'], int) else load_cfg['use_dim']
123+
124+
load_point_args = pipeline_cfg[load_point_idx]
125+
load_point_args.pop('type')
126+
load_img_args = pipeline_cfg[load_img_idx]
127+
load_img_args.pop('type')
128+
129+
load_idx = min(load_point_idx, load_img_idx)
130+
pipeline_cfg.pop(max(load_point_idx, load_img_idx))
131+
132+
pipeline_cfg[load_idx] = dict(
133+
type='MultiModalityDet3DInferencerLoader',
134+
load_point_args=load_point_args,
135+
load_img_args=load_img_args)
136+
137+
return Compose(pipeline_cfg)
138+
139+
def visualize(self,
140+
inputs: InputsType,
141+
preds: PredType,
142+
return_vis: bool = False,
143+
show: bool = False,
144+
wait_time: int = 0,
145+
draw_pred: bool = True,
146+
pred_score_thr: float = 0.3,
147+
img_out_dir: str = '') -> Union[List[np.ndarray], None]:
148+
"""Visualize predictions.
149+
150+
Args:
151+
inputs (InputsType): Inputs for the inferencer.
152+
preds (PredType): Predictions of the model.
153+
return_vis (bool): Whether to return the visualization result.
154+
Defaults to False.
155+
show (bool): Whether to display the image in a popup window.
156+
Defaults to False.
157+
wait_time (float): The interval of show (s). Defaults to 0.
158+
draw_pred (bool): Whether to draw predicted bounding boxes.
159+
Defaults to True.
160+
pred_score_thr (float): Minimum score of bboxes to draw.
161+
Defaults to 0.3.
162+
img_out_dir (str): Output directory of visualization results.
163+
If left as empty, no file will be saved. Defaults to ''.
164+
165+
Returns:
166+
List[np.ndarray] or None: Returns visualization results only if
167+
applicable.
168+
"""
169+
if self.visualizer is None or (not show and img_out_dir == ''
170+
and not return_vis):
171+
return None
172+
173+
if getattr(self, 'visualizer') is None:
174+
raise ValueError('Visualization needs the "visualizer" term'
175+
'defined in the config, but got None.')
176+
177+
results = []
178+
179+
for single_input, pred in zip(inputs, preds):
180+
points_input = single_input['points']
181+
if isinstance(points_input, str):
182+
pts_bytes = mmengine.fileio.get(points_input)
183+
points = np.frombuffer(pts_bytes, dtype=np.float32)
184+
points = points.reshape(-1, self.load_dim)
185+
points = points[:, self.use_dim]
186+
pc_name = osp.basename(points_input).split('.bin')[0]
187+
pc_name = f'{pc_name}.png'
188+
elif isinstance(points_input, np.ndarray):
189+
points = points_input.copy()
190+
pc_num = str(self.num_visualized_frames).zfill(8)
191+
pc_name = f'pc_{pc_num}.png'
192+
else:
193+
raise ValueError('Unsupported input type: '
194+
f'{type(points_input)}')
195+
196+
o3d_save_path = osp.join(img_out_dir, pc_name) \
197+
if img_out_dir != '' else None
198+
199+
img_input = single_input['img']
200+
if isinstance(single_input['img'], str):
201+
img_bytes = mmengine.fileio.get(img_input)
202+
img = mmcv.imfrombytes(img_bytes)
203+
img = img[:, :, ::-1]
204+
img_name = osp.basename(img_input)
205+
elif isinstance(img_input, np.ndarray):
206+
img = img_input.copy()
207+
img_num = str(self.num_visualized_frames).zfill(8)
208+
img_name = f'{img_num}.jpg'
209+
else:
210+
raise ValueError('Unsupported input type: '
211+
f'{type(img_input)}')
212+
213+
out_file = osp.join(img_out_dir, img_name) if img_out_dir != '' \
214+
else None
215+
216+
data_input = dict(points=points, img=img)
217+
self.visualizer.add_datasample(
218+
pc_name,
219+
data_input,
220+
pred,
221+
show=show,
222+
wait_time=wait_time,
223+
draw_gt=False,
224+
draw_pred=draw_pred,
225+
pred_score_thr=pred_score_thr,
226+
o3d_save_path=o3d_save_path,
227+
out_file=out_file,
228+
vis_task='multi-modality_det',
229+
)
230+
results.append(points)
231+
self.num_visualized_frames += 1
232+
233+
return results

0 commit comments

Comments
 (0)