1
1
# Copyright (c) OpenMMLab. All rights reserved.
2
2
import math
3
3
from numbers import Number
4
- from typing import Dict , List , Optional , Sequence , Union
4
+ from typing import Dict , List , Optional , Sequence , Tuple , Union
5
5
6
6
import numpy as np
7
7
import torch
8
8
from mmdet .models import DetDataPreprocessor
9
+ from mmdet .models .utils .misc import samplelist_boxtype2tensor
9
10
from mmengine .model import stack_batch
10
- from mmengine .utils import is_list_of
11
+ from mmengine .utils import is_seq_of
12
+ from torch import Tensor
11
13
from torch .nn import functional as F
12
14
13
15
from mmdet3d .registry import MODELS
@@ -27,52 +29,56 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
27
29
- Collate and move image and point cloud data to the target device.
28
30
29
31
- 1) For image data:
30
- - Pad images in inputs to the maximum size of current batch with defined
31
- ``pad_value``. The padding size can be divisible by a defined
32
- ``pad_size_divisor``.
33
- - Stack images in inputs to batch_imgs.
34
- - Convert images in inputs from bgr to rgb if the shape of input is
35
- (3, H, W).
36
- - Normalize images in inputs with defined std and mean.
37
- - Do batch augmentations during training.
32
+
33
+ - Pad images in inputs to the maximum size of current batch with defined
34
+ ``pad_value``. The padding size can be divisible by a defined
35
+ ``pad_size_divisor``.
36
+ - Stack images in inputs to batch_imgs.
37
+ - Convert images in inputs from bgr to rgb if the shape of input is
38
+ (3, H, W).
39
+ - Normalize images in inputs with defined std and mean.
40
+ - Do batch augmentations during training.
38
41
39
42
- 2) For point cloud data:
40
- - If no voxelization, directly return list of point cloud data.
41
- - If voxelization is applied, voxelize point cloud according to
42
- ``voxel_type`` and obtain ``voxels``.
43
+
44
+ - If no voxelization, directly return list of point cloud data.
45
+ - If voxelization is applied, voxelize point cloud according to
46
+ ``voxel_type`` and obtain ``voxels``.
43
47
44
48
Args:
45
49
voxel (bool): Whether to apply voxelization to point cloud.
46
50
Defaults to False.
47
51
voxel_type (str): Voxelization type. Two voxelization types are
48
- provided: 'hard' and 'dynamic', respectively for hard
49
- voxelization and dynamic voxelization. Defaults to 'hard'.
52
+ provided: 'hard' and 'dynamic', respectively for hard voxelization
53
+ and dynamic voxelization. Defaults to 'hard'.
50
54
voxel_layer (dict or :obj:`ConfigDict`, optional): Voxelization layer
51
55
config. Defaults to None.
52
56
batch_first (bool): Whether to put the batch dimension to the first
53
57
dimension when getting voxel coordinates. Defaults to True.
54
- max_voxels (int): Maximum number of voxels in each voxel grid. Defaults
55
- to None.
58
+ max_voxels (int, optional ): Maximum number of voxels in each voxel
59
+ grid. Defaults to None.
56
60
mean (Sequence[Number], optional): The pixel mean of R, G, B channels.
57
61
Defaults to None.
58
62
std (Sequence[Number], optional): The pixel standard deviation of
59
63
R, G, B channels. Defaults to None.
60
- pad_size_divisor (int): The size of padded image should be
61
- divisible by ``pad_size_divisor``. Defaults to 1.
62
- pad_value (Number ): The padded pixel value. Defaults to 0.
64
+ pad_size_divisor (int): The size of padded image should be divisible by
65
+ ``pad_size_divisor``. Defaults to 1.
66
+ pad_value (float or int ): The padded pixel value. Defaults to 0.
63
67
pad_mask (bool): Whether to pad instance masks. Defaults to False.
64
68
mask_pad_value (int): The padded pixel value for instance masks.
65
69
Defaults to 0.
66
70
pad_seg (bool): Whether to pad semantic segmentation maps.
67
71
Defaults to False.
68
- seg_pad_value (int): The padded pixel value for semantic
69
- segmentation maps. Defaults to 255.
72
+ seg_pad_value (int): The padded pixel value for semantic segmentation
73
+ maps. Defaults to 255.
70
74
bgr_to_rgb (bool): Whether to convert image from BGR to RGB.
71
75
Defaults to False.
72
76
rgb_to_bgr (bool): Whether to convert image from RGB to BGR.
73
77
Defaults to False.
74
- boxtype2tensor (bool): Whether to keep the ``BaseBoxes`` type of
75
- bboxes data or not. Defaults to True.
78
+ boxtype2tensor (bool): Whether to convert the ``BaseBoxes`` type of
79
+ bboxes data to ``Tensor`` type. Defaults to True.
80
+ non_blocking (bool): Whether to block current process when transferring
81
+ data to device. Defaults to False.
76
82
batch_augments (List[dict], optional): Batch-level augmentations.
77
83
Defaults to None.
78
84
"""
@@ -94,6 +100,7 @@ def __init__(self,
94
100
bgr_to_rgb : bool = False ,
95
101
rgb_to_bgr : bool = False ,
96
102
boxtype2tensor : bool = True ,
103
+ non_blocking : bool = False ,
97
104
batch_augments : Optional [List [dict ]] = None ) -> None :
98
105
super (Det3DDataPreprocessor , self ).__init__ (
99
106
mean = mean ,
@@ -106,6 +113,8 @@ def __init__(self,
106
113
seg_pad_value = seg_pad_value ,
107
114
bgr_to_rgb = bgr_to_rgb ,
108
115
rgb_to_bgr = rgb_to_bgr ,
116
+ boxtype2tensor = boxtype2tensor ,
117
+ non_blocking = non_blocking ,
109
118
batch_augments = batch_augments )
110
119
self .voxel = voxel
111
120
self .voxel_type = voxel_type
@@ -121,9 +130,9 @@ def forward(self,
121
130
``BaseDataPreprocessor``.
122
131
123
132
Args:
124
- data (dict or List[dict]): Data from dataloader.
125
- The dict contains the whole batch data, when it is
126
- a list[dict], the list indicate test time augmentation.
133
+ data (dict or List[dict]): Data from dataloader. The dict contains
134
+ the whole batch data, when it is a list[dict], the list
135
+ indicates test time augmentation.
127
136
training (bool): Whether to enable training time augmentation.
128
137
Defaults to False.
129
138
@@ -184,17 +193,10 @@ def simple_process(self, data: dict, training: bool = False) -> dict:
184
193
'pad_shape' : pad_shape
185
194
})
186
195
187
- if hasattr (self , 'boxtype2tensor' ) and self .boxtype2tensor :
188
- from mmdet .models .utils .misc import \
189
- samplelist_boxtype2tensor
196
+ if self .boxtype2tensor :
190
197
samplelist_boxtype2tensor (data_samples )
191
- elif hasattr (self , 'boxlist2tensor' ) and self .boxlist2tensor :
192
- from mmdet .models .utils .misc import \
193
- samplelist_boxlist2tensor
194
- samplelist_boxlist2tensor (data_samples )
195
198
if self .pad_mask :
196
199
self .pad_gt_masks (data_samples )
197
-
198
200
if self .pad_seg :
199
201
self .pad_gt_sem_seg (data_samples )
200
202
@@ -205,7 +207,7 @@ def simple_process(self, data: dict, training: bool = False) -> dict:
205
207
206
208
return {'inputs' : batch_inputs , 'data_samples' : data_samples }
207
209
208
- def preprocess_img (self , _batch_img : torch . Tensor ) -> torch . Tensor :
210
+ def preprocess_img (self , _batch_img : Tensor ) -> Tensor :
209
211
# channel transform
210
212
if self ._channel_conversion :
211
213
_batch_img = _batch_img [[2 , 1 , 0 ], ...]
@@ -223,12 +225,11 @@ def preprocess_img(self, _batch_img: torch.Tensor) -> torch.Tensor:
223
225
return _batch_img
224
226
225
227
def collate_data (self , data : dict ) -> dict :
226
- """Copying data to the target device and Performs normalization,
227
- padding and bgr2rgb conversion and stack based on
228
- ``BaseDataPreprocessor``.
228
+ """Copy data to the target device and perform normalization, padding
229
+ and bgr2rgb conversion and stack based on ``BaseDataPreprocessor``.
229
230
230
- Collates the data sampled from dataloader into a list of dict and
231
- list of labels, and then copies tensor to the target device.
231
+ Collates the data sampled from dataloader into a list of dict and list
232
+ of labels, and then copies tensor to the target device.
232
233
233
234
Args:
234
235
data (dict): Data sampled from dataloader.
@@ -241,7 +242,7 @@ def collate_data(self, data: dict) -> dict:
241
242
if 'img' in data ['inputs' ]:
242
243
_batch_imgs = data ['inputs' ]['img' ]
243
244
# Process data with `pseudo_collate`.
244
- if is_list_of (_batch_imgs , torch .Tensor ):
245
+ if is_seq_of (_batch_imgs , torch .Tensor ):
245
246
batch_imgs = []
246
247
img_dim = _batch_imgs [0 ].dim ()
247
248
for _batch_img in _batch_imgs :
@@ -289,7 +290,7 @@ def collate_data(self, data: dict) -> dict:
289
290
else :
290
291
raise TypeError (
291
292
'Output of `cast_data` should be a list of dict '
292
- 'or a tuple with inputs and data_samples, but got'
293
+ 'or a tuple with inputs and data_samples, but got '
293
294
f'{ type (data )} : { data } ' )
294
295
295
296
data ['inputs' ]['imgs' ] = batch_imgs
@@ -298,13 +299,13 @@ def collate_data(self, data: dict) -> dict:
298
299
299
300
return data
300
301
301
- def _get_pad_shape (self , data : dict ) -> List [tuple ]:
302
+ def _get_pad_shape (self , data : dict ) -> List [Tuple [ int , int ] ]:
302
303
"""Get the pad_shape of each image based on data and
303
304
pad_size_divisor."""
304
305
# rewrite `_get_pad_shape` for obtaining image inputs.
305
306
_batch_inputs = data ['inputs' ]['img' ]
306
307
# Process data with `pseudo_collate`.
307
- if is_list_of (_batch_inputs , torch .Tensor ):
308
+ if is_seq_of (_batch_inputs , torch .Tensor ):
308
309
batch_pad_shape = []
309
310
for ori_input in _batch_inputs :
310
311
if ori_input .dim () == 4 :
@@ -338,8 +339,8 @@ def _get_pad_shape(self, data: dict) -> List[tuple]:
338
339
return batch_pad_shape
339
340
340
341
@torch .no_grad ()
341
- def voxelize (self , points : List [torch . Tensor ],
342
- data_samples : SampleList ) -> Dict [str , torch . Tensor ]:
342
+ def voxelize (self , points : List [Tensor ],
343
+ data_samples : SampleList ) -> Dict [str , Tensor ]:
343
344
"""Apply voxelization to point cloud.
344
345
345
346
Args:
@@ -466,7 +467,8 @@ def voxelize(self, points: List[torch.Tensor],
466
467
467
468
return voxel_dict
468
469
469
- def get_voxel_seg (self , res_coors : torch .Tensor , data_sample : SampleList ):
470
+ def get_voxel_seg (self , res_coors : Tensor ,
471
+ data_sample : SampleList ) -> None :
470
472
"""Get voxel-wise segmentation label and point2voxel map.
471
473
472
474
Args:
@@ -490,7 +492,7 @@ def get_voxel_seg(self, res_coors: torch.Tensor, data_sample: SampleList):
490
492
data_sample .point2voxel_map = point2voxel_map
491
493
492
494
def ravel_hash (self , x : np .ndarray ) -> np .ndarray :
493
- """Get voxel coordinates hash for np.unique() .
495
+ """Get voxel coordinates hash for np.unique.
494
496
495
497
Args:
496
498
x (np.ndarray): The voxel coordinates of points, Nx3.
@@ -519,14 +521,14 @@ def sparse_quantize(self,
519
521
520
522
Args:
521
523
coords (np.ndarray): The voxel coordinates of points, Nx3.
522
- return_index (bool): Whether to return the indices of the
523
- unique coords, shape (M,).
524
+ return_index (bool): Whether to return the indices of the unique
525
+ coords, shape (M,).
524
526
return_inverse (bool): Whether to return the indices of the
525
- original coords shape (N,).
527
+ original coords, shape (N,).
526
528
527
529
Returns:
528
- List[np.ndarray] or None : Return index and inverse map if
529
- return_index and return_inverse is True.
530
+ List[np.ndarray]: Return index and inverse map if return_index and
531
+ return_inverse is True.
530
532
"""
531
533
_ , indices , inverse_indices = np .unique (
532
534
self .ravel_hash (coords ), return_index = True , return_inverse = True )
0 commit comments