Skip to content

Commit ed46b8c

Browse files
authored
[Enhance] Support modifying non_blocking parameters (#2567)
1 parent 56657c2 commit ed46b8c

File tree

2 files changed

+75
-75
lines changed

2 files changed

+75
-75
lines changed

mmdet3d/models/data_preprocessors/data_preprocessor.py

Lines changed: 56 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# Copyright (c) OpenMMLab. All rights reserved.
22
import math
33
from numbers import Number
4-
from typing import Dict, List, Optional, Sequence, Union
4+
from typing import Dict, List, Optional, Sequence, Tuple, Union
55

66
import numpy as np
77
import torch
88
from mmdet.models import DetDataPreprocessor
9+
from mmdet.models.utils.misc import samplelist_boxtype2tensor
910
from mmengine.model import stack_batch
10-
from mmengine.utils import is_list_of
11+
from mmengine.utils import is_seq_of
12+
from torch import Tensor
1113
from torch.nn import functional as F
1214

1315
from mmdet3d.registry import MODELS
@@ -27,52 +29,56 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
2729
- Collate and move image and point cloud data to the target device.
2830
2931
- 1) For image data:
30-
- Pad images in inputs to the maximum size of current batch with defined
31-
``pad_value``. The padding size can be divisible by a defined
32-
``pad_size_divisor``.
33-
- Stack images in inputs to batch_imgs.
34-
- Convert images in inputs from bgr to rgb if the shape of input is
35-
(3, H, W).
36-
- Normalize images in inputs with defined std and mean.
37-
- Do batch augmentations during training.
32+
33+
- Pad images in inputs to the maximum size of current batch with defined
34+
``pad_value``. The padding size can be divisible by a defined
35+
``pad_size_divisor``.
36+
- Stack images in inputs to batch_imgs.
37+
- Convert images in inputs from bgr to rgb if the shape of input is
38+
(3, H, W).
39+
- Normalize images in inputs with defined std and mean.
40+
- Do batch augmentations during training.
3841
3942
- 2) For point cloud data:
40-
- If no voxelization, directly return list of point cloud data.
41-
- If voxelization is applied, voxelize point cloud according to
42-
``voxel_type`` and obtain ``voxels``.
43+
44+
- If no voxelization, directly return list of point cloud data.
45+
- If voxelization is applied, voxelize point cloud according to
46+
``voxel_type`` and obtain ``voxels``.
4347
4448
Args:
4549
voxel (bool): Whether to apply voxelization to point cloud.
4650
Defaults to False.
4751
voxel_type (str): Voxelization type. Two voxelization types are
48-
provided: 'hard' and 'dynamic', respectively for hard
49-
voxelization and dynamic voxelization. Defaults to 'hard'.
52+
provided: 'hard' and 'dynamic', respectively for hard voxelization
53+
and dynamic voxelization. Defaults to 'hard'.
5054
voxel_layer (dict or :obj:`ConfigDict`, optional): Voxelization layer
5155
config. Defaults to None.
5256
batch_first (bool): Whether to put the batch dimension to the first
5357
dimension when getting voxel coordinates. Defaults to True.
54-
max_voxels (int): Maximum number of voxels in each voxel grid. Defaults
55-
to None.
58+
max_voxels (int, optional): Maximum number of voxels in each voxel
59+
grid. Defaults to None.
5660
mean (Sequence[Number], optional): The pixel mean of R, G, B channels.
5761
Defaults to None.
5862
std (Sequence[Number], optional): The pixel standard deviation of
5963
R, G, B channels. Defaults to None.
60-
pad_size_divisor (int): The size of padded image should be
61-
divisible by ``pad_size_divisor``. Defaults to 1.
62-
pad_value (Number): The padded pixel value. Defaults to 0.
64+
pad_size_divisor (int): The size of padded image should be divisible by
65+
``pad_size_divisor``. Defaults to 1.
66+
pad_value (float or int): The padded pixel value. Defaults to 0.
6367
pad_mask (bool): Whether to pad instance masks. Defaults to False.
6468
mask_pad_value (int): The padded pixel value for instance masks.
6569
Defaults to 0.
6670
pad_seg (bool): Whether to pad semantic segmentation maps.
6771
Defaults to False.
68-
seg_pad_value (int): The padded pixel value for semantic
69-
segmentation maps. Defaults to 255.
72+
seg_pad_value (int): The padded pixel value for semantic segmentation
73+
maps. Defaults to 255.
7074
bgr_to_rgb (bool): Whether to convert image from BGR to RGB.
7175
Defaults to False.
7276
rgb_to_bgr (bool): Whether to convert image from RGB to BGR.
7377
Defaults to False.
74-
boxtype2tensor (bool): Whether to keep the ``BaseBoxes`` type of
75-
bboxes data or not. Defaults to True.
78+
boxtype2tensor (bool): Whether to convert the ``BaseBoxes`` type of
79+
bboxes data to ``Tensor`` type. Defaults to True.
80+
non_blocking (bool): Whether to block current process when transferring
81+
data to device. Defaults to False.
7682
batch_augments (List[dict], optional): Batch-level augmentations.
7783
Defaults to None.
7884
"""
@@ -94,6 +100,7 @@ def __init__(self,
94100
bgr_to_rgb: bool = False,
95101
rgb_to_bgr: bool = False,
96102
boxtype2tensor: bool = True,
103+
non_blocking: bool = False,
97104
batch_augments: Optional[List[dict]] = None) -> None:
98105
super(Det3DDataPreprocessor, self).__init__(
99106
mean=mean,
@@ -106,6 +113,8 @@ def __init__(self,
106113
seg_pad_value=seg_pad_value,
107114
bgr_to_rgb=bgr_to_rgb,
108115
rgb_to_bgr=rgb_to_bgr,
116+
boxtype2tensor=boxtype2tensor,
117+
non_blocking=non_blocking,
109118
batch_augments=batch_augments)
110119
self.voxel = voxel
111120
self.voxel_type = voxel_type
@@ -121,9 +130,9 @@ def forward(self,
121130
``BaseDataPreprocessor``.
122131
123132
Args:
124-
data (dict or List[dict]): Data from dataloader.
125-
The dict contains the whole batch data, when it is
126-
a list[dict], the list indicate test time augmentation.
133+
data (dict or List[dict]): Data from dataloader. The dict contains
134+
the whole batch data, when it is a list[dict], the list
135+
indicates test time augmentation.
127136
training (bool): Whether to enable training time augmentation.
128137
Defaults to False.
129138
@@ -184,17 +193,10 @@ def simple_process(self, data: dict, training: bool = False) -> dict:
184193
'pad_shape': pad_shape
185194
})
186195

187-
if hasattr(self, 'boxtype2tensor') and self.boxtype2tensor:
188-
from mmdet.models.utils.misc import \
189-
samplelist_boxtype2tensor
196+
if self.boxtype2tensor:
190197
samplelist_boxtype2tensor(data_samples)
191-
elif hasattr(self, 'boxlist2tensor') and self.boxlist2tensor:
192-
from mmdet.models.utils.misc import \
193-
samplelist_boxlist2tensor
194-
samplelist_boxlist2tensor(data_samples)
195198
if self.pad_mask:
196199
self.pad_gt_masks(data_samples)
197-
198200
if self.pad_seg:
199201
self.pad_gt_sem_seg(data_samples)
200202

@@ -205,7 +207,7 @@ def simple_process(self, data: dict, training: bool = False) -> dict:
205207

206208
return {'inputs': batch_inputs, 'data_samples': data_samples}
207209

208-
def preprocess_img(self, _batch_img: torch.Tensor) -> torch.Tensor:
210+
def preprocess_img(self, _batch_img: Tensor) -> Tensor:
209211
# channel transform
210212
if self._channel_conversion:
211213
_batch_img = _batch_img[[2, 1, 0], ...]
@@ -223,12 +225,11 @@ def preprocess_img(self, _batch_img: torch.Tensor) -> torch.Tensor:
223225
return _batch_img
224226

225227
def collate_data(self, data: dict) -> dict:
226-
"""Copying data to the target device and Performs normalization,
227-
padding and bgr2rgb conversion and stack based on
228-
``BaseDataPreprocessor``.
228+
"""Copy data to the target device and perform normalization, padding
229+
and bgr2rgb conversion and stack based on ``BaseDataPreprocessor``.
229230
230-
Collates the data sampled from dataloader into a list of dict and
231-
list of labels, and then copies tensor to the target device.
231+
Collates the data sampled from dataloader into a list of dict and list
232+
of labels, and then copies tensor to the target device.
232233
233234
Args:
234235
data (dict): Data sampled from dataloader.
@@ -241,7 +242,7 @@ def collate_data(self, data: dict) -> dict:
241242
if 'img' in data['inputs']:
242243
_batch_imgs = data['inputs']['img']
243244
# Process data with `pseudo_collate`.
244-
if is_list_of(_batch_imgs, torch.Tensor):
245+
if is_seq_of(_batch_imgs, torch.Tensor):
245246
batch_imgs = []
246247
img_dim = _batch_imgs[0].dim()
247248
for _batch_img in _batch_imgs:
@@ -289,7 +290,7 @@ def collate_data(self, data: dict) -> dict:
289290
else:
290291
raise TypeError(
291292
'Output of `cast_data` should be a list of dict '
292-
'or a tuple with inputs and data_samples, but got'
293+
'or a tuple with inputs and data_samples, but got '
293294
f'{type(data)}: {data}')
294295

295296
data['inputs']['imgs'] = batch_imgs
@@ -298,13 +299,13 @@ def collate_data(self, data: dict) -> dict:
298299

299300
return data
300301

301-
def _get_pad_shape(self, data: dict) -> List[tuple]:
302+
def _get_pad_shape(self, data: dict) -> List[Tuple[int, int]]:
302303
"""Get the pad_shape of each image based on data and
303304
pad_size_divisor."""
304305
# rewrite `_get_pad_shape` for obtaining image inputs.
305306
_batch_inputs = data['inputs']['img']
306307
# Process data with `pseudo_collate`.
307-
if is_list_of(_batch_inputs, torch.Tensor):
308+
if is_seq_of(_batch_inputs, torch.Tensor):
308309
batch_pad_shape = []
309310
for ori_input in _batch_inputs:
310311
if ori_input.dim() == 4:
@@ -338,8 +339,8 @@ def _get_pad_shape(self, data: dict) -> List[tuple]:
338339
return batch_pad_shape
339340

340341
@torch.no_grad()
341-
def voxelize(self, points: List[torch.Tensor],
342-
data_samples: SampleList) -> Dict[str, torch.Tensor]:
342+
def voxelize(self, points: List[Tensor],
343+
data_samples: SampleList) -> Dict[str, Tensor]:
343344
"""Apply voxelization to point cloud.
344345
345346
Args:
@@ -466,7 +467,8 @@ def voxelize(self, points: List[torch.Tensor],
466467

467468
return voxel_dict
468469

469-
def get_voxel_seg(self, res_coors: torch.Tensor, data_sample: SampleList):
470+
def get_voxel_seg(self, res_coors: Tensor,
471+
data_sample: SampleList) -> None:
470472
"""Get voxel-wise segmentation label and point2voxel map.
471473
472474
Args:
@@ -490,7 +492,7 @@ def get_voxel_seg(self, res_coors: torch.Tensor, data_sample: SampleList):
490492
data_sample.point2voxel_map = point2voxel_map
491493

492494
def ravel_hash(self, x: np.ndarray) -> np.ndarray:
493-
"""Get voxel coordinates hash for np.unique().
495+
"""Get voxel coordinates hash for np.unique.
494496
495497
Args:
496498
x (np.ndarray): The voxel coordinates of points, Nx3.
@@ -519,14 +521,14 @@ def sparse_quantize(self,
519521
520522
Args:
521523
coords (np.ndarray): The voxel coordinates of points, Nx3.
522-
return_index (bool): Whether to return the indices of the
523-
unique coords, shape (M,).
524+
return_index (bool): Whether to return the indices of the unique
525+
coords, shape (M,).
524526
return_inverse (bool): Whether to return the indices of the
525-
original coords shape (N,).
527+
original coords, shape (N,).
526528
527529
Returns:
528-
List[np.ndarray] or None: Return index and inverse map if
529-
return_index and return_inverse is True.
530+
List[np.ndarray]: Return index and inverse map if return_index and
531+
return_inverse is True.
530532
"""
531533
_, indices, inverse_indices = np.unique(
532534
self.ravel_hash(coords), return_index=True, return_inverse=True)

mmdet3d/models/data_preprocessors/utils.py

Lines changed: 19 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -3,41 +3,39 @@
33

44
import torch
55
import torch.nn.functional as F
6+
from torch import Tensor
67

78

8-
def multiview_img_stack_batch(
9-
tensor_list: List[torch.Tensor],
10-
pad_size_divisor: int = 1,
11-
pad_value: Union[int, float] = 0) -> torch.Tensor:
12-
"""
13-
Compared to the stack_batch in mmengine.model.utils,
9+
def multiview_img_stack_batch(tensor_list: List[Tensor],
10+
pad_size_divisor: int = 1,
11+
pad_value: Union[int, float] = 0) -> Tensor:
12+
"""Compared to the ``stack_batch`` in `mmengine.model.utils`,
1413
multiview_img_stack_batch further handle the multiview images.
15-
see diff of padded_sizes[:, :-2] = 0 vs padded_sizes[:, 0] = 0 in line 47
16-
Stack multiple tensors to form a batch and pad the tensor to the max
17-
shape use the right bottom padding mode in these images. If
14+
15+
See diff of padded_sizes[:, :-2] = 0 vs padded_sizes[:, 0] = 0 in line 47.
16+
17+
Stack multiple tensors to form a batch and pad the tensor to the max shape
18+
use the right bottom padding mode in these images. If
1819
``pad_size_divisor > 0``, add padding to ensure the shape of each dim is
1920
divisible by ``pad_size_divisor``.
2021
2122
Args:
2223
tensor_list (List[Tensor]): A list of tensors with the same dim.
23-
pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding
24-
to ensure the shape of each dim is divisible by
25-
``pad_size_divisor``. This depends on the model, and many
26-
models need to be divisible by 32. Defaults to 1.
24+
pad_size_divisor (int): If ``pad_size_divisor > 0``, add padding to
25+
ensure the shape of each dim is divisible by ``pad_size_divisor``.
26+
This depends on the model, and many models need to be divisible by
27+
32. Defaults to 1.
2728
pad_value (int or float): The padding value. Defaults to 0.
2829
2930
Returns:
3031
Tensor: The n dim tensor.
3132
"""
32-
assert isinstance(
33-
tensor_list,
34-
list), f'Expected input type to be list, but got {type(tensor_list)}'
33+
assert isinstance(tensor_list, list), \
34+
f'Expected input type to be list, but got {type(tensor_list)}'
3535
assert tensor_list, '`tensor_list` could not be an empty list'
36-
assert len({
37-
tensor.ndim
38-
for tensor in tensor_list
39-
}) == 1, ('Expected the dimensions of all tensors must be the same, '
40-
f'but got {[tensor.ndim for tensor in tensor_list]}')
36+
assert len({tensor.ndim for tensor in tensor_list}) == 1, \
37+
'Expected the dimensions of all tensors must be the same, ' \
38+
f'but got {[tensor.ndim for tensor in tensor_list]}'
4139

4240
dim = tensor_list[0].dim()
4341
num_img = len(tensor_list)

0 commit comments

Comments
 (0)