Skip to content

Commit 5b53d8a

Browse files
Eric Mintunfacebook-github-bot
authored andcommitted
Sample single scale per GPU in LSJ data augmentation.
Summary: Code for sampling only one scale per minibatch in the LSJ data augmentation. `dataloader.train.mode` can be set to `per_image` to sample a separate scale for every image, `per_gpu` to sample a scale for each GPU in each minibatch, or `per_batch` to sample a single scale across all GPUs for each minibatch. Padding up to 1024x1024 is removed so that small scales can run faster. The implementation follows that from [A Multigrid Method for Efficiently Training Video Models](https://openaccess.thecvf.com/content_CVPR_2020/papers/Wu_A_Multigrid_Method_for_Efficiently_Training_Video_Models_CVPR_2020_paper.pdf]): the batch sampler samples scales in the desired mode, then for each image returns a tuple `(index, scale)`. The dataset's `__getitem__` is modified to take such a tuple as input instead of just an index. When sampled this way: 1) Padding is slightly different; it is done by the model (as is true for non-LSJ data augmentation) and not the augmentation. This means padding occurs after horizontal flipping instead of before, and uses a slightly different value (0.0 after normalization, instead of 128 before normalization). 2) Grouping by aspect ratio is turned off, since it is now detrimental. Reviewed By: vaibhava0 Differential Revision: D29506785 fbshipit-source-id: b252f21668f1508618d127133b64e28b17f48fd6
1 parent 0954ef3 commit 5b53d8a

File tree

2 files changed

+63
-41
lines changed

2 files changed

+63
-41
lines changed

detectron2/data/dataset_mapper.py

Lines changed: 30 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,35 @@ def from_config(cls, cfg, is_train: bool = True):
112112
)
113113
return ret
114114

115+
def _transform_annotations(self, dataset_dict, transforms, image_shape):
116+
# USER: Modify this if you want to keep them for some reason.
117+
for anno in dataset_dict["annotations"]:
118+
if not self.use_instance_mask:
119+
anno.pop("segmentation", None)
120+
if not self.use_keypoint:
121+
anno.pop("keypoints", None)
122+
123+
# USER: Implement additional transformations if you have other types of data
124+
annos = [
125+
utils.transform_instance_annotations(
126+
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
127+
)
128+
for obj in dataset_dict.pop("annotations")
129+
if obj.get("iscrowd", 0) == 0
130+
]
131+
instances = utils.annotations_to_instances(
132+
annos, image_shape, mask_format=self.instance_mask_format
133+
)
134+
135+
# After transforms such as cropping are applied, the bounding box may no longer
136+
# tightly bound the object. As an example, imagine a triangle object
137+
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
138+
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
139+
# the intersection of original bounding box and the cropping box.
140+
if self.recompute_boxes:
141+
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
142+
dataset_dict["instances"] = utils.filter_empty_instances(instances)
143+
115144
def __call__(self, dataset_dict):
116145
"""
117146
Args:
@@ -157,31 +186,6 @@ def __call__(self, dataset_dict):
157186
return dataset_dict
158187

159188
if "annotations" in dataset_dict:
160-
# USER: Modify this if you want to keep them for some reason.
161-
for anno in dataset_dict["annotations"]:
162-
if not self.use_instance_mask:
163-
anno.pop("segmentation", None)
164-
if not self.use_keypoint:
165-
anno.pop("keypoints", None)
166-
167-
# USER: Implement additional transformations if you have other types of data
168-
annos = [
169-
utils.transform_instance_annotations(
170-
obj, transforms, image_shape, keypoint_hflip_indices=self.keypoint_hflip_indices
171-
)
172-
for obj in dataset_dict.pop("annotations")
173-
if obj.get("iscrowd", 0) == 0
174-
]
175-
instances = utils.annotations_to_instances(
176-
annos, image_shape, mask_format=self.instance_mask_format
177-
)
189+
self._transform_annotations(dataset_dict, transforms, image_shape)
178190

179-
# After transforms such as cropping are applied, the bounding box may no longer
180-
# tightly bound the object. As an example, imagine a triangle object
181-
# [(0,0), (2,0), (0,2)] cropped by a box [(1,0),(2,2)] (XYXY format). The tight
182-
# bounding box of the cropped triangle should be [(1,0),(2,1)], which is not equal to
183-
# the intersection of original bounding box and the cropping box.
184-
if self.recompute_boxes:
185-
instances.gt_boxes = instances.gt_masks.get_bounding_boxes()
186-
dataset_dict["instances"] = utils.filter_empty_instances(instances)
187191
return dataset_dict

detectron2/data/transforms/augmentation_impl.py

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -206,20 +206,27 @@ def __init__(
206206
super().__init__()
207207
self._init(locals())
208208

209-
def get_transform(self, image: np.ndarray) -> Transform:
210-
# Compute the image scale and scaled size.
209+
def _get_resize(self, image: np.ndarray, scale: float) -> Transform:
211210
input_size = image.shape[:2]
212-
output_size = (self.target_height, self.target_width)
213-
random_scale = np.random.uniform(self.min_scale, self.max_scale)
214-
random_scale_size = np.multiply(output_size, random_scale)
215-
scale = np.minimum(
216-
random_scale_size[0] / input_size[0], random_scale_size[1] / input_size[1]
211+
212+
# Compute new target size given a scale.
213+
target_size = (self.target_height, self.target_width)
214+
target_scale_size = np.multiply(target_size, scale)
215+
216+
# Compute actual rescaling applied to input image and output size.
217+
output_scale = np.minimum(
218+
target_scale_size[0] / input_size[0], target_scale_size[1] / input_size[1]
217219
)
218-
scaled_size = np.round(np.multiply(input_size, scale)).astype(int)
220+
output_size = np.round(np.multiply(input_size, output_scale)).astype(int)
221+
219222
return ResizeTransform(
220-
input_size[0], input_size[1], scaled_size[0], scaled_size[1], self.interp
223+
input_size[0], input_size[1], output_size[0], output_size[1], self.interp
221224
)
222225

226+
def get_transform(self, image: np.ndarray) -> Transform:
227+
random_scale = np.random.uniform(self.min_scale, self.max_scale)
228+
return self._get_resize(image, random_scale)
229+
223230

224231
class RandomRotation(Augmentation):
225232
"""
@@ -279,19 +286,21 @@ class FixedSizeCrop(Augmentation):
279286
"""
280287
If `crop_size` is smaller than the input image size, then it uses a random crop of
281288
the crop size. If `crop_size` is larger than the input image size, then it pads
282-
the right and the bottom of the image to the crop size.
289+
the right and the bottom of the image to the crop size if `pad` is True, otherwise
290+
it returns the smaller image.
283291
"""
284292

285-
def __init__(self, crop_size: Tuple[int], pad_value: float = 128.0):
293+
def __init__(self, crop_size: Tuple[int], pad: bool = True, pad_value: float = 128.0):
286294
"""
287295
Args:
288296
crop_size: target image (height, width).
297+
pad: if True, will pad images smaller than `crop_size` up to `crop_size`
289298
pad_value: the padding value.
290299
"""
291300
super().__init__()
292301
self._init(locals())
293302

294-
def get_transform(self, image: np.ndarray) -> TransformList:
303+
def _get_crop(self, image: np.ndarray) -> Transform:
295304
# Compute the image scale and scaled size.
296305
input_size = image.shape[:2]
297306
output_size = self.crop_size
@@ -301,19 +310,28 @@ def get_transform(self, image: np.ndarray) -> TransformList:
301310
max_offset = np.maximum(max_offset, 0)
302311
offset = np.multiply(max_offset, np.random.uniform(0.0, 1.0))
303312
offset = np.round(offset).astype(int)
304-
crop_transform = CropTransform(
313+
return CropTransform(
305314
offset[1], offset[0], output_size[1], output_size[0], input_size[1], input_size[0]
306315
)
307316

317+
def _get_pad(self, image: np.ndarray) -> Transform:
318+
# Compute the image scale and scaled size.
319+
input_size = image.shape[:2]
320+
output_size = self.crop_size
321+
308322
# Add padding if the image is scaled down.
309323
pad_size = np.subtract(output_size, input_size)
310324
pad_size = np.maximum(pad_size, 0)
311325
original_size = np.minimum(input_size, output_size)
312-
pad_transform = PadTransform(
326+
return PadTransform(
313327
0, 0, pad_size[1], pad_size[0], original_size[1], original_size[0], self.pad_value
314328
)
315329

316-
return TransformList([crop_transform, pad_transform])
330+
def get_transform(self, image: np.ndarray) -> TransformList:
331+
transforms = [self._get_crop(image)]
332+
if self.pad:
333+
transforms.append(self._get_pad(image))
334+
return TransformList(transforms)
317335

318336

319337
class RandomCrop(Augmentation):

0 commit comments

Comments
 (0)