diff --git a/README.md b/README.md index 9c62965..57a308f 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,12 @@ python demo.py --weight ./checkpoint_VOC_efficientdet-d1_97.pth --threshold 0.6   ## Recent Update + - [04/04/2020] VOC dataset training success. + ```Shell + nice -n1 python3.6 train.py --dataset VOC --dataset_root $VOC_PATH --network efficientdet-d0 --batch_size $BSIZE --workers 8 --grad_accumulation_steps 1 --lr 0.00001 --eval_epochs 20 + ``` + I set `lr=1e-5` because `1e-4` did not work + - [31/03/2020] ~~Make support for freezing backbone layers and batch norm layers. In addition, it supports to mixed precision training APEX opt method=O1.~~ [requires testing]. - [06/01/2020] Support both DistributedDataParallel and DataParallel, change augmentation, eval_voc - [17/12/2019] Add Fast normalized fusion, Augmentation with Ratio, Change RetinaHead, Fix Support EfficientDet-D0->D7 - [7/12/2019] Support EfficientDet-D0, EfficientDet-D1, EfficientDet-D2, EfficientDet-D3, EfficientDet-D4,... . Support change gradient accumulation steps, AdamW. @@ -88,22 +94,22 @@ sh datasets/scripts/COCO2017.sh - To train EfficientDet using the train script simply specify the parameters listed in `train.py` as a flag or manually change them. ```Shell -python train.py --network effcientdet-d0 # Example +python train.py --network efficientdet-d0 # Example ``` - With VOC Dataset: ```Shell # DataParallel - python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network effcientdet-d0 --batch_size 32 + python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network efficientdet-d0 --batch_size 32 # DistributedDataParallel with backend nccl - python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network effcientdet-d0 --batch_size 32 --multiprocessing-distributed + python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network efficientdet-d0 --batch_size 32 --multiprocessing-distributed ``` - With COCO Dataset: ```Shell # DataParallel - python train.py --dataset COCO --dataset_root ~/data/coco/ --network effcientdet-d0 --batch_size 32 + python train.py --dataset COCO --dataset_root ~/data/coco/ --network efficientdet-d0 --batch_size 32 # DistributedDataParallel with backend nccl - python train.py --dataset COCO --dataset_root ~/data/coco/ --network effcientdet-d0 --batch_size 32 --multiprocessing-distributed + python train.py --dataset COCO --dataset_root ~/data/coco/ --network efficientdet-d0 --batch_size 32 --multiprocessing-distributed ``` ## Evaluation diff --git a/datasets/augmentation.py b/datasets/augmentation.py index 10a5615..d9289e4 100644 --- a/datasets/augmentation.py +++ b/datasets/augmentation.py @@ -16,8 +16,6 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility= albu.augmentations.transforms.RandomResizedCrop( height=height, width=width, p=0.3), - albu.augmentations.transforms.Flip(), - albu.augmentations.transforms.Transpose(), albu.OneOf([ albu.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.4), @@ -33,7 +31,6 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility= ]), albu.CLAHE(p=0.8), albu.HorizontalFlip(p=0.5), - albu.VerticalFlip(p=0.5), ]) if(phase == 'test' or phase == 'valid'): list_transforms.extend([ @@ -46,32 +43,43 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility= ]) if(phase == 'test'): return albu.Compose(list_transforms) - return albu.Compose(list_transforms, bbox_params=albu.BboxParams(format='pascal_voc', min_area=min_area, - min_visibility=min_visibility, label_fields=['category_id'])) + return albu.Compose(list_transforms, + bbox_params=albu.BboxParams(format='pascal_voc', + min_area=min_area, + min_visibility=min_visibility, + label_fields=['category_id'])) def detection_collate(batch): imgs = [s['image'] for s in batch] annots = [s['bboxes'] for s in batch] labels = [s['category_id'] for s in batch] + scales = [s['scale'] for s in batch] max_num_annots = max(len(annot) for annot in annots) annot_padded = np.ones((len(annots), max_num_annots, 5))*-1 if max_num_annots > 0: for idx, (annot, lab) in enumerate(zip(annots, labels)): + # pylint: disable=C1801 if len(annot) > 0: annot_padded[idx, :len(annot), :4] = annot annot_padded[idx, :len(annot), 4] = lab - return (torch.stack(imgs, 0), torch.FloatTensor(annot_padded)) + return (torch.stack(imgs, 0), + torch.FloatTensor(annot_padded), + torch.FloatTensor(scales)) def collater(data): + data = [x for x in data if x is not None] imgs = [s['img'] for s in data] annots = [s['annot'] for s in data] scales = [s['scale'] for s in data] + try: + imgs = torch.from_numpy(np.stack(imgs, axis=0)) + except ValueError: + import pdb; pdb.set_trace() - imgs = torch.from_numpy(np.stack(imgs, axis=0)) max_num_annots = max(annot.shape[0] for annot in annots) @@ -88,7 +96,8 @@ def collater(data): imgs = imgs.permute(0, 3, 1, 2) - return (imgs, torch.FloatTensor(annot_padded)) + return (imgs, torch.FloatTensor(annot_padded), + torch.FloatTensor(scales)) class Resizer(object): @@ -108,11 +117,13 @@ def __call__(self, sample, common_size=512): image = cv2.resize(image, (resized_width, resized_height)) - new_image = np.zeros((common_size, common_size, 3)) + new_image = np.zeros((common_size, common_size, 3), np.float32) new_image[0:resized_height, 0:resized_width] = image annots[:, :4] *= scale - return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale} + return {'img': torch.from_numpy(new_image), + 'annot': torch.from_numpy(annots), + 'scale': scale} class Augmenter(object): @@ -147,4 +158,5 @@ def __init__(self): def __call__(self, sample): image, annots = sample['img'], sample['annot'] - return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots} + # 1/255. = 0.00392156862745098 + return {'img': ((image.astype(np.float32) *0.00392156862745098 - self.mean) / self.std), 'annot': annots} diff --git a/datasets/coco.py b/datasets/coco.py index c006f44..5e3835d 100644 --- a/datasets/coco.py +++ b/datasets/coco.py @@ -64,6 +64,7 @@ def __len__(self): def __getitem__(self, idx): img = self.load_image(idx) + image_size = img.shape[:2] annot = self.load_annotations(idx) sample = {'img': img, 'annot': annot} if self.transform: diff --git a/datasets/voc0712.py b/datasets/voc0712.py index 4814754..28e5de4 100644 --- a/datasets/voc0712.py +++ b/datasets/voc0712.py @@ -8,6 +8,8 @@ import xml.etree.cElementTree as ET else: import xml.etree.ElementTree as ET +import albumentations as albu + VOC_CLASSES = ( # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', @@ -106,15 +108,26 @@ def __getitem__(self, index): target = ET.parse(self._annopath % img_id).getroot() img = cv2.imread(self._imgpath % img_id) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = img.astype(np.float32)/255. height, width, channels = img.shape if self.target_transform is not None: target = self.target_transform(target, width, height) target = np.array(target) sample = {'img': img, 'annot': target} - if self.transform is not None: - sample = self.transform(sample) + if isinstance(self.transform, albu.core.composition.Compose): + result = self.transform(image=img, bboxes=target[:, :4], category_id=target[:, -1]) + bboxes = np.array(result["bboxes"]) + cls = np.atleast_2d(result["category_id"]).T + if bboxes.size == 0: # after data augmentation we loose all bboxes + return None + target = np.hstack((bboxes, cls)) + sample = {"img": result["image"].transpose(1, 0).transpose(2, 1), + "annot": torch.from_numpy(target), + "scale": -1} # fake scale + else: + img = img.astype(np.float32)/255. + if self.transform is not None: + sample = self.transform(sample) return sample bbox = target[:, :4] diff --git a/eval.py b/eval.py index dc8393d..2b7ef19 100644 --- a/eval.py +++ b/eval.py @@ -73,7 +73,8 @@ def _compute_ap(recall, precision): return ap -def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, save_path=None): +def _get_detections(dataloader, retinanet, + score_threshold=0.05, max_detections=100, save_path=None): """ Get the detections from the retinanet using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes] @@ -86,52 +87,55 @@ def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100 # Returns A list of lists containing the detections for each image in the generator. """ - all_detections = [[None for i in range( - dataset.num_classes())] for j in range(len(dataset))] + dataset = dataloader.dataset + all_detections = [[None for i in range(dataset.num_classes())] + for j in range(len(dataset))] retinanet.eval() - + index = 0 with torch.no_grad(): - - for index in range(len(dataset)): - data = dataset[index] - scale = data['scale'] - - # run network - scores, labels, boxes = retinanet(data['img'].permute( - 2, 0, 1).cuda().float().unsqueeze(dim=0)) - scores = scores.cpu().numpy() - labels = labels.cpu().numpy() - boxes = boxes.cpu().numpy() - - # correct boxes for image scale - boxes /= scale - - # select indices which have a score above the threshold - indices = np.where(scores > score_threshold)[0] - if indices.shape[0] > 0: - # select those scores - scores = scores[indices] - - # find the order with which to sort the scores - scores_sort = np.argsort(-scores)[:max_detections] - - # select detections - image_boxes = boxes[indices[scores_sort], :] - image_scores = scores[scores_sort] - image_labels = labels[indices[scores_sort]] - image_detections = np.concatenate([image_boxes, np.expand_dims( - image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) - - # copy detections to all_detections - for label in range(dataset.num_classes()): - all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1] - else: - # copy detections to all_detections - for label in range(dataset.num_classes()): - all_detections[index][label] = np.zeros((0, 5)) - - print('{}/{}'.format(index + 1, len(dataset)), end='\r') + for idx, data in tqdm(enumerate(dataloader), total=len(dataloader)): + images = data[0].cuda() + scores_batch, labels_batch, boxes_batch = retinanet(images) + + scores_batch = scores_batch.reshape(images.shape[0], -1) + labels_batch = labels_batch.reshape(images.shape[0], -1) + boxes_batch = boxes_batch.reshape(images.shape[0], -1, 4) + for scores, labels, boxes, scale in zip(scores_batch, + labels_batch, + boxes_batch, data[2]): + + scores = scores.cpu().numpy() + labels = labels.cpu().numpy() + boxes = boxes.cpu().numpy() + + # correct boxes for image scale + boxes /= scale + + # select indices which have a score above the threshold + indices = np.where(scores > score_threshold)[0] + if indices.shape[0] > 0: + # select those scores + scores = scores[indices] + + # find the order with which to sort the scores + scores_sort = np.argsort(-scores)[:max_detections] + + # select detections + image_boxes = boxes[indices[scores_sort], :] + image_scores = scores[scores_sort] + image_labels = labels[indices[scores_sort]] + image_detections = np.concatenate([image_boxes, np.expand_dims( + image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) + + # copy detections to all_detections + for label in range(dataset.num_classes()): + all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1] + else: + # copy detections to all_detections + for label in range(dataset.num_classes()): + all_detections[index][label] = np.zeros((0, 5)) + index += 1 return all_detections @@ -146,18 +150,18 @@ def _get_annotations(generator): A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range( - generator.num_classes())] for j in range(len(generator))] + generator.dataset.num_classes())] for j in range(len(generator.dataset))] - for i in range(len(generator)): + for i in range(len(generator.dataset)): # load the annotations - annotations = generator.load_annotations(i) + annotations = generator.dataset.load_annotations(i) # copy detections to all_annotations - for label in range(generator.num_classes()): + for label in range(generator.dataset.num_classes()): all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() - print('{}/{}'.format(i + 1, len(generator)), end='\r') + print('{}/{}'.format(i + 1, len(generator.dataset)), end='\r') return all_annotations @@ -190,13 +194,13 @@ def evaluate( average_precisions = {} - for label in range(generator.num_classes()): + for label in range(generator.dataset.num_classes()): false_positives = np.zeros((0,)) true_positives = np.zeros((0,)) scores = np.zeros((0,)) num_annotations = 0.0 - for i in range(len(generator)): + for i in range(len(generator.dataset)): detections = all_detections[i][label] annotations = all_annotations[i][label] num_annotations += annotations.shape[0] @@ -249,72 +253,66 @@ def evaluate( print('\nmAP:') avg_mAP = [] - for label in range(generator.num_classes()): - label_name = generator.label_to_name(label) + for label in range(generator.dataset.num_classes()): + label_name = generator.dataset.label_to_name(label) print('{}: {}'.format(label_name, average_precisions[label][0])) avg_mAP.append(average_precisions[label][0]) print('avg mAP: {}'.format(np.mean(avg_mAP))) return np.mean(avg_mAP), average_precisions -def evaluate_coco(dataset, model, threshold=0.05): +def evaluate_coco(dataloader, model, threshold=0.05): model.eval() + dataset = dataloader.dataset + with torch.no_grad(): # start collecting results results = [] image_ids = [] + index = 0 - for index in range(len(dataset)): - data = dataset[index] - scale = data['scale'] - + for data in tqdm(dataloader, total=len(dataloader)): + images = data[0] # run network - scores, labels, boxes = model(data['img'].permute( - 2, 0, 1).cuda().float().unsqueeze(dim=0)) - scores = scores.cpu() - labels = labels.cpu() - boxes = boxes.cpu() - - # correct boxes for image scale - boxes /= scale - - if boxes.shape[0] > 0: - # change to (x, y, w, h) (MS COCO standard) - boxes[:, 2] -= boxes[:, 0] - boxes[:, 3] -= boxes[:, 1] - - # compute predicted labels and scores - # for box, score, label in zip(boxes[0], scores[0], labels[0]): - for box_id in range(boxes.shape[0]): - score = float(scores[box_id]) - label = int(labels[box_id]) - box = boxes[box_id, :] - - # scores are sorted, so we can break - if score < threshold: - break - - # append detection for each positively labeled class - image_result = { - 'image_id': dataset.image_ids[index], - 'category_id': dataset.label_to_coco_label(label), - 'score': float(score), - 'bbox': box.tolist(), - } - - # append detection to results - results.append(image_result) - - # append image to list of processed images - image_ids.append(dataset.image_ids[index]) - - # print progress - print('{}/{}'.format(index, len(dataset)), end='\r') - - if not len(results): + scores_batch, labels_batch, boxes_batch = model(images) + + scores_batch = scores_batch.reshape(images.shape[0], -1) + labels_batch = labels_batch.reshape(images.shape[0], -1) + boxes_batch = boxes_batch.reshape(images.shape[0], -1, 4) + + for scores, labels, boxes, scale in zip(scores_batch, + labels_batch, + boxes_batch, data[2]): + scores = scores.cpu().numpy() + labels = labels.cpu().numpy() + boxes = boxes.cpu().numpy() + + # correct boxes for image scale + boxes /= scale + + if boxes.shape[0] > 0: + # change to (x, y, w, h) (MS COCO standard) + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + # compute predicted labels and scores + # for box, score, label in zip(boxes[0], scores[0], labels[0]): + boxes = boxes[scores >= threshold] + labels = labels[scores >= threshold] + scores = scores[scores >= threshold] + results.extend([{"image_id": dataset.image_ids[index], + "category_id": dataset.label_to_coco_label(label), + "score": float(score), + "bbox": box.tolist()} + for box, score, label in zip(boxes, scores, labels)]) + + # append image to list of processed images + image_ids.append(dataset.image_ids[index]) + index += 1 + if not results: return # write output diff --git a/loader.py b/loader.py new file mode 100644 index 0000000..eeaf11e --- /dev/null +++ b/loader.py @@ -0,0 +1,67 @@ +import torch + + +class PrefetchLoader: + + def __init__(self, loader): + self.loader = loader + # self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1) + # self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1) + # self.fp16 = fp16 + # if fp16: + # self.mean = self.mean.half() + # self.std = self.std.half() + # if re_prob > 0.: + # self.random_erasing = RandomErasing( + # probability=re_prob, mode=re_mode, max_count=re_count, num_splits=re_num_splits) + # else: + # self.random_erasing = None + + def __iter__(self): + stream = torch.cuda.Stream() + first = True + + for next_input, next_target, _ in self.loader: + with torch.cuda.stream(stream): + next_input = next_input.cuda(non_blocking=True) + next_target = next_target.cuda(non_blocking=True) + # if self.fp16: + # next_input = next_input.half().sub_(self.mean).div_(self.std) + # else: + # next_input = next_input.float().sub_(self.mean).div_(self.std) + # if self.random_erasing is not None: + # next_input = self.random_erasing(next_input) + + if not first: + yield input, target + else: + first = False + + torch.cuda.current_stream().wait_stream(stream) + input = next_input + target = next_target + + yield input, target + + def __len__(self): + return len(self.loader) + + @property + def sampler(self): + return self.loader.sampler + + @property + def dataset(self): + return self.loader.dataset + + # @property + # def mixup_enabled(self): + # if isinstance(self.loader.collate_fn, FastCollateMixup): + # return self.loader.collate_fn.mixup_enabled + # else: + # return False + + # @mixup_enabled.setter + # def mixup_enabled(self, x): + # if isinstance(self.loader.collate_fn, FastCollateMixup): + # self.loader.collate_fn.mixup_enabled = x diff --git a/models/bifpn.py b/models/bifpn.py index 0f8e6bd..a9bc8f6 100644 --- a/models/bifpn.py +++ b/models/bifpn.py @@ -162,6 +162,7 @@ def __init__(self, inplace=False) ) self.bifpn_convs.append(fpn_conv) + # self.init_weights() # new code # default init_weights for conv(msra) and norm in ConvModule def init_weights(self): diff --git a/models/efficientdet.py b/models/efficientdet.py index 43357c9..2601366 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -31,7 +31,7 @@ def __init__(self, iou_threshold=0.5): super(EfficientDet, self).__init__() self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network]) - self.is_training = is_training + # self.is_training = is_training self.neck = BIFPN(in_channels=self.backbone.get_list_features()[-5:], out_channels=W_bifpn, stack=D_bifpn, @@ -44,18 +44,32 @@ def __init__(self, self.clipBoxes = ClipBoxes() self.threshold = threshold self.iou_threshold = iou_threshold - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - self.freeze_bn() + + # ============== original code starts =============== + """The following code forces all weights to be random, which does not make sense at all!""" + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + # m.weight.data.normal_(0, math.sqrt(2. / n)) + # elif isinstance(m, nn.BatchNorm2d): + # m.weight.data.fill_(1) + # m.bias.data.zero_() + + # self.freeze_bn() + # ============== original code ends =============== + self.criterion = FocalLoss() + def extract_feat(self, img): + """ + Directly extract features from the backbone+neck + """ + x = self.backbone(img) + x = self.neck(x[-5:]) + return x + def forward(self, inputs): - if self.is_training: + if self.training: inputs, annotations = inputs else: inputs = inputs @@ -64,37 +78,62 @@ def forward(self, inputs): classification = torch.cat([out for out in outs[0]], dim=1) regression = torch.cat([out for out in outs[1]], dim=1) anchors = self.anchors(inputs) - if self.is_training: + # if anchors.dtype != inputs.dtype: # used for mixed precision training + # anchors = anchors.type_as(inputs) + if self.training: return self.criterion(classification, regression, anchors, annotations) else: + max_per_image = 256 transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, inputs) scores = torch.max(classification, dim=2, keepdim=True)[0] - scores_over_thresh = (scores > self.threshold)[0, :, 0] + nms_scores = [] + nms_class = [] + anchors = [] + for idx, score in enumerate(scores): + scores_over_thresh = (score > self.threshold)[:, 0] + if scores_over_thresh.sum() == 0: + print('No boxes to NMS') + # no boxes to NMS, just return + # return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] + continue + cls_tmp = classification[idx, scores_over_thresh, :] + trf_anchors = transformed_anchors[idx, scores_over_thresh, :] + scores_tmp = scores[idx, scores_over_thresh, :] + anchors_nms_idx = nms(trf_anchors, scores_tmp[:, 0], + iou_threshold=self.iou_threshold) + nms_scores_tmp, nms_class_tmp = cls_tmp[anchors_nms_idx, :].max(dim=1) + trf_anchors = trf_anchors[anchors_nms_idx, :] - if scores_over_thresh.sum() == 0: - print('No boxes to NMS') - # no boxes to NMS, just return - return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] - classification = classification[:, scores_over_thresh, :] - transformed_anchors = transformed_anchors[:, scores_over_thresh, :] - scores = scores[:, scores_over_thresh, :] - anchors_nms_idx = nms( - transformed_anchors[0, :, :], scores[0, :, 0], iou_threshold=self.iou_threshold) - nms_scores, nms_class = classification[0, anchors_nms_idx, :].max( - dim=1) - return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]] + if not torch.all(-nms_scores_tmp[:-1] <= -nms_scores_tmp[1:]): + raise ValueError("Please make nms score sorted") + if nms_scores_tmp.shape[0] > max_per_image: + nms_scores_tmp = nms_scores_tmp[:max_per_image] + nms_class_tmp = nms_class_tmp[:max_per_image] + trf_anchors = trf_anchors[:max_per_image] + else: + K = max_per_image - nms_scores_tmp.shape[0] + nms_scores_tmp = torch.cat((nms_scores_tmp, + -torch.ones(K,).type_as(nms_scores_tmp))) + nms_class_tmp = torch.cat((nms_class_tmp, + -torch.ones(K,).type_as(nms_class_tmp))) + trf_anchors = torch.cat((trf_anchors, + -torch.ones(K, 4).type_as(trf_anchors))) + nms_scores.append(nms_scores_tmp) + nms_class.append(nms_class_tmp) + anchors.append(trf_anchors) + return torch.cat(nms_scores), torch.cat(nms_class), torch.cat(anchors) + + def freeze_backbone(self): + """Freeze backbone weights and bn layers.""" + for layer in self.backbone.modules(): + if isinstance(layer, nn.BatchNorm2d): + layer.eval() + for param in self.backbone.parameters(): + param.requires_grad = False def freeze_bn(self): '''Freeze BatchNorm layers.''' for layer in self.modules(): if isinstance(layer, nn.BatchNorm2d): layer.eval() - - def extract_feat(self, img): - """ - Directly extract features from the backbone+neck - """ - x = self.backbone(img) - x = self.neck(x[-5:]) - return x diff --git a/models/losses.py b/models/losses.py index 99b9cfd..d780d46 100644 --- a/models/losses.py +++ b/models/losses.py @@ -43,6 +43,13 @@ def forward(self, classifications, regressions, anchors, annotations): anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights + if classifications.dtype == torch.float32: + MAX_ONE = 0.9999 + MIN_ZERO = 1e-4 + else: + MAX_ONE = 0.999 + MIN_ZERO = 1e-4 + not_found = 0 for j in range(batch_size): classification = classifications[j, :, :] @@ -50,23 +57,19 @@ def forward(self, classifications, regressions, anchors, annotations): bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] - if bbox_annotation.shape[0] == 0: regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) continue - classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) + classification = torch.clamp(classification, MIN_ZERO, MAX_ONE) # num_anchors x num_annotations IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 - #import pdb - # pdb.set_trace() - # compute the loss for classification targets = torch.ones(classification.shape) * -1 targets = targets.cuda() @@ -76,6 +79,8 @@ def forward(self, classifications, regressions, anchors, annotations): positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() + if num_positive_anchors == 0: + not_found += 1 assigned_annotations = bbox_annotation[IoU_argmax, :] @@ -148,5 +153,6 @@ def forward(self, classifications, regressions, anchors, annotations): regression_losses.append(regression_loss.mean()) else: regression_losses.append(torch.tensor(0).float().cuda()) - + if not_found == batch_size: + print("Not positive sample is found in the batch") return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True) diff --git a/models/retinahead.py b/models/retinahead.py index 7fcbadf..896f1ab 100644 --- a/models/retinahead.py +++ b/models/retinahead.py @@ -1,6 +1,7 @@ from functools import partial import numpy as np +import torch import torch.nn as nn from .module import ConvModule, bias_init_with_prob, normal_init diff --git a/models/utils.py b/models/utils.py index 34c8649..69c620c 100644 --- a/models/utils.py +++ b/models/utils.py @@ -302,15 +302,27 @@ def get_model_params(model_name, override_params): return blocks_args, global_params +# url_map = { +# 'efficientnet-b0': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b0-355c32eb.pth', +# 'efficientnet-b1': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b1-f1951068.pth', +# 'efficientnet-b2': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b2-8bb594d6.pth', +# 'efficientnet-b3': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b3-5fb5a3c3.pth', +# 'efficientnet-b4': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b4-6ed6700e.pth', +# 'efficientnet-b5': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b5-b6417697.pth', +# 'efficientnet-b6': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b6-c76e70fd.pth', +# 'efficientnet-b7': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b7-dcc49843.pth', +# } + + url_map = { - 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth', - 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth', - 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth', - 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth', - 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth', - 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth', - 'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth', - 'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth', + 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', + 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', + 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', + 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', + 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', + 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', + 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', + 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', } diff --git a/train.py b/train.py index 8e90697..0cf502a 100644 --- a/train.py +++ b/train.py @@ -3,8 +3,10 @@ import os import random import shutil +from collections import OrderedDict import time import warnings +import epdb import torch import torch.nn as nn import torch.nn.parallel @@ -17,6 +19,8 @@ import torchvision.transforms as transforms import torchvision.datasets as datasets +import pytorch_warmup as warmup + import os import sys import time @@ -27,12 +31,29 @@ import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader +try: + from apex.parallel import DistributedDataParallel as DDP + from apex.fp16_utils import * + from apex import amp, optimizers + from apex.multi_tensor_apply import multi_tensor_applier +except ImportError: + print("Please install apex from https://www.github.com/nvidia/apex to run this example.") + from models.efficientdet import EfficientDet from models.losses import FocalLoss from datasets import VOCDetection, CocoDataset, get_augumentation, detection_collate, Resizer, Normalizer, Augmenter, collater from utils import EFFICIENTDET, get_state_dict from eval import evaluate, evaluate_coco +from loader import PrefetchLoader +from torch.utils.tensorboard import SummaryWriter + + +breakpoint = epdb.set_trace + +writer = SummaryWriter() + + parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], type=str, help='VOC or COCO') @@ -67,7 +88,7 @@ help='Directory for saving checkpoint models') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') -parser.add_argument('--start_epoch', default=0, type=int, metavar='N', +parser.add_argument('--start_epoch', default=-1, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training') @@ -77,7 +98,7 @@ help='url used to set up distributed training') parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend') -parser.add_argument('--seed', default=24, type=int, +parser.add_argument('--seed', default=None, type=int, help='seed for initializing training. ') parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') @@ -88,21 +109,35 @@ 'N processes per node, which has N GPUs. This is the ' 'fastest way to use PyTorch for either single node or ' 'multi node data parallel training') +parser.add_argument('--eval_epochs', default=5, type=int, + help='after how many training epochs will do evaluation (default 5).') +parser.add_argument('--freeze_backbone', action='store_true', + help='freeze EfficientNet-d{x} backbone') +parser.add_argument('--freeze_bn', action='store_true', + help='freeze all batch norm layers') +parser.add_argument('--mixed_training', action='store_true', + help='Use AMP mixed training optimization O1') +parser.add_argument('--eval', action='store_true', + help='Perform evaluation') iteration = 1 -def train(train_loader, model, scheduler, optimizer, epoch, args): +def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, args): global iteration print("{} epoch: \t start training....".format(epoch)) start = time.time() total_loss = [] model.train() - model.module.is_training = True - model.module.freeze_bn() + # model.module.is_training = True + # model.module.freeze_bn() optimizer.zero_grad() - for idx, (images, annotations) in enumerate(train_loader): - images = images.cuda().float() + + prefetcher = PrefetchLoader(train_loader) + + for idx, (images, annotations) in tqdm(enumerate(prefetcher), + total=len(prefetcher)): + images = images.float().cuda() annotations = annotations.cuda() classification_loss, regression_loss = model([images, annotations]) classification_loss = classification_loss.mean() @@ -111,15 +146,24 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): if bool(loss == 0): print('loss equal zero(0)') continue - loss.backward() + + if args.mixed_training: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + if (idx + 1) % args.grad_accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() optimizer.zero_grad() + # scheduler.step() + # if warmup_scheduler: + # warmup_scheduler.dampen() total_loss.append(loss.item()) - if(iteration % 300 == 0): - print('{} iteration: training ...'.format(iteration)) + if (iteration % 100 == 0): + # print('{} iteration: training ...'.format(iteration)) ans = { 'epoch': epoch, 'iteration': iteration, @@ -129,8 +173,11 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): } for key, value in ans.items(): print(' {:15s}: {}'.format(str(key), value)) + if key != "epoch": + writer.add_scalar(key, value, iteration) iteration += 1 - scheduler.step(np.mean(total_loss)) + scheduler.step(np.mean(total_loss)) # used for ReduceLROnPlateau + result = { 'time': time.time() - start, 'loss': np.mean(total_loss) @@ -139,16 +186,16 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): print(' {:15s}: {}'.format(str(key), value)) -def test(dataset, model, epoch, args): +def test(dataloader, model, epoch, args): print("{} epoch: \t start validation....".format(epoch)) - model = model.module + # model = model.module model.eval() - model.is_training = False + # model.is_training = False with torch.no_grad(): if(args.dataset == 'VOC'): - evaluate(dataset, model) + evaluate(dataloader, model) else: - evaluate_coco(dataset, model) + evaluate_coco(dataloader, model) def main_worker(gpu, ngpus_per_node, args): @@ -171,29 +218,23 @@ def main_worker(gpu, ngpus_per_node, args): rank=args.rank) # Training dataset + data_augmenter = transforms.Compose([Normalizer(), Augmenter(), Resizer()]) + data_augmenter = get_augumentation(phase="train") + inference_augmenter = transforms.Compose([Normalizer(), Resizer()]) train_dataset = [] - if(args.dataset == 'VOC'): - train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose( - [Normalizer(), Augmenter(), Resizer()])) + if (args.dataset == 'VOC'): + train_dataset = VOCDetection(root=args.dataset_root, + transform=data_augmenter) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[( - '2007', 'test')], transform=transforms.Compose([Normalizer(), Resizer()])) + '2007', 'test')], transform=inference_augmenter) args.num_class = train_dataset.num_classes() - elif(args.dataset == 'COCO'): - train_dataset = CocoDataset( - root_dir=args.dataset_root, - set_name='train2017', - transform=transforms.Compose( - [ - Normalizer(), - Augmenter(), - Resizer()])) - valid_dataset = CocoDataset( - root_dir=args.dataset_root, - set_name='val2017', - transform=transforms.Compose( - [ - Normalizer(), - Resizer()])) + elif (args.dataset == 'COCO'): + train_dataset = CocoDataset(root_dir=args.dataset_root, + set_name='train2017', + transform=data_augmenter) + valid_dataset = CocoDataset(root_dir=args.dataset_root, + set_name='val2017', + transform=inference_augmenter) args.num_class = train_dataset.num_classes() train_loader = DataLoader(train_dataset, @@ -203,7 +244,7 @@ def main_worker(gpu, ngpus_per_node, args): collate_fn=collater, pin_memory=True) valid_loader = DataLoader(valid_dataset, - batch_size=1, + batch_size=args.batch_size, num_workers=args.workers, shuffle=False, collate_fn=collater, @@ -222,8 +263,11 @@ def main_worker(gpu, ngpus_per_node, args): params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network - args.start_epoch = checkpoint['epoch'] + 1 + if args.start_epoch == -1: + args.start_epoch = checkpoint['epoch'] + 1 del params + if args.start_epoch == -1: + args.start_epoch = 0 model = EfficientDet(num_classes=args.num_class, network=args.network, @@ -232,8 +276,25 @@ def main_worker(gpu, ngpus_per_node, args): D_class=EFFICIENTDET[args.network]['D_class'] ) if(args.resume is not None): - model.load_state_dict(checkpoint['state_dict']) - del checkpoint + tmp = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + k = k.replace("module.", "") + tmp[k] = v + model.load_state_dict(tmp) + del tmp + + if args.freeze_backbone: + model.freeze_backbone() + + if args.freeze_bn: + model.freeze_bn() + + # define loss function (criterion) , optimizer, scheduler + optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), + lr=args.lr) + if args.resume is not None and "optimizer" in checkpoint: + optimizer.load_state_dict(checkpoint["optimizer"]) + if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, @@ -260,35 +321,65 @@ def main_worker(gpu, ngpus_per_node, args): torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: - model = model.cuda() print('Run with DataParallel ....') - model = torch.nn.DataParallel(model).cuda() + model = model.cuda() + + # define loss function (criterion) , optimizer, scheduler + optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), + lr=args.lr) + if args.resume is not None and "optimizer" in checkpoint: + optimizer.load_state_dict(checkpoint["optimizer"]) + + if args.mixed_training: + model, optimizer = amp.initialize(model, optimizer, + opt_level="O1", + keep_batchnorm_fp32=None, + master_weights=None, + loss_scale=None) + model = torch.nn.DataParallel(model) + + num_steps = len(train_loader) * args.num_epoch - # define loss function (criterion) , optimizer, scheduler - optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, verbose=True) + # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, + # T_max=num_steps) + if args.resume is not None and "scheduler" in checkpoint: + scheduler.load_state_dict(checkpoint["scheduler"]) + + # warmup_scheduler = warmup.UntunedLinearWarmup(optimizer) + warmup_scheduler = None + + if args.resume is not None and "warmup_scheduler" in checkpoint: + scheduler.load_state_dict(checkpoint["warmup_scheduler"]) + del checkpoint + cudnn.benchmark = True - for epoch in range(args.start_epoch, args.num_epoch): - train(train_loader, model, scheduler, optimizer, epoch, args) - - if (epoch + 1) % 5 == 0: - test(valid_dataset, model, epoch, args) - - state = { - 'epoch': epoch, - 'parser': args, - 'state_dict': get_state_dict(model) - } - - torch.save( - state, - os.path.join( - args.save_folder, - args.dataset, - args.network, - "checkpoint_{}.pth".format(epoch))) + if args.eval: + test(valid_loader, model, epoch=0, args=args) + else: + for epoch in range(args.start_epoch, args.num_epoch): + train(train_loader, model, scheduler, warmup_scheduler, + optimizer, epoch, args) + + state = { + 'epoch': epoch, + 'parser': args, + 'state_dict': get_state_dict(model), + 'optimizer': optimizer.state_dict() + } + + torch.save( + state, + os.path.join( + args.save_folder, + args.dataset, + args.network, + "checkpoint_{}.pth".format(epoch))) + + if (epoch + 1) % args.eval_epochs == 0: + test(valid_loader, model, epoch, args) def main():