From 3e37af6ac05815834d34b9f56719f68edd883482 Mon Sep 17 00:00:00 2001 From: longyu Date: Fri, 20 Mar 2020 21:35:48 +0100 Subject: [PATCH 01/20] fix readme typos --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9c62965..717dd23 100644 --- a/README.md +++ b/README.md @@ -88,22 +88,22 @@ sh datasets/scripts/COCO2017.sh - To train EfficientDet using the train script simply specify the parameters listed in `train.py` as a flag or manually change them. ```Shell -python train.py --network effcientdet-d0 # Example +python train.py --network efficientdet-d0 # Example ``` - With VOC Dataset: ```Shell # DataParallel - python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network effcientdet-d0 --batch_size 32 + python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network efficientdet-d0 --batch_size 32 # DistributedDataParallel with backend nccl - python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network effcientdet-d0 --batch_size 32 --multiprocessing-distributed + python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network efficientdet-d0 --batch_size 32 --multiprocessing-distributed ``` - With COCO Dataset: ```Shell # DataParallel - python train.py --dataset COCO --dataset_root ~/data/coco/ --network effcientdet-d0 --batch_size 32 + python train.py --dataset COCO --dataset_root ~/data/coco/ --network efficientdet-d0 --batch_size 32 # DistributedDataParallel with backend nccl - python train.py --dataset COCO --dataset_root ~/data/coco/ --network effcientdet-d0 --batch_size 32 --multiprocessing-distributed + python train.py --dataset COCO --dataset_root ~/data/coco/ --network efficientdet-d0 --batch_size 32 --multiprocessing-distributed ``` ## Evaluation From 759590dc83fb06b1d187c55b438c86aa28c2d800 Mon Sep 17 00:00:00 2001 From: longyu Date: Sat, 21 Mar 2020 00:24:49 +0100 Subject: [PATCH 02/20] fix none issues --- datasets/augmentation.py | 1 + models/losses.py | 1 - models/utils.py | 28 ++++++++++++++++++++-------- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/datasets/augmentation.py b/datasets/augmentation.py index 10a5615..283f023 100644 --- a/datasets/augmentation.py +++ b/datasets/augmentation.py @@ -67,6 +67,7 @@ def detection_collate(batch): def collater(data): + data = [x for x in data if x is not None] imgs = [s['img'] for s in data] annots = [s['annot'] for s in data] scales = [s['scale'] for s in data] diff --git a/models/losses.py b/models/losses.py index 99b9cfd..418e25d 100644 --- a/models/losses.py +++ b/models/losses.py @@ -50,7 +50,6 @@ def forward(self, classifications, regressions, anchors, annotations): bbox_annotation = annotations[j, :, :] bbox_annotation = bbox_annotation[bbox_annotation[:, 4] != -1] - if bbox_annotation.shape[0] == 0: regression_losses.append(torch.tensor(0).float().cuda()) classification_losses.append(torch.tensor(0).float().cuda()) diff --git a/models/utils.py b/models/utils.py index 34c8649..69c620c 100644 --- a/models/utils.py +++ b/models/utils.py @@ -302,15 +302,27 @@ def get_model_params(model_name, override_params): return blocks_args, global_params +# url_map = { +# 'efficientnet-b0': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b0-355c32eb.pth', +# 'efficientnet-b1': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b1-f1951068.pth', +# 'efficientnet-b2': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b2-8bb594d6.pth', +# 'efficientnet-b3': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b3-5fb5a3c3.pth', +# 'efficientnet-b4': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b4-6ed6700e.pth', +# 'efficientnet-b5': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b5-b6417697.pth', +# 'efficientnet-b6': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b6-c76e70fd.pth', +# 'efficientnet-b7': 'https://publicmodels.blob.core.windows.net/container/aa/efficientnet-b7-dcc49843.pth', +# } + + url_map = { - 'efficientnet-b0': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b0-355c32eb.pth', - 'efficientnet-b1': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b1-f1951068.pth', - 'efficientnet-b2': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b2-8bb594d6.pth', - 'efficientnet-b3': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b3-5fb5a3c3.pth', - 'efficientnet-b4': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b4-6ed6700e.pth', - 'efficientnet-b5': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b5-b6417697.pth', - 'efficientnet-b6': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b6-c76e70fd.pth', - 'efficientnet-b7': 'http://storage.googleapis.com/public-models/efficientnet/efficientnet-b7-dcc49843.pth', + 'efficientnet-b0': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth', + 'efficientnet-b1': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth', + 'efficientnet-b2': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b2-8bb594d6.pth', + 'efficientnet-b3': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b3-5fb5a3c3.pth', + 'efficientnet-b4': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth', + 'efficientnet-b5': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b5-b6417697.pth', + 'efficientnet-b6': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b6-c76e70fd.pth', + 'efficientnet-b7': 'https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b7-dcc49843.pth', } From 759289971695f0d7fec5a0189c0edd4bc93703f3 Mon Sep 17 00:00:00 2001 From: longyu Date: Sun, 29 Mar 2020 20:56:41 +0200 Subject: [PATCH 03/20] add prefetch loader --- loader.py | 67 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 loader.py diff --git a/loader.py b/loader.py new file mode 100644 index 0000000..8a8ce6c --- /dev/null +++ b/loader.py @@ -0,0 +1,67 @@ +import torch + + +class PrefetchLoader: + + def __init__(self, loader): + self.loader = loader + # self.mean = torch.tensor([x * 255 for x in mean]).cuda().view(1, 3, 1, 1) + # self.std = torch.tensor([x * 255 for x in std]).cuda().view(1, 3, 1, 1) + # self.fp16 = fp16 + # if fp16: + # self.mean = self.mean.half() + # self.std = self.std.half() + # if re_prob > 0.: + # self.random_erasing = RandomErasing( + # probability=re_prob, mode=re_mode, max_count=re_count, num_splits=re_num_splits) + # else: + # self.random_erasing = None + + def __iter__(self): + stream = torch.cuda.Stream() + first = True + + for next_input, next_target in self.loader: + with torch.cuda.stream(stream): + next_input = next_input.cuda(non_blocking=True) + next_target = next_target.cuda(non_blocking=True) + # if self.fp16: + # next_input = next_input.half().sub_(self.mean).div_(self.std) + # else: + # next_input = next_input.float().sub_(self.mean).div_(self.std) + # if self.random_erasing is not None: + # next_input = self.random_erasing(next_input) + + if not first: + yield input, target + else: + first = False + + torch.cuda.current_stream().wait_stream(stream) + input = next_input + target = next_target + + yield input, target + + def __len__(self): + return len(self.loader) + + @property + def sampler(self): + return self.loader.sampler + + @property + def dataset(self): + return self.loader.dataset + + # @property + # def mixup_enabled(self): + # if isinstance(self.loader.collate_fn, FastCollateMixup): + # return self.loader.collate_fn.mixup_enabled + # else: + # return False + + # @mixup_enabled.setter + # def mixup_enabled(self, x): + # if isinstance(self.loader.collate_fn, FastCollateMixup): + # self.loader.collate_fn.mixup_enabled = x From 6ce2638c71ff7bcf63e3c49caf6cabd894e1e04e Mon Sep 17 00:00:00 2001 From: longyu Date: Tue, 31 Mar 2020 13:01:56 +0200 Subject: [PATCH 04/20] add mixed precision training --- models/efficientdet.py | 2 ++ models/losses.py | 11 ++++++---- train.py | 46 ++++++++++++++++++++++++++++++++++++------ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/models/efficientdet.py b/models/efficientdet.py index 43357c9..0394453 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -64,6 +64,8 @@ def forward(self, inputs): classification = torch.cat([out for out in outs[0]], dim=1) regression = torch.cat([out for out in outs[1]], dim=1) anchors = self.anchors(inputs) + if anchors.dtype != inputs.dtype: + anchors = anchors.type_as(inputs) if self.is_training: return self.criterion(classification, regression, anchors, annotations) else: diff --git a/models/losses.py b/models/losses.py index 418e25d..5687606 100644 --- a/models/losses.py +++ b/models/losses.py @@ -43,6 +43,12 @@ def forward(self, classifications, regressions, anchors, annotations): anchor_ctr_x = anchor[:, 0] + 0.5 * anchor_widths anchor_ctr_y = anchor[:, 1] + 0.5 * anchor_heights + if classifications.dtype == torch.float32: + MAX_ONE = 0.9999 + MIN_ZERO = 1e-4 + else: + MAX_ONE = 0.999 + MIN_ZERO = 1e-4 for j in range(batch_size): classification = classifications[j, :, :] @@ -56,16 +62,13 @@ def forward(self, classifications, regressions, anchors, annotations): continue - classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4) + classification = torch.clamp(classification, MIN_ZERO, MAX_ONE) # num_anchors x num_annotations IoU = calc_iou(anchors[0, :, :], bbox_annotation[:, :4]) IoU_max, IoU_argmax = torch.max(IoU, dim=1) # num_anchors x 1 - #import pdb - # pdb.set_trace() - # compute the loss for classification targets = torch.ones(classification.shape) * -1 targets = targets.cuda() diff --git a/train.py b/train.py index 8e90697..d58b1a9 100644 --- a/train.py +++ b/train.py @@ -27,6 +27,14 @@ import torch.backends.cudnn as cudnn from torch.utils.data import DataLoader +try: + from apex.parallel import DistributedDataParallel as DDP + from apex.fp16_utils import * + from apex import amp, optimizers + from apex.multi_tensor_apply import multi_tensor_applier +except ImportError: + raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.") + from models.efficientdet import EfficientDet from models.losses import FocalLoss from datasets import VOCDetection, CocoDataset, get_augumentation, detection_collate, Resizer, Normalizer, Augmenter, collater @@ -88,6 +96,9 @@ 'N processes per node, which has N GPUs. This is the ' 'fastest way to use PyTorch for either single node or ' 'multi node data parallel training') +parser.add_argument('--freeze', action='store_true', help='freeze EfficientNet-d{x} backbone') +parser.add_argument('--mixed_training', action='store_true', + help='Use AMP mixed training optimization O1') iteration = 1 @@ -111,7 +122,12 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): if bool(loss == 0): print('loss equal zero(0)') continue - loss.backward() + if args.mixed_training: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + else: + loss.backward() + if (idx + 1) % args.grad_accumulation_steps == 0: torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() @@ -232,8 +248,30 @@ def main_worker(gpu, ngpus_per_node, args): D_class=EFFICIENTDET[args.network]['D_class'] ) if(args.resume is not None): - model.load_state_dict(checkpoint['state_dict']) + tmp = OrderedDict() + for k, v in checkpoint['state_dict'].items(): + k = k.replace("module.", "") + tmp[k] = v + model.load_state_dict(tmp) + del tmp + + model.to("cuda") + + # define loss function (criterion) , optimizer, scheduler + optimizer = optim.AdamW(model.parameters(), lr=args.lr) + if args.resume is not None and "optimizer" in checkpoint: + optimizer.load_state_dict(checkpoint["optimizer"]) del checkpoint + + scheduler = optim.lr_scheduler.ReduceLROnPlateau( + optimizer, patience=3, verbose=True) + + if args.mixed_training: + model, optimizer = amp.initialize(model, optimizer, + opt_level="O1", + keep_batchnorm_fp32=None, + loss_scale=None) + if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, @@ -264,10 +302,6 @@ def main_worker(gpu, ngpus_per_node, args): print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() - # define loss function (criterion) , optimizer, scheduler - optimizer = optim.AdamW(model.parameters(), lr=args.lr) - scheduler = optim.lr_scheduler.ReduceLROnPlateau( - optimizer, patience=3, verbose=True) cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): From 355d398954cd0a68ffe321511c50a881e822a2ee Mon Sep 17 00:00:00 2001 From: longyu Date: Tue, 31 Mar 2020 17:11:23 +0200 Subject: [PATCH 05/20] add freeze backbone and mixed training. And other fixes --- README.md | 1 + models/bifpn.py | 1 + models/efficientdet.py | 24 ++++++++++++++++-------- train.py | 37 +++++++++++++++++++++++++++---------- 4 files changed, 45 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 717dd23..a8a86eb 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ python demo.py --weight ./checkpoint_VOC_efficientdet-d1_97.pth --threshold 0.6   ## Recent Update + - [31/03/2020] Make support for freezing backbone layers and batch norm layers. In addition, it supports to mixed precision training APEX opt method=O1. - [06/01/2020] Support both DistributedDataParallel and DataParallel, change augmentation, eval_voc - [17/12/2019] Add Fast normalized fusion, Augmentation with Ratio, Change RetinaHead, Fix Support EfficientDet-D0->D7 - [7/12/2019] Support EfficientDet-D0, EfficientDet-D1, EfficientDet-D2, EfficientDet-D3, EfficientDet-D4,... . Support change gradient accumulation steps, AdamW. diff --git a/models/bifpn.py b/models/bifpn.py index 0f8e6bd..49565c2 100644 --- a/models/bifpn.py +++ b/models/bifpn.py @@ -162,6 +162,7 @@ def __init__(self, inplace=False) ) self.bifpn_convs.append(fpn_conv) + self.init_weights() # default init_weights for conv(msra) and norm in ConvModule def init_weights(self): diff --git a/models/efficientdet.py b/models/efficientdet.py index 0394453..e0dd432 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -44,14 +44,14 @@ def __init__(self, self.clipBoxes = ClipBoxes() self.threshold = threshold self.iou_threshold = iou_threshold - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() - self.freeze_bn() + """The following code forces all weights to be random, which does not make sense at all!""" + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + # m.weight.data.normal_(0, math.sqrt(2. / n)) + # elif isinstance(m, nn.BatchNorm2d): + # m.weight.data.fill_(1) + # m.bias.data.zero_() self.criterion = FocalLoss() def forward(self, inputs): @@ -87,6 +87,14 @@ def forward(self, inputs): dim=1) return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]] + def freeze_backbone(self): + """Freeze backbone weights and bn layers.""" + for layer in self.backbone.modules(): + if isinstance(layer, nn.BatchNorm2d): + layer.eval() + for param in self.backbone.parameters(): + param.requires_grad = False + def freeze_bn(self): '''Freeze BatchNorm layers.''' for layer in self.modules(): diff --git a/train.py b/train.py index d58b1a9..feb8266 100644 --- a/train.py +++ b/train.py @@ -5,6 +5,7 @@ import shutil import time import warnings +import epdb import torch import torch.nn as nn import torch.nn.parallel @@ -41,6 +42,9 @@ from utils import EFFICIENTDET, get_state_dict from eval import evaluate, evaluate_coco + +breakpoint = epdb.set_trace + parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], type=str, help='VOC or COCO') @@ -96,7 +100,12 @@ 'N processes per node, which has N GPUs. This is the ' 'fastest way to use PyTorch for either single node or ' 'multi node data parallel training') -parser.add_argument('--freeze', action='store_true', help='freeze EfficientNet-d{x} backbone') +parser.add_argument('--eval_epochs', default=5, type=int, + help='after how many training epochs will do evaluation (default 5).') +parser.add_argument('--freeze_backbone', action='store_true', + help='freeze EfficientNet-d{x} backbone') +parser.add_argument('--freeze_bn', action='store_true', + help='freeze all batch norm layers') parser.add_argument('--mixed_training', action='store_true', help='Use AMP mixed training optimization O1') @@ -109,11 +118,11 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): start = time.time() total_loss = [] model.train() - model.module.is_training = True - model.module.freeze_bn() + optimizer.zero_grad() - for idx, (images, annotations) in enumerate(train_loader): - images = images.cuda().float() + for idx, (images, annotations) in tqdm(enumerate(train_loader), + total=len(train_loader)): + images = images.float().cuda() annotations = annotations.cuda() classification_loss, regression_loss = model([images, annotations]) classification_loss = classification_loss.mean() @@ -134,7 +143,7 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): optimizer.zero_grad() total_loss.append(loss.item()) - if(iteration % 300 == 0): + if(iteration % 50 == 0): print('{} iteration: training ...'.format(iteration)) ans = { 'epoch': epoch, @@ -257,8 +266,15 @@ def main_worker(gpu, ngpus_per_node, args): model.to("cuda") + if args.freeze_backbone: + model.freeze_backbone() + + if args.freeze_bn: + model.freeze_bn() + # define loss function (criterion) , optimizer, scheduler - optimizer = optim.AdamW(model.parameters(), lr=args.lr) + optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), + lr=args.lr) if args.resume is not None and "optimizer" in checkpoint: optimizer.load_state_dict(checkpoint["optimizer"]) del checkpoint @@ -307,13 +323,14 @@ def main_worker(gpu, ngpus_per_node, args): for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args) - if (epoch + 1) % 5 == 0: + if (epoch + 1) % args.eval_epochs == 0: test(valid_dataset, model, epoch, args) state = { 'epoch': epoch, 'parser': args, - 'state_dict': get_state_dict(model) + 'state_dict': get_state_dict(model), + 'optimizer': optimizer.state_dict() } torch.save( @@ -332,7 +349,7 @@ def main(): if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) - cudnn.deterministic = True + # cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' From f9853bf1198051168122d5824deb9e1abe384f26 Mon Sep 17 00:00:00 2001 From: longyu Date: Wed, 1 Apr 2020 19:08:17 +0200 Subject: [PATCH 06/20] fix issues --- models/efficientdet.py | 16 ++++++------- models/retinahead.py | 1 + train.py | 51 +++++++++++++++++++++++++++++------------- 3 files changed, 45 insertions(+), 23 deletions(-) diff --git a/models/efficientdet.py b/models/efficientdet.py index e0dd432..07616de 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -54,6 +54,14 @@ def __init__(self, # m.bias.data.zero_() self.criterion = FocalLoss() + def extract_feat(self, img): + """ + Directly extract features from the backbone+neck + """ + x = self.backbone(img) + x = self.neck(x[-5:]) + return x + def forward(self, inputs): if self.is_training: inputs, annotations = inputs @@ -100,11 +108,3 @@ def freeze_bn(self): for layer in self.modules(): if isinstance(layer, nn.BatchNorm2d): layer.eval() - - def extract_feat(self, img): - """ - Directly extract features from the backbone+neck - """ - x = self.backbone(img) - x = self.neck(x[-5:]) - return x diff --git a/models/retinahead.py b/models/retinahead.py index 7fcbadf..896f1ab 100644 --- a/models/retinahead.py +++ b/models/retinahead.py @@ -1,6 +1,7 @@ from functools import partial import numpy as np +import torch import torch.nn as nn from .module import ConvModule, bias_init_with_prob, normal_init diff --git a/train.py b/train.py index feb8266..bb796fe 100644 --- a/train.py +++ b/train.py @@ -3,6 +3,7 @@ import os import random import shutil +from collections import OrderedDict import time import warnings import epdb @@ -18,6 +19,8 @@ import torchvision.transforms as transforms import torchvision.datasets as datasets +import pytorch_warmup as warmup + import os import sys import time @@ -89,7 +92,7 @@ help='url used to set up distributed training') parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend') -parser.add_argument('--seed', default=24, type=int, +parser.add_argument('--seed', default=None, type=int, help='seed for initializing training. ') parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') @@ -112,13 +115,13 @@ iteration = 1 -def train(train_loader, model, scheduler, optimizer, epoch, args): +def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, args): global iteration print("{} epoch: \t start training....".format(epoch)) start = time.time() total_loss = [] model.train() - + model.module.is_training = True optimizer.zero_grad() for idx, (images, annotations) in tqdm(enumerate(train_loader), total=len(train_loader)): @@ -127,10 +130,12 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): classification_loss, regression_loss = model([images, annotations]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() + loss = classification_loss + regression_loss if bool(loss == 0): print('loss equal zero(0)') continue + if args.mixed_training: with amp.scale_loss(loss, optimizer) as scaled_loss: scaled_loss.backward() @@ -141,9 +146,12 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() optimizer.zero_grad() + # scheduler.step() + # if warmup_scheduler: + # warmup_scheduler.dampen() total_loss.append(loss.item()) - if(iteration % 50 == 0): + if(iteration % 10 == 0): print('{} iteration: training ...'.format(iteration)) ans = { 'epoch': epoch, @@ -155,7 +163,8 @@ def train(train_loader, model, scheduler, optimizer, epoch, args): for key, value in ans.items(): print(' {:15s}: {}'.format(str(key), value)) iteration += 1 - scheduler.step(np.mean(total_loss)) + scheduler.step(np.mean(total_loss)) # used for ReduceLROnPlateau + result = { 'time': time.time() - start, 'loss': np.mean(total_loss) @@ -277,16 +286,27 @@ def main_worker(gpu, ngpus_per_node, args): lr=args.lr) if args.resume is not None and "optimizer" in checkpoint: optimizer.load_state_dict(checkpoint["optimizer"]) - del checkpoint - - scheduler = optim.lr_scheduler.ReduceLROnPlateau( - optimizer, patience=3, verbose=True) + num_steps = len(train_loader) * args.num_epoch + + # scheduler = optim.lr_scheduler.ReduceLROnPlateau( + # optimizer, patience=3, verbose=True) + scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, + T_max=num_steps) + if args.resume is not None and "scheduler" in checkpoint: + scheduler.load_state_dict(checkpoint["scheduler"]) if args.mixed_training: model, optimizer = amp.initialize(model, optimizer, opt_level="O1", keep_batchnorm_fp32=None, - loss_scale=None) + loss_scale=128) + + # warmup_scheduler = warmup.UntunedLinearWarmup(optimizer) + warmup_scheduler = None + + if args.resume is not None and "warmup_scheduler" in checkpoint: + scheduler.load_state_dict(checkpoint["warmup_scheduler"]) + del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor @@ -321,10 +341,8 @@ def main_worker(gpu, ngpus_per_node, args): cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): - train(train_loader, model, scheduler, optimizer, epoch, args) - - if (epoch + 1) % args.eval_epochs == 0: - test(valid_dataset, model, epoch, args) + train(train_loader, model, scheduler, warmup_scheduler, + optimizer, epoch, args) state = { 'epoch': epoch, @@ -341,6 +359,9 @@ def main_worker(gpu, ngpus_per_node, args): args.network, "checkpoint_{}.pth".format(epoch))) + if (epoch + 1) % args.eval_epochs == 0: + test(valid_dataset, model, epoch, args) + def main(): args = parser.parse_args() @@ -349,7 +370,7 @@ def main(): if args.seed is not None: random.seed(args.seed) torch.manual_seed(args.seed) - # cudnn.deterministic = True + cudnn.deterministic = True warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' From 907bd7fe8c18e26212ed88bb1ef51a2a833be356 Mon Sep 17 00:00:00 2001 From: longyu Date: Thu, 2 Apr 2020 09:41:25 +0200 Subject: [PATCH 07/20] add prefetcher --- train.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index bb796fe..da70f92 100644 --- a/train.py +++ b/train.py @@ -45,6 +45,7 @@ from utils import EFFICIENTDET, get_state_dict from eval import evaluate, evaluate_coco +from loader import PrefetchLoader breakpoint = epdb.set_trace @@ -123,8 +124,11 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar model.train() model.module.is_training = True optimizer.zero_grad() - for idx, (images, annotations) in tqdm(enumerate(train_loader), - total=len(train_loader)): + + prefetcher = PrefetchLoader(train_loader) + + for idx, (images, annotations) in tqdm(enumerate(prefetcher), + total=len(prefetcher)): images = images.float().cuda() annotations = annotations.cuda() classification_loss, regression_loss = model([images, annotations]) From 4b7e6d7d5b467254cccda3268d29da9e127840a3 Mon Sep 17 00:00:00 2001 From: longyu Date: Fri, 3 Apr 2020 16:50:46 +0200 Subject: [PATCH 08/20] go back to orignal code --- models/bifpn.py | 2 +- models/efficientdet.py | 24 +++++++++++++++--------- train.py | 3 ++- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/models/bifpn.py b/models/bifpn.py index 49565c2..a9bc8f6 100644 --- a/models/bifpn.py +++ b/models/bifpn.py @@ -162,7 +162,7 @@ def __init__(self, inplace=False) ) self.bifpn_convs.append(fpn_conv) - self.init_weights() + # self.init_weights() # new code # default init_weights for conv(msra) and norm in ConvModule def init_weights(self): diff --git a/models/efficientdet.py b/models/efficientdet.py index 07616de..b9bdad5 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -44,14 +44,20 @@ def __init__(self, self.clipBoxes = ClipBoxes() self.threshold = threshold self.iou_threshold = iou_threshold + + # ============== original code starts =============== """The following code forces all weights to be random, which does not make sense at all!""" - # for m in self.modules(): - # if isinstance(m, nn.Conv2d): - # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - # m.weight.data.normal_(0, math.sqrt(2. / n)) - # elif isinstance(m, nn.BatchNorm2d): - # m.weight.data.fill_(1) - # m.bias.data.zero_() + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + self.freeze_bn() + # ============== original code ends =============== + self.criterion = FocalLoss() def extract_feat(self, img): @@ -72,8 +78,8 @@ def forward(self, inputs): classification = torch.cat([out for out in outs[0]], dim=1) regression = torch.cat([out for out in outs[1]], dim=1) anchors = self.anchors(inputs) - if anchors.dtype != inputs.dtype: - anchors = anchors.type_as(inputs) + # if anchors.dtype != inputs.dtype: # used for mixed precision training + # anchors = anchors.type_as(inputs) if self.is_training: return self.criterion(classification, regression, anchors, annotations) else: diff --git a/train.py b/train.py index da70f92..ab2748f 100644 --- a/train.py +++ b/train.py @@ -123,6 +123,7 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar total_loss = [] model.train() model.module.is_training = True + model.module.freeze_bn() optimizer.zero_grad() prefetcher = PrefetchLoader(train_loader) @@ -155,7 +156,7 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar # warmup_scheduler.dampen() total_loss.append(loss.item()) - if(iteration % 10 == 0): + if(iteration % 50 == 0): print('{} iteration: training ...'.format(iteration)) ans = { 'epoch': epoch, From 3e95cbf03c5a000dcbfd670f807c8df849d6dc12 Mon Sep 17 00:00:00 2001 From: longyu Date: Fri, 3 Apr 2020 17:10:02 +0200 Subject: [PATCH 09/20] fix normalization issue --- datasets/augmentation.py | 3 ++- train.py | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/datasets/augmentation.py b/datasets/augmentation.py index 283f023..e51a39f 100644 --- a/datasets/augmentation.py +++ b/datasets/augmentation.py @@ -148,4 +148,5 @@ def __init__(self): def __call__(self, sample): image, annots = sample['img'], sample['annot'] - return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots} + # 1/255. = 0.00392156862745098 + return {'img': ((image.astype(np.float32) *0.00392156862745098 - self.mean) / self.std), 'annot': annots} diff --git a/train.py b/train.py index ab2748f..f7840f1 100644 --- a/train.py +++ b/train.py @@ -46,9 +46,14 @@ from eval import evaluate, evaluate_coco from loader import PrefetchLoader +from torch.utils.tensorboard import SummaryWriter + breakpoint = epdb.set_trace +writer = SummaryWriter() + + parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') parser.add_argument('--dataset', default='VOC', choices=['VOC', 'COCO'], type=str, help='VOC or COCO') From abbc5b886db2ee3aa3341eeb4593ced83e28f980 Mon Sep 17 00:00:00 2001 From: longyu Date: Fri, 3 Apr 2020 17:41:35 +0200 Subject: [PATCH 10/20] remove effcientnet from scratch --- models/efficientdet.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/models/efficientdet.py b/models/efficientdet.py index b9bdad5..b7b649c 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -47,15 +47,15 @@ def __init__(self, # ============== original code starts =============== """The following code forces all weights to be random, which does not make sense at all!""" - for m in self.modules(): - if isinstance(m, nn.Conv2d): - n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels - m.weight.data.normal_(0, math.sqrt(2. / n)) - elif isinstance(m, nn.BatchNorm2d): - m.weight.data.fill_(1) - m.bias.data.zero_() + # for m in self.modules(): + # if isinstance(m, nn.Conv2d): + # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + # m.weight.data.normal_(0, math.sqrt(2. / n)) + # elif isinstance(m, nn.BatchNorm2d): + # m.weight.data.fill_(1) + # m.bias.data.zero_() - self.freeze_bn() + # self.freeze_bn() # ============== original code ends =============== self.criterion = FocalLoss() From beac23f923cac7f30b0af25ba7ddf0e276b45c9f Mon Sep 17 00:00:00 2001 From: longyu Date: Sat, 4 Apr 2020 13:12:06 +0200 Subject: [PATCH 11/20] fix is_training flag --- datasets/augmentation.py | 2 +- models/efficientdet.py | 6 ++-- models/losses.py | 6 +++- train.py | 73 ++++++++++++++++++++-------------------- 4 files changed, 45 insertions(+), 42 deletions(-) diff --git a/datasets/augmentation.py b/datasets/augmentation.py index e51a39f..2ee9c27 100644 --- a/datasets/augmentation.py +++ b/datasets/augmentation.py @@ -109,7 +109,7 @@ def __call__(self, sample, common_size=512): image = cv2.resize(image, (resized_width, resized_height)) - new_image = np.zeros((common_size, common_size, 3)) + new_image = np.zeros((common_size, common_size, 3), np.float32) new_image[0:resized_height, 0:resized_width] = image annots[:, :4] *= scale diff --git a/models/efficientdet.py b/models/efficientdet.py index b7b649c..21df997 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -31,7 +31,7 @@ def __init__(self, iou_threshold=0.5): super(EfficientDet, self).__init__() self.backbone = EfficientNet.from_pretrained(MODEL_MAP[network]) - self.is_training = is_training + # self.is_training = is_training self.neck = BIFPN(in_channels=self.backbone.get_list_features()[-5:], out_channels=W_bifpn, stack=D_bifpn, @@ -69,7 +69,7 @@ def extract_feat(self, img): return x def forward(self, inputs): - if self.is_training: + if self.training: inputs, annotations = inputs else: inputs = inputs @@ -80,7 +80,7 @@ def forward(self, inputs): anchors = self.anchors(inputs) # if anchors.dtype != inputs.dtype: # used for mixed precision training # anchors = anchors.type_as(inputs) - if self.is_training: + if self.training: return self.criterion(classification, regression, anchors, annotations) else: transformed_anchors = self.regressBoxes(anchors, regression) diff --git a/models/losses.py b/models/losses.py index 5687606..d780d46 100644 --- a/models/losses.py +++ b/models/losses.py @@ -49,6 +49,7 @@ def forward(self, classifications, regressions, anchors, annotations): else: MAX_ONE = 0.999 MIN_ZERO = 1e-4 + not_found = 0 for j in range(batch_size): classification = classifications[j, :, :] @@ -78,6 +79,8 @@ def forward(self, classifications, regressions, anchors, annotations): positive_indices = torch.ge(IoU_max, 0.5) num_positive_anchors = positive_indices.sum() + if num_positive_anchors == 0: + not_found += 1 assigned_annotations = bbox_annotation[IoU_argmax, :] @@ -150,5 +153,6 @@ def forward(self, classifications, regressions, anchors, annotations): regression_losses.append(regression_loss.mean()) else: regression_losses.append(torch.tensor(0).float().cuda()) - + if not_found == batch_size: + print("Not positive sample is found in the batch") return torch.stack(classification_losses).mean(dim=0, keepdim=True), torch.stack(regression_losses).mean(dim=0, keepdim=True) diff --git a/train.py b/train.py index f7840f1..6be1cd7 100644 --- a/train.py +++ b/train.py @@ -127,8 +127,8 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar start = time.time() total_loss = [] model.train() - model.module.is_training = True - model.module.freeze_bn() + # model.module.is_training = True + # model.module.freeze_bn() optimizer.zero_grad() prefetcher = PrefetchLoader(train_loader) @@ -185,9 +185,9 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar def test(dataset, model, epoch, args): print("{} epoch: \t start validation....".format(epoch)) - model = model.module + # model = model.module model.eval() - model.is_training = False + # model.is_training = False with torch.no_grad(): if(args.dataset == 'VOC'): evaluate(dataset, model) @@ -217,8 +217,9 @@ def main_worker(gpu, ngpus_per_node, args): # Training dataset train_dataset = [] if(args.dataset == 'VOC'): - train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose( - [Normalizer(), Augmenter(), Resizer()])) + train_dataset = VOCDetection(root=args.dataset_root, + transform=transforms.Compose( + [Normalizer(), Augmenter(), Resizer()])) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[( '2007', 'test')], transform=transforms.Compose([Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() @@ -283,41 +284,12 @@ def main_worker(gpu, ngpus_per_node, args): model.load_state_dict(tmp) del tmp - model.to("cuda") - if args.freeze_backbone: model.freeze_backbone() if args.freeze_bn: model.freeze_bn() - # define loss function (criterion) , optimizer, scheduler - optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), - lr=args.lr) - if args.resume is not None and "optimizer" in checkpoint: - optimizer.load_state_dict(checkpoint["optimizer"]) - - num_steps = len(train_loader) * args.num_epoch - - # scheduler = optim.lr_scheduler.ReduceLROnPlateau( - # optimizer, patience=3, verbose=True) - scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, - T_max=num_steps) - if args.resume is not None and "scheduler" in checkpoint: - scheduler.load_state_dict(checkpoint["scheduler"]) - if args.mixed_training: - model, optimizer = amp.initialize(model, optimizer, - opt_level="O1", - keep_batchnorm_fp32=None, - loss_scale=128) - - # warmup_scheduler = warmup.UntunedLinearWarmup(optimizer) - warmup_scheduler = None - - if args.resume is not None and "warmup_scheduler" in checkpoint: - scheduler.load_state_dict(checkpoint["warmup_scheduler"]) - del checkpoint - if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, @@ -344,9 +316,36 @@ def main_worker(gpu, ngpus_per_node, args): torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: - model = model.cuda() print('Run with DataParallel ....') - model = torch.nn.DataParallel(model).cuda() + model = torch.nn.DataParallel(model) + model = model.cuda() + + # define loss function (criterion) , optimizer, scheduler + optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), + lr=args.lr) + if args.resume is not None and "optimizer" in checkpoint: + optimizer.load_state_dict(checkpoint["optimizer"]) + + num_steps = len(train_loader) * args.num_epoch + + scheduler = optim.lr_scheduler.ReduceLROnPlateau( + optimizer, patience=3, verbose=True) + # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, + # T_max=num_steps) + if args.resume is not None and "scheduler" in checkpoint: + scheduler.load_state_dict(checkpoint["scheduler"]) + if args.mixed_training: + model, optimizer = amp.initialize(model, optimizer, + opt_level="O1", + keep_batchnorm_fp32=None, + loss_scale=128) + + # warmup_scheduler = warmup.UntunedLinearWarmup(optimizer) + warmup_scheduler = None + + if args.resume is not None and "warmup_scheduler" in checkpoint: + scheduler.load_state_dict(checkpoint["warmup_scheduler"]) + del checkpoint cudnn.benchmark = True From 447e57ea653f4e0ab3878a12009278da1ce0e945 Mon Sep 17 00:00:00 2001 From: longyu Date: Sat, 4 Apr 2020 13:18:23 +0200 Subject: [PATCH 12/20] update readme --- README.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a8a86eb..57a308f 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,12 @@ python demo.py --weight ./checkpoint_VOC_efficientdet-d1_97.pth --threshold 0.6   ## Recent Update - - [31/03/2020] Make support for freezing backbone layers and batch norm layers. In addition, it supports to mixed precision training APEX opt method=O1. + - [04/04/2020] VOC dataset training success. + ```Shell + nice -n1 python3.6 train.py --dataset VOC --dataset_root $VOC_PATH --network efficientdet-d0 --batch_size $BSIZE --workers 8 --grad_accumulation_steps 1 --lr 0.00001 --eval_epochs 20 + ``` + I set `lr=1e-5` because `1e-4` did not work + - [31/03/2020] ~~Make support for freezing backbone layers and batch norm layers. In addition, it supports to mixed precision training APEX opt method=O1.~~ [requires testing]. - [06/01/2020] Support both DistributedDataParallel and DataParallel, change augmentation, eval_voc - [17/12/2019] Add Fast normalized fusion, Augmentation with Ratio, Change RetinaHead, Fix Support EfficientDet-D0->D7 - [7/12/2019] Support EfficientDet-D0, EfficientDet-D1, EfficientDet-D2, EfficientDet-D3, EfficientDet-D4,... . Support change gradient accumulation steps, AdamW. From 8cba4e0f2ee39db07d7bf8f0bdf11b21be5b80c3 Mon Sep 17 00:00:00 2001 From: longyu Date: Sat, 4 Apr 2020 18:34:16 +0200 Subject: [PATCH 13/20] make the coco evaluation with batch --- datasets/augmentation.py | 13 ++++-- datasets/coco.py | 1 + eval.py | 98 ++++++++++++++++++---------------------- models/efficientdet.py | 34 ++++++++------ train.py | 13 +++--- 5 files changed, 82 insertions(+), 77 deletions(-) diff --git a/datasets/augmentation.py b/datasets/augmentation.py index 2ee9c27..2156472 100644 --- a/datasets/augmentation.py +++ b/datasets/augmentation.py @@ -54,16 +54,20 @@ def detection_collate(batch): imgs = [s['image'] for s in batch] annots = [s['bboxes'] for s in batch] labels = [s['category_id'] for s in batch] + scales = [s['scale'] for s in batch] max_num_annots = max(len(annot) for annot in annots) annot_padded = np.ones((len(annots), max_num_annots, 5))*-1 if max_num_annots > 0: for idx, (annot, lab) in enumerate(zip(annots, labels)): + # pylint: disable=C1801 if len(annot) > 0: annot_padded[idx, :len(annot), :4] = annot annot_padded[idx, :len(annot), 4] = lab - return (torch.stack(imgs, 0), torch.FloatTensor(annot_padded)) + return (torch.stack(imgs, 0), + torch.FloatTensor(annot_padded), + torch.FloatTensor(scales)) def collater(data): @@ -89,7 +93,8 @@ def collater(data): imgs = imgs.permute(0, 3, 1, 2) - return (imgs, torch.FloatTensor(annot_padded)) + return (imgs, torch.FloatTensor(annot_padded), + torch.FloatTensor(scales)) class Resizer(object): @@ -113,7 +118,9 @@ def __call__(self, sample, common_size=512): new_image[0:resized_height, 0:resized_width] = image annots[:, :4] *= scale - return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale} + return {'img': torch.from_numpy(new_image), + 'annot': torch.from_numpy(annots), + 'scale': scale} class Augmenter(object): diff --git a/datasets/coco.py b/datasets/coco.py index c006f44..5e3835d 100644 --- a/datasets/coco.py +++ b/datasets/coco.py @@ -64,6 +64,7 @@ def __len__(self): def __getitem__(self, idx): img = self.load_image(idx) + image_size = img.shape[:2] annot = self.load_annotations(idx) sample = {'img': img, 'annot': annot} if self.transform: diff --git a/eval.py b/eval.py index dc8393d..d383002 100644 --- a/eval.py +++ b/eval.py @@ -73,7 +73,8 @@ def _compute_ap(recall, precision): return ap -def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100, save_path=None): +def _get_detections(dataloader, retinanet, + score_threshold=0.05, max_detections=100, save_path=None): """ Get the detections from the retinanet using the generator. The result is a list of lists such that the size is: all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes] @@ -86,14 +87,14 @@ def _get_detections(dataset, retinanet, score_threshold=0.05, max_detections=100 # Returns A list of lists containing the detections for each image in the generator. """ - all_detections = [[None for i in range( - dataset.num_classes())] for j in range(len(dataset))] + all_detections = [[None for i in range(dataloader.dataset.num_classes())] + for j in range(len(dataloader.dataset))] retinanet.eval() with torch.no_grad(): - - for index in range(len(dataset)): + for idx, data in enumerate(dataloader): + import epdb; epdb.set_trace() data = dataset[index] scale = data['scale'] @@ -257,64 +258,53 @@ def evaluate( return np.mean(avg_mAP), average_precisions -def evaluate_coco(dataset, model, threshold=0.05): +def evaluate_coco(dataloader, model, threshold=0.05): model.eval() + dataset = dataloader.dataset + with torch.no_grad(): # start collecting results results = [] image_ids = [] - - for index in range(len(dataset)): - data = dataset[index] - scale = data['scale'] - + index = 0 + for data in tqdm(dataloader, total=len(dataloader)): + images = data[0] # run network - scores, labels, boxes = model(data['img'].permute( - 2, 0, 1).cuda().float().unsqueeze(dim=0)) - scores = scores.cpu() - labels = labels.cpu() - boxes = boxes.cpu() - - # correct boxes for image scale - boxes /= scale - - if boxes.shape[0] > 0: - # change to (x, y, w, h) (MS COCO standard) - boxes[:, 2] -= boxes[:, 0] - boxes[:, 3] -= boxes[:, 1] - - # compute predicted labels and scores - # for box, score, label in zip(boxes[0], scores[0], labels[0]): - for box_id in range(boxes.shape[0]): - score = float(scores[box_id]) - label = int(labels[box_id]) - box = boxes[box_id, :] - - # scores are sorted, so we can break - if score < threshold: - break - - # append detection for each positively labeled class - image_result = { - 'image_id': dataset.image_ids[index], - 'category_id': dataset.label_to_coco_label(label), - 'score': float(score), - 'bbox': box.tolist(), - } - - # append detection to results - results.append(image_result) - - # append image to list of processed images - image_ids.append(dataset.image_ids[index]) - - # print progress - print('{}/{}'.format(index, len(dataset)), end='\r') - - if not len(results): + scores_batch, labels_batch, boxes_batch = model(images) + + for scores, labels, boxes, scale in zip(scores_batch, + labels_batch, + boxes_batch, data[2]): + scores = scores.cpu().numpy() + labels = labels.cpu().numpy() + boxes = boxes.cpu().numpy() + + # correct boxes for image scale + boxes /= scale + + if boxes.shape[0] > 0: + # change to (x, y, w, h) (MS COCO standard) + boxes[:, 2] -= boxes[:, 0] + boxes[:, 3] -= boxes[:, 1] + + # compute predicted labels and scores + # for box, score, label in zip(boxes[0], scores[0], labels[0]): + boxes = boxes[scores >= threshold] + labels = labels[scores >= threshold] + scores = scores[scores >= threshold] + results.extend([{"image_id": dataset.image_ids[index], + "category_id": dataset.label_to_coco_label(label), + "score": float(score), + "bbox": box.tolist()} + for box, score, label in zip(boxes, scores, labels)]) + + # append image to list of processed images + image_ids.append(dataset.image_ids[index]) + index += 1 + if not results: return # write output diff --git a/models/efficientdet.py b/models/efficientdet.py index 21df997..d2a5e05 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -86,20 +86,26 @@ def forward(self, inputs): transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, inputs) scores = torch.max(classification, dim=2, keepdim=True)[0] - scores_over_thresh = (scores > self.threshold)[0, :, 0] - - if scores_over_thresh.sum() == 0: - print('No boxes to NMS') - # no boxes to NMS, just return - return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] - classification = classification[:, scores_over_thresh, :] - transformed_anchors = transformed_anchors[:, scores_over_thresh, :] - scores = scores[:, scores_over_thresh, :] - anchors_nms_idx = nms( - transformed_anchors[0, :, :], scores[0, :, 0], iou_threshold=self.iou_threshold) - nms_scores, nms_class = classification[0, anchors_nms_idx, :].max( - dim=1) - return [nms_scores, nms_class, transformed_anchors[0, anchors_nms_idx, :]] + nms_scores = [] + nms_class = [] + anchors = [] + for idx, score in enumerate(scores): + scores_over_thresh = (score > self.threshold)[:, 0] + if scores_over_thresh.sum() == 0: + print('No boxes to NMS') + # no boxes to NMS, just return + # return [torch.zeros(0), torch.zeros(0), torch.zeros(0, 4)] + continue + cls_tmp = classification[idx, scores_over_thresh, :] + trf_anchors = transformed_anchors[idx, scores_over_thresh, :] + scores_tmp = scores[idx, scores_over_thresh, :] + anchors_nms_idx = nms(trf_anchors, scores_tmp[:, 0], + iou_threshold=self.iou_threshold) + nms_scores_tmp, nms_class_tmp = cls_tmp[anchors_nms_idx, :].max(dim=1) + nms_scores.append(nms_scores_tmp) + nms_class.append(nms_class_tmp) + anchors.append(trf_anchors[anchors_nms_idx, :]) + return [nms_scores, nms_class, anchors] def freeze_backbone(self): """Freeze backbone weights and bn layers.""" diff --git a/train.py b/train.py index 6be1cd7..f8e2145 100644 --- a/train.py +++ b/train.py @@ -37,7 +37,7 @@ from apex import amp, optimizers from apex.multi_tensor_apply import multi_tensor_applier except ImportError: - raise ImportError("Please install apex from https://www.github.com/nvidia/apex to run this example.") + print("Please install apex from https://www.github.com/nvidia/apex to run this example.") from models.efficientdet import EfficientDet from models.losses import FocalLoss @@ -140,7 +140,6 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar classification_loss, regression_loss = model([images, annotations]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() - loss = classification_loss + regression_loss if bool(loss == 0): print('loss equal zero(0)') @@ -161,8 +160,8 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar # warmup_scheduler.dampen() total_loss.append(loss.item()) - if(iteration % 50 == 0): - print('{} iteration: training ...'.format(iteration)) + if (iteration % 100 == 0): + # print('{} iteration: training ...'.format(iteration)) ans = { 'epoch': epoch, 'iteration': iteration, @@ -172,6 +171,8 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar } for key, value in ans.items(): print(' {:15s}: {}'.format(str(key), value)) + if key != "epoch": + writer.add_scalar(key, value, iteration) iteration += 1 scheduler.step(np.mean(total_loss)) # used for ReduceLROnPlateau @@ -216,14 +217,14 @@ def main_worker(gpu, ngpus_per_node, args): # Training dataset train_dataset = [] - if(args.dataset == 'VOC'): + if (args.dataset == 'VOC'): train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[( '2007', 'test')], transform=transforms.Compose([Normalizer(), Resizer()])) args.num_class = train_dataset.num_classes() - elif(args.dataset == 'COCO'): + elif (args.dataset == 'COCO'): train_dataset = CocoDataset( root_dir=args.dataset_root, set_name='train2017', From 8182946db0e41ea753602e87efb4c946d8409de4 Mon Sep 17 00:00:00 2001 From: longyu Date: Sat, 4 Apr 2020 19:10:11 +0200 Subject: [PATCH 14/20] make voc evaluation with batch --- eval.py | 102 +++++++++++++++++++++++++++---------------------------- train.py | 55 ++++++++++++++++-------------- 2 files changed, 81 insertions(+), 76 deletions(-) diff --git a/eval.py b/eval.py index d383002..945d759 100644 --- a/eval.py +++ b/eval.py @@ -87,52 +87,52 @@ def _get_detections(dataloader, retinanet, # Returns A list of lists containing the detections for each image in the generator. """ - all_detections = [[None for i in range(dataloader.dataset.num_classes())] - for j in range(len(dataloader.dataset))] + dataset = dataloader.dataset + all_detections = [[None for i in range(dataset.num_classes())] + for j in range(len(dataset))] retinanet.eval() - + index = 0 with torch.no_grad(): - for idx, data in enumerate(dataloader): - import epdb; epdb.set_trace() - data = dataset[index] - scale = data['scale'] + for data in tqdm(dataloader, total=len(dataloader)): + images = data[0] + scores_batch, labels_batch, boxes_batch = retinanet(images) - # run network - scores, labels, boxes = retinanet(data['img'].permute( - 2, 0, 1).cuda().float().unsqueeze(dim=0)) - scores = scores.cpu().numpy() - labels = labels.cpu().numpy() - boxes = boxes.cpu().numpy() - - # correct boxes for image scale - boxes /= scale - - # select indices which have a score above the threshold - indices = np.where(scores > score_threshold)[0] - if indices.shape[0] > 0: - # select those scores - scores = scores[indices] - - # find the order with which to sort the scores - scores_sort = np.argsort(-scores)[:max_detections] - - # select detections - image_boxes = boxes[indices[scores_sort], :] - image_scores = scores[scores_sort] - image_labels = labels[indices[scores_sort]] - image_detections = np.concatenate([image_boxes, np.expand_dims( - image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) - - # copy detections to all_detections - for label in range(dataset.num_classes()): - all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1] - else: - # copy detections to all_detections - for label in range(dataset.num_classes()): - all_detections[index][label] = np.zeros((0, 5)) - - print('{}/{}'.format(index + 1, len(dataset)), end='\r') + for scores, labels, boxes, scale in zip(scores_batch, + labels_batch, + boxes_batch, data[2]): + + scores = scores.cpu().numpy() + labels = labels.cpu().numpy() + boxes = boxes.cpu().numpy() + + # correct boxes for image scale + boxes /= scale + + # select indices which have a score above the threshold + indices = np.where(scores > score_threshold)[0] + if indices.shape[0] > 0: + # select those scores + scores = scores[indices] + + # find the order with which to sort the scores + scores_sort = np.argsort(-scores)[:max_detections] + + # select detections + image_boxes = boxes[indices[scores_sort], :] + image_scores = scores[scores_sort] + image_labels = labels[indices[scores_sort]] + image_detections = np.concatenate([image_boxes, np.expand_dims( + image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) + + # copy detections to all_detections + for label in range(dataset.num_classes()): + all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1] + else: + # copy detections to all_detections + for label in range(dataset.num_classes()): + all_detections[index][label] = np.zeros((0, 5)) + index += 1 return all_detections @@ -147,18 +147,18 @@ def _get_annotations(generator): A list of lists containing the annotations for each image in the generator. """ all_annotations = [[None for i in range( - generator.num_classes())] for j in range(len(generator))] + generator.dataset.num_classes())] for j in range(len(generator.dataset))] - for i in range(len(generator)): + for i in range(len(generator.dataset)): # load the annotations - annotations = generator.load_annotations(i) + annotations = generator.dataset.load_annotations(i) # copy detections to all_annotations - for label in range(generator.num_classes()): + for label in range(generator.dataset.num_classes()): all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy() - print('{}/{}'.format(i + 1, len(generator)), end='\r') + print('{}/{}'.format(i + 1, len(generator.dataset)), end='\r') return all_annotations @@ -191,13 +191,13 @@ def evaluate( average_precisions = {} - for label in range(generator.num_classes()): + for label in range(generator.dataset.num_classes()): false_positives = np.zeros((0,)) true_positives = np.zeros((0,)) scores = np.zeros((0,)) num_annotations = 0.0 - for i in range(len(generator)): + for i in range(len(generator.dataset)): detections = all_detections[i][label] annotations = all_annotations[i][label] num_annotations += annotations.shape[0] @@ -250,8 +250,8 @@ def evaluate( print('\nmAP:') avg_mAP = [] - for label in range(generator.num_classes()): - label_name = generator.label_to_name(label) + for label in range(generator.dataset.num_classes()): + label_name = generator.dataset.label_to_name(label) print('{}: {}'.format(label_name, average_precisions[label][0])) avg_mAP.append(average_precisions[label][0]) print('avg mAP: {}'.format(np.mean(avg_mAP))) diff --git a/train.py b/train.py index f8e2145..f36f599 100644 --- a/train.py +++ b/train.py @@ -117,6 +117,8 @@ help='freeze all batch norm layers') parser.add_argument('--mixed_training', action='store_true', help='Use AMP mixed training optimization O1') +parser.add_argument('--eval', action='store_true', + help='Perform evaluation') iteration = 1 @@ -184,16 +186,16 @@ def train(train_loader, model, scheduler, warmup_scheduler, optimizer, epoch, ar print(' {:15s}: {}'.format(str(key), value)) -def test(dataset, model, epoch, args): +def test(dataloader, model, epoch, args): print("{} epoch: \t start validation....".format(epoch)) # model = model.module model.eval() # model.is_training = False with torch.no_grad(): if(args.dataset == 'VOC'): - evaluate(dataset, model) + evaluate(dataloader, model) else: - evaluate_coco(dataset, model) + evaluate_coco(dataloader, model) def main_worker(gpu, ngpus_per_node, args): @@ -249,7 +251,7 @@ def main_worker(gpu, ngpus_per_node, args): collate_fn=collater, pin_memory=True) valid_loader = DataLoader(valid_dataset, - batch_size=1, + batch_size=args.batch_size, num_workers=args.workers, shuffle=False, collate_fn=collater, @@ -350,27 +352,30 @@ def main_worker(gpu, ngpus_per_node, args): cudnn.benchmark = True - for epoch in range(args.start_epoch, args.num_epoch): - train(train_loader, model, scheduler, warmup_scheduler, - optimizer, epoch, args) - - state = { - 'epoch': epoch, - 'parser': args, - 'state_dict': get_state_dict(model), - 'optimizer': optimizer.state_dict() - } - - torch.save( - state, - os.path.join( - args.save_folder, - args.dataset, - args.network, - "checkpoint_{}.pth".format(epoch))) - - if (epoch + 1) % args.eval_epochs == 0: - test(valid_dataset, model, epoch, args) + if args.eval: + test(valid_loader, model, epoch=0, args=args) + else: + for epoch in range(args.start_epoch, args.num_epoch): + train(train_loader, model, scheduler, warmup_scheduler, + optimizer, epoch, args) + + state = { + 'epoch': epoch, + 'parser': args, + 'state_dict': get_state_dict(model), + 'optimizer': optimizer.state_dict() + } + + torch.save( + state, + os.path.join( + args.save_folder, + args.dataset, + args.network, + "checkpoint_{}.pth".format(epoch))) + + if (epoch + 1) % args.eval_epochs == 0: + test(valid_loader, model, epoch, args) def main(): From 052cc661951dc2083a52b0f92bfc239c83812b56 Mon Sep 17 00:00:00 2001 From: longyu Date: Sat, 4 Apr 2020 19:33:46 +0200 Subject: [PATCH 15/20] fix loader --- loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loader.py b/loader.py index 8a8ce6c..eeaf11e 100644 --- a/loader.py +++ b/loader.py @@ -21,7 +21,7 @@ def __iter__(self): stream = torch.cuda.Stream() first = True - for next_input, next_target in self.loader: + for next_input, next_target, _ in self.loader: with torch.cuda.stream(stream): next_input = next_input.cuda(non_blocking=True) next_target = next_target.cuda(non_blocking=True) From 7c746534cb00f288aabf138cfce974344646df2b Mon Sep 17 00:00:00 2001 From: longyu Date: Sun, 5 Apr 2020 12:55:55 +0200 Subject: [PATCH 16/20] make evaluation with multiple gpus --- eval.py | 14 +++++++++++--- models/efficientdet.py | 21 +++++++++++++++++++-- train.py | 1 + 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/eval.py b/eval.py index 945d759..10b5b7e 100644 --- a/eval.py +++ b/eval.py @@ -94,10 +94,13 @@ def _get_detections(dataloader, retinanet, retinanet.eval() index = 0 with torch.no_grad(): - for data in tqdm(dataloader, total=len(dataloader)): - images = data[0] + for idx, data in tqdm(enumerate(dataloader), total=len(dataloader)): + images = data[0].cuda() scores_batch, labels_batch, boxes_batch = retinanet(images) + scores_batch = scores_batch.reshape(images.shape[0], -1) + labels_batch = labels_batch.reshape(images.shape[0], -1) + boxes_batch = boxes_batch.reshape(images.shape[0], -1, 4) for scores, labels, boxes, scale in zip(scores_batch, labels_batch, boxes_batch, data[2]): @@ -270,11 +273,16 @@ def evaluate_coco(dataloader, model, threshold=0.05): results = [] image_ids = [] index = 0 + for data in tqdm(dataloader, total=len(dataloader)): images = data[0] # run network scores_batch, labels_batch, boxes_batch = model(images) - + + scores_batch = scores_batch.reshape(images.shape[0], -1) + labels_batch = labels_batch.reshape(images.shape[0], -1) + boxes_batch = boxes_batch.reshape(images.shape[0], -1, 4) + for scores, labels, boxes, scale in zip(scores_batch, labels_batch, boxes_batch, data[2]): diff --git a/models/efficientdet.py b/models/efficientdet.py index d2a5e05..2601366 100644 --- a/models/efficientdet.py +++ b/models/efficientdet.py @@ -83,6 +83,7 @@ def forward(self, inputs): if self.training: return self.criterion(classification, regression, anchors, annotations) else: + max_per_image = 256 transformed_anchors = self.regressBoxes(anchors, regression) transformed_anchors = self.clipBoxes(transformed_anchors, inputs) scores = torch.max(classification, dim=2, keepdim=True)[0] @@ -102,10 +103,26 @@ def forward(self, inputs): anchors_nms_idx = nms(trf_anchors, scores_tmp[:, 0], iou_threshold=self.iou_threshold) nms_scores_tmp, nms_class_tmp = cls_tmp[anchors_nms_idx, :].max(dim=1) + trf_anchors = trf_anchors[anchors_nms_idx, :] + + if not torch.all(-nms_scores_tmp[:-1] <= -nms_scores_tmp[1:]): + raise ValueError("Please make nms score sorted") + if nms_scores_tmp.shape[0] > max_per_image: + nms_scores_tmp = nms_scores_tmp[:max_per_image] + nms_class_tmp = nms_class_tmp[:max_per_image] + trf_anchors = trf_anchors[:max_per_image] + else: + K = max_per_image - nms_scores_tmp.shape[0] + nms_scores_tmp = torch.cat((nms_scores_tmp, + -torch.ones(K,).type_as(nms_scores_tmp))) + nms_class_tmp = torch.cat((nms_class_tmp, + -torch.ones(K,).type_as(nms_class_tmp))) + trf_anchors = torch.cat((trf_anchors, + -torch.ones(K, 4).type_as(trf_anchors))) nms_scores.append(nms_scores_tmp) nms_class.append(nms_class_tmp) - anchors.append(trf_anchors[anchors_nms_idx, :]) - return [nms_scores, nms_class, anchors] + anchors.append(trf_anchors) + return torch.cat(nms_scores), torch.cat(nms_class), torch.cat(anchors) def freeze_backbone(self): """Freeze backbone weights and bn layers.""" diff --git a/train.py b/train.py index f36f599..77dbf70 100644 --- a/train.py +++ b/train.py @@ -375,6 +375,7 @@ def main_worker(gpu, ngpus_per_node, args): "checkpoint_{}.pth".format(epoch))) if (epoch + 1) % args.eval_epochs == 0: + torch.cuda.empty_cache() test(valid_loader, model, epoch, args) From 8fbde34d9c7739eb72d28c050a91f65060f7dda8 Mon Sep 17 00:00:00 2001 From: longyu Date: Sun, 5 Apr 2020 17:41:02 +0200 Subject: [PATCH 17/20] fix indent --- eval.py | 2 +- train.py | 41 ++++++++++++++++++++--------------------- 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/eval.py b/eval.py index 10b5b7e..2b7ef19 100644 --- a/eval.py +++ b/eval.py @@ -278,7 +278,7 @@ def evaluate_coco(dataloader, model, threshold=0.05): images = data[0] # run network scores_batch, labels_batch, boxes_batch = model(images) - + scores_batch = scores_batch.reshape(images.shape[0], -1) labels_batch = labels_batch.reshape(images.shape[0], -1) boxes_batch = boxes_batch.reshape(images.shape[0], -1, 4) diff --git a/train.py b/train.py index 77dbf70..fa0f28d 100644 --- a/train.py +++ b/train.py @@ -356,27 +356,26 @@ def main_worker(gpu, ngpus_per_node, args): test(valid_loader, model, epoch=0, args=args) else: for epoch in range(args.start_epoch, args.num_epoch): - train(train_loader, model, scheduler, warmup_scheduler, - optimizer, epoch, args) - - state = { - 'epoch': epoch, - 'parser': args, - 'state_dict': get_state_dict(model), - 'optimizer': optimizer.state_dict() - } - - torch.save( - state, - os.path.join( - args.save_folder, - args.dataset, - args.network, - "checkpoint_{}.pth".format(epoch))) - - if (epoch + 1) % args.eval_epochs == 0: - torch.cuda.empty_cache() - test(valid_loader, model, epoch, args) + train(train_loader, model, scheduler, warmup_scheduler, + optimizer, epoch, args) + + state = { + 'epoch': epoch, + 'parser': args, + 'state_dict': get_state_dict(model), + 'optimizer': optimizer.state_dict() + } + + torch.save( + state, + os.path.join( + args.save_folder, + args.dataset, + args.network, + "checkpoint_{}.pth".format(epoch))) + + if (epoch + 1) % args.eval_epochs == 0: + test(valid_dataset, model, epoch, args) def main(): From 954a4ade65d87ae9c8f9843f92a8ee13dd40d134 Mon Sep 17 00:00:00 2001 From: longyu Date: Mon, 6 Apr 2020 13:35:33 +0200 Subject: [PATCH 18/20] allow to start from pretrained detector weights --- train.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index fa0f28d..8161676 100644 --- a/train.py +++ b/train.py @@ -270,8 +270,11 @@ def main_worker(gpu, ngpus_per_node, args): params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network - args.start_epoch = checkpoint['epoch'] + 1 + if args.start_epoch == -1: + args.start_epoch = checkpoint['epoch'] + 1 del params + if args.start_epoch == -1: + args.start_epoch = 0 model = EfficientDet(num_classes=args.num_class, network=args.network, @@ -326,8 +329,12 @@ def main_worker(gpu, ngpus_per_node, args): # define loss function (criterion) , optimizer, scheduler optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) + if args.resume is not None and "optimizer" in checkpoint: - optimizer.load_state_dict(checkpoint["optimizer"]) + try: + optimizer.load_state_dict(checkpoint["optimizer"]) + except Exception as ex: + print("Optimizer load_state_dict error: {}".format(ex)) num_steps = len(train_loader) * args.num_epoch From c34f8a8d4965ce8b2d7d83823e53ef216ec07308 Mon Sep 17 00:00:00 2001 From: longyu Date: Mon, 6 Apr 2020 15:17:08 +0200 Subject: [PATCH 19/20] fix issue --- train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/train.py b/train.py index 8161676..34c5574 100644 --- a/train.py +++ b/train.py @@ -88,7 +88,7 @@ help='Directory for saving checkpoint models') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') -parser.add_argument('--start_epoch', default=0, type=int, metavar='N', +parser.add_argument('--start_epoch', default=-1, type=int, metavar='N', help='manual epoch number (useful on restarts)') parser.add_argument('--world-size', default=1, type=int, help='number of nodes for distributed training') @@ -382,7 +382,7 @@ def main_worker(gpu, ngpus_per_node, args): "checkpoint_{}.pth".format(epoch))) if (epoch + 1) % args.eval_epochs == 0: - test(valid_dataset, model, epoch, args) + test(valid_loader, model, epoch, args) def main(): From 25a55f4508bdf3e0170acb8d48b465ef548b9b6c Mon Sep 17 00:00:00 2001 From: longyu Date: Wed, 8 Apr 2020 20:18:38 +0200 Subject: [PATCH 20/20] change data augmentation --- datasets/augmentation.py | 15 ++++++---- datasets/voc0712.py | 19 +++++++++++-- train.py | 59 +++++++++++++++++++--------------------- 3 files changed, 53 insertions(+), 40 deletions(-) diff --git a/datasets/augmentation.py b/datasets/augmentation.py index 2156472..d9289e4 100644 --- a/datasets/augmentation.py +++ b/datasets/augmentation.py @@ -16,8 +16,6 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility= albu.augmentations.transforms.RandomResizedCrop( height=height, width=width, p=0.3), - albu.augmentations.transforms.Flip(), - albu.augmentations.transforms.Transpose(), albu.OneOf([ albu.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.4), @@ -33,7 +31,6 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility= ]), albu.CLAHE(p=0.8), albu.HorizontalFlip(p=0.5), - albu.VerticalFlip(p=0.5), ]) if(phase == 'test' or phase == 'valid'): list_transforms.extend([ @@ -46,8 +43,11 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility= ]) if(phase == 'test'): return albu.Compose(list_transforms) - return albu.Compose(list_transforms, bbox_params=albu.BboxParams(format='pascal_voc', min_area=min_area, - min_visibility=min_visibility, label_fields=['category_id'])) + return albu.Compose(list_transforms, + bbox_params=albu.BboxParams(format='pascal_voc', + min_area=min_area, + min_visibility=min_visibility, + label_fields=['category_id'])) def detection_collate(batch): @@ -75,8 +75,11 @@ def collater(data): imgs = [s['img'] for s in data] annots = [s['annot'] for s in data] scales = [s['scale'] for s in data] + try: + imgs = torch.from_numpy(np.stack(imgs, axis=0)) + except ValueError: + import pdb; pdb.set_trace() - imgs = torch.from_numpy(np.stack(imgs, axis=0)) max_num_annots = max(annot.shape[0] for annot in annots) diff --git a/datasets/voc0712.py b/datasets/voc0712.py index 4814754..28e5de4 100644 --- a/datasets/voc0712.py +++ b/datasets/voc0712.py @@ -8,6 +8,8 @@ import xml.etree.cElementTree as ET else: import xml.etree.ElementTree as ET +import albumentations as albu + VOC_CLASSES = ( # always index 0 'aeroplane', 'bicycle', 'bird', 'boat', @@ -106,15 +108,26 @@ def __getitem__(self, index): target = ET.parse(self._annopath % img_id).getroot() img = cv2.imread(self._imgpath % img_id) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) - img = img.astype(np.float32)/255. height, width, channels = img.shape if self.target_transform is not None: target = self.target_transform(target, width, height) target = np.array(target) sample = {'img': img, 'annot': target} - if self.transform is not None: - sample = self.transform(sample) + if isinstance(self.transform, albu.core.composition.Compose): + result = self.transform(image=img, bboxes=target[:, :4], category_id=target[:, -1]) + bboxes = np.array(result["bboxes"]) + cls = np.atleast_2d(result["category_id"]).T + if bboxes.size == 0: # after data augmentation we loose all bboxes + return None + target = np.hstack((bboxes, cls)) + sample = {"img": result["image"].transpose(1, 0).transpose(2, 1), + "annot": torch.from_numpy(target), + "scale": -1} # fake scale + else: + img = img.astype(np.float32)/255. + if self.transform is not None: + sample = self.transform(sample) return sample bbox = target[:, :4] diff --git a/train.py b/train.py index 34c5574..0cf502a 100644 --- a/train.py +++ b/train.py @@ -218,30 +218,23 @@ def main_worker(gpu, ngpus_per_node, args): rank=args.rank) # Training dataset + data_augmenter = transforms.Compose([Normalizer(), Augmenter(), Resizer()]) + data_augmenter = get_augumentation(phase="train") + inference_augmenter = transforms.Compose([Normalizer(), Resizer()]) train_dataset = [] if (args.dataset == 'VOC'): train_dataset = VOCDetection(root=args.dataset_root, - transform=transforms.Compose( - [Normalizer(), Augmenter(), Resizer()])) + transform=data_augmenter) valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[( - '2007', 'test')], transform=transforms.Compose([Normalizer(), Resizer()])) + '2007', 'test')], transform=inference_augmenter) args.num_class = train_dataset.num_classes() elif (args.dataset == 'COCO'): - train_dataset = CocoDataset( - root_dir=args.dataset_root, - set_name='train2017', - transform=transforms.Compose( - [ - Normalizer(), - Augmenter(), - Resizer()])) - valid_dataset = CocoDataset( - root_dir=args.dataset_root, - set_name='val2017', - transform=transforms.Compose( - [ - Normalizer(), - Resizer()])) + train_dataset = CocoDataset(root_dir=args.dataset_root, + set_name='train2017', + transform=data_augmenter) + valid_dataset = CocoDataset(root_dir=args.dataset_root, + set_name='val2017', + transform=inference_augmenter) args.num_class = train_dataset.num_classes() train_loader = DataLoader(train_dataset, @@ -296,6 +289,12 @@ def main_worker(gpu, ngpus_per_node, args): if args.freeze_bn: model.freeze_bn() + # define loss function (criterion) , optimizer, scheduler + optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), + lr=args.lr) + if args.resume is not None and "optimizer" in checkpoint: + optimizer.load_state_dict(checkpoint["optimizer"]) + if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, @@ -323,18 +322,21 @@ def main_worker(gpu, ngpus_per_node, args): model = model.cuda(args.gpu) else: print('Run with DataParallel ....') - model = torch.nn.DataParallel(model) model = model.cuda() - # define loss function (criterion) , optimizer, scheduler - optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), + # define loss function (criterion) , optimizer, scheduler + optimizer = optim.AdamW(filter(lambda p: p.requires_grad, model.parameters()), lr=args.lr) - - if args.resume is not None and "optimizer" in checkpoint: - try: + if args.resume is not None and "optimizer" in checkpoint: optimizer.load_state_dict(checkpoint["optimizer"]) - except Exception as ex: - print("Optimizer load_state_dict error: {}".format(ex)) + + if args.mixed_training: + model, optimizer = amp.initialize(model, optimizer, + opt_level="O1", + keep_batchnorm_fp32=None, + master_weights=None, + loss_scale=None) + model = torch.nn.DataParallel(model) num_steps = len(train_loader) * args.num_epoch @@ -344,11 +346,6 @@ def main_worker(gpu, ngpus_per_node, args): # T_max=num_steps) if args.resume is not None and "scheduler" in checkpoint: scheduler.load_state_dict(checkpoint["scheduler"]) - if args.mixed_training: - model, optimizer = amp.initialize(model, optimizer, - opt_level="O1", - keep_batchnorm_fp32=None, - loss_scale=128) # warmup_scheduler = warmup.UntunedLinearWarmup(optimizer) warmup_scheduler = None