toandaominh1997 · lolongcovas · Mar 20, 2020 · Mar 20, 2020 · Mar 29, 2020 · Mar 31, 2020
diff --git a/README.md b/README.md
@@ -35,6 +35,12 @@ python demo.py --weight ./checkpoint_VOC_efficientdet-d1_97.pth --threshold 0.6
 &nbsp;
 
 ## Recent Update
+ - [04/04/2020] VOC dataset training success.
+     ```Shell
+	nice -n1 python3.6 train.py --dataset VOC --dataset_root $VOC_PATH --network efficientdet-d0 --batch_size $BSIZE --workers 8 --grad_accumulation_steps 1 --lr 0.00001 --eval_epochs 20
+     ```
+     I set `lr=1e-5` because `1e-4` did not work
+ - [31/03/2020] ~~Make support for freezing backbone layers and batch norm layers. In addition, it supports to mixed precision training APEX opt method=O1.~~ [requires testing].
  - [06/01/2020] Support both DistributedDataParallel and DataParallel, change augmentation, eval_voc
  - [17/12/2019] Add Fast normalized fusion, Augmentation with Ratio, Change RetinaHead, Fix Support EfficientDet-D0->D7
  - [7/12/2019] Support EfficientDet-D0, EfficientDet-D1, EfficientDet-D2, EfficientDet-D3, EfficientDet-D4,... . Support change gradient accumulation steps, AdamW.
@@ -88,22 +94,22 @@ sh datasets/scripts/COCO2017.sh
 - To train EfficientDet using the train script simply specify the parameters listed in `train.py` as a flag or manually change them.
 
 ```Shell
-python train.py --network effcientdet-d0  # Example
+python train.py --network efficientdet-d0  # Example
 ```
 
   - With VOC Dataset:
   ```Shell
   # DataParallel
-  python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network effcientdet-d0 --batch_size 32 
+  python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network efficientdet-d0 --batch_size 32 
   # DistributedDataParallel with backend nccl
-  python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network effcientdet-d0 --batch_size 32 --multiprocessing-distributed
+  python train.py --dataset VOC --dataset_root /root/data/VOCdevkit/ --network efficientdet-d0 --batch_size 32 --multiprocessing-distributed
   ```
   - With COCO Dataset:
   ```Shell
   # DataParallel
-  python train.py --dataset COCO --dataset_root ~/data/coco/ --network effcientdet-d0 --batch_size 32
+  python train.py --dataset COCO --dataset_root ~/data/coco/ --network efficientdet-d0 --batch_size 32
   # DistributedDataParallel with backend nccl
-  python train.py --dataset COCO --dataset_root ~/data/coco/ --network effcientdet-d0 --batch_size 32 --multiprocessing-distributed
+  python train.py --dataset COCO --dataset_root ~/data/coco/ --network efficientdet-d0 --batch_size 32 --multiprocessing-distributed
   ```
 
 ## Evaluation

diff --git a/datasets/augmentation.py b/datasets/augmentation.py
@@ -16,8 +16,6 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility=
             albu.augmentations.transforms.RandomResizedCrop(
                 height=height,
                 width=width, p=0.3),
-            albu.augmentations.transforms.Flip(),
-            albu.augmentations.transforms.Transpose(),
             albu.OneOf([
                 albu.RandomBrightnessContrast(brightness_limit=0.5,
                                               contrast_limit=0.4),
@@ -33,7 +31,6 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility=
             ]),
             albu.CLAHE(p=0.8),
             albu.HorizontalFlip(p=0.5),
-            albu.VerticalFlip(p=0.5),
         ])
     if(phase == 'test' or phase == 'valid'):
         list_transforms.extend([
@@ -46,32 +43,43 @@ def get_augumentation(phase, width=512, height=512, min_area=0., min_visibility=
     ])
     if(phase == 'test'):
         return albu.Compose(list_transforms)
-    return albu.Compose(list_transforms, bbox_params=albu.BboxParams(format='pascal_voc', min_area=min_area,
-                                                                     min_visibility=min_visibility, label_fields=['category_id']))
+    return albu.Compose(list_transforms,
+                        bbox_params=albu.BboxParams(format='pascal_voc',
+                                                    min_area=min_area,
+                                                    min_visibility=min_visibility,
+                                                    label_fields=['category_id']))
 
 
 def detection_collate(batch):
     imgs = [s['image'] for s in batch]
     annots = [s['bboxes'] for s in batch]
     labels = [s['category_id'] for s in batch]
+    scales = [s['scale'] for s in batch]
 
     max_num_annots = max(len(annot) for annot in annots)
     annot_padded = np.ones((len(annots), max_num_annots, 5))*-1
 
     if max_num_annots > 0:
         for idx, (annot, lab) in enumerate(zip(annots, labels)):
+            # pylint: disable=C1801
             if len(annot) > 0:
                 annot_padded[idx, :len(annot), :4] = annot
                 annot_padded[idx, :len(annot), 4] = lab
-    return (torch.stack(imgs, 0), torch.FloatTensor(annot_padded))
+    return (torch.stack(imgs, 0),
+            torch.FloatTensor(annot_padded),
+            torch.FloatTensor(scales))
 
 
 def collater(data):
+    data = [x for x in data if x is not None]
     imgs = [s['img'] for s in data]
     annots = [s['annot'] for s in data]
     scales = [s['scale'] for s in data]
+    try:
+        imgs = torch.from_numpy(np.stack(imgs, axis=0))
+    except ValueError:
+        import pdb; pdb.set_trace()
 
-    imgs = torch.from_numpy(np.stack(imgs, axis=0))
 
     max_num_annots = max(annot.shape[0] for annot in annots)
 
@@ -88,7 +96,8 @@ def collater(data):
 
     imgs = imgs.permute(0, 3, 1, 2)
 
-    return (imgs, torch.FloatTensor(annot_padded))
+    return (imgs, torch.FloatTensor(annot_padded),
+            torch.FloatTensor(scales))
 
 
 class Resizer(object):
@@ -108,11 +117,13 @@ def __call__(self, sample, common_size=512):
 
         image = cv2.resize(image, (resized_width, resized_height))
 
-        new_image = np.zeros((common_size, common_size, 3))
+        new_image = np.zeros((common_size, common_size, 3), np.float32)
         new_image[0:resized_height, 0:resized_width] = image
         annots[:, :4] *= scale
 
-        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}
+        return {'img': torch.from_numpy(new_image),
+                'annot': torch.from_numpy(annots),
+                'scale': scale}
 
 
 class Augmenter(object):
@@ -147,4 +158,5 @@ def __init__(self):
     def __call__(self, sample):
         image, annots = sample['img'], sample['annot']
 
-        return {'img': ((image.astype(np.float32) - self.mean) / self.std), 'annot': annots}
+        # 1/255. = 0.00392156862745098
+        return {'img': ((image.astype(np.float32) *0.00392156862745098 - self.mean) / self.std), 'annot': annots}
diff --git a/datasets/coco.py b/datasets/coco.py
@@ -64,6 +64,7 @@ def __len__(self):
     def __getitem__(self, idx):
 
         img = self.load_image(idx)
+        image_size = img.shape[:2]
         annot = self.load_annotations(idx)
         sample = {'img': img, 'annot': annot}
         if self.transform:

diff --git a/datasets/voc0712.py b/datasets/voc0712.py
@@ -8,6 +8,8 @@
     import xml.etree.cElementTree as ET
 else:
     import xml.etree.ElementTree as ET
+import albumentations as albu
+
 
 VOC_CLASSES = (  # always index 0
     'aeroplane', 'bicycle', 'bird', 'boat',
@@ -106,15 +108,26 @@ def __getitem__(self, index):
         target = ET.parse(self._annopath % img_id).getroot()
         img = cv2.imread(self._imgpath % img_id)
         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-        img = img.astype(np.float32)/255.
         height, width, channels = img.shape
 
         if self.target_transform is not None:
             target = self.target_transform(target, width, height)
         target = np.array(target)
         sample = {'img': img, 'annot': target}
-        if self.transform is not None:
-            sample = self.transform(sample)
+        if isinstance(self.transform, albu.core.composition.Compose):
+            result = self.transform(image=img, bboxes=target[:, :4], category_id=target[:, -1])
+            bboxes = np.array(result["bboxes"])
+            cls = np.atleast_2d(result["category_id"]).T
+            if bboxes.size == 0:  # after data augmentation we loose all bboxes
+                return None
+            target = np.hstack((bboxes, cls))
+            sample = {"img": result["image"].transpose(1, 0).transpose(2, 1),
+                      "annot": torch.from_numpy(target),
+                      "scale": -1}  # fake scale
+        else:
+            img = img.astype(np.float32)/255.
+            if self.transform is not None:
+                sample = self.transform(sample)
         return sample
 
         bbox = target[:, :4]