PaddlePaddle
diff --git a/‎README.md‎
Lines changed: 2 additions & 1 deletion b/‎README.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎README_cn.md‎
Lines changed: 2 additions & 1 deletion b/‎README_cn.md‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎configs/swav/swav_clas_r50.yaml‎
Lines changed: 81 additions & 0 deletions b/‎configs/swav/swav_clas_r50.yaml‎
Lines changed: 81 additions & 0 deletions
diff --git a/‎configs/swav/swav_r50_100ep.yaml‎
Lines changed: 65 additions & 0 deletions b/‎configs/swav/swav_r50_100ep.yaml‎
Lines changed: 65 additions & 0 deletions
diff --git a/‎docs/Train_SwAV_model.md‎
Lines changed: 57 additions & 0 deletions b/‎docs/Train_SwAV_model.md‎
Lines changed: 57 additions & 0 deletions
diff --git a/‎passl/datasets/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎passl/datasets/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎passl/datasets/multicropdataset.py‎
Lines changed: 98 additions & 0 deletions b/‎passl/datasets/multicropdataset.py‎
Lines changed: 98 additions & 0 deletions
diff --git a/‎passl/hooks/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎passl/hooks/__init__.py‎
Lines changed: 1 addition & 0 deletions
@@ -46,7 +46,8 @@ PASSL implements a series of self-supervised learning algorithms, See **Document
 | MoCo-BYOL | 300    | 71.56            | 72.10         | ResNet-50 | [download](https://passl.bj.bcebos.com/models/mocobyol_r50_ep300_ckpt.pdparams) | [Train MoCo-BYOL](docs/Train_MoCo-BYOL_model.md) |
 | BYOL      | 300    | 72.50            | 71.62         | ResNet-50 | [download](https://passl.bj.bcebos.com/models/byol_r50_300.pdparams) | [Train BYOL](docs/Train_BYOL_model.md)           |
 | PixPro    | 100    | 55.1(fp16)       | 57.2(fp32)    | ResNet-50 | [download](https://passl.bj.bcebos.com/models/pixpro_r50_ep100_no_instance_with_linear.pdparams) | [Train PixPro](docs/Train_PixPro_model.md)       |
-| SimSiam   | 100    | 68.3             | 68.4          | ResNet-50 | [download](https://drive.google.com/file/d/1kaAm8-tlvB570kzI4fo9h4dwGQFf_4FE/view?usp=sharing) | [Train SimSiam](docs/Train_SimSiam_model.md)      |
+| SimSiam   | 100    | 68.3             | 68.4          | ResNet-50 | [download](https://drive.google.com/file/d/1kaAm8-tlvB570kzI4fo9h4dwGQFf_4FE/view?usp=sharing) | [Train SimSiam](docs/Train_SimSiam_model.md)     |
+| SwAV      | 100    | 72.1             | 72.4          | ResNet-50 | [download](https://drive.google.com/file/d/1budFSoQqZz1Idyej-R4E6kUnL8CGtdyu/view?usp=sharing) | [Train SwAV](docs/Train_SwAV_model.md)           |
 
 > Benchmark Linear Image Classification on ImageNet-1K.
 
 
@@ -46,7 +46,8 @@ PASSL 实现了一系列自监督学习算法，更具体的使用文档请参
 | MoCo-BYOL | 300    | 71.56            | 72.10         | ResNet-50 | [download](https://passl.bj.bcebos.com/models/mocobyol_r50_ep300_ckpt.pdparams) | [Train MoCo-BYOL](docs/Train_MoCo-BYOL_model.md) |
 | BYOL      | 300    | 72.50            | 71.62         | ResNet-50 | [download](https://passl.bj.bcebos.com/models/byol_r50_300.pdparams) | [Train BYOL](docs/Train_BYOL_model.md)           |
 | PixPro    | 100    | 55.1(fp16)       | 57.2(fp32)    | ResNet-50 | [download](https://passl.bj.bcebos.com/models/pixpro_r50_ep100_no_instance_with_linear.pdparams) | [Train PixPro](docs/Train_PixPro_model.md)       |
-| SimSiam   | 100    | 68.3             | 68.4          | ResNet-50 | [download](https://drive.google.com/file/d/1kaAm8-tlvB570kzI4fo9h4dwGQFf_4FE/view?usp=sharing) | [Train SimSiam](docs/Train_SimSiam_model.md)      |
+| SimSiam   | 100    | 68.3             | 68.4          | ResNet-50 | [download](https://drive.google.com/file/d/1kaAm8-tlvB570kzI4fo9h4dwGQFf_4FE/view?usp=sharing) | [Train SimSiam](docs/Train_SimSiam_model.md)     |
+| SwAV      | 100    | 72.1             | 72.4          | ResNet-50 | [download](https://drive.google.com/file/d/1budFSoQqZz1Idyej-R4E6kUnL8CGtdyu/view?usp=sharing) | [Train SwAV](docs/Train_SwAV_model.md)           |
 
 > Benchmark Linear Image Classification on ImageNet-1K.
 
 
@@ -0,0 +1,81 @@
+epochs: 100
+output_dir: output_dir
+seed: 0
+device: gpu
+
+# used for static mode and model export
+image_shape: [3, 224, 224]
+save_inference_dir: ./inference
+
+model:
+  name: Classification
+  backbone:
+    name: ResNetswav
+    depth: 50
+    frozen_stages: 4
+  head:
+    name: ClasHead
+    with_avg_pool: true
+    in_channels: 2048
+
+dataloader:
+  train:
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+    sampler:
+      batch_size: 32
+      shuffle: true
+      drop_last: true
+    dataset:
+      name: ImageNet
+      dataroot: data/ILSVRC2012/train
+      return_label: True
+      transforms:
+        - name: RandomResizedCrop
+          size: 224
+        - name: RandomHorizontalFlip
+        - name: Transpose
+        - name: NormalizeImage
+          scale: 1.0/255.0
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+  val:
+    loader:
+      num_workers: 8
+      use_shared_memory: True
+    sampler:
+      batch_size: 32
+      shuffle: false
+      drop_last: false
+    dataset:
+      name: ImageNet
+      dataroot: data/ILSVRC2012/val
+      return_label: True
+      transforms:
+        - name: Resize
+          size: 256
+        - name: CenterCrop
+          size: 224
+        - name: Transpose
+        - name: NormalizeImage
+          scale: 1.0/255.0
+          mean: [0.485, 0.456, 0.406]
+          std: [0.229, 0.224, 0.225]
+
+lr_scheduler:
+  name: CosineAnnealingDecay
+  learning_rate: 0.3
+  T_max: 100
+
+
+optimizer:
+  name: Momentum
+  weight_decay: 1e-6
+
+log_config:
+    name: LogHook
+    interval: 50
+
+custom_config:
+  - name: EvaluateHook
@@ -0,0 +1,65 @@
+epochs: 100
+output_dir: output_dir
+seed: 0
+device: gpu
+
+model:
+  name: SwAV
+  backbone:
+    name: ResNetswav
+    depth: 50
+  neck:
+    name: SwAVNeck
+    in_channels: 2048
+    hid_channels: 2048
+    out_channels: 128
+    with_l2norm: True
+    with_avg_pool: True
+  head:
+    name: SwAVHead
+    feat_dim: 128
+    sinkhorn_iterations: 3
+    epsilon: 0.05
+    temperature: 0.1
+    crops_for_assign: [0, 1]
+    num_crops: [2, 6]
+    num_prototypes: 3000
+
+dataloader:
+  train:
+    loader:
+      num_workers: 16
+      use_shared_memory: True
+    sampler:
+      batch_size: 128
+      shuffle: true
+      drop_last: true
+    dataset:
+      name: MultiCropDataset
+      dataroot: data/ILSVRC2012/train
+      size_crops: [224, 96]
+      num_crops: [2, 6]
+      min_scale_crops: [0.14, 0.05]
+      max_scale_crops: [1., 0.14]
+
+lr_scheduler:
+  name: CosineWarmup
+  learning_rate: 4.8
+  T_max: 31200
+  warmup_steps: 3120
+  start_lr: 0.3
+  end_lr: 4.8
+  eta_min: 0.0048
+
+optimizer:
+  name: LarsMomentumOptimizer
+  momentum: 0.9
+  lars_weight_decay: 1e-6
+
+optimizer_config:
+  name: SwAVOptimizerHook
+  freeze_prototypes_iters: 313
+
+log_config:
+    name: LogHook
+    interval: 50
@@ -0,0 +1,57 @@
+# Train SwAV Model
+
+## Introduction
+
+PASSL reproduces [SwAV](https://arxiv.org/abs/2006.09882). SwAV is an online algorithm that takes advantage of contrastive methods without requiring to compute pairwise comparisons. Compared to previous contrastive methods, SwAV is more memory efficient since it does not require a large memory bank or a special momentum network
+
+## Installation
+- See [INSTALL.md](INSTALL.md)
+
+## Data Preparation
+- See [GETTING_STARTED.md](GETTING_STARTED.md)
+
+## Implemented Models
+Models are all trained with ResNet-50 backbone.
+|  | epochs |official results | passl results | Backbone| Model |
+| ---|--- | ----  | ---- | ----| ---- |
+| SwAV | 100 | 72.1 | 72.4      | ResNet-50 | [download](https://drive.google.com/file/d/1budFSoQqZz1Idyej-R4E6kUnL8CGtdyu/view?usp=sharing) |
+
+
+## Getting Started
+
+### 1. Train SwAV
+
+#### single gpu
+```
+python tools/train.py -c configs/swav/swav_r50_100ep.yaml
+```
+
+#### multiple gpus
+
+```
+python -m paddle.distributed.launch --gpus="0,1,2,3,4,5,6,7" tools/train.py -c configs/swav/swav_r50_100ep.yaml
+```
+
+Pretraining models with 100 epochs can be found at [swav](https://drive.google.com/file/d/1budFSoQqZz1Idyej-R4E6kUnL8CGtdyu/view?usp=sharing)
+
+Note: The default learning rate in config files is for 8 GPUs. If using differnt number GPUs, the total batch size will change in proportion, you have to scale the learning rate following ```new_lr = old_lr * new_ngpus / old_ngpus```.
+
+### 2. Extract backbone weights
+
+```
+python tools/extract_weight.py ${CHECKPOINT} --output ${WEIGHT_FILE} --remove_prefix
+```
+
+### 3. Evaluation on ImageNet Linear Classification
+
+#### Train:
+```
+python -m paddle.distributed.launch --gpus="0,1,2,3,4,5,6,7" tools/train.py -c configs/swav/swav_clas_r50.yaml --pretrained ${WEIGHT_FILE}
+```
+
+#### Evaluate:
+```
+python -m paddle.distributed.launch --gpus="0,1,2,3,4,5,6,7" tools/train.py -c configs/swav/swav_clas_r50.yaml --load ${CLS_WEGHT_FILE} --evaluate-only
+```
+
+The trained linear weights in conjuction with the backbone weights can be found at [swav linear](https://drive.google.com/file/d/1uduDAqJqK1uFclhQSK0d9RjzGNYR_Tj2/view?usp=sharing)
@@ -19,4 +19,5 @@
 from .cifar import CIFAR10, CIFAR100
 
 from .textimagedataset import TextImageDataset
+from .multicropdataset import MultiCropDataset
 from .builder import build_dataset, build_dataloader
@@ -0,0 +1,98 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle.vision.transforms import (
+    Compose,
+    Transpose,
+    ColorJitter,
+    RandomResizedCrop,
+    RandomHorizontalFlip,
+)
+from .folder import DatasetFolder
+from .builder import DATASETS
+from .preprocess.transforms import (
+    RandomApply,
+    GaussianBlur,
+    NormalizeImage,
+    RandomGrayscale,
+)
+
+
+@DATASETS.register()
+class MultiCropDataset(DatasetFolder):
+    cls_filter = None
+
+    def __init__(self,
+                 dataroot,
+                 size_crops,
+                 num_crops,
+                 min_scale_crops,
+                 max_scale_crops,
+                 return_label=False):
+        super(MultiCropDataset, self).__init__(dataroot, cls_filter=self.cls_filter)
+        
+        assert len(size_crops) == len(num_crops)
+        assert len(min_scale_crops) == len(num_crops)
+        assert len(max_scale_crops) == len(num_crops)
+        self.return_label = return_label
+
+        color_transform = [get_color_distortion(), get_pil_gaussian_blur()]
+        mean = [0.485, 0.456, 0.406]
+        std = [0.229, 0.224, 0.225]
+        trans = []
+        for i in range(len(size_crops)):
+            randomresizedcrop = RandomResizedCrop(
+                size_crops[i],
+                scale=(min_scale_crops[i], max_scale_crops[i]),
+            )
+            trans.extend([Compose([
+                randomresizedcrop,
+                RandomHorizontalFlip(prob=0.5),
+                Compose(color_transform),
+                Transpose(),
+                NormalizeImage(scale='1.0/255.0', mean=mean, std=std)])
+            ] * num_crops[i])
+        self.trans = trans
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+
+        Returns:
+            tuple: (sample, target) where target is class_index of the target class.
+        """
+        path, target = self.samples[index]
+        sample = self.loader(path)
+        sample = list(map(lambda trans: trans(sample), self.trans))
+        if self.return_label:
+            return sample, target
+
+        return sample
+
+
+
+def get_pil_gaussian_blur(p=0.5):
+    gaussian_blur = GaussianBlur(sigma=[.1, 2.], _PIL=True)
+    rnd_gaussian_blur = RandomApply([gaussian_blur], p=p)
+    return rnd_gaussian_blur
+
+
+def get_color_distortion(s=1.0):
+    # s is the strength of color distortion.
+    color_jitter = ColorJitter(0.8*s, 0.8*s, 0.8*s, 0.2*s)
+    rnd_color_jitter = RandomApply([color_jitter], p=0.8)
+    rnd_gray = RandomGrayscale(p=0.2)
+    color_distort = Compose([rnd_color_jitter, rnd_gray])
+    return color_distort
@@ -16,6 +16,7 @@
 from .hook import Hook
 from .lr_scheduler_hook import LRSchedulerHook
 from .optimizer_hook import OptimizerHook
+from .optimizer_hook import SwAVOptimizerHook
 from .timer_hook import IterTimerHook
 from .log_hook import LogHook
 from .checkpoint_hook import CheckpointHook