Added: VGG-16 configurations and model

miguelCalado · miguelCalado · commit 011a58042632 · 2022-01-07T00:44:08.000Z
diff --git a/official/vision/image_classification/classifier_trainer.py b/official/vision/image_classification/classifier_trainer.py
@@ -36,13 +36,15 @@
 from official.vision.image_classification.efficientnet import efficientnet_model
 from official.vision.image_classification.resnet import common
 from official.vision.image_classification.resnet import resnet_model
+from official.vision.image_classification.vgg16 import vgg_model
 
 
 def get_models() -> Mapping[str, tf.keras.Model]:
   """Returns the mapping from model type name to Keras model."""
   return {
       'efficientnet': efficientnet_model.EfficientNet.from_name,
       'resnet': resnet_model.resnet50,
+      'vgg': vgg_model.vgg16,
   }
 
 
diff --git a/official/vision/image_classification/classifier_trainer_test.py b/official/vision/image_classification/classifier_trainer_test.py
@@ -53,6 +53,7 @@ def distribution_strategy_combinations() -> Iterable[Tuple[Any, ...]]:
       model=[
           'efficientnet',
           'resnet',
+          'vgg',
       ],
       dataset=[
           'imagenet',
@@ -149,6 +150,7 @@ def test_end_to_end_train_and_eval(self, distribution, model, dataset):
           model=[
               'efficientnet',
               'resnet',
+              'vgg',
           ],
           dataset='imagenet',
           dtype='float16',
@@ -193,6 +195,7 @@ def test_gpu_train(self, distribution, model, dataset, dtype):
           model=[
               'efficientnet',
               'resnet',
+              'vgg',
           ],
           dataset='imagenet',
           dtype='bfloat16',
diff --git a/official/vision/image_classification/configs/configs.py b/official/vision/image_classification/configs/configs.py
@@ -91,13 +91,44 @@ class ResNetImagenetConfig(base_configs.ExperimentConfig):
       epochs_between_evals=1, steps=None)
   model: base_configs.ModelConfig = resnet_config.ResNetModelConfig()
 
+@dataclasses.dataclass
+class VGGImagenetConfig(base_configs.ExperimentConfig):
+  """Base configuration to train vgg-16 on ImageNet."""
+  export: base_configs.ExportConfig = base_configs.ExportConfig()
+  runtime: base_configs.RuntimeConfig = base_configs.RuntimeConfig()
+  train_dataset: dataset_factory.DatasetConfig = \
+      dataset_factory.ImageNetConfig(split='train',
+                                     one_hot=False,
+                                     mean_subtract=True,
+                                     standardize=True)
+  validation_dataset: dataset_factory.DatasetConfig = \
+      dataset_factory.ImageNetConfig(split='validation',
+                                     one_hot=False,
+                                     mean_subtract=True,
+                                     standardize=True)
+  train: base_configs.TrainConfig = base_configs.TrainConfig(
+      resume_checkpoint=True,
+      epochs=90,
+      steps=None,
+      callbacks=base_configs.CallbacksConfig(
+          enable_checkpoint_and_export=True, enable_tensorboard=True),
+      metrics=['accuracy', 'top_5'],
+      time_history=base_configs.TimeHistoryConfig(log_steps=100),
+      tensorboard=base_configs.TensorBoardConfig(
+          track_lr=True, write_model_weights=False),
+      set_epoch_loop=False)
+  evaluation: base_configs.EvalConfig = base_configs.EvalConfig(
+      epochs_between_evals=1, steps=None)
+  model: base_configs.ModelConfig = vgg_config.VGGModelConfig()
+  
 
 def get_config(model: str, dataset: str) -> base_configs.ExperimentConfig:
   """Given model and dataset names, return the ExperimentConfig."""
   dataset_model_config_map = {
       'imagenet': {
           'efficientnet': EfficientNetImageNetConfig(),
           'resnet': ResNetImagenetConfig(),
+          'vgg': VGGImagenetConfig()
       }
   }
   try:
diff --git a/official/vision/image_classification/configs/examples/vgg16/gpu.yaml b/official/vision/image_classification/configs/examples/vgg16/gpu.yaml
@@ -0,0 +1,46 @@
+# Training configuration for VGG-16 trained on ImageNet on GPUs.
+# Reaches > 72.8% within 90 epochs.
+# Note: This configuration uses a scaled per-replica batch size based on the number of devices.
+runtime:
+  distribution_strategy: 'mirrored'
+  num_gpus: 1
+  batchnorm_spatial_persistent: True
+train_dataset:
+  name: 'imagenet2012'
+  data_dir: null
+  builder: 'records'
+  split: 'train'
+  image_size: 224
+  num_classes: 1000
+  num_examples: 1281167
+  batch_size: 128
+  use_per_replica_batch_size: True
+  dtype: 'float32'
+  mean_subtract: True
+  standardize: True
+validation_dataset:
+  name: 'imagenet2012'
+  data_dir: null
+  builder: 'records'
+  split: 'validation'
+  image_size: 224
+  num_classes: 1000
+  num_examples: 50000
+  batch_size: 128
+  use_per_replica_batch_size: True
+  dtype: 'float32'
+  mean_subtract: True
+  standardize: True
+model:
+  name: 'vgg'
+  optimizer:
+    name: 'momentum'
+    momentum: 0.9
+    epsilon: 0.001
+  loss:
+    label_smoothing: 0.0
+train:
+  resume_checkpoint: True
+  epochs: 90
+evaluation:
+  epochs_between_evals: 1
diff --git a/official/vision/image_classification/vgg16/__init__.py b/official/vision/image_classification/vgg16/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
diff --git a/official/vision/image_classification/vgg16/vgg_config.py b/official/vision/image_classification/vgg16/vgg_config.py
@@ -0,0 +1,53 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Lint as: python3
+"""Configuration definitions for VGG losses, learning rates, and optimizers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import dataclasses
+
+from official.modeling.hyperparams import base_config
+from official.vision.image_classification.configs import base_configs
+
+
+@dataclasses.dataclass
+class VGGModelConfig(base_configs.ModelConfig):
+  """Configuration for the VGG model."""
+  name: str = 'VGG'
+  num_classes: int = 1000
+  model_params: base_config.Config = dataclasses.field(
+      default_factory=lambda: {
+          'num_classes': 1000,
+          'batch_size': None,
+          'use_l2_regularizer': True
+      })
+  loss: base_configs.LossConfig = base_configs.LossConfig(
+      name='sparse_categorical_crossentropy')
+  optimizer: base_configs.OptimizerConfig = base_configs.OptimizerConfig(
+      name='momentum',
+      epsilon=0.001,
+      momentum=0.9,
+      moving_average_decay=None)
+  learning_rate: base_configs.LearningRateConfig = (
+      base_configs.LearningRateConfig(
+          name='stepwise',
+          initial_lr=0.01,
+          examples_per_epoch=1281167,
+          boundaries=[30, 60],
+          warmup_epochs=0,
+          scale_by_batch_size=1. / 128.,
+          multipliers=[0.01 / 256, 0.001 / 256, 0.0001 / 256]))
diff --git a/official/vision/image_classification/vgg16/vgg_model.py b/official/vision/image_classification/vgg16/vgg_model.py
@@ -0,0 +1,196 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+layers = tf.keras.layers
+
+def _gen_l2_regularizer(use_l2_regularizer=True, l2_weight_decay=1e-4):
+  return tf.keras.regularizers.L2(
+      l2_weight_decay) if use_l2_regularizer else None
+
+def vgg16(num_classes,
+          batch_size=None,
+          use_l2_regularizer=True,
+          batch_norm_decay=0.9,
+          batch_norm_epsilon=1e-5):
+    
+    input_shape = (224, 224, 3)
+    img_input = layers.Input(shape=input_shape, batch_size=batch_size)
+    
+    x = img_input
+    
+    if tf.keras.backend.image_data_format() == 'channels_first':
+        x = layers.Permute((3, 1, 2))(x)
+        bn_axis = 1
+    else:  # channels_last
+        bn_axis = 3
+        
+    # Block 1
+    x = layers.Conv2D(64, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block1_conv1')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv1')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(64, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block1_conv2')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv2')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
+    
+    # Block 2
+    x = layers.Conv2D(128, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block2_conv1')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv3')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(128, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block2_conv2')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv4')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
+    
+    # Block 3
+    x = layers.Conv2D(256, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block3_conv1')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv5')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(256, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block3_conv2')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv6')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(256, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block3_conv3')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv7')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
+    
+    # Block 4
+    x = layers.Conv2D(512, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block4_conv1')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv8')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(512, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block4_conv2')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv9')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(512, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block4_conv3')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv10')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
+    
+    # Block 5
+    x = layers.Conv2D(512, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block5_conv1')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv11')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(512, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block5_conv2')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv12')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Conv2D(512, (3, 3),
+                      padding='same',
+                      kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                      name='block5_conv3')(x)
+    x = layers.BatchNormalization(
+        axis=bn_axis,
+        momentum=batch_norm_decay,
+        epsilon=batch_norm_epsilon,
+        name='bn_conv13')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
+    
+    x = layers.Flatten(name='flatten')(x)
+    x = layers.Dense(4096,
+                     kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                     name='fc1')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Dropout(0.5)(x)
+    x = layers.Dense(4096,
+                     kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                     name='fc2')(x)
+    x = layers.Activation('relu')(x)
+    x = layers.Dropout(0.5)(x)
+    x = layers.Dense(num_classes,
+                     kernel_regularizer=_gen_l2_regularizer(use_l2_regularizer),
+                     name='fc1000')(x)
+    
+    # A softmax that is followed by the model loss must be done cannot be done
+    # in float16 due to numeric issues. So we pass dtype=float32.
+    x = layers.Activation('softmax', dtype='float32')(x)
+    
+    # Create model.
+    return tf.keras.Model(img_input, x, name='vgg16')
+