No public description

tensorflower-gardener · tensorflower-gardener · commit d817d87e6ccd · 2024-09-11T23:53:03.000-07:00
PiperOrigin-RevId: 673702497
diff --git a/official/projects/mosaic/configs/mosaic_config.py b/official/projects/mosaic/configs/mosaic_config.py
@@ -233,3 +233,135 @@ def mosaic_mnv35_cityscapes() -> cfg.ExperimentConfig:
       ])
 
   return config
+
+
+@exp_factory.register_config_factory('mosaic_mnv4_cityscapes')
+def mosaic_mnv4_cityscapes() -> cfg.ExperimentConfig:
+  """Instantiates an experiment configuration of image segmentation task.
+
+  This image segmentation experiment is conducted on Cityscapes dataset. The
+  model architecture is a MOSAIC encoder-decoer. The default backbone network is
+  an experimental mobilenet V4 variant on top of which the MOSAIC
+  encoder-decoder can be deployed. All detailed configurations can be overridden
+  by a .yaml file provided by the user to launch the experiments. Please refer
+  to .yaml examples in the path of ../configs/experiments/.
+
+  Returns:
+    A particular instance of cfg.ExperimentConfig for MOSAIC model based
+    image semantic segmentation task.
+  """
+  train_batch_size = 16
+  eval_batch_size = 16
+  steps_per_epoch = CITYSCAPES_TRAIN_EXAMPLES // train_batch_size
+  output_stride = 16
+
+  backbone_output_level = int(math.log2(output_stride))
+  config = cfg.ExperimentConfig(
+      task=MosaicSemanticSegmentationTask(
+          model=MosaicSemanticSegmentationModel(
+              # Cityscapes uses only 19 semantic classes for train/evaluation.
+              # The void (background) class is ignored in train and evaluation.
+              num_classes=19,
+              input_size=[None, None, 3],
+              backbone=backbones.Backbone(
+                  type='mobilenet',
+                  mobilenet=backbones.MobileNet(
+                      model_id='MobileNetV4ConvMediumSeg',
+                      output_intermediate_endpoints=True,
+                      output_stride=output_stride)),
+              neck=MosaicEncoderNeck(
+                  encoder_input_level=backbone_output_level,
+                  branch_filter_depths=[64, 64],
+                  conv_kernel_sizes=[3, 5],
+                  pyramid_pool_bin_nums=[1, 4, 8, 16],  # paper default
+                  activation='relu',
+                  dropout_rate=0.1,
+                  kernel_initializer='glorot_uniform',
+                  interpolation='bilinear',
+                  use_depthwise_convolution=True),
+              head=MosaicDecoderHead(
+                  num_classes=19,
+                  decoder_input_levels=['3/depthwise', '2/depthwise'],
+                  decoder_stage_merge_styles=['concat_merge', 'sum_merge'],
+                  decoder_filters=[64, 64],
+                  decoder_projected_filters=[19, 19],
+                  encoder_end_level=backbone_output_level,
+                  use_additional_classifier_layer=False,
+                  classifier_kernel_size=1,
+                  activation='relu',
+                  kernel_initializer='glorot_uniform',
+                  interpolation='bilinear',
+              ),
+              norm_activation=common.NormActivation(
+                  activation='relu',
+                  norm_momentum=0.99,
+                  norm_epsilon=1e-3,
+                  use_sync_bn=True,
+              ),
+          ),
+          losses=seg_cfg.Losses(l2_weight_decay=4e-5),
+          train_data=seg_cfg.DataConfig(
+              input_path=os.path.join(
+                  CITYSCAPES_INPUT_PATH_BASE, 'train_fine**'
+              ),
+              crop_size=[1024, 2048],
+              output_size=[1024, 2048],
+              is_training=True,
+              global_batch_size=train_batch_size,
+              aug_scale_min=0.5,
+              aug_scale_max=2.0,
+          ),
+          validation_data=seg_cfg.DataConfig(
+              input_path=os.path.join(CITYSCAPES_INPUT_PATH_BASE, 'val_fine*'),
+              output_size=[1024, 2048],
+              is_training=False,
+              global_batch_size=eval_batch_size,
+              resize_eval_groundtruth=True,
+              drop_remainder=False,
+          ),
+          # Imagenet pre-trained MobileNetV4ConvMediumSeg checkpoint.
+          init_checkpoint=(
+              'gs://tf_model_garden/vision/mobilenet/v4_seg_float//'
+          ),
+          init_checkpoint_modules='backbone',
+      ),
+      trainer=cfg.TrainerConfig(
+          steps_per_loop=steps_per_epoch,
+          summary_interval=steps_per_epoch,
+          checkpoint_interval=steps_per_epoch,
+          train_steps=100000,
+          validation_steps=CITYSCAPES_VAL_EXAMPLES // eval_batch_size,
+          validation_interval=steps_per_epoch,
+          best_checkpoint_eval_metric='mean_iou',
+          best_checkpoint_export_subdir='best_ckpt',
+          best_checkpoint_metric_comp='higher',
+          optimizer_config=optimization.OptimizationConfig({
+              'optimizer': {
+                  'type': 'sgd',
+                  'sgd': {
+                      'momentum': 0.9
+                  }
+              },
+              'learning_rate': {
+                  'type': 'polynomial',
+                  'polynomial': {
+                      'initial_learning_rate': 0.1,
+                      'decay_steps': 100000,
+                      'end_learning_rate': 0.0,
+                      'power': 0.9
+                  }
+              },
+              'warmup': {
+                  'type': 'linear',
+                  'linear': {
+                      'warmup_steps': 5 * steps_per_epoch,
+                      'warmup_learning_rate': 0
+                  }
+              }
+          })),
+      restrictions=[
+          'task.train_data.is_training != None',
+          'task.validation_data.is_training != None'
+      ])
+
+  return config