tensorflow
diff --git a/‎official/projects/mosaic/README.md‎
Lines changed: 121 additions & 0 deletions b/‎official/projects/mosaic/README.md‎
Lines changed: 121 additions & 0 deletions
diff --git a/‎official/projects/mosaic/configs/experiments/mosaic_mnv35_cityscapes_tfds_tpu.yaml‎
Lines changed: 87 additions & 0 deletions b/‎official/projects/mosaic/configs/experiments/mosaic_mnv35_cityscapes_tfds_tpu.yaml‎
Lines changed: 87 additions & 0 deletions
@@ -0,0 +1,121 @@
+# MOSAIC: Mobile Segmentation via decoding Aggregated Information and encoded Context
+
+[![Paper](http://img.shields.io/badge/Paper-arXiv.2112.11623-B3181B?logo=arXiv)](https://arxiv.org/abs/2112.11623)
+
+This repository is the official implementation of the following
+paper.
+
+* [MOSAIC: Mobile Segmentation via decoding Aggregated Information and encoded Context](https://arxiv.org/abs/2112.11623)
+
+## Description
+
+MOSAIC is a neural network architecture for efficient and accurate semantic
+image segmentation on mobile devices. MOSAIC is designed using commonly
+supported neural operations by diverse mobile hardware platforms for flexible
+deployment across various mobile platforms. With a simple asymmetric
+encoder-decoder structure which consists of an efficient multi-scale context
+encoder and a light-weight hybrid decoder to recover spatial details from
+aggregated information, MOSAIC achieves better balanced performance while
+considering accuracy and computational cost. Deployed on top of a tailored
+feature extraction backbone based on a searched classification network, MOSAIC
+achieves a 5% absolute accuracy gain on ADE20K with similar or lower latency
+compared to the current industry standard MLPerf mobile v1.0 models and
+state-of-the-art architectures.
+
+[MLPerf Mobile v2.0]((https://mlcommons.org/en/inference-mobile-20/)) included
+MOSAIC as a new industry standard benchmark model for image segmentation.
+Please see details [here](https://mlcommons.org/en/news/mlperf-inference-1q2022/).
+
+You can also refer to the [MLCommons GitHub repository](https://github.com/mlcommons/mobile_open/tree/main/vision/mosaic).
+
+## History
+
+### Oct 13, 2022
+
+*   First release of MOSAIC in TensorFlow 2 including checkpoints that have been
+    pretrained on Cityscapes.
+
+## Maintainers
+
+* Weijun Wang ([weijunw-g](https://github.com/weijunw-g))
+* Fang Yang ([fyangf](https://github.com/fyangf))
+* Shixin Luo ([luotigerlsx](https://github.com/luotigerlsx))
+
+## Requirements
+
+[![Python](https://img.shields.io/pypi/pyversions/tensorflow.svg?style=plastic)](https://badge.fury.io/py/tensorflow)
+[![tf-models-official PyPI](https://badge.fury.io/py/tf-models-official.svg)](https://badge.fury.io/py/tf-models-official)
+
+## Results
+
+The following table shows the mIoU measured on the `cityscapes` dataset.
+
+| Config                  | Backbone             | Resolution | branch_filter_depths | pyramid_pool_bin_nums | mIoU  | Download |
+|-------------------------|:--------------------:|:----------:|:--------------------:|:---------------------:|:-----:|:--------:|
+| Paper reference config  | MobileNetMultiAVGSeg | 1024x2048  | [32, 32]             | [4, 8, 16]            | 75.98 | [ckpt](https://storage.googleapis.com/tf_model_garden/vision/mosaic/MobileNetMultiAVGSeg-r1024-ebf32-nogp.tar.gz)<br>[tensorboard](https://tensorboard.dev/experiment/okEog90bSwupajFgJwGEIw//#scalars) |
+| Current best config     | MobileNetMultiAVGSeg | 1024x2048  | [64, 64]             | [1, 4, 8, 16]         | 77.24 | [ckpt](https://storage.googleapis.com/tf_model_garden/vision/mosaic/MobileNetMultiAVGSeg-r1024-ebf64-gp.tar.gz)<br>[tensorboard](https://tensorboard.dev/experiment/l5hkV7JaQM23EXeOBT6oJg/#scalars)  |
+
+*   `branch_filter_depths`: the number of convolution channels in each branch at
+    a pyramid level after `Spatial Pyramid Pooling`
+*   `pyramid_pool_bin_nums`: the number of bins at each level of the `Spatial
+    Pyramid Pooling`
+
+## Training
+
+It can run on Google Cloud Platform using Cloud TPU.
+[Here](https://cloud.google.com/tpu/docs/how-to) is the instruction of using
+Cloud TPU. Following the instructions to set up Cloud TPU and
+launch training by:
+
+```shell
+EXP_TYPE=mosaic_mnv35_cityscapes
+EXP_NAME="<experiment-name>"  # You can give any name to the experiment.
+TPU_NAME="<tpu-name>"  # The name assigned while creating a Cloud TPU
+MODEL_DIR="gs://<path-to-model-directory>"
+# Now launch the experiment.
+python3 -m official.projects.mosaic.train \
+  --experiment=$EXP_TYPE \
+  --mode=train \
+  --tpu=$TPU_NAME \
+  --model_dir=$MODEL_DIR \
+  --config_file=official/projects/mosaic/configs/experiments/mosaic_mnv35_cityscapes_tdfs_tpu.yaml
+```
+
+## Evaluation
+
+Please run this command line for evaluation.
+
+```shell
+EXP_TYPE=mosaic_mnv35_cityscapes
+EXP_NAME="<experiment-name>"  # You can give any name to the experiment.
+TPU_NAME="<tpu-name>"  # The name assigned while creating a Cloud TPU
+MODEL_DIR="gs://<path-to-model-directory>"
+# Now launch the experiment.
+python3 -m official.projects.mosaic.train \
+  --experiment=$EXP_TYPE \
+  --mode=eval \
+  --tpu=$TPU_NAME \
+  --model_dir=$MODEL_DIR \
+  --config_file=official/projects/mosaic/configs/experiments/mosaic_mnv35_cityscapes_tdfs_tpu.yaml
+```
+
+## License
+
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+This project is licensed under the terms of the **Apache License 2.0**.
+
+## Citation
+
+If you want to cite this repository in your work, please consider citing the
+paper.
+
+```
+@inproceedings{weijun2021mosaic,
+  title={MOSAIC: Mobile Segmentation via decoding Aggregated Information and
+    encoded Context},
+  author={Weijun Wang, Andrew Howard},
+  journal={arXiv preprint arXiv:2112.11623},
+  year={2021},
+}
+```
@@ -0,0 +1,87 @@
+# Using Tensorflow datasets: 'cityscapes/semantic_segmentation'
+# Some expected flags to use with xmanager launcher:
+#  --experiment_type=mosaic_mnv35_cityscapes
+#  --tpu_topology=4x4
+# mIoU: 77.24%
+runtime:
+  distribution_strategy: 'tpu'
+  mixed_precision_dtype: 'float32'
+task:
+  model:
+    num_classes: 19
+    input_size: [null, null, 3]
+    backbone:
+      type: 'mobilenet'
+      mobilenet:
+        model_id: 'MobileNetMultiAVGSeg'
+        output_intermediate_endpoints: true
+        output_stride: 16
+    neck:
+      branch_filter_depths: [64, 64]
+      conv_kernel_sizes: [3, 5]
+      pyramid_pool_bin_nums: [1, 4, 8, 16]
+      dropout_rate: 0.0
+    head:
+      num_classes: 19
+      decoder_input_levels: ['3/depthwise', '2/depthwise']
+      decoder_stage_merge_styles: ['concat_merge', 'sum_merge']
+      decoder_filters: [64, 64]
+      decoder_projected_filters: [19, 19]
+    norm_activation:
+      activation: relu
+      norm_epsilon: 0.001
+      norm_momentum: 0.99
+      use_sync_bn: true
+  init_checkpoint: 'gs://tf_model_garden/vision/mobilenet/v3.5multiavg_seg_float/'
+  init_checkpoint_modules: 'backbone'
+  losses:
+    l2_weight_decay: 1.0e-04
+  train_data:
+    output_size: [1024, 2048]
+    crop_size: [1024, 2048]
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'train'
+    is_training: true
+    global_batch_size: 32
+    dtype: 'float32'
+    aug_rand_hflip: true
+    aug_scale_max: 2.0
+    aug_scale_min: 0.5
+  validation_data:
+    output_size: [1024, 2048]
+    input_path: ''
+    tfds_name: 'cityscapes/semantic_segmentation'
+    tfds_split: 'validation'
+    is_training: false
+    global_batch_size: 32
+    dtype: 'float32'
+    drop_remainder: false
+    resize_eval_groundtruth: true
+trainer:
+  optimizer_config:
+    learning_rate:
+      polynomial:
+        decay_steps: 100000
+        initial_learning_rate: 0.1
+        power: 0.9
+      type: polynomial
+    optimizer:
+      sgd:
+        momentum: 0.9
+      type: sgd
+    warmup:
+      linear:
+        name: linear
+        warmup_learning_rate: 0
+        warmup_steps: 925
+      type: linear
+  steps_per_loop: 92  # 2975 / 32 = 92
+  summary_interval: 92
+  train_steps: 100000
+  validation_interval: 92
+  validation_steps: 16  # 500 / 32 = 16
+  checkpoint_interval: 92
+  best_checkpoint_export_subdir: 'best_ckpt'
+  best_checkpoint_eval_metric: 'mean_iou'
+  best_checkpoint_metric_comp: 'higher'