Add scheduler into tfmot compression API

jaeyoo · tensorflower-gardener · commit 20b2b38b54b0 · 2020-11-11T05:41:51.000-08:00
PiperOrigin-RevId: 341812516
diff --git a/tensorflow_model_optimization/python/core/common/keras/compression/BUILD b/tensorflow_model_optimization/python/core/common/keras/compression/BUILD
@@ -12,3 +12,26 @@ py_library(
         # tensorflow dep1,
     ],
 )
+
+py_library(
+    name = "schedules",
+    srcs = ["schedules.py"],
+    srcs_version = "PY3ONLY",
+    deps = [
+        # tensorflow dep1,
+    ],
+)
+
+py_test(
+    name = "schedules_test",
+    srcs = [
+        "schedules_test.py",
+    ],
+    python_version = "PY3",
+    deps = [
+        ":schedules",
+        # absl/testing:parameterized dep1,
+        # numpy dep1,
+        # tensorflow dep1,
+    ],
+)
diff --git a/tensorflow_model_optimization/python/core/common/keras/compression/schedules.py b/tensorflow_model_optimization/python/core/common/keras/compression/schedules.py
@@ -0,0 +1,122 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Compression Scheduler for tfmot compression."""
+import abc
+from typing import Union, Optional
+
+import tensorflow as tf
+
+
+class Scheduler(metaclass=abc.ABCMeta):
+  """Abstract Scheduler."""
+
+  @abc.abstractmethod
+  def __call__(self, step: Union[int, tf.Tensor]) -> tf.Tensor:
+    """Scheduler function given tf.Tensor step number.
+
+    Args:
+      step: tf.Tensor with tf.int32 or tf.int64 representing the current step
+        number of training loops.
+
+    Returns:
+      Any tf.Tensor Scheduled value of given `step`
+    """
+    raise NotImplementedError()
+
+
+class PolynomialDecay(Scheduler):
+  """Scheduling based on polynomial equation.
+
+  s(t) = start_value for t < begin_step
+
+       = end_value + [(start_value - end_value) * (1 - decay_term) ** exponent]
+
+       where decay_term = (t - begin_step) / decay_steps
+
+             for 0 <= 1 - decay_term <= 1
+                 <-> 0 <= decay_term <= 1
+                 <-> 0 <= (t - begin_step) / decay_steps <= 1
+                 <-> 0 <= (t - begin_step) <= decay_steps
+                 <-> begin_step <= t <= begin_step + decay_steps (=end_step)
+
+       = end_value   for t > begin_step + decay_steps (=end_step)
+  """
+
+  def __init__(self,
+               start_value: Union[int, float],
+               decay_steps: int,
+               end_value: Union[int, float],
+               begin_step: Optional[int] = 0,
+               exponent: Optional[float] = 1.0,
+               dtype: Optional[tf.dtypes.DType] = tf.float32,
+               name: Optional[str] = None):
+    """Initialize PolynomialDecayScheduler.
+
+    Args:
+      start_value: the initial value of decaying. It is also the default value
+        of this scheduler for step <= begin_step.
+      decay_steps: A Python positive int value for duration of decaying.
+      end_value: the final value of decaying. It is also the default value of
+        this scheduler for step >= end_step = begin_step + decay_steps
+      begin_step: The step value that this scheduler starts decaying.
+        Defaults to 0, which means it decays right after training starts.
+      exponent: The exponent of the polynomial decaying.
+        Defaults to 1.0, a linear function.
+      dtype: `tf.dtypes.DType`, dtype of returned tensor.
+        Defaults to tf.float32.
+      name: A Python `str` for the name scope of this scheduler.
+
+    Returns:
+      A `tf.Tensor` of the scheduled output value calculated from the polynomial
+      equation as given above.
+    """
+    self.name = name
+    self.start_value = start_value
+    self.begin_step = begin_step
+    self.end_value = end_value
+    self.decay_steps = decay_steps
+    self.end_step = self.begin_step + self.decay_steps
+    self.exponent = exponent
+    self.dtype = dtype
+
+  def __call__(self, step: Union[int, tf.Tensor]) -> tf.Tensor:
+
+    with tf.name_scope(self.name or "PolynomialDecay"):
+      val = tf.cond(tf.math.less(step, self.begin_step),
+                    lambda: tf.cast(self.start_value, dtype=self.dtype),
+                    lambda: self._after_begin_step(step), name="start")
+    return val
+
+  def _after_begin_step(self, step: Union[int, tf.Tensor]) -> tf.Tensor:
+
+    with tf.name_scope(self.name or "PolynomialDecay"):
+      val = tf.cond(tf.math.greater(step, self.end_step),
+                    lambda: tf.cast(self.end_value, dtype=self.dtype),
+                    lambda: self._during_decay(step), name="end")
+    return val
+
+  def _during_decay(self, step: Union[int, tf.Tensor]) -> tf.Tensor:
+    """Return decayed scheduled value."""
+
+    with tf.name_scope(self.name or "PolynomialDecay"):
+      local_steps = tf.cast(step - self.begin_step, dtype=tf.float32)
+      decay_term = tf.math.divide(local_steps,
+                                  tf.cast(self.decay_steps, dtype=tf.float32))
+      total_delta = tf.cast(self.start_value - self.end_value, dtype=tf.float32)
+      target = tf.math.add(self.end_value, tf.cast(
+          tf.math.multiply(total_delta, tf.pow(1 - decay_term, self.exponent)),
+          dtype=self.dtype))
+      val = tf.stop_gradient(target)
+    return val
diff --git a/tensorflow_model_optimization/python/core/common/keras/compression/schedules_test.py b/tensorflow_model_optimization/python/core/common/keras/compression/schedules_test.py
@@ -0,0 +1,118 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for schedulers."""
+
+import tensorflow as tf
+
+from tensorflow_model_optimization.python.core.common.keras.compression import schedules
+
+
+class SimpleScheduler(schedules.Scheduler):
+
+  def __call__(self, step: int) -> float:
+    return 0.1 if step >= 1000 else 0.6
+
+
+class SimpleSchedulerTest(tf.test.TestCase):
+
+  def testSimpleScheduler(self):
+    scheduler = SimpleScheduler()
+    expected = [0.6, 0.6, 0.1, 0.1]
+    output = [scheduler(i) for i in [0, 100, 1000, 2000]]
+    self.assertAllEqual(output, expected)
+
+
+class CubicPolynomialDecayTest(tf.test.TestCase):
+
+  def testBeforeDecaying(self):
+    init_value = 0.1
+    final_value = 1.0
+    begin_step = 10
+    decaying_step = 10
+    total_training_step = begin_step
+    scheduler = schedules.PolynomialDecay(init_value, decaying_step,
+                                          final_value, begin_step=begin_step,
+                                          exponent=3)
+    output = [scheduler(i) for i in range(total_training_step)]
+    expected = [init_value] * begin_step
+    self.assertAllClose(output, expected)
+
+  def testDecaying(self):
+    init_value = 0.1
+    final_value = 1.0
+    begin_step = 10
+    decaying_step = 10
+    exponent = 3
+    scheduler = schedules.PolynomialDecay(init_value, decaying_step,
+                                          final_value, begin_step=begin_step,
+                                          exponent=exponent)
+    expected = [final_value + (init_value - final_value) * \
+                (1-float(i)/decaying_step) ** exponent
+                for i in range(decaying_step)]
+    output = [scheduler(begin_step + i) for i in range(decaying_step)]
+    self.assertAllClose(output, expected)
+
+  def testBeyondEnd(self):
+    init_value = 0.1
+    final_value = 1.0
+    begin_step = 10
+    decaying_step = 10
+    total_steps = 30
+    beyond_end_steps = total_steps - decaying_step - begin_step
+    scheduler = schedules.PolynomialDecay(init_value, decaying_step,
+                                          final_value, begin_step=begin_step,
+                                          exponent=3)
+    expected = [final_value] * beyond_end_steps
+    output = [scheduler(begin_step + decaying_step + i)
+              for i in range(beyond_end_steps)]
+    self.assertAllClose(output, expected)
+
+
+class LinearPolynomialDecayTest(tf.test.TestCase):
+
+  def testHalfWay(self):
+    step = 5
+    lr = 0.05
+    end_lr = 0.0
+    decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+    expected = lr * 0.5
+    self.assertAllClose(decayed_lr(step), expected, 1e-6)
+
+  def testEnd(self):
+    step = 10
+    lr = 0.05
+    end_lr = 0.001
+    decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+    expected = end_lr
+    self.assertAllClose(decayed_lr(step), expected, 1e-6)
+
+  def testHalfWayWithEnd(self):
+    step = 5
+    lr = 0.05
+    end_lr = 0.001
+    decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+    expected = (lr + end_lr) * 0.5
+    self.assertAllClose(decayed_lr(step), expected, 1e-6)
+
+  def testBeyondEnd(self):
+    step = 15
+    lr = 0.05
+    end_lr = 0.001
+    decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+    expected = end_lr
+    self.assertAllClose(decayed_lr(step), expected, 1e-6)
+
+if __name__ == '__main__':
+  tf.test.main()