Enable Prune Aware QAT through QuantizationScheme.

psunn · psunn · commit c1be7be39d22 · 2020-11-18T10:31:18.000Z
Change-Id: I2cd50868696b277a94cbcc50b167c8eb1a6612d7
diff --git a/tensorflow_model_optimization/python/core/api/BUILD b/tensorflow_model_optimization/python/core/api/BUILD
@@ -12,6 +12,7 @@ py_library(
         "quantization/keras/__init__.py",
         "quantization/keras/default_8bit/__init__.py",
         "quantization/keras/quantizers/__init__.py",
+        "quantization/keras/experimental_scheme/__init__.py",
         "sparsity/__init__.py",
         "sparsity/keras/__init__.py",
     ],
diff --git a/tensorflow_model_optimization/python/core/api/quantization/keras/__init__.py b/tensorflow_model_optimization/python/core/api/quantization/keras/__init__.py
@@ -18,6 +18,7 @@
 # submodules
 from tensorflow_model_optimization.python.core.api.quantization.keras import quantizers
 from tensorflow_model_optimization.python.core.api.quantization.keras import default_8bit
+from tensorflow_model_optimization.python.core.api.quantization.keras import experimental_scheme
 
 # quantize all layers with default quantization implementation.
 from tensorflow_model_optimization.python.core.quantization.keras.quantize import quantize_model
diff --git a/tensorflow_model_optimization/python/core/api/quantization/keras/experimental_scheme/__init__.py b/tensorflow_model_optimization/python/core/api/quantization/keras/experimental_scheme/__init__.py
@@ -0,0 +1,18 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Module containing experimental Quantization schemes."""
+
+from tensorflow_model_optimization.python.core.quantization.keras.prune_preserve.default_8bit_prune_preserve_quantize_scheme import (
+    Default8BitPrunePreserveQuantizeScheme, )
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/prune_preserve/BUILD b/tensorflow_model_optimization/python/core/quantization/keras/prune_preserve/BUILD
@@ -10,7 +10,9 @@ py_library(
         "__init__.py",
     ],
     srcs_version = "PY3",
-    deps = [],
+    deps = [
+        ":default_8bit_prune_preserve_quantize_scheme",
+    ],
 )
 
 py_library(
@@ -22,6 +24,7 @@ py_library(
     deps = [
         # tensorflow dep1,
         "//tensorflow_model_optimization/python/core/quantization/keras/default_8bit:default_8bit_quantizers",
+        "//tensorflow_model_optimization/python/core/quantization/keras/default_8bit:default_8bit_quantize_registry",
     ],
 )
 
@@ -37,4 +40,17 @@ py_test(
         "//tensorflow_model_optimization/python/core/sparsity/keras:prune_registry",
         "//tensorflow_model_optimization/python/core/quantization/keras/default_8bit:default_8bit_quantize_registry",
     ]
+)
+
+py_library(
+    name = "default_8bit_prune_preserve_quantize_scheme",
+    srcs = [
+        "default_8bit_prune_preserve_quantize_scheme.py",
+    ],
+    srcs_version = "PY3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":prune_preserve_quantize_registry",
+        "//tensorflow_model_optimization/python/core/quantization/keras/default_8bit:default_8bit_quantize_scheme",
+    ],
 )
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/prune_preserve/default_8bit_prune_preserve_quantize_scheme.py b/tensorflow_model_optimization/python/core/quantization/keras/prune_preserve/default_8bit_prune_preserve_quantize_scheme.py
@@ -0,0 +1,31 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Default 8 bit Prune Preserve Quantization scheme which specifies how quantization should be applied."""
+
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import (
+    default_8bit_quantize_scheme, )
+from tensorflow_model_optimization.python.core.quantization.keras.prune_preserve import (
+    prune_preserve_quantize_registry, )
+
+
+class Default8BitPrunePreserveQuantizeScheme(
+    default_8bit_quantize_scheme.Default8BitQuantizeScheme):
+  """Default 8 bit Prune Preserve Quantization Scheme."""
+  def get_layout_transformer(self):
+    return super(Default8BitPrunePreserveQuantizeScheme, self).get_layout_transformer()
+
+  def get_quantize_registry(self):
+    return prune_preserve_quantize_registry.Default8bitPrunePreserveQuantizeRegistry()
+
diff --git a/tensorflow_model_optimization/python/core/quantization/keras/prune_preserve/prune_preserve_quantize_registry.py b/tensorflow_model_optimization/python/core/quantization/keras/prune_preserve/prune_preserve_quantize_registry.py
@@ -19,7 +19,9 @@
 from tensorflow_model_optimization.python.core.quantization.keras import quant_ops
 from tensorflow_model_optimization.python.core.quantization.keras import quantizers
 from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import (
-    default_8bit_quantizers)
+    default_8bit_quantizers, )
+from tensorflow_model_optimization.python.core.quantization.keras.default_8bit import (
+    default_8bit_quantize_registry, )
 
 layers = tf.keras.layers
 
@@ -179,16 +181,40 @@ def apply_sparsity_preserve_quantize_config(self, layer, quantize_config):
         quantize_config.weight_quantizer = self._config_quantizer_map[
             quantize_config.__class__.__name__]
       else:
-        raise ValueError('Configuration ' +
-                         str(quantize_config.__class__.__name__) +  
-                         ' is not supported for Layer ' +
-                         str(layer.__class__) + '.')
+        raise ValueError("Configuration " +
+                         str(quantize_config.__class__.__name__) +
+                         " is not supported for Layer " +
+                         str(layer.__class__) + ".")
     else:
-      raise ValueError('Layer ' + str(layer.__class__) + ' is not supported.')
+      raise ValueError("Layer " + str(layer.__class__) + " is not supported.")
 
     return quantize_config
 
 
+class Default8bitPrunePreserveQuantizeRegistry(PrunePreserveQuantizeRegistry):
+  """Default 8 bit PrunePreserveQuantizeRegistry."""
+  def __init__(self):
+    super(Default8bitPrunePreserveQuantizeRegistry, self).__init__()
+
+  def get_quantize_config(self, layer):
+    """Returns the quantization config with addon sparsity
+    preserve weight_quantizer for the given layer.
+
+    Args:
+      layer: input layer to return quantize config for.
+
+    Returns:
+      Returns the quantization config with sparsity preserve weight_quantizer.
+    """
+    quantize_config = default_8bit_quantize_registry.QuantizeRegistry(
+    ).get_quantize_config(layer)
+    prune_aware_quantize_config = super(
+        Default8bitPrunePreserveQuantizeRegistry,
+        self).apply_sparsity_preserve_quantize_config(layer, quantize_config)
+
+    return prune_aware_quantize_config
+
+
 class PrunePerserveDefaultWeightsQuantizer(quantizers.LastValueQuantizer):
   """Quantize weights while preserve sparsity."""
   def __init__(self, num_bits, per_axis, symmetric, narrow_range):
diff --git a/tensorflow_model_optimization/python/examples/quantization_with_sparsity/keras/BUILD b/tensorflow_model_optimization/python/examples/quantization_with_sparsity/keras/BUILD
@@ -12,5 +12,6 @@ py_binary(
         # tensorflow dep1,
         # python/keras tensorflow dep2,
         "//tensorflow_model_optimization/python/core/quantization/keras:quantize",
+        "//tensorflow_model_optimization/python/core/quantization/keras/prune_preserve:default_8bit_prune_preserve_quantize_scheme",
     ],
 )
diff --git a/tensorflow_model_optimization/python/examples/quantization_with_sparsity/keras/mnist_cnn.py b/tensorflow_model_optimization/python/examples/quantization_with_sparsity/keras/mnist_cnn.py
@@ -26,6 +26,8 @@
 from tensorflow_model_optimization.python.core.sparsity.keras import pruning_callbacks
 from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule
 from tensorflow_model_optimization.python.core.quantization.keras import quantize
+from tensorflow_model_optimization.python.core.quantization.keras.prune_preserve import (
+    default_8bit_prune_preserve_quantize_scheme, )
 
 layers = tf.keras.layers
 
@@ -118,8 +120,9 @@ def prune_preserve_quantize_model(pruned_model, train_images, train_labels):
   pruned_model = prune.strip_pruning(pruned_model)
   # Prune preserve QAT model
   quant_aware_annotate_model = quantize.quantize_annotate_model(pruned_model)
-  quant_aware_model = quantize.quantize_apply(quant_aware_annotate_model,
-                                              prune_preserve=True)
+  quant_aware_model = quantize.quantize_apply(
+    quant_aware_annotate_model,
+    scheme=default_8bit_prune_preserve_quantize_scheme.Default8BitPrunePreserveQuantizeScheme())
   quant_aware_model.summary()
 
   fit_kwargs = {

Original file line number	Diff line number	Diff line change
`@@ -12,5 +12,6 @@ py_binary(`
`12`	`12`	`# tensorflow dep1,`
`13`	`13`	`# python/keras tensorflow dep2,`
`14`	`14`	`"//tensorflow_model_optimization/python/core/quantization/keras:quantize",`
	`15`	`+ "//tensorflow_model_optimization/python/core/quantization/keras/prune_preserve:default_8bit_prune_preserve_quantize_scheme",`
`15`	`16`	`],`
`16`	`17`	`)`