Add API and command line tool to prune Keras models, without retraining.

fredrec · tensorflower-gardener · commit d942a15881fc · 2021-05-06T00:04:52.000-07:00
Quickly produces pruned models, without concern for accuracy. Useful to
evaluate the performance benefits of given pruning parameters, without
time-consuming retraining.

PiperOrigin-RevId: 372284927
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/pruning_callbacks.py b/tensorflow_model_optimization/python/core/sparsity/keras/pruning_callbacks.py
@@ -30,19 +30,6 @@
 callbacks = tf.keras.callbacks
 
 
-def _collect_prunable_layers(model):
-  """Recursively collect the prunable layers in the model."""
-  prunable_layers = []
-  for layer in model.layers:
-    # A keras model may have other models as layers.
-    if isinstance(layer, tf.keras.Model):
-      prunable_layers += _collect_prunable_layers(layer)
-    if isinstance(layer, pruning_wrapper.PruneLowMagnitude):
-      prunable_layers.append(layer)
-
-  return prunable_layers
-
-
 class UpdatePruningStep(callbacks.Callback):
   """Keras callback which updates pruning wrappers with the optimizer step.
 
@@ -63,7 +50,7 @@ def __init__(self):
 
   def on_train_begin(self, logs=None):
     # Collect all the prunable layers in the model.
-    self.prunable_layers = _collect_prunable_layers(self.model)
+    self.prunable_layers = pruning_wrapper.collect_prunable_layers(self.model)
     if not self.prunable_layers:
       return
     # If the model is newly created/initialized, set the 'pruning_step' to 0.
@@ -125,7 +112,7 @@ def on_epoch_begin(self, epoch, logs=None):
 
     pruning_logs = {}
     params = []
-    prunable_layers = _collect_prunable_layers(self.model)
+    prunable_layers = pruning_wrapper.collect_prunable_layers(self.model)
     for layer in prunable_layers:
       for _, mask, threshold in layer.pruning_vars:
         params.append(mask)
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/pruning_wrapper.py b/tensorflow_model_optimization/python/core/sparsity/keras/pruning_wrapper.py
@@ -350,3 +350,11 @@ def get_weights(self):
 
   def set_weights(self, weights):
     self.layer.set_weights(weights)
+
+
+def collect_prunable_layers(model):
+  """Recursively collect the prunable layers in the model."""
+  return [
+      layer for layer in model.submodules
+      if isinstance(layer, PruneLowMagnitude)
+  ]
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/tools/BUILD b/tensorflow_model_optimization/python/core/sparsity/keras/tools/BUILD
@@ -0,0 +1,46 @@
+load("//tensorflow_model_optimization:tensorflow_model_optimization.bzl", "py_strict_library")
+
+package(default_visibility = [
+    "//tensorflow_model_optimization:__subpackages__",
+])
+
+licenses(["notice"])
+
+py_strict_library(
+    name = "sparsity_tooling",
+    srcs = ["sparsity_tooling.py"],
+    srcs_version = "PY3",
+    visibility = ["//visibility:public"],
+    deps = [
+        # tensorflow dep1,
+        "//tensorflow_model_optimization/python/core/sparsity/keras:prune",
+        "//tensorflow_model_optimization/python/core/sparsity/keras:pruning_schedule",
+        "//tensorflow_model_optimization/python/core/sparsity/keras:pruning_wrapper",
+    ],
+)
+
+py_test(
+    name = "sparsity_tooling_test",
+    size = "medium",
+    srcs = ["sparsity_tooling_test.py"],
+    python_version = "PY3",
+    visibility = ["//visibility:public"],
+    deps = [
+        ":sparsity_tooling",
+        # absl/testing:parameterized dep1,
+        # tensorflow dep1,
+        "//tensorflow_model_optimization/python/core/keras:compat",
+        "//tensorflow_model_optimization/python/core/sparsity/keras:test_utils",
+    ],
+)
+
+py_binary(
+    name = "evaluate_pruning",
+    srcs = ["evaluate_pruning.py"],
+    python_version = "PY3",
+    deps = [
+        ":sparsity_tooling",
+        # tensorflow dep1,
+        "//tensorflow_model_optimization/python/core/sparsity/keras:prune",
+    ],
+)
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/tools/__init__.py b/tensorflow_model_optimization/python/core/sparsity/keras/tools/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/tools/evaluate_pruning.py b/tensorflow_model_optimization/python/core/sparsity/keras/tools/evaluate_pruning.py
@@ -0,0 +1,145 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tool to quickly prune a Keras model for evaluation purpose.
+
+Prunes the model with the given spasity parameters, without retraining. Will
+output a converted TFLite model for both pruned and unpruned versions.
+
+This tool is intented to produce sparsified models for evaluating the
+performance benefits (model size, inference time, …) of pruning. Since the
+sparsity is applied in one shot, without retrainig, the accuracy of the
+resulting model will be severly degraded.
+"""
+
+from __future__ import print_function
+
+import os
+import tempfile
+import textwrap
+import zipfile
+
+from absl import app
+from absl import flags
+import tensorflow as tf
+
+from tensorflow_model_optimization.python.core.sparsity.keras import prune
+from tensorflow_model_optimization.python.core.sparsity.keras.tools import sparsity_tooling
+
+
+_MODEL_PATH = flags.DEFINE_string('model', None, 'Keras model file to prune')
+_OUTPUT_DIR = flags.DEFINE_string('output_dir', None, 'Output directory')
+_SPARSITY = flags.DEFINE_float(
+    'sparsity',
+    0.8,
+    'Target sparsity level, as float in [0,1] interval',
+    lower_bound=0,
+    upper_bound=1)
+_BLOCK_SIZE = flags.DEFINE_string(
+    'block_size', '1,1',
+    'Comma-separated dimensions (height,weight) of the block sparsity pattern.'
+)
+
+
+def _parse_block_size_flag(value):
+  height_str, weight_str = value.split(',')
+  return int(height_str), int(weight_str)
+
+
+@flags.validator(_BLOCK_SIZE.name)
+def _check_block_size(flag_value):
+  try:
+    _parse_block_size_flag(flag_value)
+    return True
+  except:
+    raise flags.ValidationError('Invalid block size value "%s".' % flag_value)
+
+
+def convert_to_tflite(keras_model, output_path):
+  """Converts the given Keras model to TFLite and write it to a file."""
+  converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
+  converter.optimizations = {tf.lite.Optimize.EXPERIMENTAL_SPARSITY}
+
+  with open(output_path, 'wb') as out:
+    out.write(converter.convert())
+
+
+def get_gzipped_size(model_path):
+  """Measures the compressed size of a model."""
+  with tempfile.TemporaryFile(suffix='.zip') as zipped_file:
+    with zipfile.ZipFile(
+        zipped_file, 'w', compression=zipfile.ZIP_DEFLATED) as f:
+      f.write(model_path)
+
+    zipped_file.seek(0, 2)
+    return os.fstat(zipped_file.fileno()).st_size
+
+
+def pruned_model_filename(sparsity, block_size):
+  """Produces a human-readable name including sparsity parameters."""
+  return 'pruned_model_sparsity_%.2f_block_%s.tflite' % (
+      sparsity, '%dx%d' % block_size)
+
+
+def run(input_model_path, output_dir, target_sparsity, block_size):
+  """Prunes the model and converts both pruned and unpruned versions to TFLite."""
+
+  print(textwrap.dedent("""\
+    Warning: The sparse models produced by this tool have poor accuracy. They
+             are not intended to be served in production, but to be used for
+             performance benchmarking."""))
+
+  input_model = tf.keras.models.load_model(input_model_path)
+
+  os.makedirs(output_dir, exist_ok=True)
+  unpruned_tflite_path = os.path.join(
+      output_dir, 'unpruned_model.tflite')
+  pruned_tflite_path = os.path.join(
+      output_dir, pruned_model_filename(target_sparsity, block_size))
+
+  # Convert to TFLite without pruning
+  convert_to_tflite(input_model, unpruned_tflite_path)
+
+  # Prune and convert to TFLite
+  pruned_model = sparsity_tooling.prune_for_benchmark(
+      keras_model=input_model,
+      target_sparsity=target_sparsity,
+      block_size=block_size)
+  stripped_model = prune.strip_pruning(pruned_model)  # Remove pruning wrapper
+  convert_to_tflite(stripped_model, pruned_tflite_path)
+
+  # Measure the compressed size of unpruned vs pruned TFLite models
+  unpruned_compressed_size = get_gzipped_size(unpruned_tflite_path)
+  pruned_compressed_size = get_gzipped_size(pruned_tflite_path)
+  print('Size of gzipped TFLite models:')
+  print(' * Unpruned : %.2fMiB' % (unpruned_compressed_size / (2.**20)))
+  print(' * Pruned   : %.2fMiB' % (pruned_compressed_size / (2.**20)))
+  print('       diff : %d%%' %
+        (100. * (pruned_compressed_size - unpruned_compressed_size) /
+         unpruned_compressed_size))
+
+
+def main(argv):
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+
+  block_size = _parse_block_size_flag(_BLOCK_SIZE.value)
+  run(_MODEL_PATH.value, _OUTPUT_DIR.value, _SPARSITY.value, block_size)
+
+
+if __name__ == '__main__':
+  flags.mark_flag_as_required(_MODEL_PATH.name)
+  flags.mark_flag_as_required(_OUTPUT_DIR.name)
+
+  app.run(main)
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/tools/sparsity_tooling.py b/tensorflow_model_optimization/python/core/sparsity/keras/tools/sparsity_tooling.py
@@ -0,0 +1,93 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities to prune without training.
+
+Quickly produces pruned models, with no concern for accuracy. Useful to
+evaluate the performance benefits of given pruning parameters, without
+time-consuming retraining.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+from tensorflow_model_optimization.python.core.sparsity.keras import prune
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_schedule
+from tensorflow_model_optimization.python.core.sparsity.keras import pruning_wrapper
+
+keras = tf.keras
+
+
+class StepIndependentConstantSparsity(pruning_schedule.PruningSchedule):
+  """Pruning schedule with constant sparsity, applied at any step."""
+
+  def __init__(self, target_sparsity):
+    """Initializes a Pruning schedule with constant sparsity.
+
+    Sparsity is applied at every step.
+
+    Args:
+      target_sparsity: Target sparsity as float, in [0, 1] interval.
+    """
+    self.target_sparsity = target_sparsity
+
+  def __call__(self, step):
+    return (True, tf.constant(self.target_sparsity, dtype=tf.float32))
+
+  def get_config(self):
+    return {
+        'class_name': self.__class__.__name__,
+        'config': {
+            'target_sparsity': self.target_sparsity,
+        }
+    }
+
+
+def _apply_pruning(prunable_object):
+  """Calculates the masks and updates weights of layers of a wrapped model."""
+  assert tf.executing_eagerly()
+  for layer in pruning_wrapper.collect_prunable_layers(prunable_object):
+    layer.pruning_obj.conditional_mask_update()  # Create mask
+    layer.pruning_obj.weight_mask_op()  # weight = weight * mask
+
+
+def prune_for_benchmark(keras_model,
+                        target_sparsity,
+                        block_size=(1, 1)):
+  """Prunes a tf.keras model in a single step, without re-training.
+
+  This function is intented to quickly apply sparsity to a model, without
+  consideration for accuracy.
+
+  Args:
+    keras_model: A `tf.keras.Model` instance.
+    target_sparsity: Target sparsity as float, in [0, 1] interval.
+    block_size: The dimensions (height, weight) for the block sparse
+      pattern in rank-2 weight tensors.
+  Returns:
+    A pruned model, modified with pruning wrappers.
+  """
+
+  pruning_params = {
+      'pruning_schedule': StepIndependentConstantSparsity(target_sparsity),
+      'block_size': block_size,
+  }
+
+  prunable_object = prune.prune_low_magnitude(keras_model, **pruning_params)
+  _apply_pruning(prunable_object)
+
+  return prunable_object
diff --git a/tensorflow_model_optimization/python/core/sparsity/keras/tools/sparsity_tooling_test.py b/tensorflow_model_optimization/python/core/sparsity/keras/tools/sparsity_tooling_test.py