changed strategy to enforce static mask in response to review comments, extended mnist test for additional checks

TamasArm · TamasArm · commit 45b15be0c954 · 2021-05-25T13:34:19.000+01:00
diff --git a/tensorflow_model_optimization/python/core/clustering/keras/BUILD b/tensorflow_model_optimization/python/core/clustering/keras/BUILD
@@ -229,5 +229,6 @@ py_strict_test(
         ":cluster",
         ":cluster_config",
         # tensorflow dep1,
+        "//tensorflow_model_optimization/python/core/clustering/keras/experimental",
     ],
 )
diff --git a/tensorflow_model_optimization/python/core/clustering/keras/cluster_integration_test.py b/tensorflow_model_optimization/python/core/clustering/keras/cluster_integration_test.py
@@ -199,7 +199,7 @@ def testSparsityIsPreservedDuringTraining(self):
         stripped_model_after_tuning, 0, 'kernel')
     # Check after sparsity-aware clustering, despite zero centroid can drift,
     # the final number of unique weights remains the same
-    self.assertEqual(nr_of_unique_weights_before, nr_of_unique_weights_after)
+    self.assertLessEqual(nr_of_unique_weights_after, nr_of_unique_weights_before)
     # Check that the null weights stayed the same before and after tuning.
     # There might be new weights that become zeros but sparsity-aware
     # clustering preserves the original null weights in the original positions
diff --git a/tensorflow_model_optimization/python/core/clustering/keras/cluster_wrapper.py b/tensorflow_model_optimization/python/core/clustering/keras/cluster_wrapper.py
@@ -107,6 +107,9 @@ def __init__(self,
 
     # Stores the pairs of weight names and their respective sparsity masks
     self.sparsity_masks = {}
+    self.zero_idx = {}
+
+    # Stores the pairs of weight names and the zero centroids
 
     # Map weight names to original clusterable weights variables
     # Those weights will still be updated during backpropagation
@@ -199,10 +202,32 @@ def build(self, input_shape):
                 pulling_indices, original_weight))
         self.sparsity_masks[weight_name] = (
             tf.cast(tf.math.not_equal(clustered_weights, 0), dtype=tf.float32))
+        # If the model is pruned (which we suppose), this is approximately zero
+        self.zero_idx[weight_name] = tf.argmin(
+            tf.abs(self.cluster_centroids[weight_name]), axis=-1)
 
   def update_clustered_weights_associations(self):
     for weight_name, original_weight in self.original_clusterable_weights.items(
     ):
+
+      if self.preserve_sparsity:
+        # Set the smallest centroid to zero to force sparsity
+        # and avoid extra cluster from forming
+        zero_idx_mask = (
+            tf.cast(tf.math.not_equal(
+              self.cluster_centroids[weight_name],
+              self.cluster_centroids[weight_name][self.zero_idx[weight_name]]),
+              dtype=tf.float32)
+            )
+        self.cluster_centroids[weight_name].assign(
+                          tf.math.multiply(self.cluster_centroids[weight_name],
+                          zero_idx_mask))
+        # During training, the original zero weights can drift slightly.
+        # We want to prevent this by forcing them to stay zero at the places
+        # where they were originally zero to begin with.
+        original_weight = tf.math.multiply(original_weight,
+                                          self.sparsity_masks[weight_name])
+
       # Update pulling indices (cluster associations)
       pulling_indices = (
           self.clustering_algorithms[weight_name].get_pulling_indices(
@@ -214,15 +239,6 @@ def update_clustered_weights_associations(self):
           self.clustering_algorithms[weight_name].get_clustered_weight(
               pulling_indices, original_weight))
 
-      if self.preserve_sparsity:
-        # Re-discover the sparsity masks to avoid drifting
-        self.sparsity_masks[weight_name] = (
-            tf.cast(tf.math.not_equal(clustered_weights, 0), dtype=tf.float32)
-        )
-        # Apply the sparsity mask to the clustered weights
-        clustered_weights = tf.math.multiply(clustered_weights,
-                                             self.sparsity_masks[weight_name])
-
       # Replace the weights with their clustered counterparts
       setattr(self.layer, weight_name, clustered_weights)
 
diff --git a/tensorflow_model_optimization/python/core/clustering/keras/mnist_clustering_test.py b/tensorflow_model_optimization/python/core/clustering/keras/mnist_clustering_test.py
@@ -14,10 +14,12 @@
 # ==============================================================================
 """Tests for a simple convnet with clusterable layer on the MNIST dataset."""
 
+from absl.testing import parameterized
 import tensorflow as tf
 
 from tensorflow_model_optimization.python.core.clustering.keras import cluster
 from tensorflow_model_optimization.python.core.clustering.keras import cluster_config
+from tensorflow_model_optimization.python.core.clustering.keras.experimental import cluster as experimental_cluster
 
 tf.random.set_seed(42)
 
@@ -63,19 +65,21 @@ def _train_model(model):
   model.fit(x_train, y_train, epochs=EPOCHS)
 
 
-def _cluster_model(model, number_of_clusters):
+def _cluster_model(model, number_of_clusters, preserve_sparsity=False):
 
   (x_train, y_train), _ = _get_dataset()
 
   clustering_params = {
       'number_of_clusters':
           number_of_clusters,
       'cluster_centroids_init':
-          cluster_config.CentroidInitialization.KMEANS_PLUS_PLUS
+          cluster_config.CentroidInitialization.KMEANS_PLUS_PLUS,
+      'preserve_sparsity':
+          preserve_sparsity,
   }
 
   # Cluster model
-  clustered_model = cluster.cluster_weights(model, **clustering_params)
+  clustered_model = experimental_cluster.cluster_weights(model, **clustering_params)
 
   # Use smaller learning rate for fine-tuning
   # clustered model
@@ -106,13 +110,27 @@ def _get_number_of_unique_weights(stripped_model, layer_nr, weight_name):
 
   return nr_of_unique_weights
 
+def _deepcopy_model(model):
+  model_copy = keras.models.clone_model(model)
+  model_copy.set_weights(model.get_weights())
+  return model_copy
 
-class FunctionalTest(tf.test.TestCase):
+class FunctionalTest(tf.test.TestCase, parameterized.TestCase):
 
-  def testMnist(self):
-    """In this test we test that 'kernel' weights are clustered."""
+  def setUp(self):
     model = _build_model()
     _train_model(model)
+    self.model = model
+    self.dataset = _get_dataset()
+
+  @parameterized.parameters(
+      (False),
+      (True),
+  )
+  def testMnist(self, preserve_sparisty):
+    """In this test we test that 'kernel' weights are clustered."""
+    model = self.model
+    _, (x_test, y_test) = self.dataset
 
     # Checks that number of original weights('kernel') is greater than the
     # number of clusters
@@ -123,12 +141,11 @@ def testMnist(self):
     nr_of_bias_weights = _get_number_of_unique_weights(model, -1, 'bias')
     self.assertGreater(nr_of_bias_weights, NUMBER_OF_CLUSTERS)
 
-    _, (x_test, y_test) = _get_dataset()
-
     results_original = model.evaluate(x_test, y_test)
     self.assertGreater(results_original[1], 0.8)
 
-    clustered_model = _cluster_model(model, NUMBER_OF_CLUSTERS)
+    model_copy = _deepcopy_model(model)
+    clustered_model = _cluster_model(model_copy, NUMBER_OF_CLUSTERS, preserve_sparisty)
 
     results = clustered_model.evaluate(x_test, y_test)
 

Original file line number	Diff line number	Diff line change
`@@ -229,5 +229,6 @@ py_strict_test(`
`229`	`229`	`":cluster",`
`230`	`230`	`":cluster_config",`
`231`	`231`	`# tensorflow dep1,`
	`232`	`+ "//tensorflow_model_optimization/python/core/clustering/keras/experimental",`
`232`	`233`	`],`
`233`	`234`	`)`