Added example for clustering of MHA.

wwwind · wwwind · commit 1e247deaf819 · 2021-09-29T16:26:20.000+01:00
Change-Id: I163333ed3e7d4c45383c2b90b56bfa27368f7999
diff --git a/tensorflow_model_optimization/python/core/clustering/keras/cluster.py b/tensorflow_model_optimization/python/core/clustering/keras/cluster.py
@@ -310,6 +310,9 @@ def _strip_clustering_wrapper(layer):
           layer, input_tensors=None, clone_function=_strip_clustering_wrapper)
 
     elif isinstance(layer, cluster_wrapper.ClusterWeightsMHA):
+        # Update cluster associations in order to get the latest weights
+        layer.update_clustered_weights_associations()
+
         # In case of MHA layer, use the overloaded implementation
         return layer.strip_clustering()
 
diff --git a/tensorflow_model_optimization/python/core/clustering/keras/cluster_integration_test.py b/tensorflow_model_optimization/python/core/clustering/keras/cluster_integration_test.py
@@ -565,7 +565,6 @@ def testMHA(self):
       optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4),
       loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
       metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')])
-    clustered_model.run_eagerly = True
     clustered_model.fit(self.x_train, self.y_train, epochs=1, batch_size=100, verbose=1)
 
     stripped_model = cluster.strip_clustering(clustered_model)
diff --git a/tensorflow_model_optimization/python/examples/clustering/keras/mnist/mnist_mha.py b/tensorflow_model_optimization/python/examples/clustering/keras/mnist/mnist_mha.py
@@ -0,0 +1,106 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+# pylint: disable=missing-docstring
+"""Train a simple convnet with MultiHeadAttention layer on MNIST dataset
+and cluster it.
+"""
+import tensorflow as tf
+import tensorflow_model_optimization as tfmot
+
+import numpy as np
+
+NUMBER_OF_CLUSTERS = 3
+
+# Load MNIST dataset
+mnist = tf.keras.datasets.mnist
+(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
+
+# Normalize the input image so that each pixel value is between 0 to 1.
+train_images = train_images / 255.0
+test_images = test_images / 255.0
+
+# define model
+input = tf.keras.layers.Input(shape=(28, 28))
+x = tf.keras.layers.MultiHeadAttention(num_heads=2, key_dim=16, name="mha")(
+    query=input, value=input
+)
+x = tf.keras.layers.Flatten()(x)
+out = tf.keras.layers.Dense(10)(x)
+model = tf.keras.Model(inputs=input, outputs=out)
+
+# Train the digit classification model
+model.compile(
+    optimizer="adam",
+    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    metrics=["accuracy"],
+)
+
+model.fit(
+    train_images, train_labels, epochs=1, validation_split=0.1,
+)
+
+score = model.evaluate(test_images, test_labels, verbose=0)
+print('Model test loss:', score[0])
+print('Model test accuracy:', score[1])
+
+# Compute end step to finish pruning after 2 epochs.
+batch_size = 128
+epochs = 1
+validation_split = 0.1  # 10% of training set will be used for validation set.
+
+# Define model for clustering
+cluster_weights = tfmot.clustering.keras.cluster_weights
+CentroidInitialization = tfmot.clustering.keras.CentroidInitialization
+
+clustering_params = {
+    "number_of_clusters": NUMBER_OF_CLUSTERS,
+    "cluster_centroids_init": CentroidInitialization.KMEANS_PLUS_PLUS,
+}
+model_for_clustering = cluster_weights(model, **clustering_params)
+
+# `cluster_weights` requires a recompile.
+model_for_clustering.compile(
+    optimizer="adam",
+    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    metrics=["accuracy"],
+)
+
+model_for_clustering.fit(
+    train_images,
+    train_labels,
+    batch_size=batch_size,
+    epochs=epochs,
+    validation_split=validation_split,
+)
+
+score = model_for_clustering.evaluate(test_images, test_labels, verbose=0)
+print('Clustered model test loss:', score[0])
+print('Clustered model test accuracy:', score[1])
+
+# Strip clustering from the model
+clustered_model = tfmot.clustering.keras.strip_clustering(model_for_clustering)
+clustered_model.compile(
+    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    optimizer='adam',
+    metrics=['accuracy'])
+
+score = clustered_model.evaluate(test_images, test_labels, verbose=0)
+print('Stripped clustered model test loss:', score[0])
+print('Stripped clustered model test accuracy:', score[1])
+
+# Check that numbers of weights for MHA layer is the given number of clusters.
+mha_weights = list(filter(lambda x: 'mha' in x.name and 'kernel' in x.name, clustered_model.weights))
+for x in mha_weights:
+    assert len(np.unique(x.numpy())) == NUMBER_OF_CLUSTERS