add attention visualisation

Lingjun Liu · Lingjun Liu · commit 048d9a340635 · 2019-09-01T11:15:10.000+01:00
diff --git a/tests/models/test_transformer.py b/tests/models/test_transformer.py
@@ -4,8 +4,6 @@
 import os
 import unittest
 
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
-
 import numpy as np
 import tensorflow as tf
 import tensorlayer as tl
@@ -14,8 +12,8 @@
 from tensorlayer.models.transformer import Transformer
 from tests.utils import CustomTestCase
 from tensorlayer.models.transformer.utils import metrics
-from tensorlayer.cost import cross_entropy_seq
 from tensorlayer.optimizers import lazyAdam as optimizer
+from tensorlayer.models.transformer.utils import attention_visualisation
 import time
 
 
@@ -51,7 +49,7 @@ class Model_SEQ2SEQ_Test(CustomTestCase):
 
     @classmethod
     def setUpClass(cls):
-        cls.batch_size = 16
+        cls.batch_size = 50
 
         cls.embedding_size = 32
         cls.dec_seq_length = 5
@@ -66,7 +64,7 @@ def setUpClass(cls):
 
         assert cls.src_len == cls.tgt_len
 
-        cls.num_epochs = 1000
+        cls.num_epochs = 20
         cls.n_step = cls.src_len // cls.batch_size
 
     @classmethod
@@ -99,8 +97,8 @@ def test_basic_simpleSeq2Seq(self):
                     
                     grad = tape.gradient(loss, model_.all_weights)
                     optimizer.apply_gradients(zip(grad, model_.all_weights))
-                    
             
+
                 total_loss += loss
                 n_iter += 1
             print(time.time()-t)
@@ -115,5 +113,20 @@ def test_basic_simpleSeq2Seq(self):
             print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, self.num_epochs, total_loss / n_iter))
 
 
+        # visualise the self-attention weights at encoder 
+        trainX, trainY = shuffle(self.trainX, self.trainY)
+        X = [trainX[0]]
+        Y = [trainY[0]]
+        logits, weights_encoder, weights_decoder = model_(inputs = X, targets = Y)
+        attention_visualisation.plot_attention_weights(weights_encoder["layer_0"], X[0].numpy(), X[0].numpy())
+
+        # visualise the self-attention weights at encoder 
+        trainX, trainY = shuffle(self.trainX, self.trainY)
+        X = [trainX[0]]
+        Y = [trainY[0]]
+        logits, weights_encoder, weights_decoder = model_(inputs = X, targets = Y)
+        attention_visualisation.plot_attention_weights(weights_decoder["enc_dec"]["layer_0"], X[0].numpy(), Y[0])
+
+
 if __name__ == '__main__':
     unittest.main()