tensorflow
diff --git a/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/video_game_model.py‎
Lines changed: 0 additions & 150 deletions b/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/video_game_model.py‎
Lines changed: 0 additions & 150 deletions
diff --git a/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/README.md‎ renamed to ‎demo/dynamic_embedding/amazon-video-games-keras-eager/README.md‎
Lines changed: 2 additions & 2 deletions b/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/README.md‎ renamed to ‎demo/dynamic_embedding/amazon-video-games-keras-eager/README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/feature.py‎ renamed to ‎demo/dynamic_embedding/amazon-video-games-keras-eager/feature.py‎
Lines changed: 30 additions & 21 deletions b/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/feature.py‎ renamed to ‎demo/dynamic_embedding/amazon-video-games-keras-eager/feature.py‎
Lines changed: 30 additions & 21 deletions
diff --git a/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/main.py‎ renamed to ‎demo/dynamic_embedding/amazon-video-games-keras-eager/main.py‎
Lines changed: 46 additions & 24 deletions b/‎demo/dynamic_embedding/amazon-us-reviews-digital-video-games/main.py‎ renamed to ‎demo/dynamic_embedding/amazon-video-games-keras-eager/main.py‎
Lines changed: 46 additions & 24 deletions
@@ -13,6 +13,6 @@ It will produce a model to `export_dir`.
 
 ## Inference:
 ```bash
-python main.py --mode=test  --export_dir="export" --batch_size=10
+python main.py --mode=test  --export_dir="export" --batch_size=64
 ```
-It will print accuracy to the prediction on verified purchase of the digital video games.
+It will print accuracy to the prediction on verified purchase of the digital video games.
@@ -1,8 +1,7 @@
 import tensorflow as tf
 import tensorflow_datasets as tfds
-import sys
 
-ENCODDING_SEGMENT_LENGTH = 1000000
+ENCODING_SEGMENT_LENGTH = 1000000
 NON_LETTER_OR_NUMBER_PATTERN = r'[^a-zA-Z0-9]'
 
 FAETURES = [
@@ -12,6 +11,8 @@
 ]
 LABEL = 'verified_purchase'
 
+NUM_FEATURE_SLOTS = 0
+
 
 class _RawFeature(object):
   """
@@ -22,13 +23,15 @@ def __init__(self, dtype, category):
     if not isinstance(category, int):
       raise TypeError('category must be an integer.')
     self.category = category
+    global NUM_FEATURE_SLOTS
+    NUM_FEATURE_SLOTS = max(NUM_FEATURE_SLOTS, self.category)
 
   def encode(self, tensor):
     raise NotImplementedError
 
   def match_category(self, tensor):
-    min_code = self.category * ENCODDING_SEGMENT_LENGTH
-    max_code = (self.category + 1) * ENCODDING_SEGMENT_LENGTH
+    min_code = self.category * ENCODING_SEGMENT_LENGTH
+    max_code = (self.category + 1) * ENCODING_SEGMENT_LENGTH
     mask = tf.math.logical_and(tf.greater_equal(tensor, min_code),
                                tf.less(tensor, max_code))
     return mask
@@ -40,8 +43,8 @@ def __init__(self, dtype, category):
     super(_StringFeature, self).__init__(dtype, category)
 
   def encode(self, tensor):
-    tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODDING_SEGMENT_LENGTH)
-    tensor += ENCODDING_SEGMENT_LENGTH * self.category
+    tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODING_SEGMENT_LENGTH)
+    tensor += ENCODING_SEGMENT_LENGTH * self.category
     return tensor
 
 
@@ -53,8 +56,8 @@ def __init__(self, dtype, category):
   def encode(self, tensor):
     tensor = tf.strings.regex_replace(tensor, NON_LETTER_OR_NUMBER_PATTERN, ' ')
     tensor = tf.strings.split(tensor, sep=' ').to_tensor('')
-    tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODDING_SEGMENT_LENGTH)
-    tensor += ENCODDING_SEGMENT_LENGTH * self.category
+    tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODING_SEGMENT_LENGTH)
+    tensor += ENCODING_SEGMENT_LENGTH * self.category
     return tensor
 
 
@@ -65,23 +68,23 @@ def __init__(self, dtype, category):
 
   def encode(self, tensor):
     tensor = tf.as_string(tensor)
-    tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODDING_SEGMENT_LENGTH)
-    tensor += ENCODDING_SEGMENT_LENGTH * self.category
+    tensor = tf.strings.to_hash_bucket_fast(tensor, ENCODING_SEGMENT_LENGTH)
+    tensor += ENCODING_SEGMENT_LENGTH * self.category
     return tensor
 
 
 FEATURE_AND_ENCODER = {
-    'customer_id': _StringFeature(tf.string, 1),
-    'helpful_votes': _IntegerFeature(tf.int32, 2),
-    'product_category': _StringFeature(tf.string, 3),
-    'product_id': _StringFeature(tf.string, 4),
-    'product_parent': _StringFeature(tf.string, 5),
-    'product_title': _TextFeature(tf.string, 6),
-    #'review_body': _TextFeature(tf.string, 7),  # bad feature
-    'review_headline': _TextFeature(tf.string, 8),
-    'review_id': _StringFeature(tf.string, 9),
-    'star_rating': _IntegerFeature(tf.int32, 10),
-    'total_votes': _IntegerFeature(tf.int32, 11),
+    'customer_id': _StringFeature(tf.string, 0),
+    'helpful_votes': _IntegerFeature(tf.int32, 1),
+    'product_category': _StringFeature(tf.string, 2),
+    'product_id': _StringFeature(tf.string, 3),
+    'product_parent': _StringFeature(tf.string, 4),
+    'product_title': _TextFeature(tf.string, 5),
+    'review_headline': _TextFeature(tf.string, 6),
+    'review_id': _StringFeature(tf.string, 7),
+    'star_rating': _IntegerFeature(tf.int32, 8),
+    'total_votes': _IntegerFeature(tf.int32, 9),
+    #'review_body': _TextFeature(tf.string, 10),  # bad feature
 }
 
 
@@ -99,6 +102,12 @@ def encode_feature(data):
   return collected_features
 
 
+@tf.function
+def get_category(tensor):
+  x = tf.math.floordiv(tensor, ENCODING_SEGMENT_LENGTH)
+  return x
+
+
 def get_labels(data):
   return data['verified_purchase']
 
 
@@ -1,6 +1,7 @@
 import feature
 import video_game_model
 import tensorflow as tf
+
 from tensorflow_recommenders_addons import dynamic_embedding as de
 
 from absl import flags
@@ -11,10 +12,11 @@
 flags.DEFINE_integer('embedding_size', 4, 'Embedding size.')
 flags.DEFINE_integer('shuffle_size', 3000,
                      'Shuffle pool size for input examples.')
-flags.DEFINE_integer('reserved_features', 30000,
+flags.DEFINE_integer('max_size', 100000,
                      'Number of reserved features in embedding.')
 flags.DEFINE_string('export_dir', './export_dir', 'Directory to export model.')
 flags.DEFINE_string('mode', 'train', 'Select the running mode: train or test.')
+flags.DEFINE_string('save_format', 'keras', 'options: keras, tf')
 
 FLAGS = flags.FLAGS
 
@@ -27,6 +29,13 @@ def train(num_steps):
   # Create a model
   model = video_game_model.VideoGameDnn(batch_size=FLAGS.batch_size,
                                         embedding_size=FLAGS.embedding_size)
+  optimizer = tf.keras.optimizers.Adam(1E-3, clipnorm=None)
+  optimizer = de.DynamicEmbeddingOptimizer(optimizer)
+  auc = tf.keras.metrics.AUC(num_thresholds=1000)
+  accuracy = tf.keras.metrics.BinaryAccuracy(dtype=tf.float32)
+  model.compile(optimizer=optimizer,
+                loss='binary_crossentropy',
+                metrics=[accuracy, auc])
 
   # Get data iterator
   iterator = feature.initialize_dataset(batch_size=FLAGS.batch_size,
@@ -39,29 +48,31 @@ def train(num_steps):
   try:
     for step in range(num_steps):
       features, labels = feature.input_fn(iterator)
-      loss, auc = model.train(features, labels)
-
-      # To avoid too many features burst the memory, we restrict
-      # the model embedding layer to `reserved_features` features.
-      # And the restriction behavior will be triggered when it gets
-      # over `reserved_features * 1.2`.
-      model.embedding_store.restrict(FLAGS.reserved_features,
-                                     trigger=int(FLAGS.reserved_features * 1.2))
 
       if step % 10 == 0:
-        print('step: {}, loss: {}, var_size: {}, auc: {}'.format(
-            step, loss, model.embedding_store.size(), auc))
+        verbose = 1
+      else:
+        verbose = 0
+
+      model.fit(features, labels, steps_per_epoch=1, epochs=1, verbose=verbose)
+
+      if verbose > 0:
+        print('step: {}, size of sparse domain: {}'.format(
+            step, model.embedding_store.size()))
+      model.embedding_store.restrict(int(FLAGS.max_size * 0.8),
+                                     trigger=FLAGS.max_size)
 
   except tf.errors.OutOfRangeError:
     print('Run out the training data.')
 
-  # Set TFRA ops become legit.
-  options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
-
   # Save the model for inference.
-  inference_model = video_game_model.VideoGameDnnInference(model)
-  inference_model(feature.input_fn(iterator)[0])
-  inference_model.save('export', signatures=None, options=options)
+  options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
+  if FLAGS.save_format == 'tf':
+    model.save(FLAGS.export_dir, options=options)
+  elif FLAGS.save_format == 'keras':
+    tf.keras.models.save_model(model, FLAGS.export_dir, options=options)
+  else:
+    raise NotImplemented
 
 
 def test(num_steps):
@@ -70,25 +81,36 @@ def test(num_steps):
   """
 
   # Load model.
-  options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
-  model = tf.saved_model.load('export', tags='serve', options=options)
-  sig = model.signatures['serving_default']
+  options = tf.saved_model.LoadOptions()
+  if FLAGS.save_format == 'tf':
+    model = tf.saved_model.load(FLAGS.export_dir, tags='serve')
+
+    def model_fn(x):
+      return model.signatures['serving_default'](x)['output_1']
+
+  elif FLAGS.save_format == 'keras':
+    model = tf.keras.models.load_model(FLAGS.export_dir)
+    model_fn = model.__call__
+
+  else:
+    raise NotImplemented
 
   # Get data iterator
   iterator = feature.initialize_dataset(batch_size=FLAGS.batch_size,
                                         split='train',
                                         shuffle_size=0,
                                         skips=100000)
 
-  # Do tests.
+  # Test click-ratio
+  ctr = tf.metrics.Accuracy()
   for step in range(num_steps):
     features, labels = feature.input_fn(iterator)
-    probabilities = sig(features)['output_1']
+    probabilities = model_fn(features)
     probabilities = tf.reshape(probabilities, (-1))
     preds = tf.cast(tf.round(probabilities), dtype=tf.int32)
     labels = tf.cast(labels, dtype=tf.int32)
-    ctr = tf.metrics.Accuracy()(labels, preds)
-    print("step: {}, ctr: {}".format(step, ctr))
+    ctr.update_state(labels, preds)
+    print("step: {}, ctr: {}".format(step, ctr.result()))
 
 
 def main(argv):