Skip to content

Commit aa2fe90

Browse files
Lifannrhdong
authored andcommitted
Add demo for movielens-1m with keras API
1 parent 6922595 commit aa2fe90

File tree

2 files changed

+218
-0
lines changed

2 files changed

+218
-0
lines changed
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# movielens-1m-keras
2+
3+
This is a demo for using keras API to build a recommender system to predict the user's rating on a movie, with support of [dynamic_embedding](https://github.com/tensorflow/recommenders-addons/blob/master/docs/api_docs/tfra/dynamic_embedding.md).
4+
5+
The training and testing data are from [movielens](https://www.tensorflow.org/datasets/catalog/movielens) dataset.
6+
7+
```bash
8+
# train
9+
python movielens-1m-keras.py --mode=train --epochs=1 --steps_per_epoch=20000
10+
11+
# export model for inference
12+
python movielens-1m-keras.py --mode=export
13+
14+
# Run test
15+
python movielens-1m-keras.py --mode=test --test_steps=100 --test_batch=1024
16+
```
17+
18+
Lines changed: 200 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,200 @@
1+
import os
2+
import tensorflow as tf
3+
import tensorflow_datasets as tfds
4+
5+
from absl import flags
6+
from absl import app
7+
from tensorflow_recommenders_addons import dynamic_embedding as de
8+
9+
flags.DEFINE_string('mode', 'train', 'Select the running mode: train or test.')
10+
flags.DEFINE_string('model_dir', 'model_dir',
11+
'Directory where checkpoint stores.')
12+
flags.DEFINE_string('export_dir', 'export_dir',
13+
'Directory where model stores for inference.')
14+
flags.DEFINE_integer('steps_per_epoch', 20000, 'Number of training steps.')
15+
flags.DEFINE_integer('epochs', 1, 'Number of training epochs.')
16+
flags.DEFINE_integer('embedding_size', 32,
17+
'Embedding size for users and movies')
18+
flags.DEFINE_integer('test_steps', 128, 'Embedding size for users and movies')
19+
flags.DEFINE_integer('test_batch', 1024, 'Embedding size for users and movies')
20+
FLAGS = flags.FLAGS
21+
22+
input_spec = {
23+
'user_id': tf.TensorSpec(shape=[
24+
None,
25+
], dtype=tf.int64, name='user_id'),
26+
'movie_id': tf.TensorSpec(shape=[
27+
None,
28+
], dtype=tf.int64, name='movie_id')
29+
}
30+
31+
32+
class DualChannelsDeepModel(tf.keras.Model):
33+
34+
def __init__(self,
35+
user_embedding_size=1,
36+
movie_embedding_size=1,
37+
embedding_initializer=None,
38+
is_training=True):
39+
40+
if not is_training:
41+
de.enable_inference_mode()
42+
43+
super(DualChannelsDeepModel, self).__init__()
44+
self.user_embedding_size = user_embedding_size
45+
self.movie_embedding_size = movie_embedding_size
46+
47+
if embedding_initializer is None:
48+
embedding_initializer = tf.keras.initializers.Zeros()
49+
50+
self.user_embedding = de.keras.layers.SquashedEmbedding(
51+
user_embedding_size,
52+
initializer=embedding_initializer,
53+
name='user_embedding')
54+
self.movie_embedding = de.keras.layers.SquashedEmbedding(
55+
movie_embedding_size,
56+
initializer=embedding_initializer,
57+
name='movie_embedding')
58+
59+
self.dnn1 = tf.keras.layers.Dense(
60+
64,
61+
activation='relu',
62+
kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
63+
bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
64+
self.dnn2 = tf.keras.layers.Dense(
65+
16,
66+
activation='relu',
67+
kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
68+
bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
69+
self.dnn3 = tf.keras.layers.Dense(
70+
5,
71+
activation='softmax',
72+
kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
73+
bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
74+
self.bias_net = tf.keras.layers.Dense(
75+
5,
76+
activation='softmax',
77+
kernel_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1),
78+
bias_initializer=tf.keras.initializers.RandomNormal(0.0, 0.1))
79+
80+
@tf.function
81+
def call(self, features):
82+
user_id = tf.reshape(features['user_id'], (-1, 1))
83+
movie_id = tf.reshape(features['movie_id'], (-1, 1))
84+
user_latent = self.user_embedding(user_id)
85+
movie_latent = self.movie_embedding(movie_id)
86+
latent = tf.concat([user_latent, movie_latent], axis=1)
87+
88+
x = self.dnn1(latent)
89+
x = self.dnn2(x)
90+
x = self.dnn3(x)
91+
92+
bias = self.bias_net(latent)
93+
x = 0.2 * x + 0.8 * bias
94+
return x
95+
96+
97+
def get_dataset(batch_size=1):
98+
dataset = tfds.load('movielens/1m-ratings', split='train')
99+
features = dataset.map(
100+
lambda x: {
101+
"movie_id": tf.strings.to_number(x["movie_id"], tf.int64),
102+
"user_id": tf.strings.to_number(x["user_id"], tf.int64),
103+
})
104+
ratings = dataset.map(
105+
lambda x: tf.one_hot(tf.cast(x['user_rating'] - 1, dtype=tf.int64), 5))
106+
dataset = dataset.zip((features, ratings))
107+
dataset = dataset.shuffle(4096, reshuffle_each_iteration=False)
108+
if batch_size > 1:
109+
dataset = dataset.batch(batch_size)
110+
111+
return dataset
112+
113+
114+
def train():
115+
dataset = get_dataset(batch_size=32)
116+
model = DualChannelsDeepModel(FLAGS.embedding_size, FLAGS.embedding_size,
117+
tf.keras.initializers.RandomNormal(0.0, 0.5))
118+
optimizer = tf.keras.optimizers.Adam(1E-3)
119+
optimizer = de.DynamicEmbeddingOptimizer(optimizer)
120+
121+
auc = tf.keras.metrics.AUC(num_thresholds=1000)
122+
model.compile(optimizer=optimizer,
123+
loss=tf.keras.losses.MeanSquaredError(),
124+
metrics=[
125+
auc,
126+
])
127+
128+
if os.path.exists(FLAGS.model_dir):
129+
model.load_weights(FLAGS.model_dir)
130+
131+
model.fit(dataset, epochs=FLAGS.epochs, steps_per_epoch=FLAGS.steps_per_epoch)
132+
133+
save_options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
134+
model.save(FLAGS.model_dir, options=save_options)
135+
136+
137+
def export():
138+
model = DualChannelsDeepModel(FLAGS.embedding_size, FLAGS.embedding_size,
139+
tf.keras.initializers.Zeros(), False)
140+
model.load_weights(FLAGS.model_dir)
141+
142+
# Build input spec with dummy data. If the model is built with explicit
143+
# input specs, then no need of dummy data.
144+
dummy_data = {
145+
'user_id': tf.zeros((16,), dtype=tf.int64),
146+
'movie_id': tf.zeros([
147+
16,
148+
], dtype=tf.int64)
149+
}
150+
model(dummy_data)
151+
152+
save_options = tf.saved_model.SaveOptions(namespace_whitelist=['TFRA'])
153+
tf.keras.models.save_model(
154+
model,
155+
FLAGS.export_dir,
156+
options=save_options,
157+
include_optimizer=False,
158+
signatures=model.call.get_concrete_function(input_spec))
159+
160+
161+
def test():
162+
de.enable_inference_mode()
163+
164+
dataset = get_dataset(batch_size=FLAGS.test_batch)
165+
model = tf.keras.models.load_model(FLAGS.export_dir)
166+
signature = model.signatures['serving_default']
167+
168+
def get_close_or_equal_cnt(model, features, ratings):
169+
preds = model(features)
170+
preds = tf.math.argmax(preds, axis=1)
171+
ratings = tf.math.argmax(ratings, axis=1)
172+
close_cnt = tf.reduce_sum(
173+
tf.cast(tf.math.abs(preds - ratings) <= 1, dtype=tf.int32))
174+
equal_cnt = tf.reduce_sum(
175+
tf.cast(tf.math.abs(preds - ratings) == 0, dtype=tf.int32))
176+
return close_cnt, equal_cnt
177+
178+
it = iter(dataset)
179+
for step in range(FLAGS.test_steps):
180+
features, ratings = it.get_next()
181+
close_cnt, equal_cnt = get_close_or_equal_cnt(model, features, ratings)
182+
print(
183+
f'In batch prediction, step: {step}, {close_cnt}/{FLAGS.test_batch} are closely'
184+
f' accurate, {equal_cnt}/{FLAGS.test_batch} are absolutely accurate.')
185+
186+
187+
def main(argv):
188+
del argv
189+
if FLAGS.mode == 'train':
190+
train()
191+
elif FLAGS.mode == 'export':
192+
export()
193+
elif FLAGS.mode == 'test':
194+
test()
195+
else:
196+
raise ValueError('running mode only supports `train` or `test`')
197+
198+
199+
if __name__ == '__main__':
200+
app.run(main)

0 commit comments

Comments
 (0)