Skip to content

Commit 0cd3d46

Browse files
authored
Recommend sys new api (#10894)
1 parent d406c76 commit 0cd3d46

File tree

3 files changed

+273
-0
lines changed

3 files changed

+273
-0
lines changed

python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ add_subdirectory(fit_a_line)
1010
add_subdirectory(recognize_digits)
1111
add_subdirectory(image_classification)
1212
add_subdirectory(understand_sentiment)
13+
add_subdirectory(recommender_system)
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
2+
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
3+
4+
# default test
5+
foreach(src ${TEST_OPS})
6+
py_test(${src} SRCS ${src}.py)
7+
endforeach()
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import math
16+
import sys
17+
import numpy as np
18+
import paddle
19+
import paddle.fluid as fluid
20+
import paddle.fluid.layers as layers
21+
import paddle.fluid.nets as nets
22+
23+
IS_SPARSE = True
24+
USE_GPU = False
25+
BATCH_SIZE = 256
26+
27+
28+
def get_usr_combined_features():
29+
# FIXME(dzh) : old API integer_value(10) may have range check.
30+
# currently we don't have user configurated check.
31+
32+
USR_DICT_SIZE = paddle.dataset.movielens.max_user_id() + 1
33+
34+
uid = layers.data(name='user_id', shape=[1], dtype='int64')
35+
36+
usr_emb = layers.embedding(
37+
input=uid,
38+
dtype='float32',
39+
size=[USR_DICT_SIZE, 32],
40+
param_attr='user_table',
41+
is_sparse=IS_SPARSE)
42+
43+
usr_fc = layers.fc(input=usr_emb, size=32)
44+
45+
USR_GENDER_DICT_SIZE = 2
46+
47+
usr_gender_id = layers.data(name='gender_id', shape=[1], dtype='int64')
48+
49+
usr_gender_emb = layers.embedding(
50+
input=usr_gender_id,
51+
size=[USR_GENDER_DICT_SIZE, 16],
52+
param_attr='gender_table',
53+
is_sparse=IS_SPARSE)
54+
55+
usr_gender_fc = layers.fc(input=usr_gender_emb, size=16)
56+
57+
USR_AGE_DICT_SIZE = len(paddle.dataset.movielens.age_table)
58+
usr_age_id = layers.data(name='age_id', shape=[1], dtype="int64")
59+
60+
usr_age_emb = layers.embedding(
61+
input=usr_age_id,
62+
size=[USR_AGE_DICT_SIZE, 16],
63+
is_sparse=IS_SPARSE,
64+
param_attr='age_table')
65+
66+
usr_age_fc = layers.fc(input=usr_age_emb, size=16)
67+
68+
USR_JOB_DICT_SIZE = paddle.dataset.movielens.max_job_id() + 1
69+
usr_job_id = layers.data(name='job_id', shape=[1], dtype="int64")
70+
71+
usr_job_emb = layers.embedding(
72+
input=usr_job_id,
73+
size=[USR_JOB_DICT_SIZE, 16],
74+
param_attr='job_table',
75+
is_sparse=IS_SPARSE)
76+
77+
usr_job_fc = layers.fc(input=usr_job_emb, size=16)
78+
79+
concat_embed = layers.concat(
80+
input=[usr_fc, usr_gender_fc, usr_age_fc, usr_job_fc], axis=1)
81+
82+
usr_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
83+
84+
return usr_combined_features
85+
86+
87+
def get_mov_combined_features():
88+
89+
MOV_DICT_SIZE = paddle.dataset.movielens.max_movie_id() + 1
90+
91+
mov_id = layers.data(name='movie_id', shape=[1], dtype='int64')
92+
93+
mov_emb = layers.embedding(
94+
input=mov_id,
95+
dtype='float32',
96+
size=[MOV_DICT_SIZE, 32],
97+
param_attr='movie_table',
98+
is_sparse=IS_SPARSE)
99+
100+
mov_fc = layers.fc(input=mov_emb, size=32)
101+
102+
CATEGORY_DICT_SIZE = len(paddle.dataset.movielens.movie_categories())
103+
104+
category_id = layers.data(
105+
name='category_id', shape=[1], dtype='int64', lod_level=1)
106+
107+
mov_categories_emb = layers.embedding(
108+
input=category_id, size=[CATEGORY_DICT_SIZE, 32], is_sparse=IS_SPARSE)
109+
110+
mov_categories_hidden = layers.sequence_pool(
111+
input=mov_categories_emb, pool_type="sum")
112+
113+
MOV_TITLE_DICT_SIZE = len(paddle.dataset.movielens.get_movie_title_dict())
114+
115+
mov_title_id = layers.data(
116+
name='movie_title', shape=[1], dtype='int64', lod_level=1)
117+
118+
mov_title_emb = layers.embedding(
119+
input=mov_title_id, size=[MOV_TITLE_DICT_SIZE, 32], is_sparse=IS_SPARSE)
120+
121+
mov_title_conv = nets.sequence_conv_pool(
122+
input=mov_title_emb,
123+
num_filters=32,
124+
filter_size=3,
125+
act="tanh",
126+
pool_type="sum")
127+
128+
concat_embed = layers.concat(
129+
input=[mov_fc, mov_categories_hidden, mov_title_conv], axis=1)
130+
131+
# FIXME(dzh) : need tanh operator
132+
mov_combined_features = layers.fc(input=concat_embed, size=200, act="tanh")
133+
134+
return mov_combined_features
135+
136+
137+
def inference_program():
138+
usr_combined_features = get_usr_combined_features()
139+
mov_combined_features = get_mov_combined_features()
140+
141+
inference = layers.cos_sim(X=usr_combined_features, Y=mov_combined_features)
142+
scale_infer = layers.scale(x=inference, scale=5.0)
143+
144+
return scale_infer
145+
146+
147+
def train_program():
148+
149+
scale_infer = inference_program()
150+
151+
label = layers.data(name='score', shape=[1], dtype='float32')
152+
square_cost = layers.square_error_cost(input=scale_infer, label=label)
153+
avg_cost = layers.mean(square_cost)
154+
155+
return [avg_cost, scale_infer]
156+
157+
158+
def train(use_cuda, train_program, save_path):
159+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
160+
optimizer = fluid.optimizer.SGD(learning_rate=0.2)
161+
162+
trainer = fluid.Trainer(
163+
train_func=train_program, place=place, optimizer=optimizer)
164+
165+
feed_order = [
166+
'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id', 'category_id',
167+
'movie_title', 'score'
168+
]
169+
170+
def event_handler(event):
171+
if isinstance(event, fluid.EndStepEvent):
172+
test_reader = paddle.batch(
173+
paddle.dataset.movielens.test(), batch_size=BATCH_SIZE)
174+
avg_cost_set = trainer.test(
175+
reader=test_reader, feed_order=feed_order)
176+
177+
# get avg cost
178+
avg_cost = np.array(avg_cost_set).mean()
179+
180+
print("avg_cost: %s" % avg_cost)
181+
182+
if float(avg_cost) < 4: # Smaller value to increase CI speed
183+
trainer.save_params(save_path)
184+
trainer.stop()
185+
else:
186+
print('BatchID {0}, Test Loss {1:0.2}'.format(event.epoch + 1,
187+
float(avg_cost)))
188+
if math.isnan(float(avg_cost)):
189+
sys.exit("got NaN loss, training failed.")
190+
191+
train_reader = paddle.batch(
192+
paddle.reader.shuffle(
193+
paddle.dataset.movielens.train(), buf_size=8192),
194+
batch_size=BATCH_SIZE)
195+
196+
trainer.train(
197+
num_epochs=1,
198+
event_handler=event_handler,
199+
reader=train_reader,
200+
feed_order=[
201+
'user_id', 'gender_id', 'age_id', 'job_id', 'movie_id',
202+
'category_id', 'movie_title', 'score'
203+
])
204+
205+
206+
def infer(use_cuda, inference_program, save_path):
207+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
208+
inferencer = fluid.Inferencer(
209+
inference_program, param_path=save_path, place=place)
210+
211+
def create_lod_tensor(data, lod=None):
212+
tensor = fluid.LoDTensor()
213+
if lod is None:
214+
# Tensor, the shape is [batch_size, 1]
215+
index = 0
216+
lod_0 = [index]
217+
for l in range(len(data)):
218+
index += 1
219+
lod_0.append(index)
220+
lod = [lod_0]
221+
tensor.set_lod(lod)
222+
223+
flattened_data = np.concatenate(data, axis=0).astype("int64")
224+
flattened_data = flattened_data.reshape([len(flattened_data), 1])
225+
tensor.set(flattened_data, place)
226+
return tensor
227+
228+
# Generate a random input for inference
229+
user_id = create_lod_tensor([[1]])
230+
gender_id = create_lod_tensor([[1]])
231+
age_id = create_lod_tensor([[0]])
232+
job_id = create_lod_tensor([[10]])
233+
movie_id = create_lod_tensor([[783]])
234+
category_id = create_lod_tensor([[10], [8], [9]], [[0, 3]])
235+
movie_title = create_lod_tensor([[1069], [4140], [2923], [710], [988]],
236+
[[0, 5]])
237+
238+
results = inferencer.infer(
239+
{
240+
'user_id': user_id,
241+
'gender_id': gender_id,
242+
'age_id': age_id,
243+
'job_id': job_id,
244+
'movie_id': movie_id,
245+
'category_id': category_id,
246+
'movie_title': movie_title
247+
},
248+
return_numpy=False)
249+
250+
print("infer results: ", np.array(results[0]))
251+
252+
253+
def main(use_cuda):
254+
if use_cuda and not fluid.core.is_compiled_with_cuda():
255+
return
256+
save_path = "recommender_system.inference.model"
257+
train(use_cuda=use_cuda, train_program=train_program, save_path=save_path)
258+
infer(
259+
use_cuda=use_cuda,
260+
inference_program=inference_program,
261+
save_path=save_path)
262+
263+
264+
if __name__ == '__main__':
265+
main(USE_GPU)

0 commit comments

Comments
 (0)