Skip to content

Commit aaa2a1f

Browse files
authored
Merge pull request #1501 from reyoung/feature/recommendation_v2_api
Feature/recommendation v2 api
2 parents 79e95c1 + dda02fe commit aaa2a1f

File tree

6 files changed

+300
-28
lines changed

6 files changed

+300
-28
lines changed

demo/mnist/api_train_v2.py

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,8 @@ def main():
9292
def event_handler(event):
9393
if isinstance(event, paddle.event.EndIteration):
9494
if event.batch_id % 1000 == 0:
95-
result = trainer.test(reader=paddle.batch(
96-
paddle.dataset.mnist.test(), batch_size=256))
97-
98-
print "Pass %d, Batch %d, Cost %f, %s, Testing metrics %s" % (
99-
event.pass_id, event.batch_id, event.cost, event.metrics,
100-
result.metrics)
95+
print "Pass %d, Batch %d, Cost %f, %s" % (
96+
event.pass_id, event.batch_id, event.cost, event.metrics)
10197

10298
with gzip.open('params.tar.gz', 'w') as f:
10399
parameters.to_tar(f)
@@ -123,17 +119,16 @@ def event_handler(event):
123119
print 'Best pass is %s, testing Avgcost is %s' % (best[0], best[1])
124120
print 'The classification accuracy is %.2f%%' % (100 - float(best[2]) * 100)
125121

122+
test_creator = paddle.dataset.mnist.test()
123+
test_data = []
124+
for item in test_creator():
125+
test_data.append(item[0])
126+
if len(test_data) == 100:
127+
break
128+
126129
# output is a softmax layer. It returns probabilities.
127130
# Shape should be (100, 10)
128-
probs = paddle.infer(
129-
output=predict,
130-
parameters=parameters,
131-
reader=paddle.batch(
132-
paddle.reader.firstn(
133-
paddle.reader.map_readers(lambda item: (item[0], ),
134-
paddle.dataset.mnist.test()),
135-
n=100),
136-
batch_size=32))
131+
probs = paddle.infer(output=predict, parameters=parameters, input=test_data)
137132
print probs.shape
138133

139134

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import paddle.v2 as paddle
2+
import cPickle
3+
import copy
4+
5+
6+
def main():
7+
paddle.init(use_gpu=False)
8+
movie_title_dict = paddle.dataset.movielens.get_movie_title_dict()
9+
uid = paddle.layer.data(
10+
name='user_id',
11+
type=paddle.data_type.integer_value(
12+
paddle.dataset.movielens.max_user_id() + 1))
13+
usr_emb = paddle.layer.embedding(input=uid, size=32)
14+
15+
usr_gender_id = paddle.layer.data(
16+
name='gender_id', type=paddle.data_type.integer_value(2))
17+
usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16)
18+
19+
usr_age_id = paddle.layer.data(
20+
name='age_id',
21+
type=paddle.data_type.integer_value(
22+
len(paddle.dataset.movielens.age_table)))
23+
usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16)
24+
25+
usr_job_id = paddle.layer.data(
26+
name='job_id',
27+
type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id(
28+
) + 1))
29+
30+
usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16)
31+
32+
usr_combined_features = paddle.layer.fc(
33+
input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb],
34+
size=200,
35+
act=paddle.activation.Tanh())
36+
37+
mov_id = paddle.layer.data(
38+
name='movie_id',
39+
type=paddle.data_type.integer_value(
40+
paddle.dataset.movielens.max_movie_id() + 1))
41+
mov_emb = paddle.layer.embedding(input=mov_id, size=32)
42+
43+
mov_categories = paddle.layer.data(
44+
name='category_id',
45+
type=paddle.data_type.sparse_binary_vector(
46+
len(paddle.dataset.movielens.movie_categories())))
47+
48+
mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32)
49+
50+
mov_title_id = paddle.layer.data(
51+
name='movie_title',
52+
type=paddle.data_type.integer_value_sequence(len(movie_title_dict)))
53+
mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32)
54+
mov_title_conv = paddle.networks.sequence_conv_pool(
55+
input=mov_title_emb, hidden_size=32, context_len=3)
56+
57+
mov_combined_features = paddle.layer.fc(
58+
input=[mov_emb, mov_categories_hidden, mov_title_conv],
59+
size=200,
60+
act=paddle.activation.Tanh())
61+
62+
inference = paddle.layer.cos_sim(
63+
a=usr_combined_features, b=mov_combined_features, size=1, scale=5)
64+
cost = paddle.layer.regression_cost(
65+
input=inference,
66+
label=paddle.layer.data(
67+
name='score', type=paddle.data_type.dense_vector(1)))
68+
69+
parameters = paddle.parameters.create(cost)
70+
71+
trainer = paddle.trainer.SGD(cost=cost,
72+
parameters=parameters,
73+
update_equation=paddle.optimizer.Adam(
74+
learning_rate=1e-4))
75+
feeding = {
76+
'user_id': 0,
77+
'gender_id': 1,
78+
'age_id': 2,
79+
'job_id': 3,
80+
'movie_id': 4,
81+
'category_id': 5,
82+
'movie_title': 6,
83+
'score': 7
84+
}
85+
86+
def event_handler(event):
87+
if isinstance(event, paddle.event.EndIteration):
88+
if event.batch_id % 100 == 0:
89+
print "Pass %d Batch %d Cost %.2f" % (
90+
event.pass_id, event.batch_id, event.cost)
91+
92+
trainer.train(
93+
reader=paddle.batch(
94+
paddle.reader.shuffle(
95+
paddle.dataset.movielens.train(), buf_size=8192),
96+
batch_size=256),
97+
event_handler=event_handler,
98+
feeding=feeding,
99+
num_passes=1)
100+
101+
user_id = 234
102+
movie_id = 345
103+
104+
user = paddle.dataset.movielens.user_info()[user_id]
105+
movie = paddle.dataset.movielens.movie_info()[movie_id]
106+
107+
feature = user.value() + movie.value()
108+
109+
def reader():
110+
yield feature
111+
112+
infer_dict = copy.copy(feeding)
113+
del infer_dict['score']
114+
115+
prediction = paddle.infer(
116+
output=inference,
117+
parameters=parameters,
118+
reader=paddle.batch(
119+
reader, batch_size=32),
120+
feeding=infer_dict)
121+
print(prediction + 5) / 2
122+
123+
124+
if __name__ == '__main__':
125+
main()

doc/api/v2/run_logic.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
Trainer API
33
###########
44

5+
56
==========
67
Parameters
78
==========
@@ -24,3 +25,10 @@ Event
2425

2526
.. automodule:: paddle.v2.event
2627
:members:
28+
29+
30+
=========
31+
Inference
32+
=========
33+
34+
.. autofunction:: paddle.v2.infer

python/paddle/v2/data_feeder.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def __init__(self, data_types, feeding=None):
8585
input_types.append(each[1])
8686
DataProviderConverter.__init__(self, input_types)
8787

88+
def __len__(self):
89+
return len(self.input_names)
90+
8891
def convert(self, dat, argument=None):
8992
"""
9093
:param dat: A list of mini-batch data. Each sample is a list or tuple

python/paddle/v2/dataset/movielens.py

Lines changed: 77 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,12 @@
2323
import random
2424
import functools
2525

26-
__all__ = ['train_creator', 'test_creator']
26+
__all__ = [
27+
'train', 'test', 'get_movie_title_dict', 'max_movie_id', 'max_user_id',
28+
'age_table', 'movie_categories', 'max_job_id', 'user_info', 'movie_info'
29+
]
30+
31+
age_table = [1, 18, 25, 35, 45, 50, 56]
2732

2833

2934
class MovieInfo(object):
@@ -38,17 +43,32 @@ def value(self):
3843
[MOVIE_TITLE_DICT[w.lower()] for w in self.title.split()]
3944
]
4045

46+
def __str__(self):
47+
return "<MovieInfo id(%d), title(%s), categories(%s)>" % (
48+
self.index, self.title, self.categories)
49+
50+
def __repr__(self):
51+
return self.__str__()
52+
4153

4254
class UserInfo(object):
4355
def __init__(self, index, gender, age, job_id):
4456
self.index = int(index)
4557
self.is_male = gender == 'M'
46-
self.age = [1, 18, 25, 35, 45, 50, 56].index(int(age))
58+
self.age = age_table.index(int(age))
4759
self.job_id = int(job_id)
4860

4961
def value(self):
5062
return [self.index, 0 if self.is_male else 1, self.age, self.job_id]
5163

64+
def __str__(self):
65+
return "<UserInfo id(%d), gender(%s), age(%d), job(%d)>" % (
66+
self.index, "M"
67+
if self.is_male else "F", age_table[self.age], self.job_id)
68+
69+
def __repr__(self):
70+
return str(self)
71+
5272

5373
MOVIE_INFO = None
5474
MOVIE_TITLE_DICT = None
@@ -59,7 +79,8 @@ def value(self):
5979
def __initialize_meta_info__():
6080
fn = download(
6181
url='http://files.grouplens.org/datasets/movielens/ml-1m.zip',
62-
md5='c4d9eecfca2ab87c1945afe126590906')
82+
module_name='movielens',
83+
md5sum='c4d9eecfca2ab87c1945afe126590906')
6384
global MOVIE_INFO
6485
if MOVIE_INFO is None:
6586
pattern = re.compile(r'^(.*)\((\d+)\)$')
@@ -122,14 +143,63 @@ def __reader_creator__(**kwargs):
122143
return lambda: __reader__(**kwargs)
123144

124145

125-
train_creator = functools.partial(__reader_creator__, is_test=False)
126-
test_creator = functools.partial(__reader_creator__, is_test=True)
146+
train = functools.partial(__reader_creator__, is_test=False)
147+
test = functools.partial(__reader_creator__, is_test=True)
148+
149+
150+
def get_movie_title_dict():
151+
__initialize_meta_info__()
152+
return MOVIE_TITLE_DICT
153+
154+
155+
def __max_index_info__(a, b):
156+
if a.index > b.index:
157+
return a
158+
else:
159+
return b
160+
161+
162+
def max_movie_id():
163+
__initialize_meta_info__()
164+
return reduce(__max_index_info__, MOVIE_INFO.viewvalues()).index
165+
166+
167+
def max_user_id():
168+
__initialize_meta_info__()
169+
return reduce(__max_index_info__, USER_INFO.viewvalues()).index
170+
171+
172+
def __max_job_id_impl__(a, b):
173+
if a.job_id > b.job_id:
174+
return a
175+
else:
176+
return b
177+
178+
179+
def max_job_id():
180+
__initialize_meta_info__()
181+
return reduce(__max_job_id_impl__, USER_INFO.viewvalues()).job_id
182+
183+
184+
def movie_categories():
185+
__initialize_meta_info__()
186+
return CATEGORIES_DICT
187+
188+
189+
def user_info():
190+
__initialize_meta_info__()
191+
return USER_INFO
192+
193+
194+
def movie_info():
195+
__initialize_meta_info__()
196+
return MOVIE_INFO
127197

128198

129199
def unittest():
130-
for train_count, _ in enumerate(train_creator()()):
200+
for train_count, _ in enumerate(train()()):
131201
pass
132-
for test_count, _ in enumerate(test_creator()()):
202+
for test_count, _ in enumerate(test()()):
133203
pass
134204

135205
print train_count, test_count

0 commit comments

Comments
 (0)