|
| 1 | +import paddle.v2 as paddle |
| 2 | +import cPickle |
| 3 | +import copy |
| 4 | + |
| 5 | + |
| 6 | +def main(): |
| 7 | + paddle.init(use_gpu=False) |
| 8 | + movie_title_dict = paddle.dataset.movielens.get_movie_title_dict() |
| 9 | + uid = paddle.layer.data( |
| 10 | + name='user_id', |
| 11 | + type=paddle.data_type.integer_value( |
| 12 | + paddle.dataset.movielens.max_user_id() + 1)) |
| 13 | + usr_emb = paddle.layer.embedding(input=uid, size=32) |
| 14 | + |
| 15 | + usr_gender_id = paddle.layer.data( |
| 16 | + name='gender_id', type=paddle.data_type.integer_value(2)) |
| 17 | + usr_gender_emb = paddle.layer.embedding(input=usr_gender_id, size=16) |
| 18 | + |
| 19 | + usr_age_id = paddle.layer.data( |
| 20 | + name='age_id', |
| 21 | + type=paddle.data_type.integer_value( |
| 22 | + len(paddle.dataset.movielens.age_table))) |
| 23 | + usr_age_emb = paddle.layer.embedding(input=usr_age_id, size=16) |
| 24 | + |
| 25 | + usr_job_id = paddle.layer.data( |
| 26 | + name='job_id', |
| 27 | + type=paddle.data_type.integer_value(paddle.dataset.movielens.max_job_id( |
| 28 | + ) + 1)) |
| 29 | + |
| 30 | + usr_job_emb = paddle.layer.embedding(input=usr_job_id, size=16) |
| 31 | + |
| 32 | + usr_combined_features = paddle.layer.fc( |
| 33 | + input=[usr_emb, usr_gender_emb, usr_age_emb, usr_job_emb], |
| 34 | + size=200, |
| 35 | + act=paddle.activation.Tanh()) |
| 36 | + |
| 37 | + mov_id = paddle.layer.data( |
| 38 | + name='movie_id', |
| 39 | + type=paddle.data_type.integer_value( |
| 40 | + paddle.dataset.movielens.max_movie_id() + 1)) |
| 41 | + mov_emb = paddle.layer.embedding(input=mov_id, size=32) |
| 42 | + |
| 43 | + mov_categories = paddle.layer.data( |
| 44 | + name='category_id', |
| 45 | + type=paddle.data_type.sparse_binary_vector( |
| 46 | + len(paddle.dataset.movielens.movie_categories()))) |
| 47 | + |
| 48 | + mov_categories_hidden = paddle.layer.fc(input=mov_categories, size=32) |
| 49 | + |
| 50 | + mov_title_id = paddle.layer.data( |
| 51 | + name='movie_title', |
| 52 | + type=paddle.data_type.integer_value_sequence(len(movie_title_dict))) |
| 53 | + mov_title_emb = paddle.layer.embedding(input=mov_title_id, size=32) |
| 54 | + mov_title_conv = paddle.networks.sequence_conv_pool( |
| 55 | + input=mov_title_emb, hidden_size=32, context_len=3) |
| 56 | + |
| 57 | + mov_combined_features = paddle.layer.fc( |
| 58 | + input=[mov_emb, mov_categories_hidden, mov_title_conv], |
| 59 | + size=200, |
| 60 | + act=paddle.activation.Tanh()) |
| 61 | + |
| 62 | + inference = paddle.layer.cos_sim( |
| 63 | + a=usr_combined_features, b=mov_combined_features, size=1, scale=5) |
| 64 | + cost = paddle.layer.regression_cost( |
| 65 | + input=inference, |
| 66 | + label=paddle.layer.data( |
| 67 | + name='score', type=paddle.data_type.dense_vector(1))) |
| 68 | + |
| 69 | + parameters = paddle.parameters.create(cost) |
| 70 | + |
| 71 | + trainer = paddle.trainer.SGD(cost=cost, |
| 72 | + parameters=parameters, |
| 73 | + update_equation=paddle.optimizer.Adam( |
| 74 | + learning_rate=1e-4)) |
| 75 | + feeding = { |
| 76 | + 'user_id': 0, |
| 77 | + 'gender_id': 1, |
| 78 | + 'age_id': 2, |
| 79 | + 'job_id': 3, |
| 80 | + 'movie_id': 4, |
| 81 | + 'category_id': 5, |
| 82 | + 'movie_title': 6, |
| 83 | + 'score': 7 |
| 84 | + } |
| 85 | + |
| 86 | + def event_handler(event): |
| 87 | + if isinstance(event, paddle.event.EndIteration): |
| 88 | + if event.batch_id % 100 == 0: |
| 89 | + print "Pass %d Batch %d Cost %.2f" % ( |
| 90 | + event.pass_id, event.batch_id, event.cost) |
| 91 | + |
| 92 | + trainer.train( |
| 93 | + reader=paddle.batch( |
| 94 | + paddle.reader.shuffle( |
| 95 | + paddle.dataset.movielens.train(), buf_size=8192), |
| 96 | + batch_size=256), |
| 97 | + event_handler=event_handler, |
| 98 | + feeding=feeding, |
| 99 | + num_passes=1) |
| 100 | + |
| 101 | + user_id = 234 |
| 102 | + movie_id = 345 |
| 103 | + |
| 104 | + user = paddle.dataset.movielens.user_info()[user_id] |
| 105 | + movie = paddle.dataset.movielens.movie_info()[movie_id] |
| 106 | + |
| 107 | + feature = user.value() + movie.value() |
| 108 | + |
| 109 | + def reader(): |
| 110 | + yield feature |
| 111 | + |
| 112 | + infer_dict = copy.copy(feeding) |
| 113 | + del infer_dict['score'] |
| 114 | + |
| 115 | + prediction = paddle.infer( |
| 116 | + output=inference, |
| 117 | + parameters=parameters, |
| 118 | + reader=paddle.batch( |
| 119 | + reader, batch_size=32), |
| 120 | + feeding=infer_dict) |
| 121 | + print(prediction + 5) / 2 |
| 122 | + |
| 123 | + |
| 124 | +if __name__ == '__main__': |
| 125 | + main() |
0 commit comments