Skip to content

Commit 49e669f

Browse files
committed
align
1 parent 96e3b6d commit 49e669f

File tree

9 files changed

+178
-36
lines changed

9 files changed

+178
-36
lines changed

.pre-commit-config.yaml

100644100755
File mode changed.

models/recall/ncf/config.yaml

100644100755
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,6 @@ hyper_parameters:
3535
learning_rate: 0.001
3636
num_users: 6040
3737
num_items: 3706
38-
latent_dim: 8
38+
mf_dim: 8
39+
mode: "NCF_NeuMF" # optional: NCF_NeuMF, NCF_GMF, NCF_MLP
3940
fc_layers: [64, 32, 16, 8]

models/recall/ncf/config_bigdata.yaml

100644100755
Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
runner:
1616
train_data_dir: "../../../datasets/movielens_pinterest_NCF/big_train"
1717
train_reader_path: "movielens_reader" # importlib format
18-
train_batch_size: 256
18+
train_batch_size: 1024
1919
model_save_path: "output_model_ncf"
2020

2121
use_gpu: False
@@ -35,5 +35,6 @@ hyper_parameters:
3535
learning_rate: 0.001
3636
num_users: 6040
3737
num_items: 3706
38-
latent_dim: 8
38+
mf_dim: 8
39+
mode: "NCF_NeuMF" # optional: NCF_NeuMF, NCF_GMF, NCF_MLP
3940
fc_layers: [64, 32, 16, 8]

models/recall/ncf/dygraph_model.py

100644100755
Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,16 @@ class DygraphModel():
2424
def create_model(self, config):
2525
num_users = config.get("hyper_parameters.num_users")
2626
num_items = config.get("hyper_parameters.num_items")
27-
latent_dim = config.get("hyper_parameters.latent_dim")
27+
mf_dim = config.get("hyper_parameters.mf_dim")
28+
mode = config.get("hyper_parameters.mode")
2829
layers = config.get("hyper_parameters.fc_layers")
29-
ncf_model = net.NCFLayer(num_users, num_items, latent_dim, layers)
30+
if mode == "NCF_NeuMF":
31+
ncf_model = net.NCF_NeuMF_Layer(num_users, num_items, mf_dim,
32+
layers)
33+
if mode == "NCF_GMF":
34+
ncf_model = net.NCF_GMF_Layer(num_users, num_items, mf_dim, layers)
35+
if mode == "NCF_MLP":
36+
ncf_model = net.NCF_MLP_Layer(num_users, num_items, mf_dim, layers)
3037
return ncf_model
3138

3239
# define feeds which convert numpy of batch data to paddle.tensor

models/recall/ncf/evaluate.py

100644100755
File mode changed.

models/recall/ncf/net.py

100644100755
Lines changed: 147 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -19,39 +19,47 @@
1919
import math
2020

2121

22-
class NCFLayer(nn.Layer):
23-
def __init__(self, num_users, num_items, latent_dim, layers):
24-
super(NCFLayer, self).__init__()
22+
class NCF_NeuMF_Layer(nn.Layer):
23+
def __init__(self, num_users, num_items, mf_dim, layers):
24+
super(NCF_NeuMF_Layer, self).__init__()
2525

2626
self.num_users = num_users
2727
self.num_items = num_items
28-
self.latent_dim = latent_dim
28+
self.mf_dim = mf_dim
2929
self.layers = layers
3030

3131
self.MF_Embedding_User = paddle.nn.Embedding(
3232
self.num_users,
33-
self.latent_dim,
34-
sparse=True,
35-
weight_attr=nn.initializer.Normal(
36-
mean=0.0, std=0.01))
33+
self.mf_dim,
34+
sparse=False,
35+
weight_attr=paddle.ParamAttr(
36+
initializer=nn.initializer.Normal(
37+
mean=0.0, std=0.01),
38+
regularizer=paddle.regularizer.L2Decay(coeff=0)))
3739
self.MF_Embedding_Item = paddle.nn.Embedding(
3840
self.num_items,
39-
self.latent_dim,
40-
sparse=True,
41-
weight_attr=nn.initializer.Normal(
42-
mean=0.0, std=0.01))
41+
self.mf_dim,
42+
sparse=False,
43+
weight_attr=paddle.ParamAttr(
44+
initializer=nn.initializer.Normal(
45+
mean=0.0, std=0.01),
46+
regularizer=paddle.regularizer.L2Decay(coeff=0)))
4347
self.MLP_Embedding_User = paddle.nn.Embedding(
4448
self.num_users,
4549
int(self.layers[0] / 2),
46-
sparse=True,
47-
weight_attr=nn.initializer.Normal(
48-
mean=0.0, std=0.01))
50+
sparse=False,
51+
weight_attr=paddle.ParamAttr(
52+
initializer=nn.initializer.Normal(
53+
mean=0.0, std=0.01),
54+
regularizer=paddle.regularizer.L2Decay(coeff=0)))
4955
self.MLP_Embedding_Item = paddle.nn.Embedding(
5056
self.num_items,
5157
int(self.layers[0] / 2),
52-
sparse=True,
53-
weight_attr=nn.initializer.Normal(
54-
mean=0.0, std=0.01))
58+
sparse=False,
59+
weight_attr=paddle.ParamAttr(
60+
initializer=nn.initializer.Normal(
61+
mean=0.0, std=0.01),
62+
regularizer=paddle.regularizer.L2Decay(coeff=0)))
5563

5664
num_layer = len(self.layers)
5765
self.MLP_fc = []
@@ -62,7 +70,7 @@ def __init__(self, num_users, num_items, latent_dim, layers):
6270
weight_attr=paddle.ParamAttr(
6371
initializer=nn.initializer.TruncatedNormal(
6472
mean=0.0, std=1.0 / math.sqrt(self.layers[i - 1])),
65-
regularizer=paddle.regularizer.L2Decay(coeff=1e-4)),
73+
regularizer=paddle.regularizer.L2Decay(coeff=0)),
6674
name='layer_' + str(i))
6775
self.add_sublayer('layer_%d' % i, Linear)
6876
self.MLP_fc.append(Linear)
@@ -73,7 +81,8 @@ def __init__(self, num_users, num_items, latent_dim, layers):
7381
self.prediction = paddle.nn.Linear(
7482
in_features=self.layers[2],
7583
out_features=1,
76-
weight_attr=nn.initializer.KaimingUniform(fan_in=None),
84+
weight_attr=nn.initializer.KaimingUniform(fan_in=self.layers[2] *
85+
2),
7786
name='prediction')
7887
self.sigmoid = paddle.nn.Sigmoid()
7988

@@ -112,3 +121,121 @@ def forward(self, input_data):
112121
prediction = self.prediction(predict_vector)
113122
prediction = self.sigmoid(prediction)
114123
return prediction
124+
125+
126+
class NCF_GMF_Layer(nn.Layer):
127+
def __init__(self, num_users, num_items, mf_dim, layers):
128+
super(NCF_GMF_Layer, self).__init__()
129+
130+
self.num_users = num_users
131+
self.num_items = num_items
132+
self.mf_dim = mf_dim
133+
self.layers = layers
134+
135+
self.MF_Embedding_User = paddle.nn.Embedding(
136+
self.num_users,
137+
self.mf_dim,
138+
sparse=True,
139+
weight_attr=nn.initializer.Normal(
140+
mean=0.0, std=0.01))
141+
142+
self.MF_Embedding_Item = paddle.nn.Embedding(
143+
self.num_items,
144+
self.mf_dim,
145+
sparse=True,
146+
weight_attr=nn.initializer.Normal(
147+
mean=0.0, std=0.01))
148+
149+
self.prediction = paddle.nn.Linear(
150+
in_features=self.layers[3],
151+
out_features=1,
152+
weight_attr=nn.initializer.KaimingUniform(fan_in=None),
153+
name='prediction')
154+
155+
self.sigmoid = paddle.nn.Sigmoid()
156+
157+
def forward(self, input_data):
158+
159+
user_input = input_data[0]
160+
item_input = input_data[1]
161+
label = input_data[2]
162+
163+
user_embedding_mf = self.MF_Embedding_User(user_input)
164+
mf_user_latent = paddle.flatten(
165+
x=user_embedding_mf, start_axis=1, stop_axis=2)
166+
item_embedding_mf = self.MF_Embedding_Item(item_input)
167+
mf_item_latent = paddle.flatten(
168+
x=item_embedding_mf, start_axis=1, stop_axis=2)
169+
mf_vector = paddle.multiply(mf_user_latent, mf_item_latent)
170+
prediction = self.prediction(mf_vector)
171+
prediction = self.sigmoid(prediction)
172+
return prediction
173+
174+
175+
class NCF_MLP_Layer(nn.Layer):
176+
def __init__(self, num_users, num_items, mf_dim, layers):
177+
super(NCF_MLP_Layer, self).__init__()
178+
179+
self.num_users = num_users
180+
self.num_items = num_items
181+
self.mf_dim = mf_dim
182+
self.layers = layers
183+
184+
self.MLP_Embedding_User = paddle.nn.Embedding(
185+
self.num_users,
186+
int(self.layers[0] / 2),
187+
sparse=True,
188+
weight_attr=nn.initializer.Normal(
189+
mean=0.0, std=0.01))
190+
self.MLP_Embedding_Item = paddle.nn.Embedding(
191+
self.num_items,
192+
int(self.layers[0] / 2),
193+
sparse=True,
194+
weight_attr=nn.initializer.Normal(
195+
mean=0.0, std=0.01))
196+
197+
num_layer = len(self.layers)
198+
self.MLP_fc = []
199+
for i in range(1, num_layer):
200+
Linear = paddle.nn.Linear(
201+
in_features=self.layers[i - 1],
202+
out_features=self.layers[i],
203+
weight_attr=paddle.ParamAttr(
204+
initializer=nn.initializer.TruncatedNormal(
205+
mean=0.0, std=1.0 / math.sqrt(self.layers[i - 1]))),
206+
name='layer_' + str(i))
207+
self.add_sublayer('layer_%d' % i, Linear)
208+
self.MLP_fc.append(Linear)
209+
act = paddle.nn.ReLU()
210+
self.add_sublayer('act_%d' % i, act)
211+
self.MLP_fc.append(act)
212+
213+
self.prediction = paddle.nn.Linear(
214+
in_features=self.layers[3],
215+
out_features=1,
216+
weight_attr=nn.initializer.KaimingUniform(fan_in=self.layers[3] *
217+
2),
218+
name='prediction')
219+
220+
self.sigmoid = paddle.nn.Sigmoid()
221+
222+
def forward(self, input_data):
223+
user_input = input_data[0]
224+
item_input = input_data[1]
225+
label = input_data[2]
226+
227+
user_embedding_mlp = self.MLP_Embedding_User(user_input)
228+
mlp_user_latent = paddle.flatten(
229+
x=user_embedding_mlp, start_axis=1, stop_axis=2)
230+
item_embedding_mlp = self.MLP_Embedding_Item(item_input)
231+
mlp_item_latent = paddle.flatten(
232+
x=item_embedding_mlp, start_axis=1, stop_axis=2)
233+
mlp_vector = paddle.concat(
234+
x=[mlp_user_latent, mlp_item_latent], axis=-1)
235+
236+
for n_layer in self.MLP_fc:
237+
mlp_vector = n_layer(mlp_vector)
238+
239+
prediction = self.prediction(mlp_vector)
240+
prediction = self.sigmoid(prediction)
241+
return prediction

models/recall/ncf/readme.md

100644100755
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ python -u infer.py -m config.yaml
6666
# 静态图训练
6767
python -u ../../../tools/static_trainer.py -m config.yaml # 全量数据运行config_bigdata.yaml
6868
# 静态图预测
69-
python -u static_infer.py -m config.yaml
69+
python -u ../../../tools/static_infer.py -m config.yaml
7070
```
7171

7272
## 模型组网
@@ -81,13 +81,13 @@ python -u static_infer.py -m config.yaml
8181

8282
| 模型 | HR@10 | NDCG@10 | batch_size | epoch_num| Time of each epoch |
8383
| :------| :------ |:------ | :------ | :------| :------ |
84-
| NCF | 0.58 | 0.33 | 256 | 20 | 约20分钟 |
84+
| NCF_NeuMF | 0.58 | 0.33 | 1024 | 20 | 约20分钟 |
8585

8686
1. 确认您当前所在目录为PaddleRec/models/recall/ncf
8787
2. 进入paddlerec/datasets/movielens_pinterest_NCF目录下,执行该脚本,会从国内源的服务器上下载我们预处理完成的movielens和pinterest全量数据集,并解压到指定文件夹。
8888
``` bash
8989
cd ../../../datasets/movielens_pinterest_NCF
90-
sh run.sh
90+
bash run.sh
9191
```
9292
3. 切回模型目录,执行命令运行全量数据
9393
```bash

models/recall/ncf/run.sh

100644100755
Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#!/bin/bash
12
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
23
#
34
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,11 +12,7 @@
1112
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1213
# See the License for the specific language governing permissions and
1314
# limitations under the License.
14-
#!/bin/bash
1515
echo "................run................."
16-
echo "................The training log has been redirected to the log_train.txt file................."
1716
python -u ../../../tools/trainer.py -m config_bigdata.yaml &> log_train.txt
18-
echo "................The testing log has been redirected to the result.txt file................."
1917
python -u ../../../tools/infer.py -m config_bigdata.yaml &> result.txt
20-
echo "..............evaluate..................."
2118
python3 evaluate.py

models/recall/ncf/static_model.py

100644100755
Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
import paddle
1717
import paddle.nn as nn
1818
import paddle.nn.functional as F
19-
from net import NCFLayer
19+
from net import NCF_NeuMF_Layer, NCF_GMF_Layer, NCF_MLP_Layer
2020

2121

2222
class StaticModel():
@@ -28,7 +28,8 @@ def __init__(self, config):
2828
def _init_hyper_parameters(self):
2929
self.num_users = self.config.get("hyper_parameters.num_users")
3030
self.num_items = self.config.get("hyper_parameters.num_items")
31-
self.latent_dim = self.config.get("hyper_parameters.latent_dim")
31+
self.mf_dim = self.config.get("hyper_parameters.mf_dim")
32+
self.mode = self.config.get("hyper_parameters.mode")
3233
self.layers = self.config.get("hyper_parameters.fc_layers")
3334
self.learning_rate = self.config.get(
3435
"hyper_parameters.optimizer.learning_rate")
@@ -44,8 +45,16 @@ def create_feeds(self, is_infer=False):
4445
return feeds_list
4546

4647
def net(self, input, is_infer=False):
47-
ncf_model = NCFLayer(self.num_users, self.num_items, self.latent_dim,
48-
self.layers)
48+
if self.mode == "NCF_NeuMF":
49+
ncf_model = NCF_NeuMF_Layer(self.num_users, self.num_items,
50+
self.mf_dim, self.layers)
51+
if self.mode == "NCF_GMF":
52+
ncf_model = NCF_GMF_Layer(self.num_users, self.num_items,
53+
self.mf_dim, self.layers)
54+
if self.mode == "NCF_MLP":
55+
ncf_model = NCF_MLP_Layer(self.num_users, self.num_items,
56+
self.mf_dim, self.layers)
57+
4958
prediction = ncf_model(input)
5059

5160
self.inference_target_var = prediction

0 commit comments

Comments
 (0)