Skip to content

Commit 7c7096c

Browse files
committed
year change
1 parent d7db7be commit 7c7096c

File tree

9 files changed

+127
-84
lines changed

9 files changed

+127
-84
lines changed

datasets/criteo_fgcnn/download.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
wget --no-check-certificate https://paddlerec.bj.bcebos.com/datasets/fgcnn/datapro.zip
22
unzip -o datapro.zip
3-
echo "Complete data download."
3+
echo "Complete data download."

models/rank/fgcnn/config.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -48,4 +48,4 @@ hyper_parameters:
4848
pooling_width: [2, 2, 2, 2]
4949
stride: [1, 1]
5050
dnn_hidden_units: [100, 100, 100]
51-
dnn_dropout: 0.0
51+
dnn_dropout: 0.0

models/rank/fgcnn/config_bigdata.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -48,4 +48,4 @@ hyper_parameters:
4848
pooling_width: [2, 2, 2, 2]
4949
stride: [1, 1]
5050
dnn_hidden_units: [1000, 1000, 1000]
51-
dnn_dropout: 0.0
51+
dnn_dropout: 0.0

models/rank/fgcnn/dygraph_model.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -23,20 +23,25 @@ class DygraphModel():
2323
def create_model(self, config):
2424
sparse_input_slot = config.get('hyper_parameters.sparse_inputs_slots')
2525
dense_input_slot = config.get('hyper_parameters.dense_inputs_slots')
26-
sparse_feature_size = config.get("hyper_parameters.sparse_feature_size")
26+
sparse_feature_size = config.get(
27+
"hyper_parameters.sparse_feature_size")
2728
feature_name = config.get("hyper_parameters.feature_name")
2829
feature_dim = config.get("hyper_parameters.feature_dim", 20)
29-
conv_kernel_width = config.get("hyper_parameters.conv_kernel_width", (7, 7, 7, 7))
30-
conv_filters = config.get("hyper_parameters.conv_filters", (14, 16, 18, 20))
30+
conv_kernel_width = config.get("hyper_parameters.conv_kernel_width",
31+
(7, 7, 7, 7))
32+
conv_filters = config.get("hyper_parameters.conv_filters",
33+
(14, 16, 18, 20))
3134
new_maps = config.get("hyper_parameters.new_maps", (3, 3, 3, 3))
32-
pooling_width = config.get("hyper_parameters.pooling_width", (2, 2, 2, 2))
33-
stride = config.get("hyper_parameters.stride", (1,1))
34-
dnn_hidden_units = config.get("hyper_parameters.dnn_hidden_units", (128,))
35+
pooling_width = config.get("hyper_parameters.pooling_width",
36+
(2, 2, 2, 2))
37+
stride = config.get("hyper_parameters.stride", (1, 1))
38+
dnn_hidden_units = config.get("hyper_parameters.dnn_hidden_units",
39+
(128, ))
3540
dnn_dropout = config.get("hyper_parameters.dnn_dropout", 0.0)
36-
fgcnn_model = net.FGCNN(sparse_input_slot, sparse_feature_size,
37-
feature_name, feature_dim,dense_input_slot,
38-
conv_kernel_width, conv_filters, new_maps,
39-
pooling_width, stride, dnn_hidden_units, dnn_dropout)
41+
fgcnn_model = net.FGCNN(
42+
sparse_input_slot, sparse_feature_size, feature_name, feature_dim,
43+
dense_input_slot, conv_kernel_width, conv_filters, new_maps,
44+
pooling_width, stride, dnn_hidden_units, dnn_dropout)
4045

4146
return fgcnn_model
4247

@@ -47,9 +52,9 @@ def create_feeds(self, batch_data, config):
4752
inputs = batch_data[0]
4853
label = batch_data[1]
4954
return label, inputs
50-
5155

52-
# define loss function by predicts and label
56+
# define loss function by predicts and label
57+
5358
def create_loss(self, y_pred, label):
5459
loss = nn.functional.log_loss(
5560
y_pred, label=paddle.cast(
@@ -61,8 +66,7 @@ def create_loss(self, y_pred, label):
6166
def create_optimizer(self, dy_model, config):
6267
lr = config.get("hyper_parameters.optimizer.learning_rate", 1e-3)
6368
optimizer = paddle.optimizer.Adam(
64-
parameters=dy_model.parameters(),
65-
learning_rate=lr)
69+
parameters=dy_model.parameters(), learning_rate=lr)
6670
return optimizer
6771

6872
def create_metrics(self):
@@ -95,4 +99,4 @@ def infer_forward(self, dy_model, metrics_list, batch_data, config):
9599
metrics_list[0].update(preds=predict_2d.numpy(), labels=label.numpy())
96100
# print_dict format :{'loss': loss}
97101
print_dict = {'loss': loss}
98-
return metrics_list, print_dict
102+
return metrics_list, print_dict

models/rank/fgcnn/net.py

Lines changed: 95 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -19,11 +19,12 @@
1919
import numpy as np
2020
import pdb
2121

22+
2223
class FGCNN(nn.Layer):
23-
def __init__(self, sparse_num_field, sparse_feature_size,
24-
feature_name, feature_dim,dense_num_field, conv_kernel_width,
25-
conv_filters, new_maps, pooling_width, stride,
26-
dnn_hidden_units, dnn_dropout):
24+
def __init__(self, sparse_num_field, sparse_feature_size, feature_name,
25+
feature_dim, dense_num_field, conv_kernel_width, conv_filters,
26+
new_maps, pooling_width, stride, dnn_hidden_units,
27+
dnn_dropout):
2728
'''
2829
Parameters
2930
vocab_size -
@@ -44,19 +45,21 @@ def __init__(self, sparse_num_field, sparse_feature_size,
4445
EmbeddingLayer(
4546
num_embeddings=self.sparse_feature_size,
4647
embedding_dim=self.feature_dim,
47-
feature_name=self.feature_name[i] + '_fg_emd'
48-
) for i in range(self.feature_num_filed)])
48+
feature_name=self.feature_name[i] + '_fg_emd')
49+
for i in range(self.feature_num_filed)
50+
])
4951
self.embedding = nn.LayerList([
5052
EmbeddingLayer(
5153
num_embeddings=self.sparse_feature_size,
5254
embedding_dim=self.feature_dim,
53-
feature_name=self.feature_name[i] + '_emd'
54-
) for i in range(self.feature_num_filed)])
55+
feature_name=self.feature_name[i] + '_emd')
56+
for i in range(self.feature_num_filed)
57+
])
5558

5659
self.fgcnn = FGCNNLayer(self.feature_num_filed, self.feature_dim,
57-
self.conv_filters, self.conv_kernel_width,
60+
self.conv_filters, self.conv_kernel_width,
5861
self.new_maps, self.pooling_width, self.stride)
59-
62+
6063
self.combined_feture_num = self.fgcnn.new_feture_num + self.feature_num_filed
6164
self.inner_product_layer = InnerProductLayer(self.combined_feture_num)
6265
self.dnn_input_dim = self.combined_feture_num * (self.combined_feture_num - 1) // 2\
@@ -66,8 +69,8 @@ def __init__(self, sparse_num_field, sparse_feature_size,
6669

6770
self.fc_linear = self.add_sublayer(
6871
name='fc_linear',
69-
sublayer=nn.Linear(in_features=dnn_hidden_units[-1], out_features=1)
70-
)
72+
sublayer=nn.Linear(
73+
in_features=dnn_hidden_units[-1], out_features=1))
7174

7275
def forward(self, inputs):
7376
# print('*************************************')
@@ -76,35 +79,39 @@ def forward(self, inputs):
7679
fg_input_list = []
7780
origin_input_list = []
7881
for i in range(self.feature_num_filed):
79-
fg_input_list.append(self.fg_embedding[i](inputs[:, i].astype('int64')).reshape((-1, 1, self.feature_dim)))
80-
origin_input_list.append(self.embedding[i](inputs[:, i].astype('int64')).reshape((-1, 1, self.feature_dim)))
82+
fg_input_list.append(self.fg_embedding[i](inputs[:, i].astype(
83+
'int64')).reshape((-1, 1, self.feature_dim)))
84+
origin_input_list.append(self.embedding[i](inputs[:, i].astype(
85+
'int64')).reshape((-1, 1, self.feature_dim)))
8186
fg_input = paddle.concat(fg_input_list, axis=1)
8287
origin_input = paddle.concat(origin_input_list, axis=1)
8388
new_features = self.fgcnn(fg_input)
8489
combined_input = paddle.concat([origin_input, new_features], axis=1)
8590
inner_product = self.inner_product_layer(combined_input)
8691
linear_signal = paddle.flatten(combined_input, start_axis=1)
87-
dnn_input = paddle.concat([linear_signal,inner_product], axis=1)
92+
dnn_input = paddle.concat([linear_signal, inner_product], axis=1)
8893
dnn_output = self.dnn(dnn_input)
8994
dnn_logit = self.fc_linear(dnn_output)
9095
y_pred = F.sigmoid(dnn_logit)
9196
return y_pred
9297

98+
9399
class EmbeddingLayer(nn.Layer):
94100
def __init__(self, num_embeddings, embedding_dim, feature_name):
95101
super(EmbeddingLayer, self).__init__()
96102
self.embedding = nn.Embedding(
97103
num_embeddings=num_embeddings,
98104
embedding_dim=embedding_dim,
99105
name=feature_name,
100-
sparse=True
101-
)
106+
sparse=True)
102107

103108
def forward(self, inputs):
104109
return self.embedding(inputs)
105-
110+
111+
106112
class FGCNNLayer(nn.Layer):
107-
def __init__(self, feature_num_field, embedding_size, filters, kernel_width, new_maps, pooling_width, stride):
113+
def __init__(self, feature_num_field, embedding_size, filters,
114+
kernel_width, new_maps, pooling_width, stride):
108115
super(FGCNNLayer, self).__init__()
109116
self.feature_num_field = feature_num_field
110117
self.embedding_size = embedding_size
@@ -114,53 +121,72 @@ def __init__(self, feature_num_field, embedding_size, filters, kernel_width, new
114121
self.pooling_width = pooling_width
115122
self.stride = stride
116123
self.init()
117-
self.conv_pooling = nn.LayerList([nn.Sequential(
124+
self.conv_pooling = nn.LayerList([
125+
nn.Sequential(
118126
nn.Conv2D(
119-
in_channels=self.in_channels_size[i],
120-
out_channels=self.filters[i],
121-
kernel_size=(self.kernel_width[i], 1),
127+
in_channels=self.in_channels_size[i],
128+
out_channels=self.filters[i],
129+
kernel_size=(self.kernel_width[i], 1),
122130
padding=(self.padding_size[i], 0),
123131
stride=self.stride),
124132
nn.BatchNorm2D(self.filters[i]),
125133
nn.Tanh(),
126134
nn.MaxPool2D(
127-
kernel_size=(self.pooling_width[i], 1),
128-
stride=(self.pooling_width[i], 1)),
129-
) for i in range(len(self.filters))])
130-
self.recombination = nn.LayerList([nn.Sequential(
135+
kernel_size=(self.pooling_width[i], 1),
136+
stride=(self.pooling_width[i], 1)), )
137+
for i in range(len(self.filters))
138+
])
139+
self.recombination = nn.LayerList([
140+
nn.Sequential(
131141
nn.Linear(
132-
in_features=self.filters[i] * self.pooling_shape[i] * self.embedding_size,
133-
out_features=self.pooling_shape[i] * self.embedding_size * self.new_maps[i],
142+
in_features=self.filters[i] * self.pooling_shape[i] *
143+
self.embedding_size,
144+
out_features=self.pooling_shape[i] * self.embedding_size *
145+
self.new_maps[i],
134146
name='fgcnn_linear_%d' % i),
135147
nn.Tanh()
136148
# nn.ReLU()
137-
) for i in range(len(self.filters))])
149+
) for i in range(len(self.filters))
150+
])
138151

139152
def forward(self, inputs):
140153
feature = inputs.unsqueeze(1)
141154
new_feature_list = []
142155
for i in range(0, len(self.filters)):
143156
feature = self.conv_pooling[i](feature)
144-
result = self.recombination[i](paddle.flatten(feature, start_axis=1))
157+
result = self.recombination[i](paddle.flatten(
158+
feature, start_axis=1))
145159
new_feature_list.append(
146-
paddle.reshape(x=result, shape=(-1, self.pooling_shape[i] * self.new_maps[i] , self.embedding_size)))
160+
paddle.reshape(
161+
x=result,
162+
shape=(-1, self.pooling_shape[i] * self.new_maps[i],
163+
self.embedding_size)))
147164
new_features = paddle.concat(new_feature_list, axis=1)
148165
return new_features
149166

150167
def init(self):
151168
# compute pooling shape
152169
self.pooling_shape = []
153-
self.pooling_shape.append(self.feature_num_field // self.pooling_width[0])
170+
self.pooling_shape.append(self.feature_num_field //
171+
self.pooling_width[0])
154172
for i in range(1, len(self.filters)):
155-
self.pooling_shape.append(self.pooling_shape[i-1] // self.pooling_width[i])
173+
self.pooling_shape.append(self.pooling_shape[i - 1] //
174+
self.pooling_width[i])
156175
# compute padding size
157176
self.padding_size = []
158-
self.padding_size.append(((self.feature_num_field - 1) * self.stride[0] + self.kernel_width[0] - self.feature_num_field) // 2)
177+
self.padding_size.append(
178+
((self.feature_num_field - 1) * self.stride[0] +
179+
self.kernel_width[0] - self.feature_num_field) // 2)
159180
for i in range(1, len(self.filters)):
160181
self.padding_size.append(
161-
((self.pooling_shape[i-1] - 1) * self.stride[0] + self.kernel_width[i] - self.pooling_shape[i-1]) // 2)
162-
self.in_channels_size = [1,] + list(self.filters)
163-
self.new_feture_num = sum([self.pooling_shape[i] * self.new_maps[i] for i in range(len(self.filters))])
182+
((self.pooling_shape[i - 1] - 1) * self.stride[0] +
183+
self.kernel_width[i] - self.pooling_shape[i - 1]) // 2)
184+
self.in_channels_size = [1, ] + list(self.filters)
185+
self.new_feture_num = sum([
186+
self.pooling_shape[i] * self.new_maps[i]
187+
for i in range(len(self.filters))
188+
])
189+
164190

165191
class DNNLayer(nn.Layer):
166192
def __init__(self, inputs_dim, hidden_units, dropout_rate):
@@ -169,18 +195,22 @@ def __init__(self, inputs_dim, hidden_units, dropout_rate):
169195
self.dropout = nn.Dropout(dropout_rate)
170196

171197
hidden_units = [inputs_dim] + list(hidden_units)
172-
self.linears = nn.LayerList([nn.Sequential(
173-
nn.Linear(
174-
in_features=hidden_units[i],
175-
out_features=hidden_units[i + 1],
176-
weight_attr=nn.initializer.Normal(mean=0, std=1e-4),
177-
name='dnn_%d' % i),
178-
nn.BatchNorm(hidden_units[i+1])
179-
# nn.ReLU(hidden_units[i + 1],name='relu_%d' % i)
180-
) for i in range(len(hidden_units) - 1)])
181-
182-
self.activation_layers = nn.LayerList(
183-
[nn.ReLU(name='relu_%d'%i) for i in range(len(hidden_units) - 1)])
198+
self.linears = nn.LayerList([
199+
nn.Sequential(
200+
nn.Linear(
201+
in_features=hidden_units[i],
202+
out_features=hidden_units[i + 1],
203+
weight_attr=nn.initializer.Normal(
204+
mean=0, std=1e-4),
205+
name='dnn_%d' % i),
206+
nn.BatchNorm(hidden_units[i + 1])
207+
# nn.ReLU(hidden_units[i + 1],name='relu_%d' % i)
208+
) for i in range(len(hidden_units) - 1)
209+
])
210+
211+
self.activation_layers = nn.LayerList([
212+
nn.ReLU(name='relu_%d' % i) for i in range(len(hidden_units) - 1)
213+
])
184214

185215
# @paddle.jit.to_static
186216
def forward(self, inputs):
@@ -190,24 +220,31 @@ def forward(self, inputs):
190220
inputs = self.dropout(inputs)
191221
return inputs
192222

223+
193224
class InnerProductLayer(nn.Layer):
194225
""" output: product_sum_pooling (bs x 1),
195226
Bi_interaction_pooling (bs * dim),
196227
inner_product (bs x f2/2),
197228
elementwise_product (bs x f2/2 x emb_dim)
198229
"""
230+
199231
def __init__(self, num_fields=None):
200232
super(InnerProductLayer, self).__init__()
201233
if num_fields is None:
202234
raise ValueError("num_fields is required")
203235
else:
204236
self.num_fields = num_fields
205237
self.interaction_units = int(num_fields * (num_fields - 1) / 2)
206-
238+
207239
def forward(self, feature_emb):
208-
onemask = paddle.ones(shape=[feature_emb.shape[0],self.num_fields, self.num_fields],dtype='int32')
209-
tri = paddle.triu(onemask,1)
210-
upper_triange_mask = paddle.cast(tri,'bool')
211-
inner_product_matrix = paddle.bmm(feature_emb, paddle.transpose(feature_emb, perm=[0, 2, 1]))
212-
flat_upper_triange = paddle.masked_select(inner_product_matrix, upper_triange_mask)
213-
return flat_upper_triange.reshape([-1, self.interaction_units])
240+
onemask = paddle.ones(
241+
shape=[feature_emb.shape[0], self.num_fields, self.num_fields],
242+
dtype='int32')
243+
tri = paddle.triu(onemask, 1)
244+
upper_triange_mask = paddle.cast(tri, 'bool')
245+
inner_product_matrix = paddle.bmm(feature_emb,
246+
paddle.transpose(
247+
feature_emb, perm=[0, 2, 1]))
248+
flat_upper_triange = paddle.masked_select(inner_product_matrix,
249+
upper_triange_mask)
250+
return flat_upper_triange.reshape([-1, self.interaction_units])

models/rank/fgcnn/reader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
1+
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -18,6 +18,7 @@
1818
from paddle.io import IterableDataset
1919
import h5py
2020

21+
2122
class RecDataset(IterableDataset):
2223
def __init__(self, file_list, config):
2324
super(RecDataset, self).__init__()
@@ -34,4 +35,3 @@ def __iter__(self):
3435
output_list.append(l[0:39].astype('int64'))
3536
output_list.append(l[39:].astype('int64'))
3637
yield output_list
37-

0 commit comments

Comments
 (0)