Skip to content

Commit 44122e5

Browse files
committed
modify
1 parent 32d5e0c commit 44122e5

File tree

12 files changed

+741
-0
lines changed

12 files changed

+741
-0
lines changed

datasets/criteo-fgcnn/download.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
wget --no-check-certificate https://paddlerec.bj.bcebos.com/datasets/fgcnn/datapro.zip
2+
unzip -o datapro.zip
3+
echo "Complete data download."

models/rank/fgcnn/config.yaml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# global settings
16+
17+
runner:
18+
train_data_dir: "data/trainlite"
19+
train_reader_path: "reader" # importlib format
20+
use_gpu: True
21+
use_auc: True
22+
train_batch_size: 10
23+
epochs: 1
24+
print_interval: 10
25+
# model_init_path: "output_model_all_fgcnn/1" # init model
26+
model_save_path: "output_model_sample_fgcnn"
27+
test_data_dir: "data/testlite"
28+
infer_reader_path: "reader" # importlib format
29+
infer_batch_size: 10
30+
infer_load_path: "output_model_sample_fgcnn"
31+
infer_start_epoch: 0
32+
infer_end_epoch: 1
33+
34+
# hyper parameters of user-defined network
35+
hyper_parameters:
36+
# optimizer config
37+
optimizer:
38+
class: Adam
39+
learning_rate: 0.001
40+
sparse_inputs_slots: 26
41+
sparse_feature_size: 1000000
42+
feature_name: ['I1','I2','I3','I4','I5','I6','I7','I8','I9','I10','I11','I12','I13','C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15','C16','C17', 'C18','C19', 'C20', 'C21', 'C22','C23', 'C24', 'C25', 'C26']
43+
dense_inputs_slots: 13
44+
feature_dim: 20
45+
conv_kernel_width: [ 9, 9, 9, 9]
46+
conv_filters: [38, 40, 42, 44]
47+
new_maps: [3, 3, 3, 3]
48+
pooling_width: [2, 2, 2, 2]
49+
stride: [1, 1]
50+
dnn_hidden_units: [100, 100, 100]
51+
dnn_dropout: 0.0
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# global settings
16+
17+
runner:
18+
train_data_dir: "data/train"
19+
train_reader_path: "reader" # importlib format
20+
use_gpu: True
21+
use_auc: True
22+
train_batch_size: 2000
23+
epochs: 2
24+
print_interval: 2000
25+
# model_init_path: "output_model_all_fgcnn/1" # init model
26+
model_save_path: "output_model_all_fgcnn"
27+
test_data_dir: "data/test"
28+
infer_reader_path: "reader" # importlib format
29+
infer_batch_size: 5000
30+
infer_load_path: "output_model_all_fgcnn"
31+
infer_start_epoch: 0
32+
infer_end_epoch: 2
33+
34+
# hyper parameters of user-defined network
35+
hyper_parameters:
36+
# optimizer config
37+
optimizer:
38+
class: Adam
39+
learning_rate: 0.001
40+
sparse_inputs_slots: 26
41+
sparse_feature_size: 1000000
42+
feature_name: ['I1','I2','I3','I4','I5','I6','I7','I8','I9','I10','I11','I12','I13','C1','C2','C3','C4','C5','C6','C7','C8','C9','C10','C11','C12','C13','C14','C15','C16','C17', 'C18','C19', 'C20', 'C21', 'C22','C23', 'C24', 'C25', 'C26']
43+
dense_inputs_slots: 13
44+
feature_dim: 20
45+
conv_kernel_width: [ 9, 9, 9, 9]
46+
conv_filters: [38, 40, 42, 44]
47+
new_maps: [3, 3, 3, 3]
48+
pooling_width: [2, 2, 2, 2]
49+
stride: [1, 1]
50+
dnn_hidden_units: [1000, 1000, 1000]
51+
dnn_dropout: 0.0
17.6 KB
Binary file not shown.
33.3 KB
Binary file not shown.

models/rank/fgcnn/dygraph_model.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import paddle
16+
import paddle.nn as nn
17+
import net
18+
import numpy as np
19+
20+
21+
class DygraphModel():
22+
# define model
23+
def create_model(self, config):
24+
sparse_input_slot = config.get('hyper_parameters.sparse_inputs_slots')
25+
dense_input_slot = config.get('hyper_parameters.dense_inputs_slots')
26+
sparse_feature_size = config.get("hyper_parameters.sparse_feature_size")
27+
feature_name = config.get("hyper_parameters.feature_name")
28+
feature_dim = config.get("hyper_parameters.feature_dim", 20)
29+
conv_kernel_width = config.get("hyper_parameters.conv_kernel_width", (7, 7, 7, 7))
30+
conv_filters = config.get("hyper_parameters.conv_filters", (14, 16, 18, 20))
31+
new_maps = config.get("hyper_parameters.new_maps", (3, 3, 3, 3))
32+
pooling_width = config.get("hyper_parameters.pooling_width", (2, 2, 2, 2))
33+
stride = config.get("hyper_parameters.stride", (1,1))
34+
dnn_hidden_units = config.get("hyper_parameters.dnn_hidden_units", (128,))
35+
dnn_dropout = config.get("hyper_parameters.dnn_dropout", 0.0)
36+
fgcnn_model = net.FGCNN(sparse_input_slot, sparse_feature_size,
37+
feature_name, feature_dim,dense_input_slot,
38+
conv_kernel_width, conv_filters, new_maps,
39+
pooling_width, stride, dnn_hidden_units, dnn_dropout)
40+
41+
return fgcnn_model
42+
43+
# define feeds which convert numpy of batch data to paddle.tensor
44+
def create_feeds(self, batch_data, config):
45+
# print(len(batch_data))
46+
inputs = paddle.to_tensor(np.array(batch_data[0]).astype('int64'))
47+
inputs = batch_data[0]
48+
label = batch_data[1]
49+
return label, inputs
50+
51+
52+
# define loss function by predicts and label
53+
def create_loss(self, y_pred, label):
54+
loss = nn.functional.log_loss(
55+
y_pred, label=paddle.cast(
56+
label, dtype="float32"))
57+
avg_cost = paddle.mean(x=loss)
58+
return avg_cost
59+
60+
# define optimizer
61+
def create_optimizer(self, dy_model, config):
62+
lr = config.get("hyper_parameters.optimizer.learning_rate", 1e-3)
63+
optimizer = paddle.optimizer.Adam(
64+
parameters=dy_model.parameters(),
65+
learning_rate=lr)
66+
return optimizer
67+
68+
def create_metrics(self):
69+
metrics_list_name = ["auc"]
70+
auc_metric = paddle.metric.Auc("ROC")
71+
metrics_list = [auc_metric]
72+
return metrics_list, metrics_list_name
73+
74+
# construct train forward phase
75+
def train_forward(self, dy_model, metrics_list, batch_data, config):
76+
# 稠密向量
77+
label, inputs = self.create_feeds(batch_data, config)
78+
pred = dy_model.forward(inputs)
79+
loss = self.create_loss(pred, label)
80+
# update metrics
81+
predict_2d = paddle.concat(x=[1 - pred, pred], axis=1)
82+
metrics_list[0].update(preds=predict_2d.numpy(), labels=label.numpy())
83+
# print_dict format :{'loss': loss}
84+
print_dict = {'loss': loss}
85+
return loss, metrics_list, print_dict
86+
87+
def infer_forward(self, dy_model, metrics_list, batch_data, config):
88+
# label, sparse_tensor = self.create_feeds(batch_data, config)
89+
label, inputs = self.create_feeds(batch_data, config)
90+
pred = dy_model.forward(inputs)
91+
# pred = dy_model.forward(sparse_tensor)
92+
loss = self.create_loss(pred, label)
93+
# update metrics
94+
predict_2d = paddle.concat(x=[1 - pred, pred], axis=1)
95+
metrics_list[0].update(preds=predict_2d.numpy(), labels=label.numpy())
96+
# print_dict format :{'loss': loss}
97+
print_dict = {'loss': loss}
98+
return metrics_list, print_dict

0 commit comments

Comments
 (0)