Skip to content

Commit 5a04d25

Browse files
authored
Merge pull request #540 from thinkall/master
feat: add deepFEFM model
2 parents ac3eb7d + ab39618 commit 5a04d25

File tree

14 files changed

+1085
-0
lines changed

14 files changed

+1085
-0
lines changed

models/rank/deepfefm/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

models/rank/deepfefm/config.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
runner:
17+
train_data_dir: "data/sample_data/train"
18+
train_reader_path: "criteo_reader" # importlib format
19+
use_gpu: False
20+
use_auc: True
21+
train_batch_size: 16
22+
epochs: 2
23+
print_interval: 2
24+
#model_init_path: "output_model/0" # init model
25+
model_save_path: "output_model_deepfefm"
26+
test_data_dir: "data/sample_data/train"
27+
infer_reader_path: "criteo_reader" # importlib format
28+
infer_batch_size: 16
29+
infer_load_path: "output_model_deepfefm"
30+
infer_start_epoch: 0
31+
infer_end_epoch: 2
32+
33+
# hyper parameters of user-defined network
34+
hyper_parameters:
35+
# optimizer config
36+
optimizer:
37+
class: Adam
38+
learning_rate: 0.001
39+
strategy: async
40+
# user-defined <key, value> pairs
41+
sparse_inputs_slots: 27
42+
sparse_feature_number: 1100005
43+
sparse_feature_dim: 9
44+
dense_input_dim: 13
45+
fc_sizes: [512, 256, 128, 32]
46+
distributed_embedding: 0
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# global settings
16+
17+
runner:
18+
train_data_dir: "../../../datasets/criteo/slot_train_data_full"
19+
train_reader_path: "criteo_reader" # importlib format
20+
use_gpu: True
21+
use_auc: False
22+
train_batch_size: 5120
23+
epochs: 1
24+
print_interval: 100
25+
#model_init_path: "output_model/0" # init model
26+
model_save_path: "output_model_all_deepfefm"
27+
test_data_dir: "../../../datasets/criteo/slot_test_data_full"
28+
infer_reader_path: "criteo_reader" # importlib format
29+
infer_batch_size: 5120
30+
infer_load_path: "output_model_all_deepfefm"
31+
infer_start_epoch: 0
32+
infer_end_epoch: 1
33+
34+
# hyper parameters of user-defined network
35+
hyper_parameters:
36+
# optimizer config
37+
optimizer:
38+
class: Adam
39+
learning_rate: 0.0005
40+
strategy: async
41+
# user-defined <key, value> pairs
42+
sparse_inputs_slots: 27
43+
sparse_feature_number: 1100005
44+
sparse_feature_dim: 48
45+
dense_input_dim: 13
46+
fc_sizes: [1024, 1024, 1024]
47+
distributed_embedding: 0
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
import numpy as np
17+
18+
from paddle.io import IterableDataset
19+
20+
21+
class RecDataset(IterableDataset):
22+
def __init__(self, file_list, config):
23+
super(RecDataset, self).__init__()
24+
self.file_list = file_list
25+
self.init()
26+
27+
def init(self):
28+
from operator import mul
29+
padding = 0
30+
sparse_slots = "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
31+
self.sparse_slots = sparse_slots.strip().split(" ")
32+
self.dense_slots = ["dense_feature"]
33+
self.dense_slots_shape = [13]
34+
self.slots = self.sparse_slots + self.dense_slots
35+
self.slot2index = {}
36+
self.visit = {}
37+
for i in range(len(self.slots)):
38+
self.slot2index[self.slots[i]] = i
39+
self.visit[self.slots[i]] = False
40+
self.padding = padding
41+
42+
def __iter__(self):
43+
full_lines = []
44+
self.data = []
45+
for file in self.file_list:
46+
if '.DS_Store' in file:
47+
continue
48+
with open(file, "r") as rf:
49+
for l in rf:
50+
line = l.strip().split(" ")
51+
output = [(i, []) for i in self.slots]
52+
for i in line:
53+
slot_feasign = i.split(":")
54+
slot = slot_feasign[0]
55+
if slot not in self.slots:
56+
continue
57+
if slot in self.sparse_slots:
58+
feasign = int(slot_feasign[1])
59+
else:
60+
feasign = max(0.0,
61+
min(1.0, float(slot_feasign[1])))
62+
output[self.slot2index[slot]][1].append(feasign)
63+
self.visit[slot] = True
64+
for i in self.visit:
65+
slot = i
66+
if not self.visit[slot]:
67+
if i in self.dense_slots:
68+
output[self.slot2index[i]][1].extend(
69+
[self.padding] *
70+
self.dense_slots_shape[self.slot2index[i]])
71+
else:
72+
output[self.slot2index[i]][1].extend(
73+
[self.padding])
74+
else:
75+
self.visit[slot] = False
76+
# sparse
77+
output_list = []
78+
for key, value in output[:-1]:
79+
output_list.append(np.array(value).astype('int64'))
80+
# dense
81+
output_list.append(
82+
np.array(output[-1][1]).astype("float32"))
83+
# list
84+
yield output_list

0 commit comments

Comments
 (0)