Skip to content

Commit 851cb3e

Browse files
committed
xdeepfm2.0
1 parent 169d989 commit 851cb3e

File tree

13 files changed

+856
-0
lines changed

13 files changed

+856
-0
lines changed

models/rank/xdeepfm/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

models/rank/xdeepfm/config.yaml

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
runner:
17+
train_data_dir: "data/sample_data/train"
18+
train_reader_path: "criteo_reader" # importlib format
19+
use_gpu: True
20+
use_auc: True
21+
train_batch_size: 5
22+
epochs: 1
23+
print_interval: 2
24+
#model_init_path: "output_model/0" # init model
25+
model_save_path: "output_model_deepfm"
26+
test_data_dir: "data/sample_data/train"
27+
infer_reader_path: "criteo_reader" # importlib format
28+
infer_batch_size: 5
29+
infer_load_path: "output_model_deepfm"
30+
infer_start_epoch: 0
31+
infer_end_epoch: 3
32+
33+
34+
# hyper parameters of user-defined network
35+
hyper_parameters:
36+
# optimizer config
37+
optimizer:
38+
class: Adam
39+
learning_rate: 0.001
40+
strategy: async
41+
# user-defined <key, value> pairs
42+
sparse_inputs_slots: 27
43+
sparse_feature_number: 1000001
44+
sparse_feature_dim: 9
45+
dense_input_dim: 13
46+
fc_sizes: [512, 256, 128, 32]
47+
layer_sizes_dnn: [512, 256, 128]
48+
layer_sizes_cin: [128, 32]
49+
distributed_embedding: 0
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# global settings
16+
17+
runner:
18+
train_data_dir: "../../../datasets/criteo/slot_train_data_full"
19+
train_reader_path: "criteo_reader" # importlib format
20+
use_gpu: True
21+
use_auc: False
22+
train_batch_size: 4096
23+
epochs: 1
24+
print_interval: 10
25+
#model_init_path: "output_model/0" # init model
26+
model_save_path: "output_model_bigdata_deepfm_dy"
27+
test_data_dir: "../../../datasets/criteo/slot_test_data_full"
28+
infer_reader_path: "criteo_reader" # importlib format
29+
infer_batch_size: 4096
30+
infer_load_path: "output_model_bigdata_deepfm_dy"
31+
infer_start_epoch: 0
32+
infer_end_epoch: 1
33+
34+
# hyper parameters of user-defined network
35+
hyper_parameters:
36+
# optimizer config
37+
optimizer:
38+
class: Adam
39+
learning_rate: 0.001
40+
strategy: async
41+
# user-defined <key, value> pairs
42+
sparse_inputs_slots: 27
43+
sparse_feature_number: 1000001
44+
sparse_feature_dim: 9
45+
dense_input_dim: 13
46+
fc_sizes: [400, 400, 400]
47+
distributed_embedding: 0
48+
layer_sizes_dnn: [400, 400]
49+
layer_sizes_cin: [200, 200, 200]
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
import numpy as np
17+
18+
from paddle.io import IterableDataset
19+
20+
21+
class RecDataset(IterableDataset):
22+
def __init__(self, file_list, config):
23+
super(RecDataset, self).__init__()
24+
self.file_list = file_list
25+
self.init()
26+
27+
def init(self):
28+
from operator import mul
29+
padding = 0
30+
sparse_slots = "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
31+
self.sparse_slots = sparse_slots.strip().split(" ")
32+
self.dense_slots = ["dense_feature"]
33+
self.dense_slots_shape = [13]
34+
self.slots = self.sparse_slots + self.dense_slots
35+
self.slot2index = {}
36+
self.visit = {}
37+
for i in range(len(self.slots)):
38+
self.slot2index[self.slots[i]] = i
39+
self.visit[self.slots[i]] = False
40+
self.padding = padding
41+
42+
def __iter__(self):
43+
full_lines = []
44+
self.data = []
45+
for file in self.file_list:
46+
with open(file, "r") as rf:
47+
for l in rf:
48+
line = l.strip().split(" ")
49+
output = [(i, []) for i in self.slots]
50+
for i in line:
51+
slot_feasign = i.split(":")
52+
slot = slot_feasign[0]
53+
if slot not in self.slots:
54+
continue
55+
if slot in self.sparse_slots:
56+
feasign = int(slot_feasign[1])
57+
else:
58+
feasign = float(slot_feasign[1])
59+
output[self.slot2index[slot]][1].append(feasign)
60+
self.visit[slot] = True
61+
for i in self.visit:
62+
slot = i
63+
if not self.visit[slot]:
64+
if i in self.dense_slots:
65+
output[self.slot2index[i]][1].extend(
66+
[self.padding] *
67+
self.dense_slots_shape[self.slot2index[i]])
68+
else:
69+
output[self.slot2index[i]][1].extend(
70+
[self.padding])
71+
else:
72+
self.visit[slot] = False
73+
# sparse
74+
output_list = []
75+
for key, value in output[:-1]:
76+
output_list.append(np.array(value).astype('int64'))
77+
# dense
78+
output_list.append(
79+
np.array(output[-1][1]).astype("float32"))
80+
# list
81+
yield output_list

0 commit comments

Comments
 (0)