Skip to content

Commit f24bf7e

Browse files
authored
Merge pull request #384 from Luoyufeichen/wz38-stuff
ffm迁移
2 parents ed5bba6 + 25ec1cb commit f24bf7e

File tree

12 files changed

+718
-0
lines changed

12 files changed

+718
-0
lines changed

models/rank/ffm/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.

models/rank/ffm/config.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
runner:
16+
train_data_dir: "data/sample_data/train"
17+
train_reader_path: "criteo_reader" # importlib format
18+
use_gpu: False
19+
use_auc: True
20+
train_batch_size: 2
21+
epochs: 3
22+
print_interval: 2
23+
#model_init_path: "output_model/0" # init model
24+
model_save_path: "output_model_ffm"
25+
test_data_dir: "data/sample_data/train"
26+
infer_reader_path: "criteo_reader" # importlib format
27+
infer_batch_size: 5
28+
infer_load_path: "output_model_ffm"
29+
infer_start_epoch: 0
30+
infer_end_epoch: 3
31+
32+
# hyper parameters of user-defined network
33+
hyper_parameters:
34+
# optimizer config
35+
optimizer:
36+
class: Adam
37+
learning_rate: 0.001
38+
strategy: async
39+
# user-defined <key, value> pairs
40+
sparse_inputs_slots: 27
41+
sparse_feature_number: 1000001
42+
sparse_feature_dim: 9
43+
dense_input_dim: 13
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
runner:
15+
train_data_dir: "../../../datasets/criteo/slot_train_data_full"
16+
train_reader_path: "criteo_reader" # importlib format
17+
use_gpu: True
18+
use_auc: True
19+
train_batch_size: 4096
20+
epochs: 10
21+
print_interval: 10
22+
#model_init_path: "output_model/0" # init model
23+
model_save_path: "output_model_all_ffm"
24+
test_data_dir: "../../../datasets/criteo/slot_test_data_full"
25+
infer_reader_path: "criteo_reader" # importlib format
26+
infer_batch_size: 512
27+
infer_load_path: "output_model_all_ffm"
28+
infer_start_epoch: 0
29+
infer_end_epoch: 4
30+
31+
hyper_parameters:
32+
# optimizer config
33+
optimizer:
34+
class: Adam
35+
learning_rate: 0.001
36+
strategy: async
37+
# user-defined <key, value> pairs
38+
sparse_inputs_slots: 27
39+
sparse_feature_number: 1000001
40+
sparse_feature_dim: 9
41+
dense_input_dim: 13

models/rank/ffm/criteo_reader.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
import numpy as np
17+
18+
from paddle.io import IterableDataset
19+
20+
21+
class RecDataset(IterableDataset):
22+
def __init__(self, file_list, config):
23+
super(RecDataset, self).__init__()
24+
self.file_list = file_list
25+
self.init()
26+
27+
def init(self):
28+
from operator import mul
29+
padding = 0
30+
sparse_slots = "click 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26"
31+
self.sparse_slots = sparse_slots.strip().split(" ")
32+
self.dense_slots = ["dense_feature"]
33+
self.dense_slots_shape = [13]
34+
self.slots = self.sparse_slots + self.dense_slots
35+
self.slot2index = {}
36+
self.visit = {}
37+
for i in range(len(self.slots)):
38+
self.slot2index[self.slots[i]] = i
39+
self.visit[self.slots[i]] = False
40+
self.padding = padding
41+
42+
def __iter__(self):
43+
full_lines = []
44+
self.data = []
45+
for file in self.file_list:
46+
with open(file, "r") as rf:
47+
for l in rf:
48+
line = l.strip().split(" ")
49+
output = [(i, []) for i in self.slots]
50+
for i in line:
51+
slot_feasign = i.split(":")
52+
slot = slot_feasign[0]
53+
if slot not in self.slots:
54+
continue
55+
if slot in self.sparse_slots:
56+
feasign = int(slot_feasign[1])
57+
else:
58+
feasign = float(slot_feasign[1])
59+
output[self.slot2index[slot]][1].append(feasign)
60+
self.visit[slot] = True
61+
for i in self.visit:
62+
slot = i
63+
if not self.visit[slot]:
64+
if i in self.dense_slots:
65+
output[self.slot2index[i]][1].extend(
66+
[self.padding] *
67+
self.dense_slots_shape[self.slot2index[i]])
68+
else:
69+
output[self.slot2index[i]][1].extend(
70+
[self.padding])
71+
else:
72+
self.visit[slot] = False
73+
# sparse
74+
output_list = []
75+
for key, value in output[:-1]:
76+
output_list.append(np.array(value).astype('int64'))
77+
# dense
78+
output_list.append(
79+
np.array(output[-1][1]).astype("float32"))
80+
# list
81+
yield output_list

0 commit comments

Comments
 (0)