Skip to content

Commit 23639e0

Browse files
authored
Merge pull request #714 from wangzhen38/autofis_fix
autofis fix
2 parents 5e733be + 630d773 commit 23639e0

File tree

8 files changed

+844
-32
lines changed

8 files changed

+844
-32
lines changed

datasets/criteo_autofis/Criteo.py

Lines changed: 544 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import numpy as np
16+
import os
17+
from Criteo import Criteo
18+
from tqdm import tqdm
19+
20+
batch_size = 1024
21+
22+
train_data_param = {
23+
'gen_type': 'train',
24+
'random_sample': True,
25+
'batch_size': batch_size,
26+
'split_fields': False,
27+
'on_disk': True,
28+
'squeeze_output': True,
29+
}
30+
test_data_param = {
31+
'gen_type': 'test',
32+
'random_sample': False,
33+
'batch_size': batch_size,
34+
'split_fields': False,
35+
'on_disk': True,
36+
'squeeze_output': True,
37+
}
38+
39+
dataset = Criteo(initialized=True)
40+
train_gen = dataset.batch_generator(train_data_param)
41+
test_gen = dataset.batch_generator(test_data_param)
42+
43+
output_dir = 'data/whole_data'
44+
xs = []
45+
ys = []
46+
for x, y in tqdm(train_gen):
47+
xs.append(x)
48+
ys.append(y)
49+
50+
x = np.concatenate(xs, 0)
51+
y = np.concatenate(ys, 0)
52+
print(x.shape)
53+
#np.save(os.path.join(output_dir, 'train', 'train_x.npy'), x)
54+
#np.save(os.path.join(output_dir, 'train', 'train_y.npy'), y)
55+
np.savetxt(os.path.join(output_dir, 'train', 'train_x.txt'), x, fmt='%d')
56+
np.savetxt(os.path.join(output_dir, 'train', 'train_y.txt'), y, fmt='%d')
57+
58+
xs = []
59+
ys = []
60+
for x, y in tqdm(test_gen):
61+
xs.append(x)
62+
ys.append(y)
63+
64+
x = np.concatenate(xs, 0)
65+
y = np.concatenate(ys, 0)
66+
print(x.shape)
67+
#np.save(os.path.join(output_dir, 'test', 'test_x.npy'), x)
68+
#np.save(os.path.join(output_dir, 'test', 'test_y.npy'), y)
69+
np.savetxt(os.path.join(output_dir, 'test', 'test_x.txt'), x, fmt='%d')
70+
np.savetxt(os.path.join(output_dir, 'test', 'text_y.txt'), y, fmt='%d')

models/rank/autofis/config.yaml

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -12,41 +12,39 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
# global settings
1516

1617
runner:
17-
train_data_dir: "data/sample_data/train"
18+
train_data_dir: "data/sample_data"
1819
train_reader_path: "criteo_reader" # importlib format
1920
use_gpu: False
20-
use_auc: True
21+
use_auc: False
2122
train_batch_size: 2
22-
epochs: 3
23-
print_interval: 2
24-
# model_init_path: "output_model_deepfm/2" # init model
25-
model_save_path: "output_model_deepfm"
26-
test_data_dir: "data/sample_data/train"
23+
epochs: 1
24+
print_interval: 1
25+
#model_init_path: "output_model/0" # init model
26+
model_save_path: "output_model_autofis"
27+
test_data_dir: "data/sample_data"
2728
infer_reader_path: "criteo_reader" # importlib format
28-
infer_batch_size: 5
29-
infer_load_path: "output_model_deepfm"
29+
infer_batch_size: 2
30+
infer_load_path: "output_model_autofis"
3031
infer_start_epoch: 0
31-
infer_end_epoch: 3
32-
#use inference save model
33-
use_inference: False
34-
save_inference_feed_varnames: ["C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12","C13","C14","C15","C16","C17","C18","C19","C20","C21","C22","C23","C24","C25","C26","dense_input"]
35-
save_inference_fetch_varnames: ["sigmoid_0.tmp_0"]
36-
# use fleet
37-
use_fleet: False
38-
32+
infer_end_epoch: 1
33+
3934
# hyper parameters of user-defined network
4035
hyper_parameters:
4136
# optimizer config
4237
optimizer:
4338
class: Adam
4439
learning_rate: 0.001
45-
strategy: async
40+
gamma: 0.7
4641
# user-defined <key, value> pairs
47-
sparse_inputs_slots: 27
48-
sparse_feature_number: 1000001
49-
sparse_feature_dim: 9
50-
dense_input_dim: 13
51-
fc_sizes: [512, 256, 128, 32]
52-
distributed_embedding: 0
42+
num_inputs: 39
43+
input_size: 1178909
44+
embedding_size: 40
45+
width: 700
46+
depth: 5
47+
n_col: 741
48+
grad_c: 0.0005
49+
grad_mu: 0.8
50+
pairs: 741

models/rank/autofis/config_bigdata.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,11 @@ runner:
2323
epochs: 1
2424
print_interval: 50
2525
#model_init_path: "output_model/0" # init model
26-
model_save_path: "output_model_all_autodeepfm"
26+
model_save_path: "output_model_autofis_all"
2727
test_data_dir: "../../../datasets/criteo_autofis/data/whole_data/test"
2828
infer_reader_path: "criteo_reader" # importlib format
29-
infer_batch_size: 2000
30-
infer_load_path: "output_model_all_autodeepfm"
29+
infer_batch_size: 200
30+
infer_load_path: "output_model_autofis_all"
3131
infer_start_epoch: 0
3232
infer_end_epoch: 1
3333

models/rank/autofis/criteo_reader.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ class RecDataset(Dataset):
2222
def __init__(self, file_list, config):
2323
super(RecDataset, self).__init__()
2424
for file in file_list:
25-
if file.endswith('x.npy'):
26-
self.x = np.load(file)
27-
elif file.endswith('y.npy'):
28-
self.y = np.load(file)
25+
if file.endswith('x.txt'):
26+
self.x = np.loadtxt(file, dtype=np.int64)
27+
elif file.endswith('y.txt'):
28+
self.y = np.loadtxt(file, dtype=np.int64)
2929

3030
def __getitem__(self, item):
3131
return self.x[item], self.y[item]

0 commit comments

Comments
 (0)