Skip to content

Commit f1d786f

Browse files
authored
Merge branch 'PaddlePaddle:master' into mind_fix
2 parents 89570ad + 32d5e0c commit f1d786f

File tree

4 files changed

+36
-28
lines changed

4 files changed

+36
-28
lines changed

models/multitask/mmoe/net.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,15 @@ def __init__(self, feature_size, expert_num, expert_size, tower_size,
2828
self.gate_num = gate_num
2929

3030
self._param_expert = []
31+
expert_init = [pow(10, -i) for i in range(1, self.expert_num + 1)]
3132
for i in range(0, self.expert_num):
3233
linear = self.add_sublayer(
3334
name='expert_' + str(i),
3435
sublayer=nn.Linear(
3536
feature_size,
3637
expert_size,
37-
#initialize the weight randly
38-
weight_attr=nn.initializer.XavierUniform(),
38+
#initialize each expert respectly
39+
weight_attr=nn.initializer.Constant(value=expert_init[i]),
3940
bias_attr=nn.initializer.Constant(value=0.1),
4041
#bias_attr=paddle.ParamAttr(learning_rate=1.0),
4142
name='expert_' + str(i)))
@@ -44,14 +45,15 @@ def __init__(self, feature_size, expert_num, expert_size, tower_size,
4445
self._param_gate = []
4546
self._param_tower = []
4647
self._param_tower_out = []
48+
gate_init = [pow(10, -i) for i in range(1, self.gate_num + 1)]
4749
for i in range(0, self.gate_num):
4850
linear = self.add_sublayer(
4951
name='gate_' + str(i),
5052
sublayer=nn.Linear(
5153
feature_size,
5254
expert_num,
53-
#initialize the weight randly
54-
weight_attr=nn.initializer.XavierUniform(),
55+
#initialize every gate respectly
56+
weight_attr=nn.initializer.Constant(value=gate_init[i]),
5557
bias_attr=nn.initializer.Constant(value=0.1),
5658
#bias_attr=paddle.ParamAttr(learning_rate=1.0),
5759
name='gate_' + str(i)))
@@ -62,8 +64,8 @@ def __init__(self, feature_size, expert_num, expert_size, tower_size,
6264
sublayer=nn.Linear(
6365
expert_size,
6466
tower_size,
65-
#initialize the weight randly
66-
weight_attr=nn.initializer.XavierUniform(),
67+
#initialize each gate respectly
68+
weight_attr=nn.initializer.Constant(value=gate_init[i]),
6769
bias_attr=nn.initializer.Constant(value=0.1),
6870
#bias_attr=paddle.ParamAttr(learning_rate=1.0),
6971
name='tower_' + str(i)))
@@ -74,8 +76,8 @@ def __init__(self, feature_size, expert_num, expert_size, tower_size,
7476
sublayer=nn.Linear(
7577
tower_size,
7678
2,
77-
#initialize the weight randly
78-
weight_attr=nn.initializer.XavierUniform(),
79+
#initialize each gate respectly
80+
weight_attr=nn.initializer.Constant(value=gate_init[i]),
7981
bias_attr=nn.initializer.Constant(value=0.1),
8082
name='tower_out_' + str(i)))
8183
self._param_tower_out.append(linear)

models/multitask/ple/net.py

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
1+
#the weight randly Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
22
#
33
# Licensed under the Apache License, Version 2.0 (the "License");
44
# you may not use this file except in compliance with the License.
@@ -52,14 +52,15 @@ def __init__(self, feature_size, task_num, exp_per_task, shared_num,
5252
# task tower
5353
self._param_tower = []
5454
self._param_tower_out = []
55+
task_init = [pow(10, -i) for i in range(1, self.task_num + 1)]
5556
for i in range(0, self.task_num):
5657
linear = self.add_sublayer(
5758
name='tower_' + str(i),
5859
sublayer=nn.Linear(
5960
expert_size,
6061
tower_size,
61-
#initialize the weight randly
62-
weight_attr=nn.initializer.XavierUniform(),
62+
#initialize each task respectly
63+
weight_attr=nn.initializer.Constant(value=task_init[i]),
6364
bias_attr=nn.initializer.Constant(value=0.1),
6465
#bias_attr=paddle.ParamAttr(learning_rate=1.0),
6566
name='tower_' + str(i)))
@@ -70,8 +71,8 @@ def __init__(self, feature_size, task_num, exp_per_task, shared_num,
7071
sublayer=nn.Linear(
7172
tower_size,
7273
2,
73-
#initialize the weight randly
74-
weight_attr=nn.initializer.XavierUniform(),
74+
#initialize each task respectly
75+
weight_attr=nn.initializer.Constant(value=task_init[i]),
7576
bias_attr=nn.initializer.Constant(value=0.1),
7677
name='tower_out_' + str(i)))
7778
self._param_tower_out.append(linear)
@@ -113,43 +114,49 @@ def __init__(self, input_feature_size, task_num, exp_per_task, shared_num,
113114

114115
self._param_expert = []
115116
# task-specific expert part
117+
step = self.exp_per_task
116118
for i in range(0, self.task_num):
119+
exp_init = [
120+
pow(10, -k) for k in range(1 + i * step, step * (i + 1) + 1)
121+
]
117122
for j in range(0, self.exp_per_task):
118123
linear = self.add_sublayer(
119124
name=level_name + "_exp_" + str(i) + "_" + str(j),
120125
sublayer=nn.Linear(
121126
input_feature_size,
122127
expert_size,
123-
#initialize the weight randly
124-
weight_attr=nn.initializer.XavierUniform(),
128+
#initialize each expert respectly
129+
weight_attr=nn.initializer.Constant(value=exp_init[j]),
125130
bias_attr=nn.initializer.Constant(value=0.1),
126131
name=level_name + "_exp_" + str(i) + "_" + str(j)))
127132
self._param_expert.append(linear)
128-
133+
shared_exp_init = [pow(10, -i) for i in range(1, self.shared_num + 1)]
129134
# shared expert part
130135
for i in range(0, self.shared_num):
131136
linear = self.add_sublayer(
132137
name=level_name + "_exp_shared_" + str(i),
133138
sublayer=nn.Linear(
134139
input_feature_size,
135140
expert_size,
136-
#initialize the weight randly
137-
weight_attr=nn.initializer.XavierUniform(),
141+
#initialize each shared expert respectly
142+
weight_attr=nn.initializer.Constant(
143+
value=shared_exp_init[i]),
138144
bias_attr=nn.initializer.Constant(value=0.1),
139145
name=level_name + "_exp_shared_" + str(i)))
140146
self._param_expert.append(linear)
141147

142148
# task gate part
143149
self._param_gate = []
144150
cur_expert_num = self.exp_per_task + self.shared_num
151+
gate_init = [pow(10, -i) for i in range(1, self.task_num + 1)]
145152
for i in range(0, self.task_num):
146153
linear = self.add_sublayer(
147154
name=level_name + "_gate_" + str(i),
148155
sublayer=nn.Linear(
149156
input_feature_size,
150157
cur_expert_num,
151-
#initialize the weight randly
152-
weight_attr=nn.initializer.XavierUniform(),
158+
#initialize each gate respectly
159+
weight_attr=nn.initializer.Constant(value=gate_init[i]),
153160
bias_attr=nn.initializer.Constant(value=0.1),
154161
name=level_name + "_gate_" + str(i)))
155162
self._param_gate.append(linear)
@@ -162,8 +169,7 @@ def __init__(self, input_feature_size, task_num, exp_per_task, shared_num,
162169
sublayer=nn.Linear(
163170
input_feature_size,
164171
cur_expert_num,
165-
#initialize the weight randly
166-
weight_attr=nn.initializer.XavierUniform(),
172+
weight_attr=nn.initializer.Constant(value=0.1),
167173
bias_attr=nn.initializer.Constant(value=0.1),
168174
name=level_name + "_gate_shared_"))
169175
self._param_gate_shared = linear

models/rank/deeprec/infer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,18 @@
1919
import sys
2020
from math import sqrt
2121

22+
__dir__ = os.path.dirname(os.path.abspath(__file__))
23+
print(os.path.abspath('/'.join(__dir__.split('/')[:-3])))
24+
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
25+
sys.path.append(os.path.abspath('/'.join(__dir__.split('/')[:-3])))
26+
2227
from tools.utils.utils_single import load_yaml, load_dy_model_class, \
2328
get_abs_model
2429
from tools.utils.save_load import load_model
2530
from paddle.io import DataLoader
2631
import argparse
2732
from importlib import import_module
2833

29-
__dir__ = os.path.dirname(os.path.abspath(__file__))
30-
print(os.path.abspath('/'.join(__dir__.split('/')[:-3])))
31-
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
32-
sys.path.append(os.path.abspath('/'.join(__dir__.split('/')[:-3])))
33-
3434
logging.basicConfig(
3535
format='%(asctime)s - %(levelname)s - %(message)s', level=logging.INFO)
3636
logger = logging.getLogger(__name__)

models/rank/fat_deepffm/config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ runner:
2626
print_interval: 10
2727

2828
model_save_path: "output_model_fat_deepffm"
29-
infer_batch_size: 1000
29+
infer_batch_size: 1
3030
infer_reader_path: "criteo_reader" # importlib format
3131
test_data_dir: "data/sample_data/train"
3232

0 commit comments

Comments
 (0)