Skip to content

Commit 2c5a6ac

Browse files
reyoungqingqing01
authored andcommitted
Optional fields to shrink generated proto size (#93)
* remove unnecessary field set in ParameterConfig, Evaluators, etc
1 parent 04876d0 commit 2c5a6ac

File tree

8 files changed

+99
-65
lines changed

8 files changed

+99
-65
lines changed

paddle/gserver/layers/CRFLayer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ bool CRFLayer::init(const LayerMap& layerMap,
3131
}
3232

3333
// coeff only affect bp, keep consistent with CostLayer
34-
coeff_ = config_.has_coeff() ? config_.coeff() : real(1.0);
34+
coeff_ = config_.coeff();
3535
if (inputLayers_.size() == 3) {
3636
weightLayer_ = inputLayers_[2];
3737
}

paddle/gserver/layers/CostLayer.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,7 @@ namespace paddle {
2626
bool CostLayer::init(const LayerMap& layerMap,
2727
const ParameterMap& parameterMap) {
2828
bool ret = Layer::init(layerMap, parameterMap);
29-
if (config_.has_coeff()) {
30-
coeff_ = config_.coeff(); // coeff only affact bp
31-
} else {
32-
coeff_ = real(1.0);
33-
}
29+
coeff_ = config_.coeff();
3430
if (!ret) return ret;
3531
CHECK_GE(inputLayers_.size(), 2UL);
3632
CHECK_LE(inputLayers_.size(), 3UL);

paddle/trainer/tests/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
dump_text.test
22
test_pydata_provider_wrapper.json
3+
*proto.bin

proto/ModelConfig.proto.m4

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ sinclude(`ModelConfigLayer.proto.m4')
299299
optional bool norm_by_times = 25;
300300

301301
// for CostLayers
302-
optional real coeff = 26;
302+
optional real coeff = 26 [default = 1.0];
303303

304304
// for AverageLayer
305305
// can be set to: 'average', 'sum' or 'squarerootn'

proto/ParameterConfig.proto.m4

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ message ParameterUpdaterHookConfig {
3131
message ParameterConfig {
3232
required string name = 1;
3333
required uint64 size = 2;
34-
required real learning_rate = 3;
35-
required real momentum = 4;
34+
optional real learning_rate = 3 [default = 1.0];
35+
optional real momentum = 4 [default = 0.0];
3636
optional real initial_mean = 5 [default = 0.0];
3737
optional real initial_std = 6 [default = 0.01];
3838
// use L2-regularization if decay_rate set and decay_rate_l1 not set
@@ -54,8 +54,8 @@ message ParameterConfig {
5454
optional int32 num_batches_regularization = 13 [default = 1];
5555
// if is_sparse is true, para is sparse, else para is dense
5656
optional bool is_sparse = 14[default = false];
57-
// if para is sparse, format should be "csc" or "csr"
58-
optional string format = 15[default = "csr"];
57+
// if para is sparse, format should be "csc" or "csr", empty means is not sparse
58+
optional string format = 15 [default = ""];
5959
// sparse remote update or not
6060
optional bool sparse_remote_update = 16 [default = false];
6161
// gradient clipping threshold, no clipping by default

python/paddle/trainer/config_parser.py

Lines changed: 70 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -114,15 +114,15 @@
114114
# Initialize global variables. We use this function so that we can
115115
# call parse_config() multiple times
116116
def init_config_environment(
117-
g_default_momentum = 0.,
118-
g_default_decay_rate = 0.,
117+
g_default_momentum = None,
118+
g_default_decay_rate = None,
119119
g_default_initial_mean = 0.,
120120
g_default_initial_std = 0.01,
121-
g_default_num_batches_regularization = 1,
121+
g_default_num_batches_regularization = None,
122122
g_default_initial_strategy = 0,
123123
g_default_initial_smart = False,
124-
g_default_gradient_clipping_threshold = 0.,
125-
g_default_device = -1,
124+
g_default_gradient_clipping_threshold = None,
125+
g_default_device = None,
126126
g_default_update_hooks = None,
127127
g_default_compact_func = None,
128128

@@ -1099,12 +1099,12 @@ def Evaluator(
10991099
inputs,
11001100
chunk_scheme = None,
11011101
num_chunk_types = None,
1102-
classification_threshold = 0.5,
1103-
positive_label = -1,
1104-
dict_file = "",
1105-
result_file = "",
1106-
num_results = 1,
1107-
delimited = True,
1102+
classification_threshold = None,
1103+
positive_label = None,
1104+
dict_file = None,
1105+
result_file = None,
1106+
num_results = None,
1107+
delimited = None,
11081108
):
11091109
evaluator = g_config.model_config.evaluators.add()
11101110
evaluator.type = type
@@ -1120,12 +1120,19 @@ def Evaluator(
11201120
evaluator.num_chunk_types = num_chunk_types
11211121
g_current_submodel.evaluator_names.append(evaluator.name)
11221122

1123-
evaluator.classification_threshold = classification_threshold
1124-
evaluator.positive_label = positive_label
1125-
evaluator.dict_file = dict_file
1126-
evaluator.result_file = result_file
1127-
evaluator.num_results = num_results
1128-
evaluator.delimited = delimited
1123+
if classification_threshold is not None:
1124+
evaluator.classification_threshold = classification_threshold
1125+
if positive_label is not None:
1126+
evaluator.positive_label = positive_label
1127+
if dict_file is not None:
1128+
evaluator.dict_file = dict_file
1129+
1130+
if result_file is not None:
1131+
evaluator.result_file = result_file
1132+
if num_results is not None:
1133+
evaluator.num_results = num_results
1134+
if delimited is not None:
1135+
evaluator.delimited = delimited
11291136

11301137
class LayerBase(object):
11311138
def __init__(
@@ -1137,7 +1144,7 @@ def __init__(
11371144
device=None,
11381145
active_type="",
11391146
drop_rate=0.,
1140-
coeff=1.):
1147+
coeff=None):
11411148
config_assert('@' not in name,
11421149
"layer name: %s contain special character @" % name)
11431150
global g_current_submodel
@@ -1155,18 +1162,20 @@ def __init__(
11551162
self.inputs = [self.inputs]
11561163

11571164
self.config = g_config.model_config.layers.add()
1165+
assert isinstance(self.config, LayerConfig)
11581166
self.config.name = name
11591167
self.config.type = type
11601168
self.config.active_type = active_type
1161-
self.config.coeff = coeff
1169+
if coeff is not None:
1170+
self.config.coeff = float(coeff)
11621171
if size != 0:
11631172
self.config.size = size
11641173
if drop_rate != 0:
11651174
self.config.drop_rate = drop_rate
11661175

11671176
if device is not None:
11681177
self.config.device = device
1169-
else:
1178+
elif g_default_device is not None:
11701179
self.config.device = g_default_device
11711180

11721181
for input_index in xrange(len(self.inputs)):
@@ -1236,10 +1245,12 @@ def create_bias_parameter(
12361245
if bias.parameter_name is None:
12371246
bias.parameter_name = gen_bias_parameter_name(self.config.name)
12381247
if bias.parameter_name not in g_parameter_map:
1248+
assert isinstance(self.config, LayerConfig)
1249+
12391250
Parameter(
12401251
bias.parameter_name,
12411252
size,
1242-
self.config.device,
1253+
self.config.device if self.config.HasField('device') else None,
12431254
dims,
12441255
bias.learning_rate,
12451256
bias.momentum,
@@ -1265,7 +1276,7 @@ def create_input_parameter(
12651276
input_index,
12661277
size,
12671278
dims=None,
1268-
sparse = False,
1279+
sparse = None,
12691280
format = "csr"):
12701281
if dims is None:
12711282
# TODO(yuyang18): print warning and callstack here!
@@ -1293,7 +1304,7 @@ def create_input_parameter(
12931304
Parameter(
12941305
input_config.parameter_name,
12951306
size,
1296-
self.config.device,
1307+
self.config.device if self.config.HasField("device") else None,
12971308
dims,
12981309
input_config.learning_rate,
12991310
input_config.momentum,
@@ -1353,6 +1364,8 @@ def __init__(
13531364

13541365
if sparse:
13551366
psize = self.inputs[input_index].nnz
1367+
else:
1368+
sparse = None
13561369

13571370
self.create_input_parameter(input_index, psize, dims, sparse, format)
13581371
self.create_bias_parameter(bias, self.config.size)
@@ -2836,27 +2849,44 @@ def Parameter(
28362849
para = g_config.model_config.parameters.add()
28372850
para.name = name
28382851
para.size = size
2839-
para.device = device
2840-
para.dims.extend(dims);
2841-
para.learning_rate = default(learning_rate, 1.)
2842-
para.momentum = default(momentum, g_default_momentum)
2852+
if device is not None:
2853+
para.device = int(device)
2854+
para.dims.extend(dims)
2855+
2856+
if learning_rate is not None:
2857+
para.learning_rate = float(learning_rate)
2858+
2859+
momentum = default(momentum, g_default_momentum)
2860+
if momentum is not None:
2861+
para.momentum = float(momentum)
2862+
28432863
config_assert(not momentum or not decay_rate_l1,
28442864
"momentum and decay_rate_l1 cannot both be non-zero")
2845-
para.decay_rate = default(decay_rate, g_default_decay_rate)
2865+
2866+
decay_rate = default(decay_rate, g_default_decay_rate)
2867+
if decay_rate is not None:
2868+
para.decay_rate = decay_rate
2869+
28462870
if decay_rate_l1 is not None:
28472871
para.decay_rate_l1 = decay_rate_l1
28482872
para.initial_std = default(initial_std, g_default_initial_std)
28492873
para.initial_mean = default(initial_mean, g_default_initial_mean)
2850-
para.num_batches_regularization = default(
2874+
2875+
num_batches_regularization = default(
28512876
num_batches_regularization, g_default_num_batches_regularization)
2877+
if num_batches_regularization is not None:
2878+
para.num_batches_regularization = int(num_batches_regularization)
2879+
28522880
if sparse_remote_update is not None:
28532881
para.sparse_remote_update = sparse_remote_update
28542882
if sparse_remote_update:
28552883
g_config.opt_config.use_sparse_remote_updater = True
28562884
if sparse_update is not None:
28572885
para.sparse_update = sparse_update
2858-
para.gradient_clipping_threshold = default(
2859-
gradient_clipping_threshold, g_default_gradient_clipping_threshold);
2886+
gradient_clipping_threshold = default(
2887+
gradient_clipping_threshold, g_default_gradient_clipping_threshold)
2888+
if gradient_clipping_threshold is not None:
2889+
para.gradient_clipping_threshold = gradient_clipping_threshold
28602890
para.initial_strategy = default(initial_strategy, g_default_initial_strategy)
28612891
para.initial_smart = default(initial_smart, g_default_initial_smart)
28622892
if para.initial_smart:
@@ -2869,15 +2899,19 @@ def Parameter(
28692899
para.initial_std = 1. / math.sqrt(para.size)
28702900
if g_default_compact_func is not None:
28712901
sparse, format, need_compact = g_default_compact_func(para.name)
2872-
para.is_sparse = default(sparse, False)
2873-
para.format = default(format, "")
2874-
para.need_compact = default(need_compact, False)
2902+
2903+
if sparse is not None:
2904+
para.is_sparse = sparse
2905+
if format is not None:
2906+
para.format = format
2907+
if need_compact is not None:
2908+
para.need_compact = need_compact
28752909
if is_static is not None:
28762910
para.is_static = is_static
28772911
config_assert(not para.sparse_remote_update or not para.is_static,
28782912
"sparse_remote_update and is_static cannot both be true")
2879-
2880-
para.is_shared = default(is_shared, False)
2913+
if is_shared is not None:
2914+
para.is_shared = is_shared
28812915

28822916
update_hooks = default(update_hooks, g_default_update_hooks)
28832917

python/paddle/trainer_config_helpers/evaluators.py

Lines changed: 20 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -65,12 +65,12 @@ def evaluator_base(
6565
name=None,
6666
chunk_scheme=None,
6767
num_chunk_types=None,
68-
classification_threshold=0.5,
69-
positive_label=-1,
70-
dict_file="",
71-
result_file="",
72-
num_results=1,
73-
delimited=True):
68+
classification_threshold=None,
69+
positive_label=None,
70+
dict_file=None,
71+
result_file=None,
72+
num_results=None,
73+
delimited=None):
7474
"""
7575
Evaluator will evaluate the network status while training/testing.
7676
@@ -105,9 +105,10 @@ def evaluator_base(
105105
:type weight: LayerOutput.
106106
"""
107107
# inputs type assertions.
108-
assert isinstance(classification_threshold, float)
109-
assert isinstance(positive_label, int)
110-
assert isinstance(num_results, int)
108+
assert classification_threshold is None or isinstance(
109+
classification_threshold, float)
110+
assert positive_label is None or isinstance(positive_label, int)
111+
assert num_results is None or isinstance(num_results, int)
111112

112113
if not isinstance(input, list):
113114
input = [input]
@@ -136,7 +137,7 @@ def classification_error_evaluator(
136137
label,
137138
name=None,
138139
weight=None,
139-
threshold=0.5):
140+
threshold=None):
140141
"""
141142
Classification Error Evaluator. It will print error rate for classification.
142143
@@ -253,7 +254,7 @@ def pnpair_evaluator(
253254
def precision_recall_evaluator(
254255
input,
255256
label,
256-
positive_label=-1,
257+
positive_label=None,
257258
weight=None,
258259
name=None,
259260
):
@@ -494,7 +495,7 @@ def gradient_printer_evaluator(
494495
@wrap_name_default()
495496
def maxid_printer_evaluator(
496497
input,
497-
num_results=1,
498+
num_results=None,
498499
name=None,
499500
):
500501
"""
@@ -518,13 +519,14 @@ def maxid_printer_evaluator(
518519
"""
519520
evaluator_base(name=name,
520521
type="max_id_printer",
521-
input=input)
522+
input=input,
523+
num_results=num_results)
522524

523525
@evaluator(EvaluatorAttribute.FOR_PRINT)
524526
@wrap_name_default()
525527
def maxframe_printer_evaluator(
526528
input,
527-
num_results=1,
529+
num_results=None,
528530
name=None,
529531
):
530532
"""
@@ -556,9 +558,9 @@ def maxframe_printer_evaluator(
556558
@wrap_name_default()
557559
def seqtext_printer_evaluator(
558560
input,
559-
dict_file="",
560-
result_file="",
561-
delimited=True,
561+
result_file,
562+
dict_file=None,
563+
delimited=None,
562564
name=None,
563565
):
564566
"""
@@ -616,6 +618,7 @@ def seqtext_printer_evaluator(
616618
:param name: Evaluator name.
617619
:type name: None|basestring
618620
"""
621+
assert isinstance(result_file, basestring)
619622
evaluator_base(name=name,
620623
type="seq_text_printer",
621624
input=input,

python/paddle/trainer_config_helpers/optimizers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def to_setting_kwargs(self):
7979
'learning_method': 'momentum'
8080
}
8181

82-
def __init__(self, momentum=1e-3):
82+
def __init__(self, momentum=None):
8383
self.momentum = momentum
8484

8585

0 commit comments

Comments
 (0)