37
37
mix_hidden_lr = 1e-3
38
38
39
39
IS_SPARSE = True
40
- PASS_NUM = 10
40
+ PASS_NUM = 100
41
41
BATCH_SIZE = 10
42
42
43
43
embedding_name = 'emb'
@@ -77,7 +77,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
77
77
emb_layers .append (mark_embedding )
78
78
79
79
hidden_0_layers = [
80
- fluid .layers .fc (input = emb , size = hidden_dim ) for emb in emb_layers
80
+ fluid .layers .fc (input = emb , size = hidden_dim , act = 'tanh' )
81
+ for emb in emb_layers
81
82
]
82
83
83
84
hidden_0 = fluid .layers .sums (input = hidden_0_layers )
@@ -94,8 +95,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
94
95
95
96
for i in range (1 , depth ):
96
97
mix_hidden = fluid .layers .sums (input = [
97
- fluid .layers .fc (input = input_tmp [0 ], size = hidden_dim ),
98
- fluid .layers .fc (input = input_tmp [1 ], size = hidden_dim )
98
+ fluid .layers .fc (input = input_tmp [0 ], size = hidden_dim , act = 'tanh' ),
99
+ fluid .layers .fc (input = input_tmp [1 ], size = hidden_dim , act = 'tanh' )
99
100
])
100
101
101
102
lstm = fluid .layers .dynamic_lstm (
@@ -109,8 +110,8 @@ def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
109
110
input_tmp = [mix_hidden , lstm ]
110
111
111
112
feature_out = fluid .layers .sums (input = [
112
- fluid .layers .fc (input = input_tmp [0 ], size = label_dict_len ),
113
- fluid .layers .fc (input = input_tmp [1 ], size = label_dict_len )
113
+ fluid .layers .fc (input = input_tmp [0 ], size = label_dict_len , act = 'tanh' ),
114
+ fluid .layers .fc (input = input_tmp [1 ], size = label_dict_len , act = 'tanh' )
114
115
])
115
116
116
117
return feature_out
@@ -171,7 +172,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
171
172
# check other optimizers and check why out will be NAN
172
173
sgd_optimizer = fluid .optimizer .SGD (
173
174
learning_rate = fluid .layers .exponential_decay (
174
- learning_rate = 0.0001 ,
175
+ learning_rate = 0.01 ,
175
176
decay_steps = 100000 ,
176
177
decay_rate = 0.5 ,
177
178
staircase = True ))
@@ -233,7 +234,7 @@ def train_loop(main_program):
233
234
print ("second per batch: " + str ((time .time (
234
235
) - start_time ) / batch_id ))
235
236
# Set the threshold low to speed up the CI test
236
- if float (pass_precision ) > 0.05 :
237
+ if float (pass_precision ) > 0.01 :
237
238
if save_dirname is not None :
238
239
# TODO(liuyiqun): Change the target to crf_decode
239
240
fluid .io .save_inference_model (save_dirname , [
0 commit comments