16
16
17
17
import paddle
18
18
import paddle .fluid as fluid
19
- import numpy
19
+ import numpy as np
20
20
21
21
WORD_DICT , VERB_DICT , LABEL_DICT = paddle .dataset .conll05 .get_dict ()
22
22
WORD_DICT_LEN = len (WORD_DICT )
23
23
LABEL_DICT_LEN = len (LABEL_DICT )
24
24
PRED_DICT_LEN = len (VERB_DICT )
25
25
MARK_DICT_LEN = 2
26
+ IS_SPARSE = True
27
+ BATCH_SIZE = 10
28
+ EMBEDDING_NAME = 'emb'
26
29
27
30
28
- def lstm_net (word , predicate , ctx_n2 , ctx_n1 , ctx_0 , ctx_p1 , ctx_p2 , mark ):
31
+ def lstm_net ():
29
32
WORD_DIM = 32
30
33
MARK_DIM = 5
31
34
HIDDEN_DIM = 512
32
35
DEPTH = 8
33
- EMBEDDING_NAME = 'emb'
34
36
35
37
# Data definitions
36
38
word = fluid .layers .data (
@@ -69,8 +71,9 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
69
71
fluid .layers .embedding (
70
72
size = [WORD_DICT_LEN , WORD_DIM ],
71
73
input = x ,
72
- param_attr = fluid .ParamAttr (
73
- name = EMBEDDING_NAME , trainable = False )) for x in word_input
74
+ param_attr = fluid .ParamAttr (name = EMBEDDING_NAME ))
75
+ for x in word_input
76
+ #name=EMBEDDING_NAME, trainable=False)) for x in word_input
74
77
]
75
78
emb_layers .append (predicate_embedding )
76
79
emb_layers .append (mark_embedding )
@@ -116,21 +119,16 @@ def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
116
119
return feature_out
117
120
118
121
119
- def inference_network ():
120
- predict = lstm_net (word , predicate , ctx_n2 , ctx_n1 , ctx_0 , ctx_p1 , ctx_p2 ,
121
- mark )
122
+ def inference_program ():
123
+ predict = lstm_net ()
122
124
123
- crf_decode = fluid .layers .crf_decoding (
124
- input = feature_out , param_attr = fluid .ParamAttr (name = 'crfw' ))
125
+ return predict
125
126
126
- return crf_decode
127
127
128
-
129
- def train_network ():
128
+ def train_program ():
130
129
MIX_HIDDEN_LR = 1e-3
131
130
132
- predict = lstm_net (word , predicate , ctx_n2 , ctx_n1 , ctx_0 , ctx_p1 , ctx_p2 ,
133
- mark )
131
+ predict = lstm_net ()
134
132
target = fluid .layers .data (
135
133
name = 'target' , shape = [1 ], dtype = 'int64' , lod_level = 1 )
136
134
crf_cost = fluid .layers .linear_chain_crf (
@@ -140,44 +138,66 @@ def train_network():
140
138
name = 'crfw' , learning_rate = MIX_HIDDEN_LR ))
141
139
avg_cost = fluid .layers .mean (crf_cost )
142
140
143
- return avg_cost
141
+ return [ avg_cost ]
144
142
145
143
146
- def train (use_cuda , save_path ):
147
- BATCH_SIZE = 128
148
- EPOCH_NUM = 1
144
+ def train (use_cuda , train_program , save_path ):
145
+ place = fluid . CUDAPlace ( 0 ) if use_cuda else fluid . CPUPlace ()
146
+ optimizer = fluid . optimizer . SGD ( learning_rate = 0.01 )
149
147
150
- train_reader = paddle .batch (
151
- paddle .reader .shuffle (
152
- paddle .dataset .conll05 .train (), buf_size = 8192 ),
153
- batch_size = BATCH_SIZE )
154
- test_reader = paddle .batch (
155
- paddle .dataset .conll05 .test (), batch_size = BATCH_SIZE )
148
+ trainer = fluid .Trainer (
149
+ train_func = train_program , place = place , optimizer = optimizer )
156
150
157
- def event_handler ( event ):
158
- if isinstance ( event , fluid . EndIteration ):
159
- if ( event . batch_id % 10 ) == 0 :
160
- avg_cost = trainer . test ( reader = test_reader )
151
+ feed_order = [
152
+ 'word_data' , 'ctx_n2_data' , 'ctx_n1_data' , 'ctx_0_data' , 'ctx_p1_data' ,
153
+ 'ctx_p2_data' , 'verb_data' , 'mark_data' , 'target'
154
+ ]
161
155
162
- print ('BatchID {0:04}, Loss {1:2.2}' .format (event .batch_id + 1 ,
163
- avg_cost ))
156
+ #embedding_param = fluid.global_scope().find_var(
157
+ # EMBEDDING_NAME).get_tensor()
158
+ #embedding_param.set(
159
+ # load_parameter(conll05.get_embedding(), WORD_DICT_LEN, WORD_DIM),
160
+ # place)
164
161
165
- if avg_cost > 0.01 : # Low threshold for speeding up CI
166
- trainer .save_params (save_path )
167
- return
162
+ def event_handler (event ):
163
+ if isinstance (event , fluid .EndEpochEvent ):
164
+ test_reader = paddle .batch (
165
+ paddle .dataset .conll05 .test (), batch_size = BATCH_SIZE )
166
+ avg_cost_set = trainer .test (
167
+ reader = test_reader , feed_order = feed_order )
168
+
169
+ # get avg cost
170
+ avg_cost = np .array (avg_cost_set ).mean ()
171
+
172
+ print ("avg_cost: %s" % avg_cost )
173
+
174
+ if float (avg_cost ) < 100.0 : # Large value to increase CI speed
175
+ trainer .save_params (save_path )
176
+ else :
177
+ print ('BatchID {0}, Test Loss {1:0.2}' .format (event .epoch + 1 ,
178
+ float (avg_cost )))
179
+ if math .isnan (float (avg_cost )):
180
+ sys .exit ("got NaN loss, training failed." )
181
+
182
+ elif isinstance (event , fluid .EndStepEvent ):
183
+ print ("Step {0}, Epoch {1} Metrics {2}" .format (
184
+ event .step , event .epoch , map (np .array , event .metrics )))
185
+ if event .step == 1 : # Run 2 iterations to speed CI
186
+ trainer .save_params (save_path )
187
+ trainer .stop ()
168
188
169
- place = fluid . CUDAPlace ( 0 ) if use_cuda else fluid . CPUPlace ()
170
- sgd_optimizer = fluid . optimizer . SGD (
171
- learning_rate = fluid . layers . exponential_decay (
172
- learning_rate = 0.01 ,
173
- decay_steps = 100000 ,
174
- decay_rate = 0.5 ,
175
- staircase = True ))
176
- trainer = fluid . Trainer ( train_network , optimizer = sgd_optimizer , place = place )
177
- trainer . train ( train_reader , EPOCH_NUM , event_handler = event_handler )
189
+ train_reader = paddle . batch (
190
+ paddle . reader . shuffle (
191
+ paddle . dataset . conll05 . test (), buf_size = 8192 ),
192
+ batch_size = BATCH_SIZE )
193
+ trainer . train (
194
+ num_epochs = 1 ,
195
+ event_handler = event_handler ,
196
+ reader = train_reader ,
197
+ feed_order = feed_order )
178
198
179
199
180
- def infer (use_cuda , save_path ):
200
+ def infer (use_cuda , inference_program , save_path ):
181
201
place = fluid .CUDAPlace (0 ) if use_cuda else fluid .CPUPlace ()
182
202
inferencer = fluid .Inferencer (
183
203
inference_program , param_path = save_path , place = place )
@@ -201,26 +221,28 @@ def create_random_lodtensor(lod, place, low, high):
201
221
ctx_p2 = create_random_lodtensor (lod , place , low = 0 , high = WORD_DICT_LEN - 1 )
202
222
mark = create_random_lodtensor (lod , place , low = 0 , high = MARK_DICT_LEN - 1 )
203
223
204
- results = inferencer .infer ({
205
- 'word_data' : word ,
206
- 'verb_data' : pred ,
207
- 'ctx_n2_data' : ctx_n2 ,
208
- 'ctx_n1_data' : ctx_n1 ,
209
- 'ctx_0_data' : ctx_0 ,
210
- 'ctx_p1_data' : ctx_p1 ,
211
- 'ctx_p2_data' : ctx_p2 ,
212
- 'mark_data' : mark
213
- })
224
+ results = inferencer .infer (
225
+ {
226
+ 'word_data' : word ,
227
+ 'verb_data' : pred ,
228
+ 'ctx_n2_data' : ctx_n2 ,
229
+ 'ctx_n1_data' : ctx_n1 ,
230
+ 'ctx_0_data' : ctx_0 ,
231
+ 'ctx_p1_data' : ctx_p1 ,
232
+ 'ctx_p2_data' : ctx_p2 ,
233
+ 'mark_data' : mark
234
+ },
235
+ return_numpy = False )
214
236
215
- print ("infer results: " , results )
237
+ print ("infer results: " , np . array ( results [ 0 ]) )
216
238
217
239
218
240
def main (use_cuda ):
219
241
if use_cuda and not fluid .core .is_compiled_with_cuda ():
220
242
return
221
243
save_path = "label_semantic_roles.inference.model"
222
- train (use_cuda , save_path )
223
- infer (use_cuda , save_path )
244
+ train (use_cuda , train_program , save_path )
245
+ infer (use_cuda , inference_program , save_path )
224
246
225
247
226
248
if __name__ == '__main__' :
0 commit comments