Skip to content

Commit 8a8ae9c

Browse files
authored
Add label semantic examples with new Fluid api (#10368)
* Add label semantic examples with new api * Address review comments * Address review comment
1 parent 889c919 commit 8a8ae9c

File tree

1 file changed

+228
-0
lines changed

1 file changed

+228
-0
lines changed
Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,228 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import print_function
16+
17+
import paddle
18+
import paddle.fluid as fluid
19+
import numpy
20+
21+
WORD_DICT, VERB_DICT, LABEL_DICT = paddle.dataset.conll05.get_dict()
22+
WORD_DICT_LEN = len(WORD_DICT)
23+
LABEL_DICT_LEN = len(LABEL_DICT)
24+
PRED_DICT_LEN = len(VERB_DICT)
25+
MARK_DICT_LEN = 2
26+
27+
28+
def lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark):
29+
WORD_DIM = 32
30+
MARK_DIM = 5
31+
HIDDEN_DIM = 512
32+
DEPTH = 8
33+
EMBEDDING_NAME = 'emb'
34+
35+
# Data definitions
36+
word = fluid.layers.data(
37+
name='word_data', shape=[1], dtype='int64', lod_level=1)
38+
predicate = fluid.layers.data(
39+
name='verb_data', shape=[1], dtype='int64', lod_level=1)
40+
ctx_n2 = fluid.layers.data(
41+
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
42+
ctx_n1 = fluid.layers.data(
43+
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
44+
ctx_0 = fluid.layers.data(
45+
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
46+
ctx_p1 = fluid.layers.data(
47+
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
48+
ctx_p2 = fluid.layers.data(
49+
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
50+
mark = fluid.layers.data(
51+
name='mark_data', shape=[1], dtype='int64', lod_level=1)
52+
53+
# 8 features
54+
predicate_embedding = fluid.layers.embedding(
55+
input=predicate,
56+
size=[PRED_DICT_LEN, WORD_DIM],
57+
dtype='float32',
58+
is_sparse=IS_SPARSE,
59+
param_attr='vemb')
60+
61+
mark_embedding = fluid.layers.embedding(
62+
input=mark,
63+
size=[MARK_DICT_LEN, MARK_DIM],
64+
dtype='float32',
65+
is_sparse=IS_SPARSE)
66+
67+
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
68+
emb_layers = [
69+
fluid.layers.embedding(
70+
size=[WORD_DICT_LEN, WORD_DIM],
71+
input=x,
72+
param_attr=fluid.ParamAttr(
73+
name=EMBEDDING_NAME, trainable=False)) for x in word_input
74+
]
75+
emb_layers.append(predicate_embedding)
76+
emb_layers.append(mark_embedding)
77+
78+
hidden_0_layers = [
79+
fluid.layers.fc(input=emb, size=HIDDEN_DIM, act='tanh')
80+
for emb in emb_layers
81+
]
82+
83+
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
84+
85+
lstm_0 = fluid.layers.dynamic_lstm(
86+
input=hidden_0,
87+
size=HIDDEN_DIM,
88+
candidate_activation='relu',
89+
gate_activation='sigmoid',
90+
cell_activation='sigmoid')
91+
92+
# stack L-LSTM and R-LSTM with direct edges
93+
input_tmp = [hidden_0, lstm_0]
94+
95+
for i in range(1, DEPTH):
96+
mix_hidden = fluid.layers.sums(input=[
97+
fluid.layers.fc(input=input_tmp[0], size=HIDDEN_DIM, act='tanh'),
98+
fluid.layers.fc(input=input_tmp[1], size=HIDDEN_DIM, act='tanh')
99+
])
100+
101+
lstm = fluid.layers.dynamic_lstm(
102+
input=mix_hidden,
103+
size=HIDDEN_DIM,
104+
candidate_activation='relu',
105+
gate_activation='sigmoid',
106+
cell_activation='sigmoid',
107+
is_reverse=((i % 2) == 1))
108+
109+
input_tmp = [mix_hidden, lstm]
110+
111+
feature_out = fluid.layers.sums(input=[
112+
fluid.layers.fc(input=input_tmp[0], size=LABEL_DICT_LEN, act='tanh'),
113+
fluid.layers.fc(input=input_tmp[1], size=LABEL_DICT_LEN, act='tanh')
114+
])
115+
116+
return feature_out
117+
118+
119+
def inference_network():
120+
predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
121+
mark)
122+
123+
crf_decode = fluid.layers.crf_decoding(
124+
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
125+
126+
return crf_decode
127+
128+
129+
def train_network():
130+
MIX_HIDDEN_LR = 1e-3
131+
132+
predict = lstm_net(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2,
133+
mark)
134+
target = fluid.layers.data(
135+
name='target', shape=[1], dtype='int64', lod_level=1)
136+
crf_cost = fluid.layers.linear_chain_crf(
137+
input=predict,
138+
label=target,
139+
param_attr=fluid.ParamAttr(
140+
name='crfw', learning_rate=MIX_HIDDEN_LR))
141+
avg_cost = fluid.layers.mean(crf_cost)
142+
143+
return avg_cost
144+
145+
146+
def train(use_cuda, save_path):
147+
BATCH_SIZE = 128
148+
EPOCH_NUM = 1
149+
150+
train_reader = paddle.batch(
151+
paddle.reader.shuffle(
152+
paddle.dataset.conll05.train(), buf_size=8192),
153+
batch_size=BATCH_SIZE)
154+
test_reader = paddle.batch(
155+
paddle.dataset.conll05.test(), batch_size=BATCH_SIZE)
156+
157+
def event_handler(event):
158+
if isinstance(event, fluid.EndIteration):
159+
if (event.batch_id % 10) == 0:
160+
avg_cost = trainer.test(reader=test_reader)
161+
162+
print('BatchID {0:04}, Loss {1:2.2}'.format(event.batch_id + 1,
163+
avg_cost))
164+
165+
if avg_cost > 0.01: # Low threshold for speeding up CI
166+
trainer.save_params(save_path)
167+
return
168+
169+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
170+
sgd_optimizer = fluid.optimizer.SGD(
171+
learning_rate=fluid.layers.exponential_decay(
172+
learning_rate=0.01,
173+
decay_steps=100000,
174+
decay_rate=0.5,
175+
staircase=True))
176+
trainer = fluid.Trainer(train_network, optimizer=sgd_optimizer, place=place)
177+
trainer.train(train_reader, EPOCH_NUM, event_handler=event_handler)
178+
179+
180+
def infer(use_cuda, save_path):
181+
place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
182+
inferencer = fluid.Inferencer(
183+
inference_program, param_path=save_path, place=place)
184+
185+
def create_random_lodtensor(lod, place, low, high):
186+
data = np.random.random_integers(low, high,
187+
[lod[-1], 1]).astype("int64")
188+
res = fluid.LoDTensor()
189+
res.set(data, place)
190+
res.set_lod([lod])
191+
return res
192+
193+
# Create an input example
194+
lod = [0, 4, 10]
195+
word = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
196+
pred = create_random_lodtensor(lod, place, low=0, high=PRED_DICT_LEN - 1)
197+
ctx_n2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
198+
ctx_n1 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
199+
ctx_0 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
200+
ctx_p1 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
201+
ctx_p2 = create_random_lodtensor(lod, place, low=0, high=WORD_DICT_LEN - 1)
202+
mark = create_random_lodtensor(lod, place, low=0, high=MARK_DICT_LEN - 1)
203+
204+
results = inferencer.infer({
205+
'word_data': word,
206+
'verb_data': pred,
207+
'ctx_n2_data': ctx_n2,
208+
'ctx_n1_data': ctx_n1,
209+
'ctx_0_data': ctx_0,
210+
'ctx_p1_data': ctx_p1,
211+
'ctx_p2_data': ctx_p2,
212+
'mark_data': mark
213+
})
214+
215+
print("infer results: ", results)
216+
217+
218+
def main(use_cuda):
219+
if use_cuda and not fluid.core.is_compiled_with_cuda():
220+
return
221+
save_path = "label_semantic_roles.inference.model"
222+
train(use_cuda, save_path)
223+
infer(use_cuda, save_path)
224+
225+
226+
if __name__ == '__main__':
227+
for use_cuda in (False, True):
228+
main(use_cuda=use_cuda)

0 commit comments

Comments
 (0)