Skip to content

Commit bfc68f2

Browse files
authored
Merge pull request #7425 from helinwang/distributed_label_semantic_role
Add distributed label semantic role book chapter
2 parents 1baca7f + 6c07236 commit bfc68f2

File tree

1 file changed

+225
-0
lines changed

1 file changed

+225
-0
lines changed
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
import math
2+
3+
import numpy as np
4+
import paddle.v2 as paddle
5+
import paddle.v2.dataset.conll05 as conll05
6+
import paddle.v2.fluid as fluid
7+
import time
8+
import os
9+
10+
word_dict, verb_dict, label_dict = conll05.get_dict()
11+
word_dict_len = len(word_dict)
12+
label_dict_len = len(label_dict)
13+
pred_len = len(verb_dict)
14+
15+
mark_dict_len = 2
16+
word_dim = 32
17+
mark_dim = 5
18+
hidden_dim = 512
19+
depth = 8
20+
mix_hidden_lr = 1e-3
21+
22+
IS_SPARSE = True
23+
PASS_NUM = 10
24+
BATCH_SIZE = 20
25+
26+
embedding_name = 'emb'
27+
28+
29+
def load_parameter(file_name, h, w):
30+
with open(file_name, 'rb') as f:
31+
f.read(16) # skip header.
32+
return np.fromfile(f, dtype=np.float32).reshape(h, w)
33+
34+
35+
def db_lstm(word, predicate, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, mark,
36+
**ignored):
37+
# 8 features
38+
predicate_embedding = fluid.layers.embedding(
39+
input=predicate,
40+
size=[pred_len, word_dim],
41+
dtype='float32',
42+
is_sparse=IS_SPARSE,
43+
param_attr='vemb')
44+
45+
mark_embedding = fluid.layers.embedding(
46+
input=mark,
47+
size=[mark_dict_len, mark_dim],
48+
dtype='float32',
49+
is_sparse=IS_SPARSE)
50+
51+
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
52+
emb_layers = [
53+
fluid.layers.embedding(
54+
size=[word_dict_len, word_dim],
55+
input=x,
56+
param_attr=fluid.ParamAttr(
57+
name=embedding_name, trainable=False)) for x in word_input
58+
]
59+
emb_layers.append(predicate_embedding)
60+
emb_layers.append(mark_embedding)
61+
62+
hidden_0_layers = [
63+
fluid.layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
64+
]
65+
66+
hidden_0 = fluid.layers.sums(input=hidden_0_layers)
67+
68+
lstm_0 = fluid.layers.dynamic_lstm(
69+
input=hidden_0,
70+
size=hidden_dim,
71+
candidate_activation='relu',
72+
gate_activation='sigmoid',
73+
cell_activation='sigmoid')
74+
75+
# stack L-LSTM and R-LSTM with direct edges
76+
input_tmp = [hidden_0, lstm_0]
77+
78+
for i in range(1, depth):
79+
mix_hidden = fluid.layers.sums(input=[
80+
fluid.layers.fc(input=input_tmp[0], size=hidden_dim),
81+
fluid.layers.fc(input=input_tmp[1], size=hidden_dim)
82+
])
83+
84+
lstm = fluid.layers.dynamic_lstm(
85+
input=mix_hidden,
86+
size=hidden_dim,
87+
candidate_activation='relu',
88+
gate_activation='sigmoid',
89+
cell_activation='sigmoid',
90+
is_reverse=((i % 2) == 1))
91+
92+
input_tmp = [mix_hidden, lstm]
93+
94+
feature_out = fluid.layers.sums(input=[
95+
fluid.layers.fc(input=input_tmp[0], size=label_dict_len),
96+
fluid.layers.fc(input=input_tmp[1], size=label_dict_len)
97+
])
98+
99+
return feature_out
100+
101+
102+
def to_lodtensor(data, place):
103+
seq_lens = [len(seq) for seq in data]
104+
cur_len = 0
105+
lod = [cur_len]
106+
for l in seq_lens:
107+
cur_len += l
108+
lod.append(cur_len)
109+
flattened_data = np.concatenate(data, axis=0).astype("int64")
110+
flattened_data = flattened_data.reshape([len(flattened_data), 1])
111+
res = fluid.LoDTensor()
112+
res.set(flattened_data, place)
113+
res.set_lod([lod])
114+
return res
115+
116+
117+
def main():
118+
# define network topology
119+
word = fluid.layers.data(
120+
name='word_data', shape=[1], dtype='int64', lod_level=1)
121+
predicate = fluid.layers.data(
122+
name='verb_data', shape=[1], dtype='int64', lod_level=1)
123+
ctx_n2 = fluid.layers.data(
124+
name='ctx_n2_data', shape=[1], dtype='int64', lod_level=1)
125+
ctx_n1 = fluid.layers.data(
126+
name='ctx_n1_data', shape=[1], dtype='int64', lod_level=1)
127+
ctx_0 = fluid.layers.data(
128+
name='ctx_0_data', shape=[1], dtype='int64', lod_level=1)
129+
ctx_p1 = fluid.layers.data(
130+
name='ctx_p1_data', shape=[1], dtype='int64', lod_level=1)
131+
ctx_p2 = fluid.layers.data(
132+
name='ctx_p2_data', shape=[1], dtype='int64', lod_level=1)
133+
mark = fluid.layers.data(
134+
name='mark_data', shape=[1], dtype='int64', lod_level=1)
135+
feature_out = db_lstm(**locals())
136+
target = fluid.layers.data(
137+
name='target', shape=[1], dtype='int64', lod_level=1)
138+
crf_cost = fluid.layers.linear_chain_crf(
139+
input=feature_out,
140+
label=target,
141+
param_attr=fluid.ParamAttr(
142+
name='crfw', learning_rate=mix_hidden_lr))
143+
avg_cost = fluid.layers.mean(x=crf_cost)
144+
145+
# TODO(qiao)
146+
# check other optimizers and check why out will be NAN
147+
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.0001)
148+
optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost)
149+
150+
# TODO(qiao)
151+
# add dependency track and move this config before optimizer
152+
crf_decode = fluid.layers.crf_decoding(
153+
input=feature_out, param_attr=fluid.ParamAttr(name='crfw'))
154+
155+
chunk_evaluator = fluid.evaluator.ChunkEvaluator(
156+
input=crf_decode,
157+
label=target,
158+
chunk_scheme="IOB",
159+
num_chunk_types=int(math.ceil((label_dict_len - 1) / 2.0)))
160+
161+
train_data = paddle.batch(
162+
paddle.reader.shuffle(
163+
paddle.dataset.conll05.test(), buf_size=8192),
164+
batch_size=BATCH_SIZE)
165+
place = fluid.CPUPlace()
166+
feeder = fluid.DataFeeder(
167+
feed_list=[
168+
word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2, predicate, mark, target
169+
],
170+
place=place)
171+
exe = fluid.Executor(place)
172+
173+
t = fluid.DistributeTranspiler()
174+
pserver_endpoints = os.getenv("PSERVERS")
175+
# server endpoint for current node
176+
current_endpoint = os.getenv("SERVER_ENDPOINT")
177+
# run as trainer or parameter server
178+
training_role = os.getenv(
179+
"TRAINING_ROLE", "TRAINER") # get the training role: trainer/pserver
180+
t.transpile(
181+
optimize_ops, params_grads, pservers=pserver_endpoints, trainers=2)
182+
183+
if training_role == "PSERVER":
184+
if not current_endpoint:
185+
print("need env SERVER_ENDPOINT")
186+
exit(1)
187+
pserver_prog = t.get_pserver_program(current_endpoint, optimize_ops)
188+
exe.run(fluid.default_startup_program())
189+
exe.run(pserver_prog)
190+
elif training_role == "TRAINER":
191+
trainer_prog = t.get_trainer_program()
192+
start_time = time.time()
193+
batch_id = 0
194+
exe.run(fluid.default_startup_program())
195+
embedding_param = fluid.global_scope().find_var(
196+
embedding_name).get_tensor()
197+
embedding_param.set(
198+
load_parameter(conll05.get_embedding(), word_dict_len, word_dim),
199+
place)
200+
for pass_id in xrange(PASS_NUM):
201+
chunk_evaluator.reset(exe)
202+
for data in train_data():
203+
cost, precision, recall, f1_score = exe.run(
204+
trainer_prog,
205+
feed=feeder.feed(data),
206+
fetch_list=[avg_cost] + chunk_evaluator.metrics)
207+
pass_precision, pass_recall, pass_f1_score = chunk_evaluator.eval(
208+
exe)
209+
210+
if batch_id % 10 == 0:
211+
print("avg_cost:" + str(cost) + " precision:" + str(
212+
precision) + " recall:" + str(recall) + " f1_score:" +
213+
str(f1_score) + " pass_precision:" + str(
214+
pass_precision) + " pass_recall:" + str(
215+
pass_recall) + " pass_f1_score:" + str(
216+
pass_f1_score))
217+
if batch_id != 0:
218+
print("second per batch: " + str((time.time(
219+
) - start_time) / batch_id))
220+
221+
batch_id = batch_id + 1
222+
223+
224+
if __name__ == '__main__':
225+
main()

0 commit comments

Comments
 (0)