Skip to content

Commit 53bd51e

Browse files
authored
07/Label semantic roles (#5798)
* init label_semantic_roles.py * add linear_chain_crf and test * complete test_linear_chain_crf * correct last layer of db_lstm * update optimizer and initializer * update param_initializer of embedding_layer * support load pre trained embedding * rm unused parameter * optimize code * clean code * fix test * add todo
1 parent 778b981 commit 53bd51e

File tree

7 files changed

+270
-13
lines changed

7 files changed

+270
-13
lines changed

paddle/operators/linear_chain_crf_op.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ class LinearChainCRFOpKernel : public framework::OpKernel<T> {
271271
ll -= std::log(sum);
272272
// Now ll is equal to -log(Z).
273273

274-
const int* lbl = label.data<int>();
274+
const int64_t* lbl = label.data<int64_t>();
275275
PADDLE_ENFORCE_LT(
276276
static_cast<size_t>(*std::max_element(lbl, lbl + seq_length)), tag_num,
277277
"An invalid tag label that execesses the largest tag number.");
@@ -449,7 +449,7 @@ class LinearChainCRFGradOpKernel : public framework::OpKernel<T> {
449449
Tensor* emission_grad) const {
450450
const T* w_exps = transition_exps.data<T>();
451451
const T* x_exps = emission_exps.data<T>();
452-
const int* label_value = label.data<int>();
452+
const int64_t* label_value = label.data<int64_t>();
453453
T* beta_value = beta->data<T>();
454454

455455
auto x_dims = emission_exps.dims();

python/paddle/v2/fluid/layer_helper.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,10 @@ def create_parameter(self, attr, shape, dtype, suffix='w',
126126
self.startup_program.global_block().create_parameter(
127127
dtype=dtype, shape=shape, **attr_copy)
128128
return self.main_program.global_block().create_parameter(
129-
name=attr_copy['name'], dtype=dtype, shape=shape)
129+
name=attr_copy['name'],
130+
dtype=dtype,
131+
shape=shape,
132+
trainable=attr_copy.get('trainable', True))
130133

131134
def create_tmp_variable(self, dtype):
132135
return self.main_program.current_block().create_var(

python/paddle/v2/fluid/layers.py

Lines changed: 44 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ def _get_default_bias_initializer():
112112
def embedding(input,
113113
size,
114114
is_sparse=False,
115+
param_initializer=None,
115116
param_attr=None,
116117
data_type='float32',
117118
main_program=None,
@@ -136,9 +137,16 @@ def embedding(input,
136137
to the LayerHelper constructor.
137138
138139
"""
140+
141+
def _get_default_param_initializer():
142+
return XavierInitializer()
143+
139144
helper = LayerHelper('embedding', **locals())
140145
w = helper.create_parameter(
141-
attr=helper.param_attr, shape=size, dtype=data_type)
146+
attr=helper.param_attr,
147+
shape=size,
148+
dtype=data_type,
149+
initializer=param_initializer or _get_default_param_initializer())
142150
tmp = helper.create_tmp_variable(data_type)
143151
helper.append_op(
144152
type='lookup_table',
@@ -460,6 +468,41 @@ def sums(input, main_program=None, startup_program=None):
460468
return out
461469

462470

471+
def linear_chain_crf(input,
472+
label,
473+
param_attr=None,
474+
param_initializer=None,
475+
main_program=None,
476+
startup_program=None):
477+
def _get_default_param_initializer():
478+
return XavierInitializer()
479+
480+
helper = LayerHelper('linear_chain_crf', **locals())
481+
size = input.shape[1]
482+
transition = helper.create_parameter(
483+
attr=helper.param_attr,
484+
shape=[size + 2, size],
485+
dtype=helper.input_dtype(),
486+
initializer=param_initializer or _get_default_param_initializer())
487+
alpha = helper.create_tmp_variable(dtype=helper.input_dtype())
488+
emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
489+
transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype())
490+
log_likelihood = helper.create_tmp_variable(dtype=helper.input_dtype())
491+
helper.append_op(
492+
type='linear_chain_crf',
493+
inputs={"Emission": [input],
494+
"Transition": transition,
495+
"Label": label},
496+
outputs={
497+
"Alpha": [alpha],
498+
"EmissionExps": [emission_exps],
499+
"TransitionExps": transition_exps,
500+
"LogLikelihood": log_likelihood
501+
})
502+
503+
return log_likelihood
504+
505+
463506
def assign(input, output, main_program=None, startup_program=None):
464507
helper = LayerHelper('assign', **locals())
465508
helper.append_op(

python/paddle/v2/fluid/optimizer.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ def create_optimization_pass(self,
170170

171171
optimize_ops = []
172172
for param_and_grad in parameters_and_grads:
173-
if param_and_grad[1] is not None:
173+
if param_and_grad[0].trainable is True and param_and_grad[
174+
1] is not None:
174175
optimize_op = self._append_optimize_op(loss.block,
175176
param_and_grad)
176177
optimize_ops.append(optimize_op)
Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import numpy as np
2+
import paddle.v2 as paddle
3+
import paddle.v2.dataset.conll05 as conll05
4+
import paddle.v2.fluid.core as core
5+
import paddle.v2.fluid.framework as framework
6+
import paddle.v2.fluid.layers as layers
7+
from paddle.v2.fluid.executor import Executor, g_scope
8+
from paddle.v2.fluid.optimizer import SGDOptimizer
9+
10+
word_dict, verb_dict, label_dict = conll05.get_dict()
11+
word_dict_len = len(word_dict)
12+
label_dict_len = len(label_dict)
13+
pred_len = len(verb_dict)
14+
15+
mark_dict_len = 2
16+
word_dim = 32
17+
mark_dim = 5
18+
hidden_dim = 512
19+
depth = 8
20+
mix_hidden_lr = 1e-3
21+
22+
IS_SPARSE = True
23+
PASS_NUM = 10
24+
BATCH_SIZE = 20
25+
26+
embedding_name = 'emb'
27+
28+
29+
def load_parameter(file_name, h, w):
30+
with open(file_name, 'rb') as f:
31+
f.read(16) # skip header.
32+
return np.fromfile(f, dtype=np.float32).reshape(h, w)
33+
34+
35+
def db_lstm():
36+
# 8 features
37+
word = layers.data(name='word_data', shape=[1], data_type='int64')
38+
predicate = layers.data(name='verb_data', shape=[1], data_type='int64')
39+
ctx_n2 = layers.data(name='ctx_n2_data', shape=[1], data_type='int64')
40+
ctx_n1 = layers.data(name='ctx_n1_data', shape=[1], data_type='int64')
41+
ctx_0 = layers.data(name='ctx_0_data', shape=[1], data_type='int64')
42+
ctx_p1 = layers.data(name='ctx_p1_data', shape=[1], data_type='int64')
43+
ctx_p2 = layers.data(name='ctx_p2_data', shape=[1], data_type='int64')
44+
mark = layers.data(name='mark_data', shape=[1], data_type='int64')
45+
46+
predicate_embedding = layers.embedding(
47+
input=predicate,
48+
size=[pred_len, word_dim],
49+
data_type='float32',
50+
is_sparse=IS_SPARSE,
51+
param_attr={'name': 'vemb'})
52+
53+
mark_embedding = layers.embedding(
54+
input=mark,
55+
size=[mark_dict_len, mark_dim],
56+
data_type='float32',
57+
is_sparse=IS_SPARSE)
58+
59+
word_input = [word, ctx_n2, ctx_n1, ctx_0, ctx_p1, ctx_p2]
60+
emb_layers = [
61+
layers.embedding(
62+
size=[word_dict_len, word_dim],
63+
input=x,
64+
param_attr={'name': embedding_name,
65+
'trainable': False}) for x in word_input
66+
]
67+
emb_layers.append(predicate_embedding)
68+
emb_layers.append(mark_embedding)
69+
70+
hidden_0_layers = [
71+
layers.fc(input=emb, size=hidden_dim) for emb in emb_layers
72+
]
73+
74+
hidden_0 = layers.sums(input=hidden_0_layers)
75+
76+
lstm_0 = layers.dynamic_lstm(
77+
input=hidden_0,
78+
size=hidden_dim,
79+
candidate_activation='relu',
80+
gate_activation='sigmoid',
81+
cell_activation='sigmoid')
82+
83+
# stack L-LSTM and R-LSTM with direct edges
84+
input_tmp = [hidden_0, lstm_0]
85+
86+
for i in range(1, depth):
87+
mix_hidden = layers.sums(input=[
88+
layers.fc(input=input_tmp[0], size=hidden_dim),
89+
layers.fc(input=input_tmp[1], size=hidden_dim)
90+
])
91+
92+
lstm = layers.dynamic_lstm(
93+
input=mix_hidden,
94+
size=hidden_dim,
95+
candidate_activation='relu',
96+
gate_activation='sigmoid',
97+
cell_activation='sigmoid',
98+
is_reverse=((i % 2) == 1))
99+
100+
input_tmp = [mix_hidden, lstm]
101+
102+
feature_out = layers.sums(input=[
103+
layers.fc(input=input_tmp[0], size=label_dict_len),
104+
layers.fc(input=input_tmp[1], size=label_dict_len)
105+
])
106+
107+
return feature_out
108+
109+
110+
def to_lodtensor(data, place):
111+
seq_lens = [len(seq) for seq in data]
112+
cur_len = 0
113+
lod = [cur_len]
114+
for l in seq_lens:
115+
cur_len += l
116+
lod.append(cur_len)
117+
flattened_data = np.concatenate(data, axis=0).astype("int64")
118+
flattened_data = flattened_data.reshape([len(flattened_data), 1])
119+
res = core.LoDTensor()
120+
res.set(flattened_data, place)
121+
res.set_lod([lod])
122+
return res
123+
124+
125+
def main():
126+
# define network topology
127+
feature_out = db_lstm()
128+
target = layers.data(name='target', shape=[1], data_type='int64')
129+
crf_cost = layers.linear_chain_crf(
130+
input=feature_out,
131+
label=target,
132+
param_attr={"name": 'crfw',
133+
"learning_rate": mix_hidden_lr})
134+
avg_cost = layers.mean(x=crf_cost)
135+
# TODO(qiao)
136+
# 1. add crf_decode_layer and evaluator
137+
# 2. use other optimizer and check why out will be NAN
138+
sgd_optimizer = SGDOptimizer(learning_rate=0.0001)
139+
opts = sgd_optimizer.minimize(avg_cost)
140+
141+
train_data = paddle.batch(
142+
paddle.reader.shuffle(
143+
paddle.dataset.conll05.test(), buf_size=8192),
144+
batch_size=BATCH_SIZE)
145+
place = core.CPUPlace()
146+
exe = Executor(place)
147+
148+
exe.run(framework.default_startup_program())
149+
150+
embedding_param = g_scope.find_var(embedding_name).get_tensor()
151+
embedding_param.set(
152+
load_parameter(conll05.get_embedding(), word_dict_len, word_dim), place)
153+
154+
batch_id = 0
155+
for pass_id in xrange(PASS_NUM):
156+
for data in train_data():
157+
word_data = to_lodtensor(map(lambda x: x[0], data), place)
158+
ctx_n2_data = to_lodtensor(map(lambda x: x[1], data), place)
159+
ctx_n1_data = to_lodtensor(map(lambda x: x[2], data), place)
160+
ctx_0_data = to_lodtensor(map(lambda x: x[3], data), place)
161+
ctx_p1_data = to_lodtensor(map(lambda x: x[4], data), place)
162+
ctx_p2_data = to_lodtensor(map(lambda x: x[5], data), place)
163+
verb_data = to_lodtensor(map(lambda x: x[6], data), place)
164+
mark_data = to_lodtensor(map(lambda x: x[7], data), place)
165+
target = to_lodtensor(map(lambda x: x[8], data), place)
166+
167+
outs = exe.run(framework.default_main_program(),
168+
feed={
169+
'word_data': word_data,
170+
'ctx_n2_data': ctx_n2_data,
171+
'ctx_n1_data': ctx_n1_data,
172+
'ctx_0_data': ctx_0_data,
173+
'ctx_p1_data': ctx_p1_data,
174+
'ctx_p2_data': ctx_p2_data,
175+
'verb_data': verb_data,
176+
'mark_data': mark_data,
177+
'target': target
178+
},
179+
fetch_list=[avg_cost])
180+
avg_cost_val = np.array(outs[0])
181+
182+
if batch_id % 10 == 0:
183+
print("avg_cost=" + str(avg_cost_val))
184+
185+
# exit early for CI
186+
exit(0)
187+
188+
batch_id = batch_id + 1
189+
190+
191+
if __name__ == '__main__':
192+
main()

python/paddle/v2/fluid/tests/test_layers.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1+
import unittest
2+
13
import paddle.v2.fluid.layers as layers
24
import paddle.v2.fluid.nets as nets
35
from paddle.v2.fluid.framework import Program
4-
import paddle.v2.fluid.core as core
5-
import unittest
66

77

88
class TestBook(unittest.TestCase):
@@ -20,7 +20,8 @@ def test_fit_a_line(self):
2020
avg_cost = layers.mean(x=cost, main_program=program)
2121
self.assertIsNotNone(avg_cost)
2222
program.append_backward(avg_cost)
23-
print str(program)
23+
24+
# print str(program)
2425

2526
def test_recognize_digits_mlp(self):
2627
program = Program()
@@ -49,7 +50,7 @@ def test_recognize_digits_mlp(self):
4950
input=predict, label=label, main_program=program)
5051
avg_cost = layers.mean(x=cost, main_program=program)
5152
self.assertIsNotNone(avg_cost)
52-
print str(program)
53+
# print str(program)
5354

5455
def test_simple_conv2d(self):
5556
program = Program()
@@ -64,7 +65,7 @@ def test_simple_conv2d(self):
6465
filter_size=[4, 4],
6566
main_program=program)
6667

67-
print str(program)
68+
# print str(program)
6869

6970
def test_recognize_digits_conv(self):
7071
program = Program()
@@ -103,7 +104,7 @@ def test_recognize_digits_conv(self):
103104

104105
program.append_backward(avg_cost)
105106

106-
print str(program)
107+
# print str(program)
107108

108109
def test_word_embedding(self):
109110
program = Program()
@@ -164,7 +165,24 @@ def test_word_embedding(self):
164165
avg_cost = layers.mean(x=cost, main_program=program)
165166
self.assertIsNotNone(avg_cost)
166167

167-
print str(program)
168+
# print str(program)
169+
170+
def test_linear_chain_crf(self):
171+
program = Program()
172+
173+
# Change g_program, so the rest layers use `g_program`
174+
images = layers.data(
175+
name='pixel',
176+
shape=[784],
177+
data_type='float32',
178+
main_program=program)
179+
label = layers.data(
180+
name='label', shape=[1], data_type='int32', main_program=program)
181+
hidden = layers.fc(input=images, size=128, main_program=program)
182+
crf = layers.linear_chain_crf(
183+
input=hidden, label=label, main_program=program)
184+
185+
# print str(program)
168186

169187

170188
if __name__ == '__main__':

python/paddle/v2/fluid/tests/test_linear_chain_crf_op.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def set_test_data(self):
104104
transition_exps = np.exp(transition)
105105

106106
labels = np.random.randint(
107-
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int32")
107+
low=0, high=TAG_NUM, size=(lod[-1][-1], 1), dtype="int64")
108108

109109
self.inputs = {
110110
"Emission": (emission, lod),

0 commit comments

Comments
 (0)