Skip to content

Commit a4bd414

Browse files
authored
Merge pull request #1555 from jacquesqiao/refine-import
optimize import of seqToseq_net_v2 for book
2 parents 3fd13e7 + 8fa09b8 commit a4bd414

File tree

2 files changed

+52
-46
lines changed

2 files changed

+52
-46
lines changed

demo/seqToseq/api_train_v2.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,31 +72,35 @@ def main():
7272
# define network topology
7373
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
7474
parameters = paddle.parameters.create(cost)
75-
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
76-
77-
def event_handler(event):
78-
if isinstance(event, paddle.event.EndIteration):
79-
if event.batch_id % 10 == 0:
80-
print "Pass %d, Batch %d, Cost %f, %s" % (
81-
event.pass_id, event.batch_id, event.cost, event.metrics)
8275

76+
# define optimize method and trainer
77+
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
8378
trainer = paddle.trainer.SGD(cost=cost,
8479
parameters=parameters,
8580
update_equation=optimizer)
8681

82+
# define data reader
8783
reader_dict = {
8884
'source_language_word': 0,
8985
'target_language_word': 1,
9086
'target_language_next_word': 2
9187
}
9288

93-
trn_reader = paddle.reader.batched(
89+
wmt14_reader = paddle.reader.batched(
9490
paddle.reader.shuffle(
9591
train_reader("data/pre-wmt14/train/train"), buf_size=8192),
9692
batch_size=5)
9793

94+
# define event_handler callback
95+
def event_handler(event):
96+
if isinstance(event, paddle.event.EndIteration):
97+
if event.batch_id % 10 == 0:
98+
print "Pass %d, Batch %d, Cost %f, %s" % (
99+
event.pass_id, event.batch_id, event.cost, event.metrics)
100+
101+
# start to train
98102
trainer.train(
99-
reader=trn_reader,
103+
reader=wmt14_reader,
100104
event_handler=event_handler,
101105
num_passes=10000,
102106
reader_dict=reader_dict)

demo/seqToseq/seqToseq_net_v2.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,4 @@
1-
import paddle.v2.activation as activation
2-
import paddle.v2.attr as attr
3-
import paddle.v2.data_type as data_type
4-
import paddle.v2.layer as layer
5-
import paddle.v2.networks as networks
1+
import paddle.v2 as paddle
62

73

84
def seqToseq_net_v2(source_dict_dim, target_dict_dim):
@@ -12,79 +8,85 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
128
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
139

1410
#### Encoder
15-
src_word_id = layer.data(
11+
src_word_id = paddle.layer.data(
1612
name='source_language_word',
17-
type=data_type.integer_value_sequence(source_dict_dim))
18-
src_embedding = layer.embedding(
13+
type=paddle.data_type.integer_value_sequence(source_dict_dim))
14+
src_embedding = paddle.layer.embedding(
1915
input=src_word_id,
2016
size=word_vector_dim,
21-
param_attr=attr.ParamAttr(name='_source_language_embedding'))
22-
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size)
23-
src_backward = networks.simple_gru(
17+
param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
18+
src_forward = paddle.networks.simple_gru(
19+
input=src_embedding, size=encoder_size)
20+
src_backward = paddle.networks.simple_gru(
2421
input=src_embedding, size=encoder_size, reverse=True)
25-
encoded_vector = layer.concat(input=[src_forward, src_backward])
22+
encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
2623

2724
#### Decoder
28-
with layer.mixed(size=decoder_size) as encoded_proj:
29-
encoded_proj += layer.full_matrix_projection(input=encoded_vector)
25+
with paddle.layer.mixed(size=decoder_size) as encoded_proj:
26+
encoded_proj += paddle.layer.full_matrix_projection(
27+
input=encoded_vector)
3028

31-
backward_first = layer.first_seq(input=src_backward)
29+
backward_first = paddle.layer.first_seq(input=src_backward)
3230

33-
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot:
34-
decoder_boot += layer.full_matrix_projection(input=backward_first)
31+
with paddle.layer.mixed(
32+
size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
33+
decoder_boot += paddle.layer.full_matrix_projection(
34+
input=backward_first)
3535

3636
def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
3737

38-
decoder_mem = layer.memory(
38+
decoder_mem = paddle.layer.memory(
3939
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
4040

41-
context = networks.simple_attention(
41+
context = paddle.networks.simple_attention(
4242
encoded_sequence=enc_vec,
4343
encoded_proj=enc_proj,
4444
decoder_state=decoder_mem)
4545

46-
with layer.mixed(size=decoder_size * 3) as decoder_inputs:
47-
decoder_inputs += layer.full_matrix_projection(input=context)
48-
decoder_inputs += layer.full_matrix_projection(input=current_word)
46+
with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
47+
decoder_inputs += paddle.layer.full_matrix_projection(input=context)
48+
decoder_inputs += paddle.layer.full_matrix_projection(
49+
input=current_word)
4950

50-
gru_step = layer.gru_step(
51+
gru_step = paddle.layer.gru_step(
5152
name='gru_decoder',
5253
input=decoder_inputs,
5354
output_mem=decoder_mem,
5455
size=decoder_size)
5556

56-
with layer.mixed(
57-
size=target_dict_dim, bias_attr=True,
58-
act=activation.Softmax()) as out:
59-
out += layer.full_matrix_projection(input=gru_step)
57+
with paddle.layer.mixed(
58+
size=target_dict_dim,
59+
bias_attr=True,
60+
act=paddle.activation.Softmax()) as out:
61+
out += paddle.layer.full_matrix_projection(input=gru_step)
6062
return out
6163

6264
decoder_group_name = "decoder_group"
63-
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True)
64-
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True)
65+
group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
66+
group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
6567
group_inputs = [group_input1, group_input2]
6668

67-
trg_embedding = layer.embedding(
68-
input=layer.data(
69+
trg_embedding = paddle.layer.embedding(
70+
input=paddle.layer.data(
6971
name='target_language_word',
70-
type=data_type.integer_value_sequence(target_dict_dim)),
72+
type=paddle.data_type.integer_value_sequence(target_dict_dim)),
7173
size=word_vector_dim,
72-
param_attr=attr.ParamAttr(name='_target_language_embedding'))
74+
param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
7375
group_inputs.append(trg_embedding)
7476

7577
# For decoder equipped with attention mechanism, in training,
7678
# target embeding (the groudtruth) is the data input,
7779
# while encoded source sequence is accessed to as an unbounded memory.
7880
# Here, the StaticInput defines a read-only memory
7981
# for the recurrent_group.
80-
decoder = layer.recurrent_group(
82+
decoder = paddle.layer.recurrent_group(
8183
name=decoder_group_name,
8284
step=gru_decoder_with_attention,
8385
input=group_inputs)
8486

85-
lbl = layer.data(
87+
lbl = paddle.layer.data(
8688
name='target_language_next_word',
87-
type=data_type.integer_value_sequence(target_dict_dim))
88-
cost = layer.classification_cost(input=decoder, label=lbl)
89+
type=paddle.data_type.integer_value_sequence(target_dict_dim))
90+
cost = paddle.layer.classification_cost(input=decoder, label=lbl)
8991

9092
return cost

0 commit comments

Comments
 (0)