1
- import paddle .v2 .activation as activation
2
- import paddle .v2 .attr as attr
3
- import paddle .v2 .data_type as data_type
4
- import paddle .v2 .layer as layer
5
- import paddle .v2 .networks as networks
1
+ import paddle .v2 as paddle
6
2
7
3
8
4
def seqToseq_net_v2 (source_dict_dim , target_dict_dim ):
@@ -12,79 +8,85 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
12
8
encoder_size = 512 # dimension of hidden unit in GRU Encoder network
13
9
14
10
#### Encoder
15
- src_word_id = layer .data (
11
+ src_word_id = paddle . layer .data (
16
12
name = 'source_language_word' ,
17
- type = data_type .integer_value_sequence (source_dict_dim ))
18
- src_embedding = layer .embedding (
13
+ type = paddle . data_type .integer_value_sequence (source_dict_dim ))
14
+ src_embedding = paddle . layer .embedding (
19
15
input = src_word_id ,
20
16
size = word_vector_dim ,
21
- param_attr = attr .ParamAttr (name = '_source_language_embedding' ))
22
- src_forward = networks .simple_gru (input = src_embedding , size = encoder_size )
23
- src_backward = networks .simple_gru (
17
+ param_attr = paddle .attr .ParamAttr (name = '_source_language_embedding' ))
18
+ src_forward = paddle .networks .simple_gru (
19
+ input = src_embedding , size = encoder_size )
20
+ src_backward = paddle .networks .simple_gru (
24
21
input = src_embedding , size = encoder_size , reverse = True )
25
- encoded_vector = layer .concat (input = [src_forward , src_backward ])
22
+ encoded_vector = paddle . layer .concat (input = [src_forward , src_backward ])
26
23
27
24
#### Decoder
28
- with layer .mixed (size = decoder_size ) as encoded_proj :
29
- encoded_proj += layer .full_matrix_projection (input = encoded_vector )
25
+ with paddle .layer .mixed (size = decoder_size ) as encoded_proj :
26
+ encoded_proj += paddle .layer .full_matrix_projection (
27
+ input = encoded_vector )
30
28
31
- backward_first = layer .first_seq (input = src_backward )
29
+ backward_first = paddle . layer .first_seq (input = src_backward )
32
30
33
- with layer .mixed (size = decoder_size , act = activation .Tanh ()) as decoder_boot :
34
- decoder_boot += layer .full_matrix_projection (input = backward_first )
31
+ with paddle .layer .mixed (
32
+ size = decoder_size , act = paddle .activation .Tanh ()) as decoder_boot :
33
+ decoder_boot += paddle .layer .full_matrix_projection (
34
+ input = backward_first )
35
35
36
36
def gru_decoder_with_attention (enc_vec , enc_proj , current_word ):
37
37
38
- decoder_mem = layer .memory (
38
+ decoder_mem = paddle . layer .memory (
39
39
name = 'gru_decoder' , size = decoder_size , boot_layer = decoder_boot )
40
40
41
- context = networks .simple_attention (
41
+ context = paddle . networks .simple_attention (
42
42
encoded_sequence = enc_vec ,
43
43
encoded_proj = enc_proj ,
44
44
decoder_state = decoder_mem )
45
45
46
- with layer .mixed (size = decoder_size * 3 ) as decoder_inputs :
47
- decoder_inputs += layer .full_matrix_projection (input = context )
48
- decoder_inputs += layer .full_matrix_projection (input = current_word )
46
+ with paddle .layer .mixed (size = decoder_size * 3 ) as decoder_inputs :
47
+ decoder_inputs += paddle .layer .full_matrix_projection (input = context )
48
+ decoder_inputs += paddle .layer .full_matrix_projection (
49
+ input = current_word )
49
50
50
- gru_step = layer .gru_step (
51
+ gru_step = paddle . layer .gru_step (
51
52
name = 'gru_decoder' ,
52
53
input = decoder_inputs ,
53
54
output_mem = decoder_mem ,
54
55
size = decoder_size )
55
56
56
- with layer .mixed (
57
- size = target_dict_dim , bias_attr = True ,
58
- act = activation .Softmax ()) as out :
59
- out += layer .full_matrix_projection (input = gru_step )
57
+ with paddle .layer .mixed (
58
+ size = target_dict_dim ,
59
+ bias_attr = True ,
60
+ act = paddle .activation .Softmax ()) as out :
61
+ out += paddle .layer .full_matrix_projection (input = gru_step )
60
62
return out
61
63
62
64
decoder_group_name = "decoder_group"
63
- group_input1 = layer .StaticInputV2 (input = encoded_vector , is_seq = True )
64
- group_input2 = layer .StaticInputV2 (input = encoded_proj , is_seq = True )
65
+ group_input1 = paddle . layer .StaticInputV2 (input = encoded_vector , is_seq = True )
66
+ group_input2 = paddle . layer .StaticInputV2 (input = encoded_proj , is_seq = True )
65
67
group_inputs = [group_input1 , group_input2 ]
66
68
67
- trg_embedding = layer .embedding (
68
- input = layer .data (
69
+ trg_embedding = paddle . layer .embedding (
70
+ input = paddle . layer .data (
69
71
name = 'target_language_word' ,
70
- type = data_type .integer_value_sequence (target_dict_dim )),
72
+ type = paddle . data_type .integer_value_sequence (target_dict_dim )),
71
73
size = word_vector_dim ,
72
- param_attr = attr .ParamAttr (name = '_target_language_embedding' ))
74
+ param_attr = paddle . attr .ParamAttr (name = '_target_language_embedding' ))
73
75
group_inputs .append (trg_embedding )
74
76
75
77
# For decoder equipped with attention mechanism, in training,
76
78
# target embeding (the groudtruth) is the data input,
77
79
# while encoded source sequence is accessed to as an unbounded memory.
78
80
# Here, the StaticInput defines a read-only memory
79
81
# for the recurrent_group.
80
- decoder = layer .recurrent_group (
82
+ decoder = paddle . layer .recurrent_group (
81
83
name = decoder_group_name ,
82
84
step = gru_decoder_with_attention ,
83
85
input = group_inputs )
84
86
85
- lbl = layer .data (
87
+ lbl = paddle . layer .data (
86
88
name = 'target_language_next_word' ,
87
- type = data_type .integer_value_sequence (target_dict_dim ))
88
- cost = layer .classification_cost (input = decoder , label = lbl )
89
+ type = paddle . data_type .integer_value_sequence (target_dict_dim ))
90
+ cost = paddle . layer .classification_cost (input = decoder , label = lbl )
89
91
90
92
return cost
0 commit comments