1
1
import numpy as np
2
2
import paddle .v2 as paddle
3
- import paddle .v2 .dataset . conll05 as conll05
3
+ import paddle .v2 .fluid as fluid
4
4
import paddle .v2 .fluid .core as core
5
5
import paddle .v2 .fluid .framework as framework
6
6
import paddle .v2 .fluid .layers as layers
7
- from paddle .v2 .fluid .executor import Executor , g_scope
8
- from paddle .v2 .fluid .optimizer import SGDOptimizer
9
- import paddle .v2 .fluid as fluid
10
- import paddle .v2 .fluid .layers as pd
7
+ from paddle .v2 .fluid .executor import Executor
11
8
12
9
dict_size = 30000
13
10
source_dict_dim = target_dict_dim = dict_size
14
11
src_dict , trg_dict = paddle .dataset .wmt14 .get_dict (dict_size )
15
- hidden_dim = 512
16
- word_dim = 512
12
+ hidden_dim = 32
13
+ word_dim = 16
17
14
IS_SPARSE = True
18
- batch_size = 50
15
+ batch_size = 10
19
16
max_length = 50
20
17
topk_size = 50
21
18
trg_dic_size = 10000
22
19
23
- src_word_id = layers .data (name = "src_word_id" , shape = [1 ], dtype = 'int64' )
24
- src_embedding = layers .embedding (
25
- input = src_word_id ,
26
- size = [dict_size , word_dim ],
27
- dtype = 'float32' ,
28
- is_sparse = IS_SPARSE ,
29
- param_attr = fluid .ParamAttr (name = 'vemb' ))
30
-
31
-
32
- def encoder ():
33
-
34
- lstm_hidden0 , lstm_0 = layers .dynamic_lstm (
35
- input = src_embedding ,
36
- size = hidden_dim ,
37
- candidate_activation = 'sigmoid' ,
38
- cell_activation = 'sigmoid' )
39
-
40
- lstm_hidden1 , lstm_1 = layers .dynamic_lstm (
41
- input = src_embedding ,
42
- size = hidden_dim ,
43
- candidate_activation = 'sigmoid' ,
44
- cell_activation = 'sigmoid' ,
45
- is_reverse = True )
46
-
47
- bidirect_lstm_out = layers .concat ([lstm_hidden0 , lstm_hidden1 ], axis = 0 )
48
-
49
- return bidirect_lstm_out
50
-
51
-
52
- def decoder_trainer (context ):
53
- '''
54
- decoder with trainer
55
- '''
56
- pass
20
+ decoder_size = hidden_dim
21
+
22
+
23
+ def encoder_decoder ():
24
+ # encoder
25
+ src_word_id = layers .data (
26
+ name = "src_word_id" , shape = [1 ], dtype = 'int64' , lod_level = 1 )
27
+ src_embedding = layers .embedding (
28
+ input = src_word_id ,
29
+ size = [dict_size , word_dim ],
30
+ dtype = 'float32' ,
31
+ is_sparse = IS_SPARSE ,
32
+ param_attr = fluid .ParamAttr (name = 'vemb' ))
33
+
34
+ fc1 = fluid .layers .fc (input = src_embedding , size = hidden_dim * 4 , act = 'tanh' )
35
+ lstm_hidden0 , lstm_0 = layers .dynamic_lstm (input = fc1 , size = hidden_dim * 4 )
36
+ encoder_out = layers .sequence_pool (input = lstm_hidden0 , pool_type = "last" )
37
+
38
+ # decoder
39
+ trg_language_word = layers .data (
40
+ name = "target_language_word" , shape = [1 ], dtype = 'int64' , lod_level = 1 )
41
+ trg_embedding = layers .embedding (
42
+ input = trg_language_word ,
43
+ size = [dict_size , word_dim ],
44
+ dtype = 'float32' ,
45
+ is_sparse = IS_SPARSE ,
46
+ param_attr = fluid .ParamAttr (name = 'vemb' ))
47
+
48
+ rnn = fluid .layers .DynamicRNN ()
49
+ with rnn .block ():
50
+ current_word = rnn .step_input (trg_embedding )
51
+ mem = rnn .memory (init = encoder_out )
52
+ fc1 = fluid .layers .fc (input = [current_word , mem ],
53
+ size = decoder_size ,
54
+ act = 'tanh' )
55
+ out = fluid .layers .fc (input = fc1 , size = target_dict_dim , act = 'softmax' )
56
+ rnn .update_memory (mem , fc1 )
57
+ rnn .output (out )
58
+
59
+ return rnn ()
57
60
58
61
59
62
def to_lodtensor (data , place ):
@@ -72,13 +75,18 @@ def to_lodtensor(data, place):
72
75
73
76
74
77
def main ():
75
- encoder_out = encoder ()
76
- # TODO(jacquesqiao) call here
77
- decoder_trainer (encoder_out )
78
+ rnn_out = encoder_decoder ()
79
+ label = layers .data (
80
+ name = "target_language_next_word" , shape = [1 ], dtype = 'int64' , lod_level = 1 )
81
+ cost = layers .cross_entropy (input = rnn_out , label = label )
82
+ avg_cost = fluid .layers .mean (x = cost )
83
+
84
+ optimizer = fluid .optimizer .Adagrad (learning_rate = 1e-4 )
85
+ optimizer .minimize (avg_cost )
78
86
79
87
train_data = paddle .batch (
80
88
paddle .reader .shuffle (
81
- paddle .dataset .wmt14 .train (8000 ), buf_size = 1000 ),
89
+ paddle .dataset .wmt14 .train (dict_size ), buf_size = 1000 ),
82
90
batch_size = batch_size )
83
91
84
92
place = core .CPUPlace ()
@@ -88,15 +96,23 @@ def main():
88
96
89
97
batch_id = 0
90
98
for pass_id in xrange (2 ):
91
- print 'pass_id' , pass_id
92
99
for data in train_data ():
93
- print 'batch' , batch_id
94
- batch_id += 1
95
- if batch_id > 10 : break
96
100
word_data = to_lodtensor (map (lambda x : x [0 ], data ), place )
101
+ trg_word = to_lodtensor (map (lambda x : x [1 ], data ), place )
102
+ trg_word_next = to_lodtensor (map (lambda x : x [2 ], data ), place )
97
103
outs = exe .run (framework .default_main_program (),
98
- feed = {'src_word_id' : word_data , },
99
- fetch_list = [encoder_out ])
104
+ feed = {
105
+ 'src_word_id' : word_data ,
106
+ 'target_language_word' : trg_word ,
107
+ 'target_language_next_word' : trg_word_next
108
+ },
109
+ fetch_list = [avg_cost ])
110
+ avg_cost_val = np .array (outs [0 ])
111
+ print ('pass_id=' + str (pass_id ) + ' batch=' + str (batch_id ) +
112
+ " avg_cost=" + str (avg_cost_val ))
113
+ if batch_id > 3 :
114
+ exit (0 )
115
+ batch_id += 1
100
116
101
117
102
118
if __name__ == '__main__' :
0 commit comments