@@ -139,4 +139,70 @@ def numpy2tensors(npx, npy, dev, inputs=None, labels=None):
139139 inputs_ .append (x )
140140 if not inputs :
141141 inputs = inputs_
142- return inputs , labels
142+ return inputs , labels
143+
144+
145+ def convert (batch ,
146+ batch_size ,
147+ seq_length ,
148+ vocab_size ,
149+ dev ,
150+ inputs = None ,
151+ labels = None ):
152+ '''convert a batch of data into a sequence of input tensors'''
153+ y = batch [:, 1 :]
154+ x1 = batch [:, :seq_length ]
155+ x = np .zeros ((batch_size , seq_length , vocab_size ), dtype = np .float32 )
156+ for b in range (batch_size ):
157+ for t in range (seq_length ):
158+ c = x1 [b , t ]
159+ x [b , t , c ] = 1
160+ return numpy2tensors (x , y , dev , inputs , labels )
161+
162+
163+ def sample (model , data , dev , nsamples = 100 , use_max = False ):
164+ while True :
165+ cmd = input ('Do you want to sample text from the model [y/n]' )
166+ if cmd == 'n' :
167+ return
168+ else :
169+ seed = input ('Please input some seeding text, e.g., #include <c: ' )
170+ inputs = []
171+ for c in seed :
172+ x = np .zeros ((1 , data .vocab_size ), dtype = np .float32 )
173+ x [0 , data .char_to_idx [c ]] = 1
174+ tx = tensor .from_numpy (x )
175+ tx .to_device (dev )
176+ inputs .append (tx )
177+ model .reset_states (dev )
178+ outputs = model (inputs )
179+ y = tensor .softmax (outputs [- 1 ])
180+ sys .stdout .write (seed )
181+ for i in range (nsamples ):
182+ prob = tensor .to_numpy (y )[0 ]
183+ if use_max :
184+ cur = np .argmax (prob )
185+ else :
186+ cur = np .random .choice (data .vocab_size , 1 , p = prob )[0 ]
187+ sys .stdout .write (data .idx_to_char [cur ])
188+ x = np .zeros ((1 , data .vocab_size ), dtype = np .float32 )
189+ x [0 , cur ] = 1
190+ tx = tensor .from_numpy (x )
191+ tx .to_device (dev )
192+ outputs = model ([tx ])
193+ y = tensor .softmax (outputs [- 1 ])
194+
195+
196+ def evaluate (model , data , batch_size , seq_length , dev , inputs , labels ):
197+ model .eval ()
198+ val_loss = 0.0
199+ for b in range (data .num_test_batch ):
200+ batch = data .val_dat [b * batch_size :(b + 1 ) * batch_size ]
201+ inputs , labels = convert (batch , batch_size , seq_length , data .vocab_size ,
202+ dev , inputs , labels )
203+ model .reset_states (dev )
204+ y = model (inputs )
205+ loss = autograd .softmax_cross_entropy (y , labels )[0 ]
206+ val_loss += tensor .to_numpy (loss )[0 ]
207+ print (' validation loss is %f' %
208+ (val_loss / data .num_test_batch / seq_length ))
0 commit comments