11# !/bin/python
22# -*- coding: utf-8 -*-
33
4+ import profile
5+
46import time
57import os
68import codecs
@@ -114,7 +116,7 @@ def train(args):
114116 print "\t \t Reduce smoothing coefficient activated."
115117
116118 alpha = 0.5 # smoothing coefficient for the weight adjustments
117- graph_ids = sparse_graphs .keys () # list of dict keys, needed when shuffeling tokens after every epoch
119+ graph_ids = sparse_graphs .keys () # list of dict keys, needed when shuffeling tokens after every epoch
118120
119121 for epoch in range (1 , int (args .epochs ) + 1 ):
120122
@@ -128,7 +130,8 @@ def train(args):
128130 weight_vector , correct , errors = structured_perceptron (deepcopy (sparse_graphs [graph_id ]), feat_map , rev_feat_map , weight_vector , correct , errors , "train" , alpha )
129131 total += 1
130132 if total % 500 == 0 :
131- print "\t \t \t Instance Nr. " + str (total ) + ", Correct: " + str (correct ) + ", Errors: " + str (errors )
133+ print "\t \t \t Instance Nr. " + str (total ) + ", Correct: " + str (correct ) + " (" \
134+ + str ((correct * 100 )/ total ) + "%), Errors: " + str (errors )
132135 # print "\t\t\tCurrent weight vector:"
133136 # print "\t\t\t" + str(weight_vector)
134137
@@ -164,47 +167,32 @@ def test(args):
164167 stop = time .time ()
165168 print "\t \t " + str (len (feat_map )) + " features loaded"
166169 print "\t \t Done, " + str (stop - start ) + " sec."
167-
168170 start = time .time ()
169- print "\t Creating graph representation of every sentence..."
171+ sentence_count = 0
172+ for sentence in sentences (codecs .open (args .in_file , encoding = 'utf-8' )):
173+ sentence_count += 1
170174
171- full_graphs = {}
175+ print " \t Start annotating the test file, Total Instances: " + str ( sentence_count )
172176
173- empty_feat_vec = 0
177+ total = 0
178+ errors = 0
174179
175180 for sentence in sentences (codecs .open (args .in_file , encoding = 'utf-8' )):
176181
182+ # create complete, directed graph representation of sentence
177183 full_graph = CompleteFullGraph (sentence ).heads
178184
179- # add feature vec to every graph
185+ # add feature vec
180186 full_graph = add_feat_vec_to_full_graph (full_graph , feat_map )
181187
182- # feat_vec sanity
183-
184- for head in full_graph :
185- for arc in full_graph [head ]:
186- if not arc .feat_vec :
187- empty_feat_vec += 1
188-
189- full_graphs [len (full_graphs )] = full_graph
190-
191- stop = time .time ()
192- print "\t \t Number of sentences: " + str (len (full_graphs )) + ", Number of arcs with empty feature vectors: " + str (empty_feat_vec )
193- print "\t \t Done, " + str (stop - start ) + " sec"
194-
195- start = time .time ()
196- print "\t Start annotating the test file, Total Instances: " + str (len (full_graphs ))
197-
198- total = 0
199- errors = 0
200- for graph_id in full_graphs :
201188 tmp_errors = errors
202- predicted_graph , errors = structured_perceptron (deepcopy (full_graphs [graph_id ]), feat_map , rev_feat_map , weight_vector , 0 , errors , "test" )
203189
204- if tmp_errors == errors :
190+ predicted_graph , errors = structured_perceptron (deepcopy (full_graph ), feat_map , rev_feat_map , weight_vector , 0 , errors , "test" )
191+
192+ if tmp_errors == errors : # no error occured during prediction
205193 write_graph_to_file (predicted_graph , args .out_file )
206- else :
207- write_graph_to_file (full_graphs [ graph_id ] , args .out_file , "error" )
194+ else : # a error occured during prediction
195+ write_graph_to_file (full_graph , args .out_file , "error" )
208196
209197 total += 1
210198 if total % 500 == 0 :
@@ -248,7 +236,7 @@ def write_to_file(token, file_obj):
248236 else :
249237 if arguments .train :
250238 print "Running in training mode\n "
251- train (arguments )
239+ profile . run ( train (arguments ) )
252240
253241 elif arguments .test :
254242 print "Running in test mode\n "
0 commit comments