Skip to content

Commit a7d3230

Browse files
committed
Optimizing testing, fix errors in CLE, minor stuff
1 parent 3488007 commit a7d3230

File tree

8 files changed

+244
-187
lines changed

8 files changed

+244
-187
lines changed

create_screens.sh

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
#!/bin/bash
22

33
screen -dmS "cle_1" ./start-cle_automated.sh 1 m_e-1 p_e-1 e_e-1
4-
screen -dmS "cle_2" ./start-cle_automated.sh 1 m_e-1_ss p_e-1_st e_e-1_st -shuffle-sentences
5-
screen -dmS "cle_3" ./start-cle_automated.sh 10 m_e-10 p_e-10 e_e-10
6-
screen -dmS "cle_4" ./start-cle_automated.sh 10 m_e-10_da p_e-10_da e_e-10_da -decrease-alpha
7-
screen -dmS "cle_5" ./start-cle_automated.sh 10 m_e-10_ss p_e-10_ss e_e-10_ss -shuffle-sentences
8-
screen -dmS "cle_6" ./start-cle_automated.sh 10 m_e-10_da_ss p_e-10_da_ss e_e-10_da_ss -decrease-alpha -shuffle-sentences
4+
screen -dmS "cle_2" ./start-cle_automated.sh 10 m_e-10 p_e-10 e_e-10
5+
screen -dmS "cle_3" ./start-cle_automated.sh 10 m_e-10_da p_e-10_da e_e-10_da -decrease-alpha
6+
screen -dmS "cle_4" ./start-cle_automated.sh 10 m_e-10_ss p_e-10_ss e_e-10_ss -shuffle-sentences
7+
screen -dmS "cle_5" ./start-cle_automated.sh 10 m_e-10_da_ss p_e-10_da_ss e_e-10_da_ss -decrease-alpha -shuffle-sentences

dependency-parser.py

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# !/bin/python
22
# -*- coding: utf-8 -*-
33

4+
import profile
5+
46
import time
57
import os
68
import codecs
@@ -114,7 +116,7 @@ def train(args):
114116
print "\t\tReduce smoothing coefficient activated."
115117

116118
alpha = 0.5 # smoothing coefficient for the weight adjustments
117-
graph_ids = sparse_graphs.keys() #list of dict keys, needed when shuffeling tokens after every epoch
119+
graph_ids = sparse_graphs.keys() # list of dict keys, needed when shuffeling tokens after every epoch
118120

119121
for epoch in range(1, int(args.epochs) + 1):
120122

@@ -128,7 +130,8 @@ def train(args):
128130
weight_vector, correct, errors = structured_perceptron(deepcopy(sparse_graphs[graph_id]), feat_map, rev_feat_map, weight_vector, correct, errors, "train", alpha)
129131
total += 1
130132
if total % 500 == 0:
131-
print "\t\t\tInstance Nr. " + str(total) + ", Correct: " + str(correct) + ", Errors: " + str(errors)
133+
print "\t\t\tInstance Nr. " + str(total) + ", Correct: " + str(correct) + " (" \
134+
+ str((correct*100)/total) + "%), Errors: " + str(errors)
132135
# print "\t\t\tCurrent weight vector:"
133136
# print "\t\t\t" + str(weight_vector)
134137

@@ -164,47 +167,32 @@ def test(args):
164167
stop = time.time()
165168
print "\t\t" + str(len(feat_map)) + " features loaded"
166169
print "\t\tDone, " + str(stop - start) + " sec."
167-
168170
start = time.time()
169-
print "\tCreating graph representation of every sentence..."
171+
sentence_count = 0
172+
for sentence in sentences(codecs.open(args.in_file, encoding='utf-8')):
173+
sentence_count += 1
170174

171-
full_graphs = {}
175+
print "\tStart annotating the test file, Total Instances: " + str(sentence_count)
172176

173-
empty_feat_vec = 0
177+
total = 0
178+
errors = 0
174179

175180
for sentence in sentences(codecs.open(args.in_file, encoding='utf-8')):
176181

182+
# create complete, directed graph representation of sentence
177183
full_graph = CompleteFullGraph(sentence).heads
178184

179-
# add feature vec to every graph
185+
# add feature vec
180186
full_graph = add_feat_vec_to_full_graph(full_graph, feat_map)
181187

182-
# feat_vec sanity
183-
184-
for head in full_graph:
185-
for arc in full_graph[head]:
186-
if not arc.feat_vec:
187-
empty_feat_vec += 1
188-
189-
full_graphs[len(full_graphs)] = full_graph
190-
191-
stop = time.time()
192-
print "\t\tNumber of sentences: " + str(len(full_graphs)) + ", Number of arcs with empty feature vectors: " + str(empty_feat_vec)
193-
print "\t\tDone, " + str(stop - start) + " sec"
194-
195-
start = time.time()
196-
print "\tStart annotating the test file, Total Instances: " + str(len(full_graphs))
197-
198-
total = 0
199-
errors = 0
200-
for graph_id in full_graphs:
201188
tmp_errors = errors
202-
predicted_graph, errors = structured_perceptron(deepcopy(full_graphs[graph_id]), feat_map, rev_feat_map, weight_vector, 0, errors, "test")
203189

204-
if tmp_errors == errors:
190+
predicted_graph, errors = structured_perceptron(deepcopy(full_graph), feat_map, rev_feat_map, weight_vector, 0, errors, "test")
191+
192+
if tmp_errors == errors: # no error occured during prediction
205193
write_graph_to_file(predicted_graph, args.out_file)
206-
else:
207-
write_graph_to_file(full_graphs[graph_id], args.out_file, "error")
194+
else: # a error occured during prediction
195+
write_graph_to_file(full_graph, args.out_file, "error")
208196

209197
total += 1
210198
if total % 500 == 0:
@@ -248,7 +236,7 @@ def write_to_file(token, file_obj):
248236
else:
249237
if arguments.train:
250238
print "Running in training mode\n"
251-
train(arguments)
239+
profile.run(train(arguments))
252240

253241
elif arguments.test:
254242
print "Running in test mode\n"

modules/cle.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,12 @@
1-
from graphs import highest_scoring_heads, cycle, reverse_head_graph
1+
from graphs import highest_scoring_heads, cycle, reverse_head_graph, check_graph_sanity
22
from copy import deepcopy
33

44
def chu_liu_edmonds(graph):
55

66
g = deepcopy(graph)
77
g_a = highest_scoring_heads(deepcopy(g))
8-
c = cycle(g_a, [], sorted(g_a.keys())[0])
8+
c = cycle(g_a)
99

10-
if c is None:
11-
print "The cycle function returned 'None'."
1210
if not c:
1311
return g_a
1412
else:
@@ -66,6 +64,7 @@ def chu_liu_edmonds(graph):
6664
y[head_of_cycle] = new_dependents
6765

6866
# adding arcs from inside cycle to inside cycle except the one pointing to cycle_head
67+
6968
for node in c:
7069
for head in g_a:
7170
if head == node:
@@ -76,7 +75,6 @@ def chu_liu_edmonds(graph):
7675
y[head].append(arc)
7776
else:
7877
y[head] = [arc]
79-
return y
8078

8179
def contract(g, g_a, c, t_c):
8280

@@ -230,6 +228,8 @@ def contract(g, g_a, c, t_c):
230228
g_c[head].append(arc)
231229
else:
232230
g_c[head] = [arc]
231+
else:
232+
g_c[head] = [arc]
233233

234234
return g_c, t_c
235235

0 commit comments

Comments
 (0)