Skip to content

Commit 9b8d9b3

Browse files
committed
Performance optimiztation
1 parent a7d3230 commit 9b8d9b3

File tree

5 files changed

+164
-196
lines changed

5 files changed

+164
-196
lines changed

dependency-parser.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
# !/bin/python
22
# -*- coding: utf-8 -*-
33

4-
import profile
5-
64
import time
75
import os
86
import codecs
@@ -130,8 +128,8 @@ def train(args):
130128
weight_vector, correct, errors = structured_perceptron(deepcopy(sparse_graphs[graph_id]), feat_map, rev_feat_map, weight_vector, correct, errors, "train", alpha)
131129
total += 1
132130
if total % 500 == 0:
133-
print "\t\t\tInstance Nr. " + str(total) + ", Correct: " + str(correct) + " (" \
134-
+ str((correct*100)/total) + "%), Errors: " + str(errors)
131+
print "\t\t\tInstance Nr. " + str(total) + "\tCorrect: " + str(correct) + "\t(" \
132+
+ str((correct*100)/total) + "%)\tErrors: " + str(errors)
135133
# print "\t\t\tCurrent weight vector:"
136134
# print "\t\t\t" + str(weight_vector)
137135

@@ -196,7 +194,7 @@ def test(args):
196194

197195
total += 1
198196
if total % 500 == 0:
199-
print "\t\tInstance Nr. " + str(total) + ", Errors: " + str(errors)
197+
print "\t\tInstance Nr. " + str(total) + "\tErrors: " + str(errors)
200198
# print "\t\t\tCurrent weight vector:"
201199
# print "\t\t\t" + str(weight_vector)
202200
stop = time.time()
@@ -236,7 +234,7 @@ def write_to_file(token, file_obj):
236234
else:
237235
if arguments.train:
238236
print "Running in training mode\n"
239-
profile.run(train(arguments))
237+
train(arguments)
240238

241239
elif arguments.test:
242240
print "Running in test mode\n"

modules/cle.py

Lines changed: 114 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -35,201 +35,180 @@ def chu_liu_edmonds(graph):
3535

3636
# adding arcs from inside cycle to outside cycle, then delete c
3737
if t_c in y:
38-
for arc in y[t_c]:
39-
if arc.dependent not in c:
40-
for head in g_a:
41-
if head == arc.former_head:
42-
for arc2 in g_a[head]:
43-
if arc2.dependent == arc.dependent:
44-
if head in y:
45-
y[head].append(arc2)
46-
else:
47-
y[head] = [arc2]
38+
for arc in (arc for arc in y[t_c] if arc.dependent not in c):
39+
# dependents of t_c which are not in c
40+
for head in (head for head in g_a if head == arc.former_head):
41+
# heads in g_a which are former head of arc
42+
for arc2 in (arc2 for arc2 in g_a[head] if arc2.dependent == arc.dependent):
43+
# dependent of head which has the same id as arc
44+
try:
45+
y[head].append(arc2)
46+
except KeyError:
47+
y[head] = [arc2]
4848

4949
del y[t_c]
5050

5151
# adding arcs from outside cycle to inside cycle, adding the arc from outside to C
5252
new_dependents = []
5353
for arc in y[head_of_cycle]:
5454
if arc.dependent == t_c:
55-
for head in g:
56-
if head == head_of_cycle:
57-
for arc2 in g[head]:
58-
if arc2.dependent == arc.former_dependent:
59-
cycle_head = arc.former_dependent
60-
new_dependents.append(arc2)
61-
55+
# arc is t_c in y and dependent of head_of_cycle
56+
for head in (head for head in g if head == head_of_cycle):
57+
# head in g which is head_of_cycle
58+
for arc2 in (arc2 for arc2 in g[head] if arc2.dependent == arc.former_dependent):
59+
# arc in g[head] which has the same id as the former_dependent of t_c
60+
cycle_head = arc.former_dependent
61+
new_dependents.append(arc2)
6262
else:
6363
new_dependents.append(arc)
6464
y[head_of_cycle] = new_dependents
6565

6666
# adding arcs from inside cycle to inside cycle except the one pointing to cycle_head
6767

68-
for node in c:
69-
for head in g_a:
70-
if head == node:
71-
for arc in g_a[head]:
72-
if arc.dependent in c:
73-
if not arc.dependent == cycle_head:
74-
if head in y:
75-
y[head].append(arc)
76-
else:
77-
y[head] = [arc]
68+
for head in (head for head in g_a if head in c):
69+
# every head in g_a that is in c
70+
for arc in (arc for arc in g_a[head] if arc.dependent in c and not arc.dependent == cycle_head):
71+
# every dependent of head in g_a if it is in c but not cycle_head
72+
try:
73+
y[head].append(arc)
74+
except KeyError:
75+
y[head] = [arc]
76+
77+
return y
7878

7979
def contract(g, g_a, c, t_c):
8080

8181
g_c = deepcopy(g_a)
8282

8383
# delete all nodes that are in c out of g_c
84-
for node in c:
85-
if node in g_c:
86-
del g_c[node]
84+
for node in (node for node in c if node in g_c):
85+
del g_c[node]
8786

8887
# remove all dependents out of c of every head they are in, keep dependents that are not in c
8988
for head in g_c:
9089
new_dependents = []
91-
for arc in g_c[head]:
92-
if arc.dependent not in c:
93-
new_dependents.append(arc)
90+
for arc in (arc for arc in g_c[head] if arc.dependent not in c):
91+
new_dependents.append(arc)
9492
g_c[head] = new_dependents
9593

9694
# if there are heads with no dependents, remove them completely
9795
tmp_g_c = deepcopy(g_c)
98-
for head in g_c:
99-
if not g_c[head]:
100-
del tmp_g_c[head]
96+
for head in (head for head in g_c if not g_c[head]):
97+
del tmp_g_c[head]
10198
g_c = tmp_g_c
10299

103100
# Arcs leaving C
104101
# (add all dependents out of g_a that had a head out of C)
105-
for head in c:
106-
for arc in g_a[head]:
107-
if arc.dependent not in c:
108-
if t_c in g_c:
109-
if g_c[t_c]:
110-
found_dependent = False
111-
new_dependents = []
112-
for arc2 in g_c[t_c]:
113-
if arc2.dependent == arc.dependent:
114-
found_dependent = True
115-
if arc.score > arc2.score:
116-
new_arc = deepcopy(arc)
117-
new_arc.head = t_c
118-
new_arc.former_head = head
119-
new_arc.feat_vec = []
120-
121-
new_dependents.append(new_arc)
122-
else:
123-
new_dependents.append(arc2)
124-
else:
125-
new_dependents.append(arc2)
126-
127-
if not found_dependent:
128-
102+
for head in (head for head in g_a if head in c):
103+
for arc in (arc for arc in g_a[head] if arc.dependent not in c):
104+
try:
105+
found_dependent = False
106+
new_dependents = []
107+
for arc2 in g_c[t_c]:
108+
if arc2.dependent == arc.dependent:
109+
found_dependent = True
110+
if arc.score > arc2.score:
129111
new_arc = deepcopy(arc)
130112
new_arc.head = t_c
131113
new_arc.former_head = head
132114
new_arc.feat_vec = []
133115

134116
new_dependents.append(new_arc)
135-
136-
g_c[t_c] = new_dependents
137-
117+
else:
118+
new_dependents.append(arc2)
138119
else:
139-
new_arc = deepcopy(arc)
140-
new_arc.head = t_c
141-
new_arc.former_head = head
142-
new_arc.feat_vec = []
120+
new_dependents.append(arc2)
121+
122+
if not found_dependent:
143123

144-
g_c[t_c] = [new_arc]
145-
else:
146124
new_arc = deepcopy(arc)
147125
new_arc.head = t_c
148126
new_arc.former_head = head
149127
new_arc.feat_vec = []
150128

151-
g_c[t_c] = [new_arc]
129+
new_dependents.append(new_arc)
130+
131+
g_c[t_c] = new_dependents
132+
133+
except KeyError:
134+
new_arc = deepcopy(arc)
135+
new_arc.head = t_c
136+
new_arc.former_head = head
137+
new_arc.feat_vec = []
138+
139+
g_c[t_c] = [new_arc]
152140

153141
# compute s(C), the score of the cycle
154142
s_c = 0.0
155143
for head in c:
156-
for arc in g_a[head]:
157-
if arc.dependent in c:
158-
s_c += arc.score
144+
for arc in (arc for arc in g_a[head] if arc.dependent in c):
145+
s_c += arc.score
159146

160147
# Arcs entering C
161-
for head in g:
162-
if head not in c:
163-
for arc in g[head]:
164-
# check all arcs the go into c, save the highest
165-
if arc.dependent in c:
166-
167-
# compute s(th, td), the score of the arc from outside the cycle to inside the cycle
168-
s_th_td = arc.score
169-
170-
# compute s(h(td), td), the score of the head of the arc INSIDE the cycle
171-
s_htd_td = 0.0
172-
173-
for head2 in c:
174-
for arc2 in g_a[head2]:
175-
if arc2.dependent == arc.dependent:
176-
s_htd_td = arc2.score
177-
178-
s = s_th_td + s_c - s_htd_td
179-
180-
if head in g_c:
181-
if g_c[head]:
182-
found_dependent = False
183-
new_dependents = []
184-
for arc2 in g_c[head]:
185-
if arc2.dependent == t_c:
186-
found_dependent = True
187-
if s > arc2.score:
188-
new_arc = deepcopy(arc)
189-
new_arc.score = s
190-
new_arc.dependent = t_c
191-
new_arc.former_dependent = arc.dependent
192-
new_dependents.append(new_arc)
193-
else:
194-
new_dependents.append(arc2)
195-
else:
196-
new_dependents.append(arc2)
197-
198-
if not found_dependent:
148+
for head in (head for head in g if head not in c):
149+
for arc in g[head]:
150+
# check all arcs the go into c, save the highest
151+
if arc.dependent in c:
152+
153+
# compute s(th, td), the score of the arc from outside the cycle to inside the cycle
154+
s_th_td = arc.score
155+
156+
# compute s(h(td), td), the score of the head of the arc INSIDE the cycle
157+
s_htd_td = 0.0
158+
159+
for head2 in c:
160+
for arc2 in g_a[head2]:
161+
if arc2.dependent == arc.dependent:
162+
s_htd_td = arc2.score
163+
164+
s = s_th_td + s_c - s_htd_td
165+
166+
try:
167+
found_dependent = False
168+
new_dependents = []
169+
for arc2 in g_c[head]:
170+
if arc2.dependent == t_c:
171+
found_dependent = True
172+
if s > arc2.score:
199173
new_arc = deepcopy(arc)
200174
new_arc.score = s
201175
new_arc.dependent = t_c
202176
new_arc.former_dependent = arc.dependent
203177
new_dependents.append(new_arc)
204-
205-
g_c[head] = new_dependents
206-
178+
else:
179+
new_dependents.append(arc2)
207180
else:
208-
new_arc = deepcopy(arc)
209-
new_arc.score = s
210-
new_arc.dependent = t_c
211-
new_arc.former_dependent = arc.dependent
212-
g_c[head].append(new_arc)
213-
else:
181+
new_dependents.append(arc2)
182+
183+
if not found_dependent:
214184
new_arc = deepcopy(arc)
215185
new_arc.score = s
216186
new_arc.dependent = t_c
217187
new_arc.former_dependent = arc.dependent
218-
g_c[head] = [new_arc]
219-
else:
220-
# find all arcs that are outside of c, add them if not there
221-
if head in g_c:
222-
arc_found = False
223-
for arc2 in g_c[head]:
224-
if arc2.dependent == arc.dependent:
225-
arc_found = True
226-
if not arc_found:
227-
if g_c[head]:
228-
g_c[head].append(arc)
229-
else:
230-
g_c[head] = [arc]
231-
else:
232-
g_c[head] = [arc]
188+
new_dependents.append(new_arc)
189+
190+
g_c[head] = new_dependents
191+
192+
except KeyError:
193+
new_arc = deepcopy(arc)
194+
new_arc.score = s
195+
new_arc.dependent = t_c
196+
new_arc.former_dependent = arc.dependent
197+
g_c[head] = [new_arc]
198+
else:
199+
# find all arcs that are outside of c, add them if not there
200+
try:
201+
arc_found = False
202+
for arc2 in g_c[head]:
203+
if arc2.dependent == arc.dependent:
204+
arc_found = True
205+
if not arc_found:
206+
try:
207+
g_c[head].append(arc)
208+
except KeyError:
209+
g_c[head] = [arc]
210+
except KeyError:
211+
g_c[head] = [arc]
233212

234213
return g_c, t_c
235214

0 commit comments

Comments
 (0)