YNedderhoff
diff --git a/‎create_screens.sh‎
Lines changed: 4 additions & 5 deletions b/‎create_screens.sh‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎dependency-parser.py‎
Lines changed: 19 additions & 31 deletions b/‎dependency-parser.py‎
Lines changed: 19 additions & 31 deletions
diff --git a/‎modules/cle.py‎
Lines changed: 5 additions & 5 deletions b/‎modules/cle.py‎
Lines changed: 5 additions & 5 deletions
@@ -1,8 +1,7 @@
 #!/bin/bash
 
 screen -dmS "cle_1" ./start-cle_automated.sh 1 m_e-1 p_e-1 e_e-1
-screen -dmS "cle_2" ./start-cle_automated.sh 1 m_e-1_ss p_e-1_st e_e-1_st -shuffle-sentences
-screen -dmS "cle_3" ./start-cle_automated.sh 10 m_e-10 p_e-10 e_e-10
-screen -dmS "cle_4" ./start-cle_automated.sh 10 m_e-10_da p_e-10_da e_e-10_da -decrease-alpha
-screen -dmS "cle_5" ./start-cle_automated.sh 10 m_e-10_ss p_e-10_ss e_e-10_ss -shuffle-sentences
-screen -dmS "cle_6" ./start-cle_automated.sh 10 m_e-10_da_ss p_e-10_da_ss e_e-10_da_ss -decrease-alpha -shuffle-sentences
+screen -dmS "cle_2" ./start-cle_automated.sh 10 m_e-10 p_e-10 e_e-10
+screen -dmS "cle_3" ./start-cle_automated.sh 10 m_e-10_da p_e-10_da e_e-10_da -decrease-alpha
+screen -dmS "cle_4" ./start-cle_automated.sh 10 m_e-10_ss p_e-10_ss e_e-10_ss -shuffle-sentences
+screen -dmS "cle_5" ./start-cle_automated.sh 10 m_e-10_da_ss p_e-10_da_ss e_e-10_da_ss -decrease-alpha -shuffle-sentences
@@ -1,6 +1,8 @@
 # !/bin/python
 #  -*- coding: utf-8 -*-
 
+import profile
+
 import time
 import os
 import codecs
@@ -114,7 +116,7 @@ def train(args):
         print "\t\tReduce smoothing coefficient activated."
 
     alpha = 0.5  # smoothing coefficient for the weight adjustments
-    graph_ids = sparse_graphs.keys() #list of dict keys, needed when shuffeling tokens after every epoch
+    graph_ids = sparse_graphs.keys()  # list of dict keys, needed when shuffeling tokens after every epoch
 
     for epoch in range(1, int(args.epochs) + 1):
 
@@ -128,7 +130,8 @@ def train(args):
             weight_vector, correct, errors = structured_perceptron(deepcopy(sparse_graphs[graph_id]), feat_map, rev_feat_map, weight_vector, correct, errors, "train", alpha)
             total += 1
             if total % 500 == 0:
-                print "\t\t\tInstance Nr. " + str(total) + ", Correct: " + str(correct) + ", Errors: " + str(errors)
+                print "\t\t\tInstance Nr. " + str(total) + ", Correct: " + str(correct) + " (" \
+                    + str((correct*100)/total) + "%), Errors: " + str(errors)
                 # print "\t\t\tCurrent weight vector:"
                 # print "\t\t\t" + str(weight_vector)
 
@@ -164,47 +167,32 @@ def test(args):
     stop = time.time()
     print "\t\t" + str(len(feat_map)) + " features loaded"
     print "\t\tDone, " + str(stop - start) + " sec."
-
     start = time.time()
-    print "\tCreating graph representation of every sentence..."
+    sentence_count = 0
+    for sentence in sentences(codecs.open(args.in_file, encoding='utf-8')):
+        sentence_count += 1
 
-    full_graphs = {}
+    print "\tStart annotating the test file, Total Instances: " + str(sentence_count)
 
-    empty_feat_vec = 0
+    total = 0
+    errors = 0
 
     for sentence in sentences(codecs.open(args.in_file, encoding='utf-8')):
 
+        # create complete, directed graph representation of sentence
         full_graph = CompleteFullGraph(sentence).heads
 
-        # add feature vec to every graph
+        # add feature vec
         full_graph = add_feat_vec_to_full_graph(full_graph, feat_map)
 
-        # feat_vec sanity
-
-        for head in full_graph:
-            for arc in full_graph[head]:
-                if not arc.feat_vec:
-                    empty_feat_vec += 1
-
-        full_graphs[len(full_graphs)] = full_graph
-
-    stop = time.time()
-    print "\t\tNumber of sentences: " + str(len(full_graphs)) + ", Number of arcs with empty feature vectors: " + str(empty_feat_vec)
-    print "\t\tDone, " + str(stop - start) + " sec"
-
-    start = time.time()
-    print "\tStart annotating the test file, Total Instances: " + str(len(full_graphs))
-
-    total = 0
-    errors = 0
-    for graph_id in full_graphs:
         tmp_errors = errors
-        predicted_graph, errors = structured_perceptron(deepcopy(full_graphs[graph_id]), feat_map, rev_feat_map, weight_vector, 0, errors, "test")
 
-        if tmp_errors == errors:
+        predicted_graph, errors = structured_perceptron(deepcopy(full_graph), feat_map, rev_feat_map, weight_vector, 0, errors, "test")
+
+        if tmp_errors == errors:  # no error occured during prediction
             write_graph_to_file(predicted_graph, args.out_file)
-        else:
-            write_graph_to_file(full_graphs[graph_id], args.out_file, "error")
+        else:  # a error occured during prediction
+            write_graph_to_file(full_graph, args.out_file, "error")
 
         total += 1
         if total % 500 == 0:
@@ -248,7 +236,7 @@ def write_to_file(token, file_obj):
     else:
         if arguments.train:
             print "Running in training mode\n"
-            train(arguments)
+            profile.run(train(arguments))
 
         elif arguments.test:
             print "Running in test mode\n"
 
@@ -1,14 +1,12 @@
-from graphs import highest_scoring_heads, cycle, reverse_head_graph
+from graphs import highest_scoring_heads, cycle, reverse_head_graph, check_graph_sanity
 from copy import deepcopy
 
 def chu_liu_edmonds(graph):
 
     g = deepcopy(graph)
     g_a = highest_scoring_heads(deepcopy(g))
-    c = cycle(g_a, [], sorted(g_a.keys())[0])
+    c = cycle(g_a)
 
-    if c is None:
-        print "The cycle function returned 'None'."
     if not c:
         return g_a
     else:
@@ -66,6 +64,7 @@ def chu_liu_edmonds(graph):
         y[head_of_cycle] = new_dependents
 
         # adding arcs from inside cycle to inside cycle except the one pointing to cycle_head
+
         for node in c:
             for head in g_a:
                 if head == node:
@@ -76,7 +75,6 @@ def chu_liu_edmonds(graph):
                                     y[head].append(arc)
                                 else:
                                     y[head] = [arc]
-        return y
 
 def contract(g, g_a, c, t_c):
 
@@ -230,6 +228,8 @@ def contract(g, g_a, c, t_c):
                                 g_c[head].append(arc)
                             else:
                                 g_c[head] = [arc]
+                    else:
+                        g_c[head] = [arc]
 
     return g_c, t_c