@@ -6,24 +6,24 @@ def give_features(hform, hlemma, hpos, dform, dlemma, dpos, hrform, hrpos, hlfor
66 direction , distance ):
77 # generator that yields features based on the following information:
88
9- # 1 = hform
10- # 2 = hpos
11- # 3 = dform
12- # 4 = dpos
13- # 5 = hlemma
14- # 6 = dlemma
15-
16- # 7 = hrform
17- # 8 = hrpos
18- # 9 = hlform
19- # 10 = hlpos
20- # 11 = drform
21- # 12 = drpos
22- # 13 = dlform
23- # 14 = dlpos
24-
25- # 15 = direction
26- # 16 = distance
9+ # 1 = hform = form of the head
10+ # 2 = hpos = pos of the head
11+ # 3 = dform = form of the dependent
12+ # 4 = dpos = pos of the dependent
13+ # 5 = hlemma = lemma of the head
14+ # 6 = dlemma = lemma of the dependent
15+
16+ # 7 = hrform = form of the right neighbour of the head
17+ # 8 = hrpos = pos of the right neighbour of the head
18+ # 9 = hlform = form of the left neighbour of the head
19+ # 10 = hlpos = pos of the left neighbour of the head
20+ # 11 = drform = form of the right neighbour of the dependent
21+ # 12 = drpos = pos of the right neighbour of the dependent
22+ # 13 = dlform = form of the left neighbour of the dependent
23+ # 14 = dlpos = pos of the left neighbour of the dependent
24+
25+ # 15 = direction = is the head right or left of the dependent in the sentence
26+ # 16 = distance = the distance between head and dependent
2727
2828 yield u'1,15,16:{0},{1},{2}' .format (hform , direction , distance )
2929 yield u'2,15,16:{0},{1},{2}' .format (hpos , direction , distance )
@@ -91,6 +91,9 @@ def give_features(hform, hlemma, hpos, dform, dlemma, dpos, hrform, hrpos, hlfor
9191
9292
9393def give_distance (id1 , id2 , direction ):
94+
95+ # returns the distance of head and dependent in the sentence as bucketed feature
96+
9497 if direction == "right" :
9598 d = id1 - id2
9699 else :
@@ -120,6 +123,9 @@ def give_distance(id1, id2, direction):
120123
121124
122125def give_direction (id1 , id2 ):
126+
127+ # returns the direction of the head (left or right from the dependent)
128+
123129 if id2 < id1 :
124130 direction = "right"
125131 else :
@@ -129,6 +135,9 @@ def give_direction(id1, id2):
129135
130136
131137def give_surrounding_information (sentence , id1 , id2 ):
138+
139+ # returns form and pos of the left and right neighbours of head and dependent
140+
132141 hrform = "__NULL__"
133142 hrpos = "__NULL__"
134143 hlform = "__NULL__"
@@ -138,6 +147,7 @@ def give_surrounding_information(sentence, id1, id2):
138147 drpos = "__NULL__"
139148 dlform = "__NULL__"
140149 dlpos = "__NULL__"
150+
141151 if id1 not in [0 , 1 , len (sentence )]:
142152 hrform = sentence [id1 ].form
143153 hrpos = sentence [id1 ].pos
@@ -178,6 +188,7 @@ def fm(infile):
178188
179189 for sentence in sentences (codecs .open (infile , encoding = 'utf-8' )):
180190 for token1 in sentence :
191+
181192 direction = "left"
182193 distance = give_distance (0 , token1 .id , direction )
183194 hrform = hrpos = hlform = hlpos = drform = drpos = dlform = dlpos = "__NULL__"
@@ -186,7 +197,7 @@ def fm(infile):
186197 for feature in give_features ("__ROOT__" , "__ROOT__" , "__ROOT__" , token1 .form , token1 .lemma , token1 .pos ,
187198 hrform , hrpos , hlform , hlpos , drform , drpos , dlform , dlpos , direction ,
188199 distance ):
189- #print feature
200+
190201 if feature not in feat_map :
191202 feat_map [feature ] = index
192203 index += 1
@@ -203,7 +214,7 @@ def fm(infile):
203214 for feature in give_features (token1 .form , token1 .lemma , token1 .pos , token2 .form , token2 .lemma ,
204215 token2 .pos , hrform , hrpos , hlform , hlpos , drform , drpos , dlform , dlpos ,
205216 direction , distance ):
206- #print feature
217+
207218 if feature not in feat_map :
208219 feat_map [feature ] = index
209220 index += 1
0 commit comments