@@ -53,7 +53,8 @@ def parse_ann_line(self, ann_line):
5353 parts = ann_line .strip ().split ('\t ' )
5454 res = {
5555 'annotation_id_s' : parts [0 ],
56- 'source' : 'brat' ,
56+ #'source': 'brat',
57+ 'source' : 'reviewed' ,
5758 }
5859 if parts [0 ][0 ] == 'T' : # anchors (for targets, components, events)
5960 args = parts [1 ].split ()[1 :]
@@ -162,13 +163,18 @@ def read_records(self, in_file):
162163 targets_anns = ch .get ('targets_ss' , [])
163164 cont_anns = ch .get ('cont_ss' , [])
164165 ch ['target_ids_ss' ] = list (map (lambda t : index [t ]['id' ], targets_anns ))
166+ ch ['target_ann_ids_ss' ] = list (map (lambda t : index [t ]['annotation_id_s' ], targets_anns ))
165167 ch ['target_names_ss' ] = list (map (lambda t : index [t ]['name' ], targets_anns ))
166168 ch ['cont_ids_ss' ] = list (map (lambda c : index [c ]['id' ], cont_anns ))
167169 ch ['cont_names_ss' ] = list (map (lambda c : index [c ]['name' ], cont_anns ))
168170 # extract excerpt from anchor annotation
169171 anc_doc = index [ch ['anchor_s' ]]
170172 ch ['excerpt_t' ] = self .extract_excerpt (txt , anc_doc )
171173
174+ # Track aliases
175+ targets = [a for a in children if a .get ('type' ) == 'target' ]
176+ aliases = [a for a in children if a .get ('type' ) == 'alias' ]
177+
172178 # Extract references
173179 references = extract_references (txt )
174180
@@ -188,14 +194,20 @@ def read_records(self, in_file):
188194 for child in children :
189195 if 'name' in child :
190196 if child ['type' ] == 'target' :
191- child ['can_name' ] = canonical_target_name (child ['name' ])
197+ child ['can_name' ] = \
198+ canonical_target_name (child ['name' ],
199+ child ['annotation_id_s' ],
200+ targets , aliases )
192201 else :
193202 child ['can_name' ] = canonical_name (child ['name' ])
194203 if 'target_names_ss' in child :
195- child ['target_names_ss' ] = map (canonical_target_name ,
196- child ['target_names_ss' ])
204+ child ['target_names_ss' ] = \
205+ [canonical_target_name (t , i , targets , aliases ) \
206+ for (t ,i ) in zip (child ['target_names_ss' ],
207+ child ['target_ann_ids_ss' ])]
197208 if 'cont_names_ss' in child :
198- child ['cont_names_ss' ] = map (canonical_name , child ['cont_names_ss' ])
209+ child ['cont_names_ss' ] = \
210+ [canonical_name (c ) for c in child ['cont_names_ss' ]]
199211 yield child
200212
201213 def index (self , solr_url , in_file ):
0 commit comments