@@ -158,7 +158,7 @@ def parse_gpi(gpi_file):
158158
159159
160160class AssocExtractor :
161- def __init__ (self , gpad_file , parser_config : AssocParserConfig = None ):
161+ def __init__ (self , gpad_file , gpi_file , parser_config : AssocParserConfig = None ):
162162 if parser_config :
163163 gpad_parser = GpadParser (config = parser_config )
164164 else :
@@ -173,13 +173,15 @@ def __init__(self, gpad_file, parser_config: AssocParserConfig = None):
173173 length = lines ) as associations :
174174 self .assocs = self .extract_properties_from_assocs (associations )
175175
176+ self .entity_parents = self .parse_gpi_parents (gpi_file )
177+
176178 def group_assocs (self ):
177179 assocs_by_gene = {}
178180 for a in self .assocs :
179- # validation function
180- # if not self.assoc_filter.validate_line(a):
181- # continue
182181 subject_id = a ["subject" ]["id" ]
182+ # If entity has parent, assign to parent entity model
183+ if subject_id in self .entity_parents :
184+ subject_id = self .entity_parents [subject_id ]
183185 if subject_id in assocs_by_gene :
184186 assocs_by_gene [subject_id ].append (a )
185187 else :
@@ -193,6 +195,19 @@ def extract_properties_from_assocs(assocs):
193195 new_assoc_list .append (extract_properties (a ))
194196 return new_assoc_list
195197
198+ @staticmethod
199+ def parse_gpi_parents (gpi_file ):
200+ if gpi_file is None :
201+ return None
202+ parser = GpiParser ()
203+ entity_parents = {}
204+ entities = parser .parse (gpi_file )
205+ for entity in entities :
206+ entity_id = entity ['id' ]
207+ if len (entity ['parents' ]) > 0 :
208+ entity_parents [entity_id ] = entity ['parents' ][0 ] # There may only be one
209+ return entity_parents
210+
196211
197212def unzip (filepath ):
198213 input_file = gzip .GzipFile (filepath , "rb" )
0 commit comments