Skip to content

Commit bd9c7a7

Browse files
committed
Grouping annotations by entity GPI parent ID; issue #83
1 parent 02ef984 commit bd9c7a7

File tree

2 files changed

+20
-5
lines changed

2 files changed

+20
-5
lines changed

bin/validate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -558,7 +558,7 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology):
558558
unzip(gpad_path, unzipped)
559559
gpad_path = unzipped
560560
# NOTE: Validation on GPAD not included here since it's currently baked into produce() above.
561-
extractor = AssocExtractor(gpad_path)
561+
extractor = AssocExtractor(gpad_path, gpi_path)
562562
assocs_by_gene = extractor.group_assocs()
563563

564564
absolute_target = os.path.abspath(target)

ontobio/rdfgen/gocamgen/gocam_builder.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def parse_gpi(gpi_file):
158158

159159

160160
class AssocExtractor:
161-
def __init__(self, gpad_file, parser_config: AssocParserConfig = None):
161+
def __init__(self, gpad_file, gpi_file, parser_config: AssocParserConfig = None):
162162
if parser_config:
163163
gpad_parser = GpadParser(config=parser_config)
164164
else:
@@ -173,13 +173,15 @@ def __init__(self, gpad_file, parser_config: AssocParserConfig = None):
173173
length=lines) as associations:
174174
self.assocs = self.extract_properties_from_assocs(associations)
175175

176+
self.entity_parents = self.parse_gpi_parents(gpi_file)
177+
176178
def group_assocs(self):
177179
assocs_by_gene = {}
178180
for a in self.assocs:
179-
# validation function
180-
# if not self.assoc_filter.validate_line(a):
181-
# continue
182181
subject_id = a["subject"]["id"]
182+
# If entity has parent, assign to parent entity model
183+
if subject_id in self.entity_parents:
184+
subject_id = self.entity_parents[subject_id]
183185
if subject_id in assocs_by_gene:
184186
assocs_by_gene[subject_id].append(a)
185187
else:
@@ -193,6 +195,19 @@ def extract_properties_from_assocs(assocs):
193195
new_assoc_list.append(extract_properties(a))
194196
return new_assoc_list
195197

198+
@staticmethod
199+
def parse_gpi_parents(gpi_file):
200+
if gpi_file is None:
201+
return None
202+
parser = GpiParser()
203+
entity_parents = {}
204+
entities = parser.parse(gpi_file)
205+
for entity in entities:
206+
entity_id = entity['id']
207+
if len(entity['parents']) > 0:
208+
entity_parents[entity_id] = entity['parents'][0] # There may only be one
209+
return entity_parents
210+
196211

197212
def unzip(filepath):
198213
input_file = gzip.GzipFile(filepath, "rb")

0 commit comments

Comments
 (0)