Skip to content

Commit 3bec89a

Browse files
authored
Merge pull request #60 from ali-yz/fix/og-bug
fix: issue #59 nonunique gene names
2 parents ad519b1 + 4b7a63c commit 3bec89a

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

FastOMA/collect_subhogs.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -218,12 +218,12 @@ def write_group_files(orthoxml: Path, roothog_folder: Path, output_file_og_tsv=N
218218
with open(output_file_og_tsv, 'w') as tsv:
219219
tsv.write("Group\tProtein\n")
220220
for grp, meta in extract_marker_groups_at_level(orthoxml, protein_attribute="protId", callback=callback_group_and_omamer):
221-
group_members = {g.xref for g in grp}
221+
group_members = {(g.xref, g.species) for g in grp}
222222
group_name = meta['group_id'].replace("HOG:", "OG_")
223223
nr_prot_in_groups += len(grp)
224224
nr_groups += 1
225-
for gene in group_members:
226-
tsv.write(f"{group_name}\t{gene}\n")
225+
for gene_xref, gene_species in group_members:
226+
tsv.write(f"{group_name}\t{gene_xref}\n")
227227

228228
_write_group_fasta(fasta_format, group_members, group_name, id_transformer, meta, output_fasta_groups,
229229
roothog_folder)
@@ -245,14 +245,14 @@ def write_roothogs(orthoxml: Path, roothog_folder: Path, output_file_roothog_tsv
245245
with open(output_file_roothog_tsv, 'wt') as tsv:
246246
tsv.write("RootHOG\tProtein\tOMAmerRootHOG\n")
247247
for grp, meta in extract_flat_groups_at_level(orthoxml, callback=callback_group_and_omamer):
248-
group_members = {g.xref for g in grp}
248+
group_members = {(g.xref, g.species) for g in grp}
249249
group_name = meta['group_id']
250250
# this is the id of the merged roothogs from the placement step
251251
omamer_roothog = meta['omamer_roothog']
252252
nr_prot_in_groups += len(grp)
253253
nr_groups += 1
254-
for gene in group_members:
255-
tsv.write(f"{group_name}\t{gene}\t{omamer_roothog}\n")
254+
for gene_xref, _ in group_members:
255+
tsv.write(f"{group_name}\t{gene_xref}\t{omamer_roothog}\n")
256256

257257
_write_group_fasta(fasta_format, group_members, group_name.replace(":", ""), id_transformer, meta, output_fasta_groups,
258258
roothog_folder)
@@ -268,7 +268,7 @@ def _write_group_fasta(fasta_format, group_members, group_name, id_transformer,
268268
for rec in SeqIO.parse(rhog_fasta, "fasta"):
269269
orig_id, sp, *rest = rec.id.split("||")
270270
protid = id_transformer.transform(orig_id)
271-
if protid in group_members:
271+
if (protid, sp) in group_members:
272272
rec.id = protid
273273
rec.description += " [" + sp + "]"
274274
group_seqs.append(rec)

0 commit comments

Comments
 (0)