@@ -218,12 +218,12 @@ def write_group_files(orthoxml: Path, roothog_folder: Path, output_file_og_tsv=N
218218 with open (output_file_og_tsv , 'w' ) as tsv :
219219 tsv .write ("Group\t Protein\n " )
220220 for grp , meta in extract_marker_groups_at_level (orthoxml , protein_attribute = "protId" , callback = callback_group_and_omamer ):
221- group_members = {g .xref for g in grp }
221+ group_members = {( g .xref , g . species ) for g in grp }
222222 group_name = meta ['group_id' ].replace ("HOG:" , "OG_" )
223223 nr_prot_in_groups += len (grp )
224224 nr_groups += 1
225- for gene in group_members :
226- tsv .write (f"{ group_name } \t { gene } \n " )
225+ for gene_xref , gene_species in group_members :
226+ tsv .write (f"{ group_name } \t { gene_xref } \n " )
227227
228228 _write_group_fasta (fasta_format , group_members , group_name , id_transformer , meta , output_fasta_groups ,
229229 roothog_folder )
@@ -245,14 +245,14 @@ def write_roothogs(orthoxml: Path, roothog_folder: Path, output_file_roothog_tsv
245245 with open (output_file_roothog_tsv , 'wt' ) as tsv :
246246 tsv .write ("RootHOG\t Protein\t OMAmerRootHOG\n " )
247247 for grp , meta in extract_flat_groups_at_level (orthoxml , callback = callback_group_and_omamer ):
248- group_members = {g .xref for g in grp }
248+ group_members = {( g .xref , g . species ) for g in grp }
249249 group_name = meta ['group_id' ]
250250 # this is the id of the merged roothogs from the placement step
251251 omamer_roothog = meta ['omamer_roothog' ]
252252 nr_prot_in_groups += len (grp )
253253 nr_groups += 1
254- for gene in group_members :
255- tsv .write (f"{ group_name } \t { gene } \t { omamer_roothog } \n " )
254+ for gene_xref , _ in group_members :
255+ tsv .write (f"{ group_name } \t { gene_xref } \t { omamer_roothog } \n " )
256256
257257 _write_group_fasta (fasta_format , group_members , group_name .replace (":" , "" ), id_transformer , meta , output_fasta_groups ,
258258 roothog_folder )
@@ -268,7 +268,7 @@ def _write_group_fasta(fasta_format, group_members, group_name, id_transformer,
268268 for rec in SeqIO .parse (rhog_fasta , "fasta" ):
269269 orig_id , sp , * rest = rec .id .split ("||" )
270270 protid = id_transformer .transform (orig_id )
271- if protid in group_members :
271+ if ( protid , sp ) in group_members :
272272 rec .id = protid
273273 rec .description += " [" + sp + "]"
274274 group_seqs .append (rec )
0 commit comments