Skip to content

Commit d471371

Browse files
committed
[FT] use full seqs at every level for alignments
1 parent d513978 commit d471371

File tree

4 files changed

+21
-2
lines changed

4 files changed

+21
-2
lines changed

FastOMA.nf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,7 @@ process hog_big{
372372
--gap-ratio-row ${params.filter_gap_ratio_row} \
373373
--gap-ratio-col ${params.filter_gap_ratio_col} \
374374
--number-of-samples-per-hog ${params.nr_repr_per_hog} \
375+
--align-subhogs-seqs full-seqs \
375376
${ params.write_msas ? "--msa-write" : ""} \
376377
${ params.write_genetrees ? "--gene-trees-write" : ""}
377378
"""
@@ -404,6 +405,7 @@ process hog_rest{
404405
--gap-ratio-row ${params.filter_gap_ratio_row} \
405406
--gap-ratio-col ${params.filter_gap_ratio_col} \
406407
--number-of-samples-per-hog ${params.nr_repr_per_hog} \
408+
--align-subhogs-seqs full-seqs \
407409
${ params.write_msas ? "--msa-write" : ""} \
408410
${ params.write_genetrees ? "--gene-trees-write" : ""}
409411
"""

FastOMA/_infer_subhog.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -387,8 +387,21 @@ def write_msa_or_tree_if_necessary(self, elem, fn_suffix="", features=[]):
387387
for member in rep_val.representative.get_subelements():
388388
writer.writerow([n.name, member])
389389

390+
def get_sequences_from_subhogs(self):
391+
if self.conf.align_subhogs_seqs == "msa":
392+
return [hog.get_msa() for hog in self.subhogs.values() if len(hog.get_msa()) > 0]
393+
elif self.conf.align_subhogs_seqs == "full-seqs":
394+
seqs = []
395+
for hog in self.subhogs.values():
396+
for rep in hog.get_representatives():
397+
seqs.append(MultipleSeqAlignment([rep.get_record()]))
398+
assert len(seqs) == sum(len(subhog.get_msa()) for subhog in self.subhogs.values())
399+
return seqs
400+
else:
401+
raise ValueError("Unknown align_subhogs_seqs value")
402+
390403
def align_subhogs(self):
391-
sub_msas = [hog.get_msa() for hog in self.subhogs.values() if len(hog.get_msa()) > 0]
404+
sub_msas = self.get_sequences_from_subhogs()
392405
logger.debug(f"Merging {len(sub_msas)} MSAs for rhog: {self.rhogid}, level: {self.node_species_tree.name}")
393406
if len(sub_msas) == 0:
394407
logger.info(

FastOMA/_wrappers.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def merge_msa(list_msas):
4141
# wrapper_mafft_merge.options['--merge'].active = False
4242

4343
#mafft --auto Automatically selects an appropriate strategy from L-INS-i, FFT-NS-i and FFT-NS-2, according to data size. Default: off (always FFT-NS-2)
44-
wrapper_mafft_merge.options['--auto'].set_value(False) # todo we can make it as an argument in fastoma-infer-subhogs.
44+
wrapper_mafft_merge.options['--auto'].set_value(False) # todo we can make it as an argument in fastoma-infer-subhogs.
4545
# wrapper_mafft_merge.options['--anysymbol'].active = True
4646
wrapper_mafft_merge.options['--anysymbol'].set_value(True)
4747
wrapper_mafft_merge.options['--thread'].set_value(-1) # -1 uses a largely appropriate number of threads in each step, after automatically counting the number of physical cores the computer has.

FastOMA/infer_subhogs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ def fastoma_infer_subhogs():
4949
help="For trimming the MSA, the threshold of ratio of gaps for each column.")
5050
parser.add_argument("--min-col-trim", required=False, type=int, default=50, # todo min rows trim
5151
help="min no. columns in msa to consider for filtering")
52+
parser.add_argument("--align-subhogs-seqs", default="msa", choices=("msa", "full-seqs"),
53+
help="Sequences used in ancestral nodes to build MSA from. 'msa' uses the trimmed, "
54+
"aligned sequences from the more recent taxonomic level, where as 'full-seqs' "
55+
"uses always the initial full protein sequences.")
5256
parser.add_argument('-v', action="count", default=0, help="Increase verbosity to info/debug")
5357
conf_infer_subhhogs = parser.parse_args()
5458

0 commit comments

Comments
 (0)