diff --git a/tools/amas/.shed.yml b/tools/amas/.shed.yml new file mode 100644 index 00000000000..5753399e060 --- /dev/null +++ b/tools/amas/.shed.yml @@ -0,0 +1,19 @@ +categories: + - Phylogenetics + - Sequence Analysis + - Statistics +description: AMAS high-throughput alignment manipulation and summaries for phylogenomics +homepage_url: https://github.com/marekborowiec/AMAS +long_description: Handle expansive phylogenomic data sets by concatenating, removing, + replicating, splitting, and summarising large nucleotide or amino acid alignments. +name: amas +owner: iuc +remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas +auto_tool_repositories: + name_template: "{{ tool_id }}" + description_template: "Wrapper for amas functions: {{ tool_name }}." +suite: + name: "suite_amas" + description: "A suite of tools that brings the amas project into Galaxy." + long_description: Handle expansive phylogenomic data sets by concatenating, removing, + replicating, splitting, and summarising large nucleotide or amino acid alignments. \ No newline at end of file diff --git a/tools/amas/amas_concat.xml b/tools/amas/amas_concat.xml new file mode 100644 index 00000000000..91ec49f438a --- /dev/null +++ b/tools/amas/amas_concat.xml @@ -0,0 +1,147 @@ + + concatenate multiple alignments + + + macros.xml + + + + amas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/amas/amas_remove.xml b/tools/amas/amas_remove.xml new file mode 100644 index 00000000000..c8b755ec549 --- /dev/null +++ b/tools/amas/amas_remove.xml @@ -0,0 +1,102 @@ + + remove taxa from multiple alignments + + + macros.xml + + + + amas + + + + + + + + + + + + + [A-Za-z0-9_.\-]+(\s+[A-Za-z0-9_.\-]+)* + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/amas/amas_replicate.xml b/tools/amas/amas_replicate.xml new file mode 100644 index 00000000000..ba58b05590d --- /dev/null +++ b/tools/amas/amas_replicate.xml @@ -0,0 +1,98 @@ + + replicate multiple alignments + + + macros.xml + + + + amas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/amas/amas_split.xml b/tools/amas/amas_split.xml new file mode 100644 index 00000000000..69714760cd2 --- /dev/null +++ b/tools/amas/amas_split.xml @@ -0,0 +1,108 @@ + + split multiple alignments + + + macros.xml + + + + amas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/amas/amas_summary.xml b/tools/amas/amas_summary.xml new file mode 100644 index 00000000000..9ae15fd920c --- /dev/null +++ b/tools/amas/amas_summary.xml @@ -0,0 +1,105 @@ + + summarise multiple alignments + + + macros.xml + + + + amas + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tools/amas/check_interleaved.py b/tools/amas/check_interleaved.py new file mode 100644 index 00000000000..102c64713da --- /dev/null +++ b/tools/amas/check_interleaved.py @@ -0,0 +1,112 @@ +""" +Helper script to check if AMAS input files are interleaved. +""" +import argparse +import re +import sys + + +def check_phylip_interleaved(filepath): + """Check if PHYLIP file is interleaved.""" + with open(filepath, encoding='utf-8') as f: + # First line is header: ntax nchar + header = next(f).strip().split() + ntax = int(header[0]) + + for idx, line in enumerate(f, 1): + if line.strip(): + if idx > ntax: + return True + + return False + + +def check_nexus_interleaved(filepath): + """Check if NEXUS file is interleaved.""" + in_data_block = False + in_matrix = False + ntax = None + seq_lines = 0 + + with open(filepath, encoding='utf-8') as f: + for line in f: + content = line.strip().lower() + + if not content: + continue + + if in_matrix: + if content == 'end;': + return seq_lines != ntax if ntax else False + + if content != ';': + seq_lines += 1 + if ntax and seq_lines > ntax: + return True + continue + + if not in_data_block: + if content.startswith('begin'): + words = content.split() + if len(words) > 1 and ( + words[1].startswith('data') + or words[1].startswith('characters')): + in_data_block = True + continue + + if content.startswith('dimensions') and ntax is None: + match = re.search(r'ntax=(\d+)', content) + if match: + ntax = int(match.group(1)) + + elif content.startswith('format'): + if re.search(r'\binterleave(?:;|=yes;?)?\b', content): + return True + + elif content.startswith('matrix'): + in_matrix = True + + return False + + +def check_fasta_interleaved(filepath): + """FASTA files are not interleaved.""" + return False + + +def main(): + parser = argparse.ArgumentParser( + description='Check if AMAS input files are interleaved' + ) + parser.add_argument('input_files', nargs='+', help='Input sequence files') + parser.add_argument('--format', required=True, + choices=['fasta', 'phylip', 'nexus'], + help='Input format') + + args = parser.parse_args() + + interleaved_status = [] + for filepath in args.input_files: + if args.format == 'phylip': + is_interleaved = check_phylip_interleaved(filepath) + elif args.format == 'nexus': + is_interleaved = check_nexus_interleaved(filepath) + else: + is_interleaved = check_fasta_interleaved(filepath) + + interleaved_status.append(is_interleaved) + + interleaved_status = list(set(interleaved_status)) + if len(interleaved_status) > 1: + raise Exception("Error: Input files are a mix of interleaved/sequential formats") + + if interleaved_status[0]: + print(f"{args.format}-int") + else: + print(args.format) + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tools/amas/macros.xml b/tools/amas/macros.xml new file mode 100644 index 00000000000..6db5d497828 --- /dev/null +++ b/tools/amas/macros.xml @@ -0,0 +1,174 @@ + + 1.0 + 0 + 25.0 + + + + amas + + + + + python -c "import amas; print(amas.__version__)" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + out_format == "fasta" + + + + + out_format == "phylip" or out_format == "phylip-int" + + + + + out_format == "nexus" or out_format == "nexus-int" + + + + + + + + + + 10.7717/peerj.1660 + + + \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_1.fasta b/tools/amas/test-data/inputs/concat_1.fasta new file mode 100644 index 00000000000..8869ded1323 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_1.fasta @@ -0,0 +1,6 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_2.fasta b/tools/amas/test-data/inputs/concat_2.fasta new file mode 100644 index 00000000000..ff5e4cc8f61 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_2.fasta @@ -0,0 +1,6 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_int_1.nex b/tools/amas/test-data/inputs/concat_int_1.nex new file mode 100644 index 00000000000..a70889bfe2a --- /dev/null +++ b/tools/amas/test-data/inputs/concat_int_1.nex @@ -0,0 +1,22 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=300; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_int_1.phylip b/tools/amas/test-data/inputs/concat_int_1.phylip new file mode 100644 index 00000000000..d65b3f611d0 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_int_1.phylip @@ -0,0 +1,12 @@ +3 300 +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT + +ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCAT +GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGT +TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCT \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_int_2.nex b/tools/amas/test-data/inputs/concat_int_2.nex new file mode 100644 index 00000000000..c8291be9700 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_int_2.nex @@ -0,0 +1,19 @@ +#NEXUS + +Begin data; + Dimensions Nchar=300 Ntax=3; + Format Datatype=DNA Gap = - Missing = ? Interleave=yes; + matrix +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +Taxon_A GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG +; +End; \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_int_2.phylip b/tools/amas/test-data/inputs/concat_int_2.phylip new file mode 100644 index 00000000000..4a2c0545524 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_int_2.phylip @@ -0,0 +1,12 @@ +3 300 +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_int_3.nex b/tools/amas/test-data/inputs/concat_int_3.nex new file mode 100644 index 00000000000..fa3883a5db3 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_int_3.nex @@ -0,0 +1,16 @@ +#NEXUS + +begin data; + dimensions nchar=200 ntax=3; + format datatype=DNA gap = - missing = ?; + matrix +Taxon_A AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +Taxon_B GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +Taxon_C TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + +Taxon_A GGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B CCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +; +end; \ No newline at end of file diff --git a/tools/amas/test-data/inputs/concat_result.phylip b/tools/amas/test-data/inputs/concat_result.phylip new file mode 100644 index 00000000000..5ce3ce2d664 --- /dev/null +++ b/tools/amas/test-data/inputs/concat_result.phylip @@ -0,0 +1,4 @@ +10 200 +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU10 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT \ No newline at end of file diff --git a/tools/amas/test-data/inputs/fasta1.fas b/tools/amas/test-data/inputs/fasta1.fas new file mode 100644 index 00000000000..3f7183c557b --- /dev/null +++ b/tools/amas/test-data/inputs/fasta1.fas @@ -0,0 +1,20 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU3 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU4 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU5 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU6 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU7 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU8 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU9 +ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT diff --git a/tools/amas/test-data/inputs/partitions_concat.nex b/tools/amas/test-data/inputs/partitions_concat.nex new file mode 100644 index 00000000000..37c2c77c26d --- /dev/null +++ b/tools/amas/test-data/inputs/partitions_concat.nex @@ -0,0 +1,6 @@ +#NEXUS + +Begin sets; + charset p1_concat_1 = 1-100; + charset p2_concat_2 = 101-200; +End; \ No newline at end of file diff --git a/tools/amas/test-data/inputs/partitions_concat_unspecified.txt b/tools/amas/test-data/inputs/partitions_concat_unspecified.txt new file mode 100644 index 00000000000..0e23d953c9b --- /dev/null +++ b/tools/amas/test-data/inputs/partitions_concat_unspecified.txt @@ -0,0 +1,2 @@ +p1_concat_1 = 1-100 +p2_concat_2 = 101-200 diff --git a/tools/amas/test-data/inputs/remove_input.nex b/tools/amas/test-data/inputs/remove_input.nex new file mode 100644 index 00000000000..002c144c2b0 --- /dev/null +++ b/tools/amas/test-data/inputs/remove_input.nex @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_concat.phylip b/tools/amas/test-data/outputs/expected_concat.phylip new file mode 100644 index 00000000000..6b03683296f --- /dev/null +++ b/tools/amas/test-data/outputs/expected_concat.phylip @@ -0,0 +1,4 @@ +3 200 +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU10 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT diff --git a/tools/amas/test-data/outputs/expected_concat_fasta.fas b/tools/amas/test-data/outputs/expected_concat_fasta.fas new file mode 100644 index 00000000000..6992ad7318c --- /dev/null +++ b/tools/amas/test-data/outputs/expected_concat_fasta.fas @@ -0,0 +1,12 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC +CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCTACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTC +CAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT diff --git a/tools/amas/test-data/outputs/expected_concat_int.nex b/tools/amas/test-data/outputs/expected_concat_int.nex new file mode 100644 index 00000000000..009f0421886 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_concat_int.nex @@ -0,0 +1,18 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=600; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAA +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGG + + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_concat_int_multi.nex b/tools/amas/test-data/outputs/expected_concat_int_multi.nex new file mode 100644 index 00000000000..ed23b62e3e0 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_concat_int_multi.nex @@ -0,0 +1,18 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=3 NCHAR=800; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +Taxon_A ATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATATGCATGCATAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +Taxon_B GCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGCTAGCTAGTGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +Taxon_C TTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTAGCTAGCTTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + +AAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTT +GGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAAGGGGGAAAAACCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTTCCCCCTTTTT +TTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGTTTTTGGGGGAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCCAAAAACCCCC + + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_partitions.nex b/tools/amas/test-data/outputs/expected_partitions.nex new file mode 100644 index 00000000000..37c2c77c26d --- /dev/null +++ b/tools/amas/test-data/outputs/expected_partitions.nex @@ -0,0 +1,6 @@ +#NEXUS + +Begin sets; + charset p1_concat_1 = 1-100; + charset p2_concat_2 = 101-200; +End; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_partitions_int.txt b/tools/amas/test-data/outputs/expected_partitions_int.txt new file mode 100644 index 00000000000..167a478b3c1 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_partitions_int.txt @@ -0,0 +1,2 @@ +p1_concat_int_1 = 1-300 +p2_concat_int_2 = 301-600 diff --git a/tools/amas/test-data/outputs/expected_partitions_int_multi.txt b/tools/amas/test-data/outputs/expected_partitions_int_multi.txt new file mode 100644 index 00000000000..5eb6fc53fe3 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_partitions_int_multi.txt @@ -0,0 +1,3 @@ +p1_concat_int_1 = 1-300 +p2_concat_int_2 = 301-600 +p3_concat_int_3 = 601-800 diff --git a/tools/amas/test-data/outputs/expected_partitions_raxml.txt b/tools/amas/test-data/outputs/expected_partitions_raxml.txt new file mode 100644 index 00000000000..41adfcf7a76 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_partitions_raxml.txt @@ -0,0 +1,2 @@ +DNA, p1_concat_1 = 1-100 +DNA, p2_concat_2 = 101-200 diff --git a/tools/amas/test-data/outputs/expected_remove_filtered.int-nex b/tools/amas/test-data/outputs/expected_remove_filtered.int-nex new file mode 100644 index 00000000000..964dd0b604a --- /dev/null +++ b/tools/amas/test-data/outputs/expected_remove_filtered.int-nex @@ -0,0 +1,19 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=8 NCHAR=100; + FORMAT INTERLEAVE DATATYPE=DNA GAP = - MISSING = ?; + MATRIX +OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT +OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_replicate1.nex b/tools/amas/test-data/outputs/expected_replicate1.nex new file mode 100644 index 00000000000..002c144c2b0 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_replicate1.nex @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_replicate2.nex b/tools/amas/test-data/outputs/expected_replicate2.nex new file mode 100644 index 00000000000..002c144c2b0 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_replicate2.nex @@ -0,0 +1,20 @@ +#NEXUS + +BEGIN DATA; + DIMENSIONS NTAX=10 NCHAR=100; + FORMAT DATATYPE=DNA GAP = - MISSING = ?; + MATRIX + OTU1 ?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU10 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU2 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU3 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU4 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU5 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU6 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU7 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU8 ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + OTU9 ACACATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCAACTGCTTAGCTCCACTCGCT + +; + +END; \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_split_partition1.fas b/tools/amas/test-data/outputs/expected_split_partition1.fas new file mode 100644 index 00000000000..205a34b8042 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_split_partition1.fas @@ -0,0 +1,9 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT +>OTU10 +AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA +AAAAAAAAAAAAAAAAAAAA +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT diff --git a/tools/amas/test-data/outputs/expected_split_partition2.fas b/tools/amas/test-data/outputs/expected_split_partition2.fas new file mode 100644 index 00000000000..d16ff20f2f2 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_split_partition2.fas @@ -0,0 +1,9 @@ +>OTU1 +?ACCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT +>OTU10 +TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT +TTTTTTTTTTTTTTTTTTTT +>OTU2 +ACCCATGTTTGTTGTAGGAGTCAACTTAGAAGCTTATGACCCATCCTATAAAGTCATCTCCAATGCCTCGTGCACAACCA +ACTGCTTAGCTCCACTCGCT diff --git a/tools/amas/test-data/outputs/expected_summary.txt b/tools/amas/test-data/outputs/expected_summary.txt new file mode 100644 index 00000000000..6eef1b748f6 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_summary.txt @@ -0,0 +1,2 @@ +Alignment_name No_of_taxa Alignment_length Total_matrix_cells Undetermined_characters Missing_percent No_variable_sites Proportion_variable_sites Parsimony_informative_sites Proportion_parsimony_informative AT_content GC_content A C G T K M R Y S W B V H D X N O - ? +fasta1.fas 10 100 1000 1 0.1 2 0.02 1 0.01 0.543 0.457 262 297 160 280 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 \ No newline at end of file diff --git a/tools/amas/test-data/outputs/expected_taxa_summary.txt b/tools/amas/test-data/outputs/expected_taxa_summary.txt new file mode 100644 index 00000000000..16d6e91e103 --- /dev/null +++ b/tools/amas/test-data/outputs/expected_taxa_summary.txt @@ -0,0 +1,11 @@ +Alignment_name Taxon_name Sequence_length Undetermined_characters Missing_percent AT_content GC_content A C G T K M R Y S W B V H D X N O - ? +fasta1.fas OTU1 100 1 1.0 0.545 0.455 26 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 +fasta1.fas OTU10 100 0 0.0 0.55 0.45 27 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU2 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU3 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU4 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU5 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU6 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU7 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU8 100 0 0.0 0.54 0.46 26 30 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 +fasta1.fas OTU9 100 0 0.0 0.55 0.45 27 29 16 28 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 \ No newline at end of file