galaxyproject · jchchiu · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025 · Nov 6, 2025
diff --git a/tools/amas/.shed.yml b/tools/amas/.shed.yml
@@ -0,0 +1,19 @@
+categories: 
+ - Phylogenetics
+ - Sequence Analysis
+ - Statistics
+description: AMAS high-throughput alignment manipulation and summaries for phylogenomics
+homepage_url: https://github.com/marekborowiec/AMAS
+long_description: Handle expansive phylogenomic data sets by concatenating, removing,
+  replicating, splitting, and summarising large nucleotide or amino acid alignments.
+name: amas
+owner: iuc
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "Wrapper for amas functions: {{ tool_name }}."
+suite:
+  name: "suite_amas"
+  description: "A suite of tools that brings the amas project into Galaxy."
+  long_description: Handle expansive phylogenomic data sets by concatenating, removing,
+   replicating, splitting, and summarising large nucleotide or amino acid alignments.
diff --git a/tools/amas/amas_concat.xml b/tools/amas/amas_concat.xml
@@ -0,0 +1,117 @@
+<tool id="amas_concat" name="AMAS concat" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>concatenate multiple alignments</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        @SYMLINK_INPUTS@
+
+        python -m amas.AMAS
+            concat
+            --concat-part partitions.txt
+            --concat-out concatenated.out
+            --part-format $part_format
+            --out-format $out_format
+            --in-files
+                @INPUT_FILENAMES@
+            --in-format $in_format
+            --data-type $data_type
+            --cores "\${GALAXY_SLOTS:-1}"
+            $check_align
+        ]]></command>
+
+    <inputs>
+        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequences to concatenate" multiple="true" 
+               help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <expand macro="input_format" />
-        <expand macro="input_format" />
-        <expand macro="input_format" />
+        <expand macro="output_format" label="Select output format for concatenated alignment" />
+        <param name="part_format" type="select" label="Format of the partitions file"
+               help="A file defining how the concatenated alignment is split into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' or 'DNA, gene1 = 1-500' for RAxML format).">
+            <option value="nexus">nexus</option>
+            <option value="raxml">raxml</option>
+            <option value="unspecified" selected="true">unspecified</option>
+        </param>
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+        <data name="output" from_work_dir="concatenated.out" format="txt" label="${tool.name} on ${on_string} (Concatenated alignment)">
+            <change_format>
+                <when input="out_format" value="fasta" format="fasta" />
+                <when input="out_format" value="phylip" format="phylip" />
+                <when input="out_format" value="phylip-int" format="phylip" />
+                <when input="out_format" value="nexus" format="nex" />
+                <when input="out_format" value="nexus-int" format="nex" />
+            </change_format>
+        </data>
+        <data name="partitions_out" from_work_dir="partitions.txt" format="txt" label="${tool.name} on ${on_string} (Partition file)" />
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/concat_1.fasta,inputs/concat_2.fasta" />
+            <param name="out_format" value="phylip" />
+            <param name="part_format" value="nexus" />
+            <param name="in_format" value="fasta" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="output" file="outputs/expected_concat.phylip" ftype="phylip" compare="sim_size" />
+            <output name="partitions_out" file="outputs/expected_partitions.txt" ftype="txt" />
+        </test>
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/concat_1.fasta,inputs/concat_2.fasta" />
+            <param name="out_format" value="fasta" />
+            <param name="part_format" value="raxml" />
+            <param name="in_format" value="fasta" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="output" file="outputs/expected_concat_fasta.fas" ftype="fasta" compare="sim_size" />
+            <output name="partitions_out" file="outputs/expected_partitions_raxml.txt" ftype="txt" />
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Concat combines multiple sequence alignments into a single concatenated alignment, commonly used in phylogenomic analyses.
+
+        **Inputs**
+
+        - **Multiple alignment files**: Select 2 or more pre-aligned sequence files (FASTA, PHYLIP, or NEXUS format)
+        - **Input format**: Specify the format of your input files
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Output format**: Select the desired format for the concatenated alignment
+
+        **Outputs**
+
+        1. **Concatenated alignment**: A single file containing all input alignments joined end-to-end
+        2. **Partitions file**: Defines the boundaries of each original alignment within the concatenated file
+
+        @PARTITIONS_HELP@
+
+        **Use cases**
+
+        - **Multi-locus phylogenomics**: Combine hundreds of genes for species tree inference
+        - **Partitioned phylogenetic analysis**: Apply different evolutionary models to different genes using tools like RAxML or IQ-TREE
+        - **Supermatrix construction**: Create dataset for concatenation-based phylogenetic methods
+        - **Increased phylogenetic signal**: Leverage information from multiple loci to resolve difficult nodes
+        - **Comparative analyses**: Prepare datasets for testing hypotheses across multiple genomic regions
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
diff --git a/tools/amas/amas_remove.xml b/tools/amas/amas_remove.xml
@@ -0,0 +1,100 @@
+<tool id="amas_remove" name="AMAS remove" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>remove taxa from multiple alignments</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        @SYMLINK_INPUTS@
+
+        python -m amas.AMAS
+        remove
+        --taxa-to-remove
+        #for $taxon in $taxa_to_remove.split()
+            '$taxon'
+        #end for
+        --out-format $out_format
+        --in-files
+            @INPUT_FILENAMES@
+        --in-format $in_format
+        --data-type $data_type
+        --cores "\${GALAXY_SLOTS:-1}"
+        $check_align
+        ]]></command>
+
+    <inputs>
+        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequence(s) to remove taxa" multiple="true" 
+               help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <expand macro="input_format" />
+        <expand macro="output_format" label="Select output format for alignment(s) with taxa removed"/>
+        <param name="taxa_to_remove" type="text" label="Taxa to remove" 
+               help="Space-separated list of taxon names to remove (e.g., 'OTU9 OTU10 Sample_A'). Note: AMAS converts spaces to underscores and strips quotes from sequence names, so use 'Species_1' to remove a taxon named 'Species 1'.">
+            <validator type="regex" message="Please provide at least one taxon name (alphanumeric, underscores, hyphens, and dots allowed)">[A-Za-z0-9_.\-]+(\s+[A-Za-z0-9_.\-]+)*</validator>
+        </param>
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+        <expand macro="collection_outputs" name="reduced_alignments" />
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_files" value="inputs/remove_input.nex" />
+            <param name="taxa_to_remove" value="OTU9 OTU10" />
+            <param name="out_format" value="nexus-int" />
+            <param name="in_format" value="nexus" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output_collection name="reduced_alignments_nexus" type="list">
+                <element name="reduced_remove_input.nex-out.int-nex" file="outputs/expected_remove_filtered.int-nex" ftype="nex" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Remove excludes specified taxa (sequences) from one or more alignments. This is useful for removing problematic sequences, outgroups, or creating taxon subsets for comparative analyses.
+
+        **Inputs**
+
+        - **Alignment files**: One or more pre-aligned sequence files (FASTA, PHYLIP, or NEXUS format)
+        - **Taxa to remove**: Space-separated list of sequence names to exclude (e.g., 'OTU9 OTU10 Sample_A')
+
+          **Important**: AMAS converts spaces to underscores and strips quotes from sequence names during processing. If your input file contains a taxon named 'Species 1' or '"Species 1"', you must specify it as 'Species_1' in the taxa to remove list.
+
+        - **Input format**: Specify the format of your input files
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Output format**: Select the desired format for the reduced alignments
+
+        **Outputs**
+
+        A collection of alignment files with specified taxa removed. Each output file contains the same alignment as the input, minus the excluded sequences.
+
+        **Tip:** You may want to realign your files after taxon removal.
+
+        **Use cases**
+
+        - Remove sequences with excessive missing data
+        - Exclude contaminated or mis-identified samples
+        - Create taxon subsets for sensitivity analyses
+        - Remove outgroups after tree rooting
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
diff --git a/tools/amas/amas_replicate.xml b/tools/amas/amas_replicate.xml
@@ -0,0 +1,96 @@
+<tool id="amas_replicate" name="AMAS replicate" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>replicate multiple alignments</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        @SYMLINK_INPUTS@
+
+        python -m amas.AMAS
+        replicate
+        --rep-aln $replicate_replicates $replicate_loci
+        --out-format $out_format
+        --in-files
+            @INPUT_FILENAMES@
+        --in-format $in_format
+        --data-type $data_type
+        --cores "\${GALAXY_SLOTS:-1}"
+        $check_align
+        ]]></command>
+
+    <inputs>
+        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequence(s) to replicate" multiple="true" 
+               help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <expand macro="input_format" />
+        <expand macro="output_format" label="Select output format for replicated alignment(s)" />
+        <param name="replicate_replicates" type="integer" value="10" min="1" label="Number of replicate datasets to build" />
+        <param name="replicate_loci" type="integer" value="2" min="1" label="Number of loci per replicate" />
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+        <expand macro="collection_outputs" name="replicate_alignments" />
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_files" value="inputs/fasta1.fas" />
+            <param name="replicate_replicates" value="2" />
+            <param name="replicate_loci" value="1" />
+            <param name="out_format" value="nexus" />
+            <param name="in_format" value="fasta" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output_collection name="replicate_alignments_nexus" type="list">
+                <element name="replicate1_1-loci-out.nex" file="outputs/expected_replicate1.nex" ftype="nex" />
+                <element name="replicate2_1-loci-out.nex" file="outputs/expected_replicate2.nex" ftype="nex" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Replicate generates jackknife or bootstrap replicates by randomly sampling loci (genes) from your dataset. This is used to assess phylogenetic signal distribution and node support across different genomic regions.
+
+        **Inputs**
+
+        - **Alignment files**: Multiple pre-aligned sequence files, one per locus/gene (FASTA, PHYLIP, or NEXUS format)
+        - **Number of replicates**: How many replicate datasets to generate
+        - **Loci per replicate**: How many loci to include in each replicate
+        - **Input format**: Specify the format of your input files
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Output format**: Select the desired format for the replicate alignments
+
+        **Outputs**
+
+        A collection of replicate alignment files. Each replicate contains a random subset of the input loci concatenated together.
+
+        **Use cases**
+
+        - **Phylogenetic jackknifing**: Assess whether phylogenetic signal is driven by specific loci
+        - **Node support evaluation**: Test robustness of tree topology across different gene combinations
+        - **Signal heterogeneity**: Identify whether conflicting signals come from particular genomic regions
+
+        **Example**
+
+        From 100 input genes, create 10 replicates each containing 50 randomly sampled genes. Each replicate can then be used to build a phylogenetic tree, and consistency across replicates indicates robust phylogenetic signal.
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>