galaxyproject
diff --git a/‎tools/fastq_groupmerge/.shed.yml‎
Lines changed: 13 additions & 0 deletions b/‎tools/fastq_groupmerge/.shed.yml‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎tools/fastq_groupmerge/fastq_groupmerge.xml‎
Lines changed: 229 additions & 0 deletions b/‎tools/fastq_groupmerge/fastq_groupmerge.xml‎
Lines changed: 229 additions & 0 deletions
diff --git a/‎tools/fastq_groupmerge/test-data/A1_R1.fastq.gz‎
214 Bytes b/‎tools/fastq_groupmerge/test-data/A1_R1.fastq.gz‎
214 Bytes
diff --git a/‎tools/fastq_groupmerge/test-data/A1_R2.fastq.gz‎
213 Bytes b/‎tools/fastq_groupmerge/test-data/A1_R2.fastq.gz‎
213 Bytes
diff --git a/‎tools/fastq_groupmerge/test-data/A1_forward.fastq.gz‎
219 Bytes b/‎tools/fastq_groupmerge/test-data/A1_forward.fastq.gz‎
219 Bytes
diff --git a/‎tools/fastq_groupmerge/test-data/A1_reverse.fastq.gz‎
218 Bytes b/‎tools/fastq_groupmerge/test-data/A1_reverse.fastq.gz‎
218 Bytes
diff --git a/‎tools/fastq_groupmerge/test-data/A2_R1.fastq‎
Lines changed: 4 additions & 0 deletions b/‎tools/fastq_groupmerge/test-data/A2_R1.fastq‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tools/fastq_groupmerge/test-data/A2_R2.fastq‎
Lines changed: 4 additions & 0 deletions b/‎tools/fastq_groupmerge/test-data/A2_R2.fastq‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tools/fastq_groupmerge/test-data/B1_forward.fastq‎
Lines changed: 4 additions & 0 deletions b/‎tools/fastq_groupmerge/test-data/B1_forward.fastq‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎tools/fastq_groupmerge/test-data/B1_reverse.fastq‎
Lines changed: 4 additions & 0 deletions b/‎tools/fastq_groupmerge/test-data/B1_reverse.fastq‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,13 @@
+name: fastq_groupmerge
+owner: iuc
+description: A tool for merging fastq reads by metadata
+homepage_url: https://github.com/SantaMcCloud/fastq-groupmerge
+long_description: |
+  This tool takes multiple fastq read files as input and merges them based on 
+  the provided metadata file (multiple groups allowed).
+  It is designed to support workflows that require specific sample grouping 
+  - such as co- and group-assembly - or any other use cases where merging reads is necessary.
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastq_groupmerge
+type: unrestricted
+categories: 
+- Metagenomics
@@ -0,0 +1,229 @@
+<tool id="fastq_groupmerge" name="Fastq groupmerge" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <macros>
+        <token name="@TOOL_VERSION@">1.0.1</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+        <token name="@PROFILE@">25.0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">fastq-groupmerge</requirement>
+    </requirements>
+    <command detect_errors="exit_code">
+        <![CDATA[
+
+            mkdir 'output' 'samples' &&
+            
+            #if $input.is_select == "pair":
+                #for $sample in $samples:
+                    ln -s '$sample.forward' 'samples/${sample.element_identifier}_forward.${sample.forward.ext}' &&
+                    ln -s '$sample.reverse' 'samples/${sample.element_identifier}_reverse.${sample.reverse.ext}' &&
+                #end for
+            #else:
+                #for $sample in $samples:
+                    ln -s '$sample' 'samples/$sample.element_identifier.${sample.ext}' &&
+                #end for
+            #end if
+
+            fastq_groupmerge.py
+            'samples'
+            'output'
+            #if $metadata:
+                --metadata '$metadata'
+                --group_col '$group_col'
+                #if $metadata.ext == "csv"
+                    --sep ","
+                #else
+                    --sep "\t"
+                #end if
+            #end if
+            #if $input.is_select == 'pair':
+                --forward_suffix '_forward'
+                --reverse_suffix '_reverse'
+            #else:
+                --single_reads
+            #end if
+
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="input"> 
+            <param name="is_select" type="select" label="Check type of fastq read library">
+                <option value="single">Single reads</option>
+                <option value="pair" selected="true">Paired reads</option>
+            </param>
+            <when value="single">
+                <param name="samples" type="data_collection" collection_type="list" format="fastq,fastq.gz" label="Input single sample(s) read(s)"/>
+                </when>
+            <when value="pair">
+                <param name="samples" type="data_collection" collection_type="list:paired" format="fastq,fastq.gz" label="Input paired sample(s) read(s) collection"/>
+            </when>
+        </conditional>
+        <param argument="--metadata" type="data" multiple="false" format="tabular,csv,tsv" optional="true" label="Metadata table file" help="Metadata file with first column sample name and another column with group ID. Multiple grouping is allowed, see the help section. If no metadata table is provided, this tool will merge all samples!"/>
+        <param argument="--group_col" type="text" value="group" label="Input the column name of the `group` column" help="The metadata file should contain two columns, one with the sample names and one with sample group ID. Use the same ID for samples that should be grouped. Look at the help section for more information!"/>
+    </inputs>
+    <outputs>
+        <collection name="merged_samples_pairs" type="list:paired" label="${tool.name} on ${on_string}: Merged samples (pairs)">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.fastq.gz" ext="fastq.gz" directory="output"/>
+            <filter>input['is_select'] == 'pair'</filter>
+        </collection>
+        <collection name="merged_samples_single" type="list" label="${tool.name} on ${on_string}: Merged samples (single)">
+            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)\.fastq.gz" ext="fastq.gz" directory="output"/>
+            <filter>input['is_select'] == 'single'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="samples">
+                    <collection type="list:paired">
+                        <element name="A1">
+                            <collection type="paired">
+                                <element name="forward" value="A1_forward.fastq.gz" ftype="fastq.gz"/>
+                                <element name="reverse" value="A1_reverse.fastq.gz" ftype="fastq.gz"/>
+                            </collection>
+                        </element>
+                        <element name="B1">
+                            <collection type="paired">
+                                <element name="forward" value="B1_forward.fastq" ftype="fastq"/>
+                                <element name="reverse" value="B1_reverse.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="metadata" value="metadata_1.csv" ftype="tabular"/>
+            <param name="group_col" value="TEST_COLUMN"/>
+            <output_collection name="merged_samples_pairs" type="list:paired" count="2">
+                <element name="control">
+                    <element name="forward" value="control_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="control_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+                <element name="single">
+                    <element name="forward" value="single_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="single_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="samples">
+                    <collection type="list:paired">
+                        <element name="A2">
+                            <collection type="paired">
+                                <element name="forward" value="A2_R1.fastq" ftype="fastq"/>
+                                <element name="reverse" value="A2_R2.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                        <element name="B2">
+                            <collection type="paired">
+                                <element name="forward" value="B2_R1.fastq" ftype="fastq"/>
+                                <element name="reverse" value="B2_R2.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="metadata" value="metadata_2.csv" ftype="csv"/>
+            <output_collection name="merged_samples_pairs" type="list:paired" count="1">
+                <element name="treatment">
+                    <element name="forward" value="treatment_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="treatment_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="single"/>
+                <param name="samples">
+                    <collection type="list">
+                        <element name="A1_forward" value="A1_forward.fastq.gz" ftype="fastq.gz"/>
+                        <element name="A1_reverse" value="A1_reverse.fastq.gz" ftype="fastq.gz"/>
+                        <element name="B1_forward" value="B1_forward.fastq" ftype="fastq"/>
+                        <element name="B1_reverse" value="B1_reverse.fastq" ftype="fastq"/>
+                    </collection>
+                </param>
+            </conditional>
+            <param name="metadata" value="metadata_single.csv" ftype="csv"/>
+            <output_collection name="merged_samples_single" type="list" count="1">
+                    <element name="Test" value="Test.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+            </output_collection>
+        </test>
+        <test expect_num_outputs="1">
+            <conditional name="input">
+                <param name="is_select" value="pair"/>
+                <param name="samples">
+                    <collection type="list:paired">
+                        <element name="A1">
+                            <collection type="paired">
+                                <element name="forward" value="A1_forward.fastq.gz" ftype="fastq.gz"/>
+                                <element name="reverse" value="A1_reverse.fastq.gz" ftype="fastq.gz"/>
+                            </collection>
+                        </element>
+                        <element name="B1">
+                            <collection type="paired">
+                                <element name="forward" value="B1_forward.fastq" ftype="fastq"/>
+                                <element name="reverse" value="B1_reverse.fastq" ftype="fastq"/>
+                            </collection>
+                        </element>
+                    </collection>
+                </param>
+            </conditional>
+            <output_collection name="merged_samples_pairs" type="list:paired" count="1">
+                <element name="merged">
+                    <element name="forward" value="merged_forward.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                    <element name="reverse" value="merged_reverse.fastq.gz" ftype="fastq.gz" compare="sim_size"/>
+                </element>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+        <![CDATA[
+
+            **What does this tool**
+
+            This tool is designed to group sample fastq reads together based on a grouping defined in a metadata file.
+            This tool can be used to support grouped-assembly. In some cases you want to group them in multiple ways. E.g. merge technical replicas but also merge samples from similar samples (e.g. all from the gut). To this end you can provide multiple groupings. 
+
+            **Input**
+
+            - A collection of pair reads which can be in fastq or fastq format
+            - OPTIONAL BUT RECOMMENDED: a metadata file either tab separated in format: tabular/tsv or comma-separated in format: csv
+
+            The metadata file can look look like this for example:
+
+            .. metadata table::
+
+                sample_id,group
+                A1,control
+                B1,control
+                A1,A1
+                Test,
+                ,Test
+            
+            Important to this:
+
+            - The metadata file required to have a column sample_id with sample names (this are the pair name for example 'A1' is the pair collection name so 'A1' has to be written in the sample_id column) when using the pair collection option. 
+            - The column 'group' can be called anything. All samples with the same ID will be merged together in the output file. In the example file the output 'control_forward.fastq.gz' will contain the forward reads from 'A1' and 'B1'
+            - When there is a empty entry in any column this line will be ignored!
+            - When using the single read option note that in the 'sample_id' column the file name has to be stated completely therefore as example for input 'test_read.fastq' a line in the metadata table has to be 'test_read' 
+            - If metadata file is given only the sample reads stated in this file will be taken into account so you can also add the collection where other sample reads in this collection, they will be ignored if there are not stated in the metadata file!
+
+            **Output**
+
+            - For each group stated in the 'group' column a forward file [{group_name}_{forward_suffix}.fastq.gz] and a reverse file [{group_name}_{reverse_suffix}.fastq.gz] will be created
+            - When no metadata is given all inputs which match to the 'forward_suffix' and 'reverse_suffix' will be merged together into one file each for forward and reverse!
+
+        ]]>
+    </help>
+    <citations>
+        <citation type="bibtex">@misc{BibEntry2025Oct,
+            title = {{fastq-groupmerge}},
+            author = {Santino Faack (SantaMcCloud)},
+            journal = {GitHub},
+            year = {2025},
+            month = oct,
+            url = {https://github.com/SantaMcCloud/fastq-groupmerge}
+        }</citation>
+    </citations> 
+</tool>
@@ -0,0 +1,4 @@
+@S0R226/1
+GGTTTACCAATTGAAAATGCAATTCAAAAATTAGGTGTCAATCGTAAAGAAATGCCGACATACGAATTTAGAGCACTTTGTGAGAAATATGCGCGTGAACAAGTTGCAATTCAAATCGGTGATTTTAAATCGTTAGGTAAAAGTGGGGAT
++
+DAAGGGCGBIEHHKKGHKGKFKKKJGJKIGIIJJ@KHDHHKIJE@GGE=IKCEJKJKEEEEIG8EGJEICJGEE0EIE;3$HCDEA6EEDEF?E$DCE4<@$EEEEEEDEEDEEED$E)D@FDCDDEECEEEEEDCE=$;$EDECA;C4:
@@ -0,0 +1,4 @@
+@S0R226/2
+ATGCCCTTCATCGCCATTACCGCAAATACACGAATTTGTGTTGCTTCAAAATCTTTATTGAGTGTTAAATAAGGATTCTTATAATCCCCAATTTTACATAACTATTTAAAATAAGCCATTTGAATTGCAATTTATTTATGAGCAGATTGC
++
+<CAGGG@GIIIIEJKK9J$CK=KHKHKCKKJKIKJGKJKICJBFAJ=BA>CJA:<0EJADIGCAEIE@EEEE:G$BE=EEBEEDEEEBEEEBDFEEE$9CDE$EE$E@;B$C$EEEAC;E6E1DEECEED$;C$E$$EA;$BE5$D$@$=
@@ -0,0 +1,4 @@
+@S0R15658/1
+AATGGGAAGATGATTTTAGGGAATATCTCAAAATGCTGGATGAGACTAAACCTGTAGTCTTATGTGGGGACTTAAGCGTCGCTCATAAAGAGATTGACTTGAAAAATCCTTCAGCGAATCGTAAAAACCCTGGCTTTAGTTATCAAGAAC
++
+AADGGGEEIIHII<JCKKJKKKFFJFJKIJIJKKKII@JHKKKDKEEJKJJHEKJK$CIEH>JEKGIEGDGE@EE$K$EIEEEADC?DEEE?EEEECDC1E9EEEDEEDEE=)CCCEDE1BC??FE7?7CAEDEDAAC3E$EE$;;CEE$
@@ -0,0 +1,4 @@
+@S0R11796/2
+AACCCCGTTGCTCACGACGGAATCGTTGTAAGTGATCTTTCTTAAGTTTTGTAATATTTTGACCAGTTACACCAATGGTAAGGCTGGTCACCGAGTCAATTGTGGAAATGCCATTAAGAAGCGTCGTTTTACCAGAACCTTATGGTCCCA
++
+ADD>E$3E=I=?IHHJKKKJK$GKKFHKJGFKKK*KK:FKJGKCJCIH8I:KHDKDF>B$DEK<B$848DKEFIGD;EG@)$:ECGGE6?EDEEEEEE$$:FEEEDA$$??$?BEEAECCFAEE=DD4@D6CDEBCAECB$EC9$=EA$;