diff --git a/CITATIONS.md b/CITATIONS.md index d7f75e84..61332768 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,6 +12,8 @@ - [Aspera CLI](https://github.com/IBM/aspera-cli) +- [AWS CLI](https://aws.amazon.com/cli/) + - [Python](http://www.python.org) - [Requests](https://docs.python-requests.org/) @@ -20,6 +22,10 @@ ## Pipeline resources +- [AWS Open Data Program - SRA](https://registry.opendata.aws/ncbi-sra/) + + > The Sequence Read Archive (SRA) is mirrored on AWS S3 as part of the AWS Open Data Program, providing free access to SRA data. + - [ENA](https://pubmed.ncbi.nlm.nih.gov/33175160/) > Harrison PW, Ahamed A, Aslam R, Alako BTF, Burgin J, Buso N, Courtot M, Fan J, Gupta D, Haseeb M, Holt S, Ibrahim T, Ivanov E, Jayathilaka S, Kadhirvelu VB, Kumar M, Lopez R, Kay S, Leinonen R, Liu X, O'Cathail C, Pakseresht A, Park Y, Pesant S, Rahman N, Rajan J, Sokolov A, Vijayaraja S, Waheed Z, Zyoud A, Burdett T, Cochrane G. The European Nucleotide Archive in 2020. Nucleic Acids Res. 2021 Jan 8;49(D1):D82-D85. doi: 10.1093/nar/gkaa1028. PubMed PMID: 33175160; PubMed Central PMCID: PMC7778925. diff --git a/docs/usage.md b/docs/usage.md index 248363de..d461a287 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -72,7 +72,10 @@ See [issue #260](https://github.com/nf-core/fetchngs/issues/260) for more detail ### Primary options for downloading data -If the appropriate download links are available, the pipeline uses FTP by default to download FastQ files by setting the `--download_method ftp` parameter. If you are having issues and prefer to use sra-tools or Aspera instead, you can set the [`--download_method`](https://nf-co.re/fetchngs/parameters#download_method) parameter to `--download_method sratools` or `--download_method aspera`, respectively. +If the appropriate download links are available, the pipeline uses FTP by default to download FastQ files by setting the `--download_method ftp` parameter. If you are having issues and prefer to use alternative methods, you can set the [`--download_method`](https://nf-co.re/fetchngs/parameters#download_method) parameter to: +- `--download_method sratools`: Uses NCBI's sra-tools to download SRA files and convert to FastQ +- `--download_method aspera`: Uses Aspera CLI for faster downloads from ENA +- `--download_method aws`: Downloads SRA files from the AWS S3 Open Data Program mirror and converts to FastQ ### Downloading dbGAP data with JWT diff --git a/modules/local/sra_aws_download/environment.yml b/modules/local/sra_aws_download/environment.yml new file mode 100644 index 00000000..7d00cd7b --- /dev/null +++ b/modules/local/sra_aws_download/environment.yml @@ -0,0 +1,7 @@ +name: sra_aws_download +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::awscli=2.15.0 \ No newline at end of file diff --git a/modules/local/sra_aws_download/main.nf b/modules/local/sra_aws_download/main.nf new file mode 100644 index 00000000..a849e43c --- /dev/null +++ b/modules/local/sra_aws_download/main.nf @@ -0,0 +1,55 @@ +process SRA_AWS_DOWNLOAD { + tag "$meta.id" + label 'process_low' + label 'error_retry' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/awscli:1.8.3--py35_0' : + 'quay.io/biocontainers/awscli:1.8.3--py35_0' }" + + input: + tuple val(meta), val(run_accession) + + output: + tuple val(meta), path("*.sra"), emit: sra + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${run_accession}" + """ + # Download SRA file from AWS S3 Open Data Program + aws s3 cp \\ + --region us-east-1 \\ + --no-sign-request \\ + ${args} \\ + s3://sra-pub-run-odp/sra/${run_accession}/${run_accession} \\ + ${prefix}.sra + + # Verify download + if [ ! -f "${prefix}.sra" ]; then + echo "ERROR: Failed to download ${run_accession} from AWS S3" + exit 1 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + aws-cli: \$(aws --version 2>&1 | sed 's/aws-cli\\///; s/ Python.*//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${run_accession}" + """ + touch ${prefix}.sra + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + aws-cli: \$(aws --version 2>&1 | sed 's/aws-cli\\///; s/ Python.*//') + END_VERSIONS + """ +} \ No newline at end of file diff --git a/modules/local/sra_aws_download/nextflow.config b/modules/local/sra_aws_download/nextflow.config new file mode 100644 index 00000000..dfe95467 --- /dev/null +++ b/modules/local/sra_aws_download/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: 'SRA_AWS_DOWNLOAD' { + publishDir = [ + path: { "${params.outdir}/sra" }, + enabled: false + ] + } +} \ No newline at end of file diff --git a/modules/local/sra_aws_download/tests/main.nf.test b/modules/local/sra_aws_download/tests/main.nf.test new file mode 100644 index 00000000..aaaa6ff3 --- /dev/null +++ b/modules/local/sra_aws_download/tests/main.nf.test @@ -0,0 +1,56 @@ +nextflow_process { + + name "Test Process SRA_AWS_DOWNLOAD" + script "../main.nf" + process "SRA_AWS_DOWNLOAD" + tag "modules" + tag "modules_local" + tag "sra_aws_download" + + test("Should download SRA file from AWS") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + 'DRR028935' + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("Should download SRA file from AWS - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + 'DRR028935' + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/local/sra_aws_download/tests/main.nf.test.snap b/modules/local/sra_aws_download/tests/main.nf.test.snap new file mode 100644 index 00000000..742d6f64 --- /dev/null +++ b/modules/local/sra_aws_download/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "Should download SRA file from AWS": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "DRR028935.sra:md5,bc88b59c510081d85448416f05094ed5" + ] + ], + "1": [ + "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896" + ], + "sra": [ + [ + { + "id": "test", + "single_end": false + }, + "DRR028935.sra:md5,bc88b59c510081d85448416f05094ed5" + ] + ], + "versions": [ + "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T14:59:02.578113" + }, + "Should download SRA file from AWS - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "DRR028935.sra:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896" + ], + "sra": [ + [ + { + "id": "test", + "single_end": false + }, + "DRR028935.sra:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,ce0676c62bd6864661cf98777e7c2896" + ] + } + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T14:59:07.021124" + } +} \ No newline at end of file diff --git a/nextflow_schema.json b/nextflow_schema.json index d4d87227..42176427 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -51,9 +51,9 @@ "type": "string", "default": "ftp", "fa_icon": "fas fa-download", - "enum": ["aspera", "ftp", "sratools"], - "description": "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'.", - "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ." + "enum": ["aspera", "ftp", "sratools", "aws"], + "description": "Method to download FastQ files. Available options are 'aspera', 'ftp', 'sratools', or 'aws'. Default is 'ftp'.", + "help_text": "FTP and Aspera CLI download FastQ files directly from the ENA FTP. sratools uses sra-tools to download *.sra files and convert to FastQ. aws uses AWS CLI to download *.sra files from the SRA mirror on AWS S3 Open Data Program and convert to FastQ." }, "skip_fastq_download": { "type": "boolean", diff --git a/subworkflows/local/fastq_download_aws_sratools/main.nf b/subworkflows/local/fastq_download_aws_sratools/main.nf new file mode 100644 index 00000000..85ad5d25 --- /dev/null +++ b/subworkflows/local/fastq_download_aws_sratools/main.nf @@ -0,0 +1,39 @@ +include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/custom/sratoolsncbisettings/main' +include { SRA_AWS_DOWNLOAD } from '../../../modules/local/sra_aws_download/main' +include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/fasterqdump/main' + +// +// Download FASTQ sequencing reads from AWS S3 SRA mirror +// +workflow FASTQ_DOWNLOAD_AWS_SRATOOLS { + take: + ch_sra_ids // channel: [ val(meta), val(id) ] + ch_dbgap_key // channel: [ path(dbgap_key) ] + + main: + + ch_versions = Channel.empty() + + // + // Detect existing NCBI user settings or create new ones. + // + CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() ) + ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings + ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions) + + // + // Download SRA files from AWS S3 + // + SRA_AWS_DOWNLOAD ( ch_sra_ids ) + ch_versions = ch_versions.mix(SRA_AWS_DOWNLOAD.out.versions.first()) + + // + // Convert the SRA format into one or more compressed FASTQ files. + // + SRATOOLS_FASTERQDUMP ( SRA_AWS_DOWNLOAD.out.sra, ch_ncbi_settings, ch_dbgap_key ) + ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) + + emit: + reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] + versions = ch_versions // channel: [ versions.yml ] +} \ No newline at end of file diff --git a/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test b/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test new file mode 100644 index 00000000..d4477df0 --- /dev/null +++ b/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test @@ -0,0 +1,41 @@ +nextflow_workflow { + + name "Test workflow: fastq_download_aws_sratools/main.nf" + script "../main.nf" + workflow "FASTQ_DOWNLOAD_AWS_SRATOOLS" + + tag "CUSTOM_SRATOOLSNCBISETTINGS" + tag "SRA_AWS_DOWNLOAD" + tag "SRATOOLS_FASTERQDUMP" + + test("Parameters: default") { + + when { + workflow { + """ + input[0] = Channel.of( + [[ id:'test_single_end', single_end:true ], 'DRR000774'], + [[ id:'test_paired_end', single_end:false ], 'SRR11140744'] + ) + input[1] = [] + """ + } + } + + then { + def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip + def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip + def selines = path(workflow.out.reads[1][1]).linesGzip + assertAll( + { assert workflow.success }, + { assert snapshot(pelines1[0..5]).match("test_pe_reads_1_lines") }, + { assert snapshot(pelines1.size()).match("test_pe_reads_1_size") }, + { assert snapshot(pelines2[0..5]).match("test_pe_reads_2_lines") }, + { assert snapshot(pelines2.size()).match("test_pe_reads_2_size") }, + { assert snapshot(selines[0..5]).match("test_se_reads_lines") }, + { assert snapshot(selines.size()).match("test_se_reads_size") }, + { assert snapshot(workflow.out.versions).match("versions") } + ) + } + } +} \ No newline at end of file diff --git a/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test.snap b/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test.snap new file mode 100644 index 00000000..f16b5987 --- /dev/null +++ b/subworkflows/local/fastq_download_aws_sratools/tests/main.nf.test.snap @@ -0,0 +1,97 @@ +{ + "test_se_reads_size": { + "content": [ + 19996 + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:55.000747" + }, + "test_pe_reads_2_lines": { + "content": [ + [ + "@SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251", + "ACAGGACACGAGTAACTCGTCTATCTTCTGCTGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAA", + "+SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251", + "ABAAAFBFFBDBGGGGGGGGGGHHHHHHHHHHCHGHGGGHHHGGHGGHGHGGGHFHHHHHHHHGGGGGHHHHHHHHHFHHHHGHHHGHGGGGGEFGDGHHGFGGGHHHHHGHHGGHHFHHHHGHHHHHHHHHHHHHHGFFGGHHHHHHGGHHGGHHHHHEGHHHHHHHGHHGHHFHHHHHGGGGGGGGGGGGAGGG9BEFFFFFFFFFFFFFFEEFFFFFFFA.FFFFFFFEFEFFFFFFF.BFFFFFFFB", + "@SRR11140744.2 M01472:285:000000000-CYHNP:1:1101:20752:3564 length=238", + "GTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACG" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:54.994204" + }, + "test_pe_reads_2_size": { + "content": [ + 2011460 + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:54.996252" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,4146bec4feafc4feada81bcd86180836", + "versions.yml:md5,44c44e0430f2f8aff8aef894c79ae2c8", + "versions.yml:md5,9c64ac49745ab1738b7edeecee34f559" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:55.003931" + }, + "test_pe_reads_1_size": { + "content": [ + 2013376 + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:54.990405" + }, + "test_se_reads_lines": { + "content": [ + [ + "@DRR000774.1 1 length=421", + "ACGCAGGTGCCAGCAGCCGCGGTAATACGTAGGATCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGTGCGTAGGCGGCTTGTCAAGTCTCATGTGAAATCTCCCGGCTCAACTGGGAGGGTCATGGGAAACTGATGAGCTCGAGGGCAGTAGAGGGAAGCGGAATTCCGAGAGTAGTGGTGAAATGCGTAGATACTCGGAGGAACACCAGTGGCGAAAGCGGCTTCCTGGACTGTACCTGACGCTGAGGCACGAAAGCGTGGGGAGCAAACCGGATTAGATACCCGGGTAGTCCACGCCCTAAACGATGGATACTAGATATAGGGGGTATCGACCCTCTGTGTCGAAGCTAACGCATTAAGTATCCCGCCTGAGGAGTACGGCCGCAAGGCTAAAACTTAAGGAATTGACGGCTGCGT", + "+DRR000774.1 1 length=421", + "FFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:88FFF888???DBBBBB666F222ADDDFFF::;FFFFFFFFFFFFFFFFFFFFFFFFFFFF9:::FFFFCCCFFFFDDDFFFFF<<<<<8888886623//38><83238@B@@<;855557,,,,,,,0/0;;8:==DDDDDDDDD9:", + "@DRR000774.2 2 length=126", + "ACGCAGGTGCCAGCAGCCGCGGTAATACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTTCAAGTCAGGGGTGGAAATACCCGGGGCCGTCAACCCGACCG" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:54.998236" + }, + "test_pe_reads_1_lines": { + "content": [ + [ + "@SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251", + "ACATAGGGCTGTTCAAGTTGAGGCAAAACGCCTTTTTCAACTTCTACTAAGCCACAAGTGCCATCTTTAAGATGTTGACGTGCCTCTGATAAGACCTCCTCCACGGAGTCTCCAAAGCCACGTACGAGCACGTCGCGAACCTGTAAAACAGGCAAACTGAGTTGGACGTGTGTTTTCTCGTTGAAACCAGGGACAAGGCTCTCCATCTTACCTTTCGGTCACACCCGGACGAAACCTAGATGTGCTGATGA", + "+SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251", + "BCCCCFFFFFCFGGGGGGGGGGHGGHHHHGGGHGHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHGGGHHHHHGHHGHHHHHHHHHHHHHGGGGGHHHHHHHHHHHHGHHHGGGGGHGHHGGGGGGGHHHHHHHHHHHGGHHHHHFHHHHHHHGGGHHHHHHHHHGGGHHHHHHHHGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGFFFFFFFFFDFFFFFFFFFFFFFFFFFFFFB", + "@SRR11140744.2 M01472:285:000000000-CYHNP:1:1101:20752:3564 length=238", + "CGTACGAGCACGTCGCGAACCTGTAAAACAGGCAAACTGAGTTGGACGTGTGTTTTCTCGTTGAAACCAGGGACAAGGCTCTCCATCTTACCTTTCGGTCACACCCGGACGAAACCTAGATGTGCTGATGATCGGCTGCAACACGGACGAAACCGTAAGCAGCCTGCAGAAGATAGACGAGTTACTCGTGTCCTGTCAACGACAGTAATTAGTTATTAATTATACTGCGTGAGTGCAC" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T15:00:54.985177" + } +} \ No newline at end of file diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 0c8cac0c..e837088a 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -11,6 +11,7 @@ include { SRA_RUNINFO_TO_FTP } from '../../modules/local/sra_runinfo_to_ftp include { ASPERA_CLI } from '../../modules/local/aspera_cli' include { SRA_TO_SAMPLESHEET } from '../../modules/local/sra_to_samplesheet' include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' +include { FASTQ_DOWNLOAD_AWS_SRATOOLS } from '../../subworkflows/local/fastq_download_aws_sratools' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -76,6 +77,9 @@ workflow SRA { if (meta.fastq_aspera && params.download_method == 'aspera') { download_method = 'aspera' } + if (params.download_method == 'aws') { + download_method = 'aws' + } if ((!meta.fastq_aspera && !meta.fastq_1) || params.download_method == 'sratools') { download_method = 'sratools' } @@ -86,6 +90,8 @@ workflow SRA { return [ meta, [ meta.fastq_1, meta.fastq_2 ] ] sratools: download_method == 'sratools' return [ meta, meta.run_accession ] + aws: download_method == 'aws' + return [ meta, meta.run_accession ] } .set { ch_sra_reads } @@ -115,12 +121,22 @@ workflow SRA { ) ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) + // + // SUBWORKFLOW: Download sequencing reads from AWS S3 SRA mirror + // + FASTQ_DOWNLOAD_AWS_SRATOOLS ( + ch_sra_reads.aws, + params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [] + ) + ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_AWS_SRATOOLS.out.versions.first()) + // Isolate FASTQ channel which will be added to emit block SRA_FASTQ_FTP .out .fastq .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) .mix(ASPERA_CLI.out.fastq) + .mix(FASTQ_DOWNLOAD_AWS_SRATOOLS.out.reads) .map { meta, fastq -> def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config index d242c238..8f3a65a5 100644 --- a/workflows/sra/nextflow.config +++ b/workflows/sra/nextflow.config @@ -4,5 +4,6 @@ includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" includeConfig "../../modules/local/sra_ids_to_runinfo/nextflow.config" includeConfig "../../modules/local/sra_runinfo_to_ftp/nextflow.config" includeConfig "../../modules/local/sra_to_samplesheet/nextflow.config" +includeConfig "../../modules/local/sra_aws_download/nextflow.config" includeConfig "../../modules/nf-core/sratools/prefetch/nextflow.config" includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config" diff --git a/workflows/sra/tests/sra_download_method_aws.nf.test b/workflows/sra/tests/sra_download_method_aws.nf.test new file mode 100644 index 00000000..973e2746 --- /dev/null +++ b/workflows/sra/tests/sra_download_method_aws.nf.test @@ -0,0 +1,42 @@ +nextflow_workflow { + + name "Test workflow: sra/main.nf" + script "../main.nf" + workflow "SRA" + tag "SRA_DOWNLOAD_METHOD_AWS" + + // Dependencies + tag "FASTQ_DOWNLOAD_AWS_SRATOOLS" + tag "SRA_IDS_TO_RUNINFO" + tag "SRA_RUNINFO_TO_FTP" + tag "SRA_TO_SAMPLESHEET" + tag "MULTIQC_MAPPINGS_CONFIG" + + test("Parameters: --download_method aws") { + + when { + params { + outdir = "$outputDir" + download_method = 'aws' + } + workflow { + """ + input[0] = Channel.from("DRX026011", "ERX1234253", "SRX6725035") + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.samplesheet[0]).name, + workflow.out.mappings, + workflow.out.sample_mappings, + workflow.out.sra_metadata, + workflow.out.versions + ).match() } + ) + } + } +} \ No newline at end of file diff --git a/workflows/sra/tests/sra_download_method_aws.nf.test.snap b/workflows/sra/tests/sra_download_method_aws.nf.test.snap new file mode 100644 index 00000000..822c52b0 --- /dev/null +++ b/workflows/sra/tests/sra_download_method_aws.nf.test.snap @@ -0,0 +1,143 @@ +{ + "Parameters: --download_method aws": { + "content": [ + "samplesheet.csv", + [ + "id_mappings.csv:md5,3e41ce6ab19feb76f2b20fa77a910ad3" + ], + [ + "multiqc_config.yml:md5,1ac06bb95b503703430e74660bbdd768" + ], + [ + { + "base_count": "194930", + "experiment_accession": "DRX026011", + "experiment_alias": "DRX026011", + "experiment_title": "Illumina HiSeq 2500 paired end sequencing: Illumina HiSeq 2500 paired end sequencing of SAMD00024405", + "fastq_1": "DRX026011_DRR028935_1.fastq.gz:md5,1c3a691ea99767f25de2492440a02cb7", + "fastq_2": "DRX026011_DRR028935_2.fastq.gz:md5,6fa02d3e52613cfe3464cc7a29f227d4", + "fastq_aspera": "fasp.sra.ebi.ac.uk:/vol1/fastq/DRR028/DRR028935/DRR028935_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/DRR028/DRR028935/DRR028935_2.fastq.gz", + "fastq_bytes": "60275;61610", + "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/DRR028/DRR028935/DRR028935_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/DRR028/DRR028935/DRR028935_2.fastq.gz", + "fastq_galaxy": "ftp.sra.ebi.ac.uk/vol1/fastq/DRR028/DRR028935/DRR028935_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/DRR028/DRR028935/DRR028935_2.fastq.gz", + "fastq_md5": "cc01df82a9354bb6b3be93483b20c35a;941c9998d746416dad53c94c480ddf30", + "id": "DRX026011_DRR028935", + "instrument_model": "Illumina HiSeq 2500", + "instrument_platform": "ILLUMINA", + "library_layout": "PAIRED", + "library_name": "day0_BbSQE-I", + "library_selection": "cDNA", + "library_source": "TRANSCRIPTOMIC", + "library_strategy": "RNA-Seq", + "md5_1": "cc01df82a9354bb6b3be93483b20c35a", + "md5_2": "941c9998d746416dad53c94c480ddf30", + "read_count": "965", + "run_accession": "DRR028935", + "run_alias": "DRR028935", + "sample_accession": "SAMD00024405", + "sample_alias": "SAMD00024405", + "sample_description": "Liquid culture", + "sample_title": "Botryococcus braunii Showa at day 0 after inoculation into fresh culture medium", + "scientific_name": "Botryococcus braunii Showa", + "secondary_sample_accession": "DRS019431", + "secondary_study_accession": "DRP002616", + "single_end": false, + "study_accession": "PRJDB3420", + "study_alias": "DRP002616", + "study_title": "Liquid culture of Botryococcus braunii, race B, Showa", + "submission_accession": "DRA002949", + "tax_id": "1202541" + }, + { + "base_count": "1996273", + "experiment_accession": "SRX6725035", + "experiment_alias": "Emb289P1_bin131", + "experiment_title": "Illumina HiSeq 2500 sequencing: Binning of metagenomic reads from the P1 gut compartment of Embiratermes neotenicus", + "fastq_1": "SRX6725035_SRR9984183.fastq.gz:md5,aadf8ac0a6a3282b52404aa4dd14497c", + "fastq_2": "", + "fastq_aspera": "fasp.sra.ebi.ac.uk:/vol1/fastq/SRR998/003/SRR9984183/SRR9984183.fastq.gz", + "fastq_bytes": "605358", + "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/SRR998/003/SRR9984183/SRR9984183.fastq.gz", + "fastq_galaxy": "ftp.sra.ebi.ac.uk/vol1/fastq/SRR998/003/SRR9984183/SRR9984183.fastq.gz", + "fastq_md5": "0b512d2dc31685983456bd56fd836544", + "id": "SRX6725035_SRR9984183", + "instrument_model": "Illumina HiSeq 2500", + "instrument_platform": "ILLUMINA", + "library_layout": "SINGLE", + "library_name": "Emb289P1_bin131", + "library_selection": "RANDOM", + "library_source": "METAGENOMIC", + "library_strategy": "WGS", + "md5_1": "0b512d2dc31685983456bd56fd836544", + "md5_2": "", + "read_count": "58", + "run_accession": "SRR9984183", + "run_alias": "Emb289P1_bin131.fastq", + "sample_accession": "SAMN12581720", + "sample_alias": "Emb289P1_bin131", + "sample_description": "Keywords: GSC:MIxS MIMAG:5.0", + "sample_title": "MIMAG Metagenome-assembled Genome sample from Defluviitaleaceae bacterium", + "scientific_name": "Defluviitaleaceae bacterium", + "secondary_sample_accession": "SRS5277011", + "secondary_study_accession": "SRP218535", + "single_end": true, + "study_accession": "PRJNA560329", + "study_alias": "PRJNA560329", + "study_title": "Phylogenomic analysis of 589 metagenome-assembled genomes encompassing all major prokaryotic lineages from the gut of higher termites", + "submission_accession": "SRA942061", + "tax_id": "2660712" + }, + { + "base_count": "35658", + "experiment_accession": "ERX1234253", + "experiment_alias": "qiita_ptid_1263:10317.BLANK.93.3E.r22", + "experiment_title": "Illumina HiSeq 2500 sequencing: qiita_ptid_1263:10317.BLANK.93.3E.r22", + "fastq_1": "ERX1234253_ERR1160846.fastq.gz:md5,98515d664854f1c96f55ac836fb671b9", + "fastq_2": "", + "fastq_aspera": "fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz", + "fastq_bytes": "18077", + "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz", + "fastq_galaxy": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz", + "fastq_md5": "5924f20ef547ebdfed7cad795bbab6e6", + "id": "ERX1234253_ERR1160846", + "instrument_model": "Illumina HiSeq 2500", + "instrument_platform": "ILLUMINA", + "library_layout": "SINGLE", + "library_name": "10317.BLANK.93.3E.r22", + "library_selection": "PCR", + "library_source": "METAGENOMIC", + "library_strategy": "AMPLICON", + "md5_1": "5924f20ef547ebdfed7cad795bbab6e6", + "md5_2": "", + "read_count": "283", + "run_accession": "ERR1160846", + "run_alias": "qiita_ppdid_706:10317.BLANK.93.3E.r22", + "sample_accession": "SAMEA3687214", + "sample_alias": "qiita_sid_10317:10317.BLANK.93.3E.r22", + "sample_description": "American Gut control", + "sample_title": "10317.BLANK.93.3E.r22", + "scientific_name": "metagenome", + "secondary_sample_accession": "ERS994363", + "secondary_study_accession": "ERP012803", + "single_end": true, + "study_accession": "PRJEB11419", + "study_alias": "qiita_sid_10317", + "study_title": "American Gut Project", + "submission_accession": "ERA541392", + "tax_id": "256318" + } + ], + [ + "versions.yml:md5,08f737bc2e21b301336285511488a41e", + "versions.yml:md5,1496d1cbc9041e07ab8a0c25f0b054d9", + "versions.yml:md5,9b17045ca8bdc272cb3f9d349a81d206", + "versions.yml:md5,b52279f7d6b891a6523d9321f3f85b47" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "25.04.6" + }, + "timestamp": "2025-08-14T23:08:09.974937" + } +} \ No newline at end of file