Skip to content

Commit 293476b

Browse files
committed
Merge branch 'remote-test-files' into dev
2 parents 4743c94 + 8fbba89 commit 293476b

File tree

8 files changed

+228
-193
lines changed

8 files changed

+228
-193
lines changed

FastOMA.nf

Lines changed: 159 additions & 162 deletions
Large diffs are not rendered by default.

FastOMA/fastoma_notebook_stat.ipynb

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,8 @@
8989
},
9090
"source": [
9191
"output_folder = \"Output\"\n",
92-
"input_folder = \"testdata/in_folder\"\n",
93-
"proteome_folder = input_folder + \"/proteome\"\n",
92+
"input = \"testdata/in_folder\"\n",
93+
"proteome_folder = input + \"/proteome\"\n",
9494
"min_sequence_length = 50"
9595
],
9696
"outputs": [],

README.md

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ any installation steps given the system supports running either docker container
5858
installed.
5959

6060
```bash
61-
nextflow run dessimozlab/FastOMA -profile docker --input_folder /path/to/in_folder --output_folder /path/to/out_folder
61+
nextflow run dessimozlab/FastOMA -profile docker --input /path/to/in_folder --output_folder /path/to/out_folder
6262
```
6363
You could also add specific version to be used by adding `-r v0.4.0` to the command line. Without any `-r` argument,
6464
always the latest available release will be used. With `-r dev` the latest development release can be used.
@@ -85,7 +85,7 @@ git checkout version, you can specify this in the following way:
8585
```bash
8686
nextflow run FastOMA.nf -profile docker \
8787
--container_version "sha-$(git rev-list --max-count=1 --abbrev-commit HEAD)" \
88-
--input_folder testdata/in_folder \
88+
--input testdata/in_folder \
8989
--output_folder myresult/
9090
```
9191

@@ -126,7 +126,7 @@ nextflow run FastOMA.nf -profile docker --container_version "sha-$(git rev-list
126126

127127
- run pipeline including with some testdata (For more details, see the section [How to run FastOMA on the test data](https://github.com/DessimozLab/fastoma?tab=readme-ov-file#how-to-run-fastoma-on-the-test-data) )
128128
```bash
129-
nextflow run FastOMA.nf -profile standard --input_folder testdata/in_folder --output_folder output -with-report
129+
nextflow run FastOMA.nf -profile standard --input testdata/in_folder --output_folder output -with-report
130130
```
131131

132132

@@ -172,7 +172,7 @@ mamba activate FastOMA
172172
Afterwards, you can run the workflow using nextflow (which is installed as part of the conda environment)
173173

174174
```
175-
nextflow run FastOMA.nf -profile standard|slurm --input_folder /path/to/input_folder --output_folder /path/to/output
175+
nextflow run FastOMA.nf -profile standard|slurm --input /path/to/input --output_folder /path/to/output
176176
```
177177
Note that you should use either the profile `standard` or `slurm` such the nextflow executor will use the activated environment.
178178

@@ -191,7 +191,7 @@ One can select the desired container via the `--container_version` argument
191191
```
192192
nextflow run FastOMA.nf -profile docker \
193193
--container_version "sha-$(git rev-list --max-count=1 --abbrev-commit HEAD)" \
194-
--input_folder testdata/in_folder \
194+
--input testdata/in_folder \
195195
--output_folder myresult/
196196
```
197197
This will use the container that is tagged with the current commit id. Similarly, one could also use
@@ -251,7 +251,7 @@ Finally, run the package using nextflow as below:
251251
```
252252
# cd FastOMA/testdata
253253
nextflow run ../FastOMA.nf \
254-
--input_folder in_folder \
254+
--input in_folder \
255255
--omamer_db in_folder/omamerdb.h5 \
256256
--output_folder out_folder \
257257
--report \
@@ -421,7 +421,7 @@ For running on a SLURM cluster, you can add the slurm profile argument: `-profi
421421
# ls ../FastOMA.nf
422422
423423
nextflow ../FastOMA.nf -profile slurm \
424-
--input_folder in_folder \
424+
--input in_folder \
425425
--output_folder out_folder
426426
```
427427

@@ -468,6 +468,7 @@ Citation: Majidian, Sina, Yannis Nevers, Ali Yazdizadeh Kharrazi, Alex Warwick
468468

469469
## Change log
470470
- Update v0.5dev (not released yet):
471+
- renamed input_folder parameter to input. input accepts now also (remote) archive tarball files.
471472
- better configuration setup (close to nf-core)
472473
- improved resource allocation for nextflow
473474
- improved handling of alternative splicing variants in reporting

conf/test-fungi.config

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// Default configuration for Nextflow
2+
3+
params {
4+
test_data_url = "https://zenodo.org/records/17434495/files/fungi-30.tgz?download=1"
5+
report = true
6+
omamer_db = "${projectDir}/testdata/test.h5"
7+
}

conf/test-mammalia.config

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// Default configuration for Nextflow
2+
3+
params {
4+
test_data_url = "https://zenodo.org/records/17434495/files/mammalia-22.tgz?download=1"
5+
report = true
6+
}

conf/test.config

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ process {
99

1010
params {
1111
omamer_db = "${projectDir}/testdata/test.h5"
12-
input_folder = "${projectDir}/testdata/in_folder"
12+
input = "${projectDir}/testdata/in_folder"
1313
report = true
1414

1515
// derived parameters
16-
proteome_folder = "${params.input_folder}/proteome"
17-
hogmap_in = "${params.input_folder}/hogmap_in"
18-
splice_folder = "${params.input_folder}/splice"
19-
species_tree = "${params.input_folder}/species_tree.nwk"
16+
proteome_folder = "${params.input}/proteome"
17+
hogmap_in = "${params.input}/hogmap_in"
18+
splice_folder = "${params.input}/splice"
19+
species_tree = "${params.input}/species_tree.nwk"
2020
}

nextflow.config

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,16 @@ manifest {
3030
params {
3131
// default parameters for test run
3232
// these can be overridden by the user on the command line
33-
input_folder = null
33+
input = null
3434
// input sub-folders, can also be somewhere else
35-
proteome_folder = "${params.input_folder}/proteome"
36-
hogmap_in = "${params.input_folder}/hogmap_in"
37-
splice_folder = "${params.input_folder}/splice"
38-
species_tree = "${params.input_folder}/species_tree.nwk"
35+
proteome_folder = "${params.input}/proteome"
36+
hogmap_in = "${params.input}/hogmap_in"
37+
splice_folder = "${params.input}/splice"
38+
species_tree = "${params.input}/species_tree.nwk"
39+
// cache path for (remote) archive input files
40+
test_data_cache = null
41+
// Keep deprecated parameter for backward compatibility
42+
input_folder = null
3943

4044
// main output folder
4145
output_folder = "Output"
@@ -144,8 +148,10 @@ profiles {
144148
slurm {
145149
includeConfig 'conf/slurm_basic.config'
146150
}
147-
test { includeConfig 'conf/test.config' }
148-
large { includeConfig 'conf/base_large.config' }
151+
test { includeConfig 'conf/test.config' }
152+
large { includeConfig 'conf/base_large.config' }
153+
mammalia { includeConfig 'conf/test-mammalia.config' }
154+
fungi { includeConfig 'conf/test-fungi.config' }
149155
}
150156

151157
// Capture exit codes from upstream processes when piping
@@ -183,4 +189,4 @@ dag {
183189
enabled = !params.help && params.report
184190
file = "${params.statsdir}/pipeline_dag_${params.trace_report_suffix}.html"
185191
overwrite = true
186-
}
192+
}

nextflow_schema.json

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,19 +9,24 @@
99
"title": "Input options",
1010
"type": "object",
1111
"description": "Define where the pipeline should find input data ",
12-
"required": ["input_folder", "species_tree"],
12+
"required": ["input", "species_tree"],
1313
"properties": {
14-
"input_folder": {
14+
"input": {
1515
"type": "string",
16-
"format": "directory-path",
17-
"description": "Path to input directory containing proteomes and species tree",
18-
"fa_icon": "fas fa-folder-open"
16+
"description": "Input data source: local directory, archive file, or remote URL",
17+
"help": "Can be: (1) Path to a local directory containing proteome/ subfolder and species_tree.nwk file, (2) Path to a local archive file (.tar.gz, .tgz, .zip), or (3) HTTP/HTTPS URL to download an archive. Archives will be automatically extracted and cached.",
18+
"fa_icon": "fas fa-folder-open",
19+
"examples": [
20+
"/path/to/dataset/",
21+
"/path/to/dataset.tar.gz",
22+
"https://zenodo.org/records/12345/files/dataset.tar.gz"
23+
]
1924
},
2025
"proteome_folder": {
2126
"type": "string",
2227
"format": "directory-path",
2328
"description": "Path to input directory containing the proteome files in fasta format",
24-
"help": "If not provided, the proteomes are asumed to be in the input_folder/proteomes directory.",
29+
"help": "Override the default proteome folder location. Only used when input is a local directory. If not specified, defaults to input/proteome/.",
2530
"fa_icon": "fas fa-folder-open"
2631
},
2732
"hogmap_in": {
@@ -34,17 +39,17 @@
3439
"species_tree": {
3540
"type": "string",
3641
"format": "file-path",
37-
"description": "Path to species tree file",
42+
"description": "Path to species tree file in Newick format",
3843
"fa_icon": "fas fa-tree",
3944
"pattern": "^\\S+\\.(nhx|nh|nwk)",
40-
"help": "The species tree should be in Newick or NHX format. By default, the pipeline looks for a file named species_tree.nwk in the input_folder."
45+
"help": "Override the default species tree location. Only used when input is a local directory. If not specified, defaults to input/species_tree.nwk."
4146
},
4247
"splice_folder": {
4348
"type": "string",
4449
"format": "directory-path",
4550
"description": "Path to input directory containing the splice files",
4651
"fa_icon": "fas fa-folder-open",
47-
"help": "If provided, FastOMA will use the splice files to identify and handle alternative splicing isoforms in the proteomes and select the best representative isoform for each gene."
52+
"help": "If provided, FastOMA will use splice variant information to select representative isoforms for each gene. Only used when input is a local directory."
4853
},
4954
"omamer_db": {
5055
"type": "string",
@@ -53,6 +58,19 @@
5358
"fa_icon": "fas fa-database",
5459
"help": "If not provided, the default OMAmer database (LUCA) will be used.",
5560
"default": "https://omabrowser.org/All/LUCA.h5"
61+
},
62+
"test_data_cache": {
63+
"type": "string",
64+
"format": "directory-path",
65+
"description": "Path where (remote) input archives will be stored and permanently cached",
66+
"fa_ison": "fas fa-folder-open"
67+
},
68+
"input_folder": {
69+
"type": "string",
70+
"hidden": true,
71+
"format": "directory-path",
72+
"description": "DEPRECATED: Use --input instead",
73+
"help_text": "This parameter has been renamed to --input. Please update your command line or configuration files."
5674
}
5775
}
5876
},

0 commit comments

Comments
 (0)