Skip to content

Commit 4dc6158

Browse files
committed
Merge branch 'inputs' into main
2 parents 9dfcb9b + bc2e1d8 commit 4dc6158

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2399
-747
lines changed
Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,16 @@
77
"items": {
88
"type": "object",
99
"properties": {
10-
"sample": {
11-
"type": "string",
12-
"pattern": "^\\S+$",
13-
"errorMessage": "Sample name must be provided and cannot contain spaces"
14-
},
1510
"fasta": {
1611
"type": "string",
1712
"format": "file-path",
18-
"pattern": "^\\S+\\.f(a|asta|na|as)\\.gz$",
19-
"errorMessage": "FastA file must be provided, cannot contain spaces and must have extension '.fa.gz', '.fasta.gz', '.fna.gz', or '.fas.gz'",
13+
"pattern": ".*",
14+
"errorMessage": "Path to a FASTA file, cannot contain spaces",
2015
"exists": true
2116
}
17+
2218
},
2319
"required": [
24-
"sample",
2520
"fasta"
2621
]
2722
}

assets/schema_genbank.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-07/schema",
3+
"$id": "https://raw.githubusercontent.com/socialgene/sgnf/master/assets/schema_input.json",
4+
"title": "socialgene/sgnf pipeline - params.input schema",
5+
"description": "Schema for the file provided with params.input",
6+
"type": "array",
7+
"items": {
8+
"type": "object",
9+
"properties": {
10+
"fasta": {
11+
"type": "string",
12+
"format": "file-path",
13+
"pattern": ".*",
14+
"errorMessage": "Path to a FASTA file, cannot contain spaces",
15+
"exists": true
16+
}
17+
18+
},
19+
"required": [
20+
"fasta"
21+
]
22+
}
23+
}

conf/base.config

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,6 @@ process {
6060
errorStrategy = 'retry'
6161
maxRetries = 2
6262
}
63-
withName:CUSTOM_DUMPSOFTWAREVERSIONS {
64-
cache = false
65-
}
6663

6764
withName:PROCESS_GENBANK_FILES {
6865
cpus = { check_max (1 * task.attempt, 'cpus' ) }

conf/examples/input_examples/by_taxa/actinomycetota.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ process {
8989
}
9090
withLabel:process_high_memory {
9191
memory = { check_max( 400.GB * task.attempt, 'memory' ) }
92-
}
92+
}
9393
withName:ANTISMASH {
9494
cpus = 2
9595
memory = { check_max (3.GB * task.attempt, 'memory' ) }
@@ -98,6 +98,6 @@ process {
9898
}
9999
withName:'MMSEQS2_CLUSTER'{
100100
// https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster
101-
ext.args2 = '-c 0.7 --cov-mode 0'
101+
ext.args = '-c 0.7 --cov-mode 0'
102102
}
103103
}

conf/examples/input_examples/input_examples.config

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@ process {
6161
}
6262
withName:'MMSEQS2_CLUSTER'{
6363
// https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster
64-
ext.args = '--single-step-clustering 1'
65-
ext.args2 = '--min-seq-id 0.5 -c 0.7 --cov-mode 0'
64+
ext.args = '--single-step-clustering 1 --min-seq-id 0.5 -c 0.7 --cov-mode 0'
6665
}
6766
}

conf/examples/input_examples/protein_databases/swissprot.config

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ params {
2020
// Use the downloaded BGC0000001 as input
2121
local_faa = "/tmp/uni/seqs.fasta"
2222

23-
crabhash_path = '/tmp/github/kwan_lab/crabhash/target/release'
2423

2524
/*
2625
////////////////////////

conf/modules.config

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,6 @@
1212

1313
process {
1414

15-
withName: CUSTOM_DUMPSOFTWAREVERSIONS {
16-
publishDir = [
17-
path: { "${params.tracedir}" },
18-
mode: 'copy',
19-
pattern: '*_versions.yml'
20-
]
21-
}
2215

2316
withName:'HMMER_HMMSEARCH'{
2417
ext.args = ""
@@ -31,7 +24,7 @@ process {
3124

3225
withName:'MMSEQS2_CLUSTER'{
3326
// https://github.com/soedinglab/mmseqs2/wiki#how-to-set-the-right-alignment-coverage-to-cluster
34-
ext.args2 = '-c 0.7 --cov-mode 0'
27+
ext.args = '-c 0.7 --cov-mode 0'
3528
}
3629

3730
withName:'MMSEQS2_CREATEDB'{
@@ -71,4 +64,7 @@ process {
7164
]
7265

7366
}
67+
68+
69+
7470
}

conf/modules2.config

Lines changed: 26 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ process {
1515
withName:'DIAMOND_BLASTP'{
1616
publishDir = [
1717
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
18-
mode: 'copy',
18+
mode: params.publish_dir_mode,
1919
]
2020
}
2121

@@ -28,12 +28,12 @@ process {
2828
publishDir = [
2929
[
3030
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
31-
mode: 'copy',
31+
mode: params.publish_dir_mode,
3232
pattern: '*mmseqs2_results_cluster.tsv.gz'
3333
],
3434
[
3535
path: { "${params.outdir_per_run}/mmseqs_databases" },
36-
mode: 'copy',
36+
mode: params.publish_dir_mode,
3737
pattern: 'mmseqs_*'
3838
],
3939

@@ -43,43 +43,43 @@ process {
4343
withName:'MMSEQS2_CREATEDB'{
4444
publishDir = [
4545
path: {"${params.outdir_per_run}/mmseqs_databases"},
46-
mode: 'copy',
46+
mode: params.publish_dir_mode,
4747
]
4848
}
4949

5050

5151
withName: 'NEO4J_HEADERS|TAXDUMP_PROCESS|HMM_TSV_PARSE' {
5252
publishDir = [
5353
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
54-
mode: 'copy',
54+
mode: params.publish_dir_mode,
5555
]
5656
}
5757

5858
withName: 'DEDUPLICATE_GENOMIC_INFO' {
5959
publishDir = [
6060
[
6161
path: { "${params.outdir_neo4j}/import/genomic_info" },
62-
mode: 'copy',
62+
mode: params.publish_dir_mode,
6363
pattern: '*locus_to_protein.gz'
6464
],
6565
[
6666
path: { "${params.outdir_neo4j}/import/genomic_info" },
67-
mode: 'copy',
67+
mode: params.publish_dir_mode,
6868
pattern: '*assembly_to_locus.gz'
6969
],
7070
[
7171
path: { "${params.outdir_neo4j}/import/genomic_info" },
72-
mode: 'copy',
72+
mode: params.publish_dir_mode,
7373
pattern: '*assembly_to_taxid.gz'
7474
],
7575
[
7676
path: { "${params.outdir_neo4j}/import/genomic_info" },
77-
mode: 'copy',
77+
mode: params.publish_dir_mode,
7878
pattern: '*loci.gz'
7979
],
8080
[
8181
path: { "${params.outdir_neo4j}/import/genomic_info" },
82-
mode: 'copy',
82+
mode: params.publish_dir_mode,
8383
pattern: '*assemblies.gz'
8484
]
8585
]
@@ -89,17 +89,17 @@ process {
8989
publishDir = [
9090
[
9191
path: { "${params.outdir_neo4j}/import/protein_info" },
92-
mode: 'copy',
92+
mode: params.publish_dir_mode,
9393
pattern: '*protein_info.gz'
9494
],
9595
[
9696
path: { "${params.outdir_neo4j}/import/protein_info" },
97-
mode: 'copy',
97+
mode: params.publish_dir_mode,
9898
pattern: '*protein_ids.gz'
9999
],
100100
[
101101
path: { "${params.outdir_neo4j}/import/protein_info" },
102-
mode: 'copy',
102+
mode: params.publish_dir_mode,
103103
pattern: '*protein_to_go.gz'
104104
]
105105

@@ -109,7 +109,7 @@ process {
109109
withName: 'NEO4J_HEADERS|TAXDUMP_PROCESS|HMM_TSV_PARSE' {
110110
publishDir = [
111111
path: { "${params.outdir_neo4j}/import/${task.process.tokenize(':')[-1].toLowerCase()}" },
112-
mode: 'copy',
112+
mode: params.publish_dir_mode,
113113
]
114114
}
115115

@@ -118,17 +118,17 @@ process {
118118
[
119119
[
120120
path: { "${params.outdir_neo4j}/import/hmm_info" },
121-
mode: 'copy',
121+
mode: params.publish_dir_mode,
122122
pattern: '*hmminfo'
123123
],
124124
[
125125
path: { "${params.outdir_neo4j}/import/hmm_info" },
126-
mode: 'copy',
126+
mode: params.publish_dir_mode,
127127
pattern: '*sg_hmm_nodes'
128128
],
129129
[
130130
path: { "${params.outdir_per_run}/hmm_cache" },
131-
mode: 'copy',
131+
mode: params.publish_dir_mode,
132132
pattern: 'socialgene_nr_hmms_file_*'
133133
]
134134
]
@@ -138,30 +138,20 @@ process {
138138
withName:'TIGRFAM_ROLES|TIGRFAM_TO_GO|TIGRFAM_TO_ROLE'{
139139
publishDir = [
140140
path: { "${params.outdir_neo4j}/import/tigrfam_info" },
141-
mode: 'copy',
141+
mode: params.publish_dir_mode,
142142
]
143143
}
144144
withName:'PARAMETER_EXPORT_FOR_NEO4J'{
145145
publishDir = [
146146
path: { "${params.outdir_neo4j}/import/parameters" },
147-
mode: 'copy',
148-
]
149-
}
150-
151-
withName: 'CRABHASH' {
152-
publishDir = [
153-
path: {"${params.outdir_neo4j}/import/protein_info"},
154-
mode: 'copy',
155-
overwrite: false,
156-
pattern: "*.protein_info.gz"
157-
147+
mode: params.publish_dir_mode,
158148
]
159149
}
160150

161151
withName: 'DOWNLOAD_CHEMBL_DATA' {
162152
publishDir = [
163153
path: {"${params.outdir_neo4j}/import/chembl"},
164-
mode: 'copy',
154+
mode: params.publish_dir_mode,
165155
overwrite: false
166156
]
167157
}
@@ -175,7 +165,7 @@ process {
175165
withName: 'DOWNLOAD_GOTERMS' {
176166
publishDir = [
177167
path: { "${params.outdir_neo4j}/import/goterms" },
178-
mode: 'copy',
168+
mode: params.publish_dir_mode,
179169
]
180170
}
181171

@@ -205,21 +195,21 @@ process {
205195
withName: 'DEDUPLICATE_AND_INDEX_FASTA' {
206196
publishDir = [
207197
path: { "${params.outdir_per_run}/nonredundant_fasta" },
208-
mode: 'copy',
198+
mode: params.publish_dir_mode,
209199
]
210200
}
211201

212202
withName: 'NEO4J_ADMIN_IMPORT' {
213203
publishDir = [
214204
path: {"${params.outdir_neo4j}"},
215-
mode: 'copy',
205+
mode: params.publish_dir_mode,
216206
]
217207
}
218208

219209
withName: 'NEO4J_ADMIN_IMPORT_DRYRUN' {
220210
publishDir = [
221211
path: {"${params.outdir_neo4j}"},
222-
mode: 'copy',
212+
mode: params.publish_dir_mode,
223213
]
224214
}
225215

@@ -240,16 +230,11 @@ process {
240230
withName:'MERGE_PARSED_DOMTBLOUT'{
241231
publishDir = [
242232
path: { "${params.outdir_neo4j}/import/parsed_domtblout" },
243-
mode: 'copy',
233+
mode: params.publish_dir_mode,
244234
]
245235
}
246236

247-
withName:'MULTIQC'{
248-
publishDir = [
249-
path: { "${params.tracedir}/${task.process.tokenize(':')[-1].toLowerCase()}" },
250-
mode: 'copy',
251-
]
252-
}
237+
253238

254239

255240
}

0 commit comments

Comments
 (0)