Skip to content

Commit 03c5e2a

Browse files
authored
Merge pull request #20 from OpenMined/madhava/fix_failures
Made bioscript a bit harder when processing files and added combine
2 parents 2354601 + eacb136 commit 03c5e2a

File tree

14 files changed

+174
-81
lines changed

14 files changed

+174
-81
lines changed

examples/apol1/apol1-classifier/pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: apol1-classifier
2-
description: Classification of APOL1 genotypes (G0, G1, G2) for kidney disease risk assessment.
2+
version: 0.1.1
33
inputs:
44
samplesheet: List[GenotypeRecord]
55
steps:

examples/apol1/apol1-classifier/project.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ template: dynamic-nextflow
55
version: 0.1.1
66
assets:
77
- classify_apol1.py
8+
description: Classification of APOL1 genotypes (G0, G1, G2) for kidney disease risk assessment.
89
inputs:
910
- name: participants
1011
type: List[GenotypeRecord]

examples/apol1/apol1-classifier/workflow.nf

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// BioVault workflow export v0.1.1
2+
13
nextflow.enable.dsl=2
24

35
workflow USER {
@@ -39,6 +41,8 @@ process apol1_classifier {
3941
container 'ghcr.io/openmined/bioscript:0.1.4'
4042
publishDir params.results_dir, mode: 'copy', overwrite: true, pattern: 'result_APOL1_*.tsv'
4143
tag { participant_id }
44+
errorStrategy { params.nextflow.error_strategy }
45+
maxRetries { params.nextflow.max_retries }
4246

4347
input:
4448
tuple path(assets_dir), val(participant_id), path(genotype_file)
@@ -47,9 +51,10 @@ process apol1_classifier {
4751
path "result_APOL1_${participant_id}.tsv"
4852

4953
script:
50-
def filename = genotype_file.name
54+
def genoFileName = genotype_file.getName()
5155
"""
52-
bioscript classify "${{assets_dir}}/classify_apol1.py" --file "${filename}" --participant_id "${{participant_id}}"
56+
GENO_FILE=\$(printf '%q' "${genoFileName}")
57+
bioscript classify "${assets_dir}/classify_apol1.py" --file \$GENO_FILE --participant_id "${participant_id}"
5358
"""
5459
}
5560

@@ -64,13 +69,9 @@ process aggregate_results {
6469
path "result_APOL1.tsv"
6570

6671
script:
72+
def manifestContent = individual_results.collect { it.toString() }.join('\n') + '\n'
6773
"""
68-
# Extract header from first file
69-
head -n 1 ${individual_results[0]} > result_APOL1.tsv
70-
71-
# Append all data rows (skip headers)
72-
for file in ${individual_results}; do
73-
tail -n +2 "\$file" >> result_APOL1.tsv
74-
done
74+
cat <<'EOF' > results.list\n${manifestContent}EOF
75+
bioscript combine --list results.list --output result_APOL1.tsv
7576
"""
7677
}

examples/apol1/apol1_dev.ipynb

Lines changed: 3 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,8 @@
318318
" 'path': 'result_APOL1.tsv',\n",
319319
" },\n",
320320
" ],\n",
321+
" version=\"0.1.1\",\n",
322+
" description=\"Classification of APOL1 genotypes (G0, G1, G2) for kidney disease risk assessment.\",\n",
321323
")\n",
322324
"project\n"
323325
]
@@ -356,6 +358,7 @@
356358
" },\n",
357359
" ),\n",
358360
" ],\n",
361+
" version=\"0.1.1\",\n",
359362
")\n",
360363
"pipeline\n"
361364
]
@@ -544,27 +547,6 @@
544547
"!bioscript classify classify_apol1.py --file apol1_headerless.txt --participant_id=\"HEADERLESS\"\n",
545548
"!cat result_APOL1_HEADERLESS.tsv\n"
546549
]
547-
},
548-
{
549-
"cell_type": "code",
550-
"execution_count": null,
551-
"metadata": {},
552-
"outputs": [],
553-
"source": []
554-
},
555-
{
556-
"cell_type": "code",
557-
"execution_count": null,
558-
"metadata": {},
559-
"outputs": [],
560-
"source": []
561-
},
562-
{
563-
"cell_type": "code",
564-
"execution_count": null,
565-
"metadata": {},
566-
"outputs": [],
567-
"source": []
568550
}
569551
],
570552
"metadata": {

examples/brca/brca-classifier/pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: brca-classifier
2-
description: Classification of BRCA1 and BRCA2 variants from ClinVar database.
2+
version: 0.1.1
33
inputs:
44
samplesheet: List[GenotypeRecord]
55
steps:

examples/brca/brca-classifier/project.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ assets:
77
- classify_brca.py
88
- brca2_clinvar.tsv
99
- brca1_clinvar.tsv
10+
description: Classification of BRCA variants using ClinVar reference data for hereditary cancer risk.
1011
inputs:
1112
- name: participants
1213
type: List[GenotypeRecord]

examples/brca/brca-classifier/workflow.nf

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
// BioVault workflow export v0.1.1
2+
13
nextflow.enable.dsl=2
24

35
workflow USER {
@@ -39,6 +41,8 @@ process brca_classifier {
3941
container 'ghcr.io/openmined/bioscript:0.1.4'
4042
publishDir params.results_dir, mode: 'copy', overwrite: true, pattern: 'result_BRCA_*.tsv'
4143
tag { participant_id }
44+
errorStrategy { params.nextflow.error_strategy }
45+
maxRetries { params.nextflow.max_retries }
4246

4347
input:
4448
tuple path(assets_dir), val(participant_id), path(genotype_file)
@@ -47,9 +51,10 @@ process brca_classifier {
4751
path "result_BRCA_${participant_id}.tsv"
4852

4953
script:
50-
def filename = genotype_file.name
54+
def genoFileName = genotype_file.getName()
5155
"""
52-
bioscript classify "${{assets_dir}}/classify_brca.py" --file "${filename}" --participant_id "${{participant_id}}"
56+
GENO_FILE=\$(printf '%q' "${genoFileName}")
57+
bioscript classify "${assets_dir}/classify_brca.py" --file \$GENO_FILE --participant_id "${participant_id}"
5358
"""
5459
}
5560

@@ -64,13 +69,9 @@ process aggregate_results {
6469
path "result_BRCA.tsv"
6570

6671
script:
72+
def manifestContent = individual_results.collect { it.toString() }.join('\n') + '\n'
6773
"""
68-
# Extract header from first file
69-
head -n 1 ${individual_results[0]} > result_BRCA.tsv
70-
71-
# Append all data rows (skip headers)
72-
for file in ${individual_results}; do
73-
tail -n +2 "\$file" >> result_BRCA.tsv
74-
done
74+
cat <<'EOF' > results.list\n${manifestContent}EOF
75+
bioscript combine --list results.list --output result_BRCA.tsv
7576
"""
7677
}

examples/brca/brca_dev.ipynb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,8 @@
345345
" 'path': 'result_BRCA.tsv',\n",
346346
" },\n",
347347
" ],\n",
348+
" version=\"0.1.1\",\n",
349+
" description=\"Classification of BRCA variants using ClinVar reference data for hereditary cancer risk.\",\n",
348350
")\n",
349351
"project\n"
350352
]
@@ -383,6 +385,7 @@
383385
" },\n",
384386
" ),\n",
385387
" ],\n",
388+
" version=\"0.1.1\",\n",
386389
")\n",
387390
"pipeline\n"
388391
]

examples/herc2/herc2-classifier/pipeline.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name: herc2-classifier
2-
description: Classification of HERC2 gene variant (rs12913832) for eye color prediction.
2+
version: 0.1.1
33
inputs:
44
samplesheet: List[GenotypeRecord]
55
steps:

examples/herc2/herc2-classifier/project.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ template: dynamic-nextflow
55
version: 0.1.1
66
assets:
77
- classify_herc2.py
8+
description: Classification of HERC2 genotypes for eye color prediction.
89
inputs:
910
- name: participants
1011
type: List[GenotypeRecord]

0 commit comments

Comments
 (0)