Skip to content

Commit 3201f51

Browse files
committed
Added exclude.txt to exclude bad samples from the joint genotyping.
1 parent 529eccf commit 3201f51

File tree

4 files changed

+28
-4
lines changed

4 files changed

+28
-4
lines changed

Dockstore.cwl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ dct:creator:
2727

2828
requirements:
2929
- class: DockerRequirement
30-
dockerPull: "quay.io/zek12/dockerized_gatk_pipeline:0.1.0"
30+
dockerPull: "quay.io/zek12/dockerized_gatk_pipeline:0.2.0"
3131

3232
hints:
3333
- class: ResourceRequirement
@@ -67,6 +67,13 @@ inputs:
6767
position: 4
6868
shellQuote: true
6969

70+
exclude:
71+
type: File
72+
doc: "A txt file with the gvcfs to exclude (bad samples) from the joint genotyping. Samples have to be one per line and ending with .g.vcf.gz"
73+
inputBinding:
74+
position: 5
75+
shellQuote: true
76+
7077

7178
outputs:
7279

Dockstore.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99
"path": "http://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz",
1010
"class": "File"
1111
},
12+
"exclude": {
13+
"path": "exclude.txt",
14+
"class": "File"
15+
},
1216
"vcf": {
13-
"path": "/tmp/joint_chrY.vcf",
17+
"path": "joint_chrY.vcf",
1418
"class": "File"
1519
}
1620
}

exclude.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
sample1.g.vcf.gz
2+
sample2.g.vcf.gz

scripts/part3_joint_genotyping.sh

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,20 @@ path_logs="."
1515
path_output_vcf="."
1616
mem=$3 # = 32
1717
ref_genome=$4
18+
exclude=$5
19+
20+
# remove last slash in input dirs if they end with slash
21+
path_to_gvcfs=${path_to_gvcfs%/}
22+
1823

1924
gatk_ref_bundle_dbsnp="/opt/dbsnp_138.b37.vcf.gz"
2025
logfile=${path_logs}/log.log
2126

2227

28+
# exclude bad samples
29+
# ls -1a $path_to_gvcfs/*.g.vcf.gz | sed -e 's/.*\///g' | sed -e 's/\..*//g' > all_samples.txt
30+
ls -1a $path_to_gvcfs/*.g.vcf.gz | sed -e 's/.*\///g' > all_samples.txt
31+
grep -F -x -v -f $exclude all_samples.txt > passed.txt
2332

2433

2534
# prepare ref genome
@@ -47,8 +56,10 @@ if [ ! -f $path_logs/part_3_GenotypeGVCFs_finished_chr$chr.txt ]; then
4756

4857
echo "$(date '+%d/%m/%y_%H:%M:%S'),---Starting GenotypeGVCFs: joint genotyping of chromosome $chr---" >> "$logfile"
4958

50-
gvcf_paths=$(ls $path_to_gvcfs/*.g.vcf.gz)
51-
gvcf_array=$(for i in $gvcf_paths; do echo "--variant $i"; done)
59+
# gvcf_paths=$(ls $path_to_gvcfs/*.g.vcf.gz)
60+
# gvcf_array=$(for i in $gvcf_paths; do echo "--variant $i"; done)
61+
gvcf_paths=$(cat passed.txt)
62+
gvcf_array=$(for i in $gvcf_paths; do echo "--variant $path_to_gvcfs/$i"; done)
5263

5364
time ($this_gatk \
5465
-T GenotypeGVCFs \

0 commit comments

Comments
 (0)