File tree Expand file tree Collapse file tree 4 files changed +28
-4
lines changed
Expand file tree Collapse file tree 4 files changed +28
-4
lines changed Original file line number Diff line number Diff line change @@ -27,7 +27,7 @@ dct:creator:
2727
2828requirements :
2929 - class: DockerRequirement
30- dockerPull: "quay.io/zek12/dockerized_gatk_pipeline:0.1 .0"
30+ dockerPull: "quay.io/zek12/dockerized_gatk_pipeline:0.2 .0"
3131
3232hints :
3333 - class: ResourceRequirement
@@ -67,6 +67,13 @@ inputs:
6767 position : 4
6868 shellQuote : true
6969
70+ exclude:
71+ type : File
72+ doc : "A txt file with the gvcfs to exclude (bad samples) from the joint genotyping. Samples have to be one per line and ending with . g. vcf. gz"
73+ inputBinding :
74+ position : 5
75+ shellQuote : true
76+
7077
7178outputs :
7279
Original file line number Diff line number Diff line change 99 "path" : " http://ftp.sanger.ac.uk/pub/cancer/dockstore/human/core_ref_GRCh37d5.tar.gz" ,
1010 "class" : " File"
1111 },
12+ "exclude" : {
13+ "path" : " exclude.txt" ,
14+ "class" : " File"
15+ },
1216 "vcf" : {
13- "path" : " /tmp/ joint_chrY.vcf" ,
17+ "path" : " joint_chrY.vcf" ,
1418 "class" : " File"
1519 }
1620}
Original file line number Diff line number Diff line change 1+ sample1.g.vcf.gz
2+ sample2.g.vcf.gz
Original file line number Diff line number Diff line change @@ -15,11 +15,20 @@ path_logs="."
1515path_output_vcf=" ."
1616mem=$3 # = 32
1717ref_genome=$4
18+ exclude=$5
19+
20+ # remove last slash in input dirs if they end with slash
21+ path_to_gvcfs=${path_to_gvcfs%/ }
22+
1823
1924gatk_ref_bundle_dbsnp=" /opt/dbsnp_138.b37.vcf.gz"
2025logfile=${path_logs} /log.log
2126
2227
28+ # exclude bad samples
29+ # ls -1a $path_to_gvcfs/*.g.vcf.gz | sed -e 's/.*\///g' | sed -e 's/\..*//g' > all_samples.txt
30+ ls -1a $path_to_gvcfs /* .g.vcf.gz | sed -e ' s/.*\///g' > all_samples.txt
31+ grep -F -x -v -f $exclude all_samples.txt > passed.txt
2332
2433
2534# prepare ref genome
@@ -47,8 +56,10 @@ if [ ! -f $path_logs/part_3_GenotypeGVCFs_finished_chr$chr.txt ]; then
4756
4857 echo " $( date ' +%d/%m/%y_%H:%M:%S' ) ,---Starting GenotypeGVCFs: joint genotyping of chromosome $chr ---" >> " $logfile "
4958
50- gvcf_paths=$( ls $path_to_gvcfs /* .g.vcf.gz)
51- gvcf_array=$( for i in $gvcf_paths ; do echo " --variant $i " ; done)
59+ # gvcf_paths=$(ls $path_to_gvcfs/*.g.vcf.gz)
60+ # gvcf_array=$(for i in $gvcf_paths; do echo "--variant $i"; done)
61+ gvcf_paths=$( cat passed.txt)
62+ gvcf_array=$( for i in $gvcf_paths ; do echo " --variant $path_to_gvcfs /$i " ; done)
5263
5364 time ($this_gatk \
5465 -T GenotypeGVCFs \
You can’t perform that action at this time.
0 commit comments