Skip to content

Commit 30cf486

Browse files
committed
Added script for integration test case on NA12878/NA12889 chr20.
Updated submodule.
1 parent 8fc5696 commit 30cf486

File tree

3 files changed

+145
-2
lines changed

3 files changed

+145
-2
lines changed

VarDict

src/main/java/com/astrazeneca/vardict/modules/SAMFileParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -647,7 +647,7 @@ && isHasAndEquals(querySequence.charAt(n), ref, start)
647647

648648
// subCnt(getVariation(hash, inspos, ref.get(inspos).toString()), dir, tp, tmpq,
649649
// Qmean, nm, conf);
650-
if (inspos > record.getAlignmentStart()) {
650+
if (inspos > position) {
651651
Variation tv = getVariation(hash, inspos, String.valueOf(querySequence.charAt(n - 1 - (start - 1 - inspos))));
652652
//Substract count.
653653
subCnt(tv, dir, tp, queryQuality.charAt(n - 1 - (start -1 - inspos)) - 33,
Lines changed: 143 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
#!/bin/bash
2+
#Paths to VarDict java and perl. Change VARDICTJAVA_HOME and VARDICTPERL_HOME to your paths
3+
VARDICTJAVA_HOME="$HOME/IdeaProjects/VarDictJava/build/install"
4+
VARDICTJAVA="$VARDICTJAVA_HOME/bin/VarDict"
5+
6+
VARDICTPERL_HOME="$HOME/IdeaProjects/VarDictJava/VarDict"
7+
VARDICTPERL="$VARDICTPERL_HOME/vardict"
8+
VARDICTPERL_R_PAIRED="$VARDICTPERL_HOME/testsomatic.R"
9+
VARDICTPERL_VAR_PAIRED="$VARDICTPERL_HOME/var2vcf_paired.R"
10+
11+
# Parameters for Vardict
12+
JAVA_THREADS=8
13+
PARAMETERS="-c 1 -S 2 -E 3 -g 4 -f 0.001 -N abc"
14+
15+
# Multiallelic confirmed variants that aren't supported by Perl
16+
CONFIRMED_DIFFERENCES=\
17+
"20\t24993259\t24993259\tG\tA|"\
18+
"20\t31682933\t31682933\tG\tT|"\
19+
"20\t31829197\t31829197\tC\tT|"\
20+
"20\t44669022\t44669024\tTCC\tCTT|"\
21+
"20\t36869637\t36869637\tC\tA|"\
22+
"20\t50244187\t50244188\tGA\tAC"
23+
24+
#File names and paths
25+
DIR_INPUT="input"
26+
DIR_OUTPUT="output"
27+
28+
FASTA_URL="http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/phase2_reference_assembly_sequence/hs37d5.fa"
29+
FASTA=$(echo $FASTA_URL | sed 's#.*/##')
30+
FASTA_PATH="../$DIR_INPUT/$FASTA"
31+
32+
NORMAL_BAM_URL="http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA12878/exome_alignment/NA12878.chrom20.ILLUMINA.bwa.CEU.exome.20121211.bam"
33+
NORMAL_BAM=$(echo $NORMAL_BAM_URL | sed 's#.*/##')
34+
NORMAL_BAM_PATH="../$DIR_INPUT/$NORMAL_BAM"
35+
36+
TUMOR_BAM_URL="http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/phase3/data/NA12889/exome_alignment/NA12889.chrom20.ILLUMINA.bwa.CEU.exome.20121211.bam"
37+
TUMOR_BAM=$(echo $TUMOR_BAM_URL | sed 's#.*/##')
38+
TUMOR_BAM_PATH="../$DIR_INPUT/$TUMOR_BAM"
39+
40+
BED_URL="http://ftp.1000genomes.ebi.ac.uk/vol1/ftp/technical/reference/exome_pull_down_targets_phases1_and_2/20120518.consensus.annotation.bed"
41+
BED=$(echo $BED_URL | sed 's#.*/##')
42+
CHR="chr20"
43+
BED_SPLIT=$BED.$CHR
44+
BED_PATH="../$DIR_INPUT/$BED"
45+
BED_SPLIT_PATH="../$DIR_INPUT/$BED_SPLIT"
46+
47+
echo Creating input directory
48+
mkdir $DIR_INPUT
49+
cd $DIR_INPUT
50+
51+
# Fasta downloading. Will rewrite existing files. Can be commented if fasta is downloaded already. Then change $FASTA_PATH to actual location of fasta.
52+
echo Downloading fasta files
53+
wget $FASTA_URL.gz -O $FASTA_PATH.gz
54+
wget $FASTA_URL.gz.fai -O $FASTA_PATH.gz.fai
55+
56+
# Fasta unzipping. Can be commented if fasta is downloaded already. Then change $FASTA_PATH to actual location of fasta. Will overwrite existing Fasta file.
57+
echo Unzipping fasta files
58+
gzip -df $FASTA.gz
59+
gzip -df $FASTA.gz.fai
60+
mv $FASTA.gz.fai $FASTA.fai
61+
62+
# BAM and BAI download
63+
# Normal
64+
echo Downloading normal BAM
65+
wget $NORMAL_BAM_URL -O $NORMAL_BAM_PATH
66+
wget $NORMAL_BAM_URL.bai -O $NORMAL_BAM_PATH.bai
67+
# Tumor
68+
echo Downloading tumor BAM
69+
wget $TUMOR_BAM_URL -O $TUMOR_BAM_PATH
70+
wget $TUMOR_BAM_URL.bai -O $TUMOR_BAM_PATH.bai
71+
72+
# BED download
73+
echo Downloading BED
74+
wget $BED_URL -O $BED_PATH
75+
76+
# Splitting BED file on chr20 for better performance in VarDict Perl
77+
cat $BED | grep $CHR > $BED_SPLIT
78+
rm $BED
79+
80+
echo Creating output directory
81+
cd ..
82+
mkdir $DIR_OUTPUT
83+
cd $DIR_OUTPUT
84+
85+
# Run VarDict
86+
echo Running VarDict java
87+
time $VARDICTJAVA \
88+
-G $FASTA_PATH \
89+
$PARAMETERS \
90+
-th $JAVA_THREADS \
91+
-b "$TUMOR_BAM_PATH|$NORMAL_BAM_PATH" \
92+
$BED_SPLIT_PATH | sort > java.var
93+
94+
#-F 0x504 flag can be deleted after Perl fix for filter unmapped reads by default
95+
echo Running VarDict perl
96+
time $VARDICTPERL \
97+
-G $FASTA_PATH \
98+
-$PARAMETERS \
99+
-b "$TUMOR_BAM_PATH|$NORMAL_BAM_PATH" \
100+
-F 0x504 \
101+
$BED_SPLIT_PATH | sort > perl.var
102+
103+
# Check if var files aren't empty
104+
if [ ! -s "perl.var" ] || [ ! -s "java.var" ]; then
105+
echo " Var files are empty!"
106+
exit 1;
107+
fi
108+
109+
# Run differences comparing
110+
echo Running differences raw VARs perl and java
111+
cat java.var | grep -Pv $CONFIRMED_DIFFERENCES > java_confirmed.var
112+
113+
diff_var=$(diff perl.var java_confirmed.var > diff_var.txt)
114+
ret1=$?
115+
if [ "$ret1" = "0" ]; then
116+
echo " Raw VAR diff OK (no differences)";
117+
else
118+
echo " Raw VAR files have differences!"
119+
exit 1;
120+
fi
121+
122+
#This part can be uncommented when .R and .pl scripts in vardict repositories will be updated.
123+
#echo Running R script
124+
#cat java.var | $VARDICTPERL_R_PAIRED > java_r.var
125+
#cat perl.var | $VARDICTPERL_R_PAIRED > perl_r.var
126+
127+
#if [ ! -s "perl_r.var" ] || [ ! -s "java_r.var" ]; then
128+
# echo " Var files after R script are empty!"
129+
# exit 1;
130+
#fi
131+
#echo Running Var2VCF script
132+
#cat java_r.var | $VARDICTPERL_VAR_PAIRED > java.vcf
133+
#cat perl_r.var | $VARDICTPERL_VAR_PAIRED > perl.vcf
134+
135+
#echo Running differences VCFs perl and java
136+
#diff_vcf=$(diff perl.vcf java.vcf > diff_vcf.txt)
137+
#ret2=$?
138+
#if ["$ret2" = "0"]; then
139+
# echo " VCF diff OK (no differences)";
140+
#else
141+
# echo " VCF files have differences!"
142+
# exit 1;
143+
#fi

0 commit comments

Comments
 (0)