Skip to content

Commit 477ba8f

Browse files
author
hiyoothere
committed
initial
1 parent e2cdf72 commit 477ba8f

File tree

5 files changed

+217
-0
lines changed

5 files changed

+217
-0
lines changed

1.A.pipe_Align_Preprocess.sh

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
#!/bin/bash
2+
#$ -cwd
3+
#$ -S /bin/bash
4+
5+
6+
# basic argument
7+
DIR=$1
8+
SAMPLE=$2
9+
REF=$3
10+
DataPath=$4
11+
FASTQ1=$5
12+
FASTQ2=$6
13+
PICARD=$7
14+
OPT=''
15+
16+
17+
date
18+
19+
# 1. Actual Alignment. -I option to use illumina 1.3+ quailities. For the latest version, we don't need -I option.
20+
21+
bwa mem -t 4 -M $REF $DataPath$FASTQ1 $DataPath$FASTQ2 | java -jar $PICARD/picard.jar SortSam \
22+
SORT_ORDER=coordinate \
23+
INPUT=/dev/stdin \
24+
OUTPUT=$AlignedPath$SAMPLE$OPT.bam \
25+
VALIDATION_STRINGENCY=LENIENT \
26+
CREATE_INDEX=true \
27+
TMP_DIR=$DIR/out/tmp/
28+
29+
# 2.1. Add or replace read groups
30+
java -jar $PICARD/picard.jar AddOrReplaceReadGroups \
31+
INPUT=$AlignedPath$SAMPLE$OPT.bam \
32+
OUTPUT=$AlignedPath$SAMPLE$OPT.RGadded.bam \
33+
SORT_ORDER=coordinate \
34+
RGLB='MRS' \
35+
RGPL='il' \
36+
RGPU='WES' \
37+
RGSM=$SAMPLE \
38+
CREATE_INDEX=true \
39+
VALIDATION_STRINGENCY=LENIENT \
40+
TMP_DIR=$DIR/out/tmp/
41+
42+
# 3. Marking PCR duplicates
43+
java -jar $PICARD/picard.jar MarkDuplicates \
44+
INPUT=$AlignedPath$SAMPLE$OPT.RGadded.bam \
45+
OUTPUT=$AlignedPath$SAMPLE$OPT.RGadded.marked.bam \
46+
METRICS_FILE=$AlignedPath$SAMPLE$OPT.metrics \
47+
CREATE_INDEX=true \
48+
REMOVE_DUPLICATES=true \
49+
VALIDATION_STRINGENCY=LENIENT \
50+
TMP_DIR=$DIR/out/tmp/
51+
52+
# 3-1. LeftAlignIndel
53+
/data/project/MRS/Resource/gatk-4.1.5.0/gatk LeftAlignIndels \
54+
-R $REF \
55+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.bam \
56+
-O $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.bam
57+
58+
59+
# 4. Local realignment around indels
60+
#step1. To create a table of possible indels
61+
java -jar /opt/Yonsei/GATK/3.8-1/GenomeAnalysisTK.jar \
62+
-T RealignerTargetCreator \
63+
-R $REF \
64+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.bam \
65+
-o $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.forIndelRealigner.list \
66+
67+
68+
# 5. [step2] To realign reads around indels targets
69+
java -jar /opt/Yonsei/GATK/3.8-1/GenomeAnalysisTK.jar \
70+
-T IndelRealigner \
71+
-R $REF \
72+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.bam \
73+
-targetIntervals $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.forIndelRealigner.list \
74+
-o $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.bam \
75+
76+
77+
# 6. The mate information must be fixed#
78+
java -jar $PICARD/picard.jar FixMateInformation \
79+
INPUT=$AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.bam \
80+
OUTPUT=$AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.bam \
81+
SO=coordinate \
82+
VALIDATION_STRINGENCY=LENIENT \
83+
CREATE_INDEX=true \
84+
TMP_DIR=$DIR/out/tmp/
85+
86+
87+
#7. Base Quality score recalibration.
88+
# 7.1) BaseRecalibrator_1st pass
89+
gatk BaseRecalibrator \
90+
-R $REF \
91+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.bam \
92+
--known-sites $dbSNP \
93+
-O $AlignedPath$SAMPLE$OPT.recal_pass1.table \
94+
--tmp-dir $DIR/out/tmp/
95+
96+
## 7.2) ApplyBQSR
97+
gatk ApplyBQSR \
98+
-R $REF \
99+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.bam \
100+
--bqsr-recal-file $AlignedPath$SAMPLE$OPT.recal_pass1.table \
101+
-O $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.pre_recal.bam \
102+
--tmp-dir $DIR/out/tmp/
103+
104+
# 7.3) BaseRecalibrator_2st pass
105+
gatk BaseRecalibrator \
106+
-R $REF \
107+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.pre_recal.bam \
108+
--known-sites $dbSNP \
109+
-O $AlignedPath$SAMPLE$OPT.recal_pass2.table \
110+
--tmp-dir $DIR/out/tmp/
111+
112+
# 7.4) ApplyBQSR
113+
gatk ApplyBQSR \
114+
-R $REF \
115+
-I $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.pre_recal.bam \
116+
--bqsr-recal-file $AlignedPath$SAMPLE$OPT.recal_pass2.table \
117+
-O $AlignedPath$SAMPLE$OPT.RGadded.marked.LA.realigned.fixed.recal.bam \
118+
--tmp-dir $DIR/out/tmp/
119+
120+
# 8. Remove intermediate files
121+
rm $AlignedPath$SAMPLE$OPT.bam
122+
rm $AlignedPath$SAMPLE$OPT.RGadded.bam
123+
rm $AlignedPath$SAMPLE$OPT.RGadded.marked.bam
124+
rm $AlignedPath$SAMPLE$OPT.RGadded.marked.realigned.bam
125+
rm $AlignedPath$SAMPLE$OPT.RGadded.marked.realigned.fixed.bam
126+
rm $AlignedPath$SAMPLE$OPT.RGadded.marked.realigned.fixed.pre_recal.bam
127+
128+
129+
date
130+
131+
132+
133+

1.A.pipe_CNVkit.sh

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
#!/bin/bash
2+
#$ -S /bin/bash
3+
#$ -cwd
4+
DIR=$1
5+
REF=$2
6+
Inputpath=$3
7+
Analysispath=$2
8+
ID=$3
9+
10+
11+
cnvkit.py coverage $Inputpath/$PRI \
12+
$Analysispath/my_targets.bed \
13+
-o $Analysispath/$ID.targetcoverage.cnn
14+
15+
cnvkit.py coverage $Inputpath/$PRI \
16+
$Analysispath/antitargets.bed \
17+
-o $Analysispath/$ID.antitargetcoverage.cnn
18+
19+
20+
cnvkit.py reference \
21+
$Analysispath/FASTP_RF-1_RF_il_WES.RGadded.marked.realigned.fixed.targetcoverage.cnn \
22+
$Analysispath/FASTP_RF-2_RF_il_WES.RGadded.marked.realigned.fixed.targetcoverage.cnn \
23+
$Analysispath/FASTP_RF-3_RF_il_WES.RGadded.marked.realigned.fixed.targetcoverage.cnn \
24+
$Analysispath/FASTP_RF-4_RF_il_WES.RGadded.marked.realigned.fixed.targetcoverage.cnn \
25+
$Analysispath/FASTP_RF-5_RF_il_WES.RGadded.marked.realigned.fixed.targetcoverage.cnn \
26+
-f $REF \
27+
-o $Analysispath/FOR_RF6.reference.cnn
28+
29+
30+
cnvkit.py fix \
31+
$Analysispath/FASTP_$ID'_RF_il_WES.RGadded.marked.realigned.fixed.targetcoverage.cnn' \
32+
$Analysispath/FASTP_$ID'_RF_il_WES.antitargetcoverage.cnn' \
33+
$Analysispath/FOR_$ID'.reference.cnn' \
34+
-o $Analysispath/$ID'.cnr'
35+
36+
Analysispath=/data/project/MRS/0.Genotype/4.analysis/CNVkit
37+
cnvkit.py segment $Analysispath/$ID.cnr \
38+
-o $Analysispath/$ID.cns
39+
40+
cnvkit.py scatter $Analysispath/$ID.cnr \
41+
-s $Analysispath/$ID.cns \
42+
-o $Analysispath/$ID.pdf \
43+
--y-max 4 \
44+
--y-min -4

1.A.pipe_DeepVariant.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
#$ -cwd
3+
4+
ID=$1
5+
REF=$2
6+
INTERVAL=$3
7+
8+
BIN_VERSION="1.0.0"
9+
10+
sudo docker run \
11+
-v "/home/hiyoothere/MRS/3.aligned":"/input" \
12+
-v "/home/hiyoothere/MRS/DV":"/output" \
13+
-v "/home/hiyoothere/reference":"/reference" \
14+
google/deepvariant:1.0.0 \
15+
/opt/deepvariant/bin/run_deepvariant \
16+
--model_type=WES \
17+
--ref=$REF \
18+
--reads=/input/$ID'.RGadded.marked.realigned.fixed.recal.LA.bam' \
19+
--output_vcf=/output/$ID'.Target.DV.vcf' \
20+
--output_gvcf=/output/$ID'.Target.DV.gvcf' \
21+
--regions=$INTERVAL \
22+
--num_shards 4

1.A.pipe_Strelka2.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
#!/bin/bash
2+
3+
ID=$1
4+
REF=$2
5+
AlignedPath=$3
6+
INTERVAL=$4
7+
8+
StrelkaPath=/opt/Yonsei/Strelka2/2.9.10/bin
9+
AnalysisPath=/data/project/MRS/0.Genotype/4.analysis/Strelka
10+
11+
$StrelkaPath/configureStrelkaGermlineWorkflow.py \
12+
--bam=$AlignedPath$ID'.RGadded.marked.realigned.fixed.recal.LA.bam' \
13+
--referenceFasta=$REF \
14+
--exome --disableSequenceErrorEstimation \
15+
--callRegions=$INTERVAL'.gz' \
16+
--runDir=$AnalysisPath/$ID
17+
18+
$AnalysisPath/$ID/runWorkflow.py -m local -j 20

Controls.tar.gz

92.2 MB
Binary file not shown.

0 commit comments

Comments
 (0)