@@ -9,18 +9,18 @@ helpFunction()
99 echo -e " \t-o String, Path to output directory, required"
1010 echo -e " \t-i String, Path to bwa index of target transcriptome, required"
1111 echo -e " \t-g String, Path to transcript,gene and gene type dictionary file in csv format, required"
12+ echo -e " \t-j String, Job ID to be prepended to the output files and directories, optional, default=PROPERseq"
1213 echo -e " \t-t Int, Number of working threads, optional, default=2"
1314 echo -e " \t-r Char (T or F), remove intermediate files or not, optional, default=T"
1415 echo -e " \t-p Float, false discovery rate used to identify protein-protein interactions, optional, default=0.05"
1516 echo -e " \t-d Float, odds ratio cutoff used to identify protein-protein interactions, optional, default=1"
1617 echo -e " \t-c Float, read count cutoff coefficient used to identify protein-protein interactions, optional, default=4"
1718 echo -e " \t-h Print usage message"
1819 echo
19- echo
2020 exit 1 # Exit script after printing help
2121}
2222
23- while getopts " a:b:t:o:i:h:r:p:d:c:g:" opt
23+ while getopts " a:b:t:o:i:h:r:p:d:c:g:j: " opt
2424do
2525 case " $opt " in
2626 a ) read1=" $OPTARG " ;;
3333 d ) oddsCutoff=" $OPTARG " ;;
3434 c ) rcCutoff=" $OPTARG " ;;
3535 g ) geneDic=" $OPTARG " ;;
36+ j ) jobId=" $OPTARG " ;;
3637 h ) helpFunction ;;
3738 ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent
3839 esac
@@ -90,82 +91,107 @@ then
9091fi
9192wait
9293
94+ if [ ! -z " $jobId " ]
95+ then
96+ jobId=${jobId} _
97+ else
98+ jobId=PROPERseq_
99+ fi
100+ wait
101+
102+
93103mkdir $outputDir
94- mkdir $outputDir /processedFastq
95- mkdir $outputDir /intermediateFiles
96- mkdir $outputDir /chimericReadPairs
97- mkdir $outputDir /alignment/
104+ mkdir $outputDir /${jobId} processedFastq
105+ mkdir $outputDir /${jobId} intermediateFiles
106+ mkdir $outputDir /${jobId} alignment/
98107wait
99108
100- python getCurrentDateTime_pub.py $outputDir 2> $outputDir /errorLog.txt
109+ python getCurrentDateTime_pub.py $outputDir $jobId
101110wait
102111
103- cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read1 > $outputDir /intermediateFiles/R1.cutadapt.fastq 2> $outputDir /intermediateFiles/R1.linkers.txt &
104112
105- cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read2 > $outputDir /intermediateFiles/R2.cutadapt.fastq 2> $outputDir /intermediateFiles/R2.linkers.txt &
113+ cat $outputDir /${jobId} intermediateFiles/runStart.txt > $outputDir /${jobId} proteinProteinInteractions.csv
114+ cat $outputDir /${jobId} intermediateFiles/runStart.txt > $outputDir /${jobId} errorLog.txt
115+ cat $outputDir /${jobId} intermediateFiles/runStart.txt > $outputDir /${jobId} chimericReadPairs.csv
116+ cat $outputDir /${jobId} intermediateFiles/runStart.txt > $outputDir /${jobId} summary.csv
106117wait
107118
108119
109- python processFastq_pub.py $outputDir /intermediateFiles/R1.cutadapt.lengthFiltered.fastq $outputDir /intermediateFiles/R1.cutadapt.fastq $outputDir yes 2>> $outputDir /errorLog.txt &
110- python processFastq_pub.py $outputDir /intermediateFiles/R2.cutadapt.lengthFiltered.fastq $outputDir /intermediateFiles/R2.cutadapt.fastq $outputDir no 2>> $outputDir /errorLog.txt &
120+ cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read1 > $outputDir /${jobId} intermediateFiles/R1.cutadapt.fastq 2> $outputDir /${jobId} intermediateFiles/R1.linkers.txt &
121+
122+ cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read2 > $outputDir /${jobId} intermediateFiles/R2.cutadapt.fastq 2> $outputDir /${jobId} intermediateFiles/R2.linkers.txt &
111123wait
112124
113- fastp -w $numT -i $outputDir /intermediateFiles/R1.cutadapt.lengthFiltered.fastq -I $outputDir /intermediateFiles/R2.cutadapt.lengthFiltered.fastq -o $outputDir /processedFastq/R1.cutadapt.fastp.fastq -O $outputDir /processedFastq/R2.cutadapt.fastp.fastq -h $outputDir /intermediateFiles/fastp.html -j $outputDir /intermediateFiles/fastp.json 2>> $outputDir /errorLog.txt
125+
126+ python processFastq_pub.py $outputDir /${jobId} intermediateFiles/R1.cutadapt.lengthFiltered.fastq $outputDir /${jobId} intermediateFiles/R1.cutadapt.fastq $outputDir yes ${jobId} 2>> $outputDir /${jobId} errorLog.txt &
127+ python processFastq_pub.py $outputDir /${jobId} intermediateFiles/R2.cutadapt.lengthFiltered.fastq $outputDir /${jobId} intermediateFiles/R2.cutadapt.fastq $outputDir no ${jobId} 2>> $outputDir /${jobId} errorLog.txt &
114128wait
115129
116- python writeNumReadPairs_pub.py $outputDir 2>> $outputDir /errorLog.txt
130+ fastp -w $numT -i $outputDir / ${jobId} intermediateFiles/R1.cutadapt.lengthFiltered.fastq -I $outputDir / ${jobId} intermediateFiles/R2.cutadapt.lengthFiltered.fastq -o $outputDir / ${jobId} processedFastq/R1.cutadapt.fastp.fastq -O $outputDir / ${jobId} processedFastq/R2.cutadapt.fastp.fastq -h $outputDir / ${jobId} intermediateFiles/fastp.html -j $outputDir / ${jobId} intermediateFiles/fastp.json 2>> $outputDir /${jobId} errorLog.txt
117131wait
118132
119- mkdir $outputDir /alignment/read1_tx
120- mkdir $outputDir /alignment/read2_tx
133+ python writeNumReadPairs_pub.py $outputDir $jobId 2>> $outputDir /${jobId} errorLog.txt
121134wait
122135
123- source= $outputDir /processedFastq
124- target= $outputDir /alignment/
136+ mkdir $outputDir /${jobId} alignment/read1_tx
137+ mkdir $outputDir /${jobId} alignment/read2_tx
125138wait
126139
140+ source=$outputDir /${jobId} processedFastq
141+ target=$outputDir /${jobId} alignment
142+ wait
127143
128- bwa mem -a -t $numT $bwaIndex $source /R1.cutadapt.fastp.fastq > $target /read1_tx/alignment.sam 2>> $outputDir /errorLog.txt &
129- bwa mem -a -t $numT $bwaIndex $source /R2.cutadapt.fastp.fastq > $target /read2_tx/alignment.sam 2>> $outputDir /errorLog.txt &
144+ half=$(( numT/ 2 ))
145+ bwa mem -a -t $half $bwaIndex $source /R1.cutadapt.fastp.fastq > $target /read1_tx/alignment.sam 2>> $outputDir /${jobId} errorLog.txt &
146+ bwa mem -a -t $half $bwaIndex $source /R2.cutadapt.fastp.fastq > $target /read2_tx/alignment.sam 2>> $outputDir /${jobId} errorLog.txt &
130147wait
131148
132- samtools view -H $target /read1_tx/alignment.sam > $target /read1_tx/header.sam 2>> $outputDir /errorLog.txt &
133- samtools view -H $target /read2_tx/alignment.sam > $target /read2_tx/header.sam 2>> $outputDir /errorLog.txt &
149+ samtools view -H $target /read1_tx/alignment.sam > $target /read1_tx/header.sam 2>> $outputDir /${jobId} errorLog.txt &
150+ samtools view -H $target /read2_tx/alignment.sam > $target /read2_tx/header.sam 2>> $outputDir /${jobId} errorLog.txt &
134151wait
135152
136- samtools view -F 4 $target /read1_tx/alignment.sam | cat $target /read1_tx/header.sam - | samtools view -b - > $target /read1_tx/mapped.bam 2>> $outputDir /errorLog.txt &
137- samtools view -F 4 $target /read2_tx/alignment.sam | cat $target /read2_tx/header.sam - | samtools view -b - > $target /read2_tx/mapped.bam 2>> $outputDir /errorLog.txt &
153+ samtools view -F 4 $target /read1_tx/alignment.sam | cat $target /read1_tx/header.sam - | samtools view -b - > $target /read1_tx/mapped.bam 2>> $outputDir /${jobId} errorLog.txt &
154+ samtools view -F 4 $target /read2_tx/alignment.sam | cat $target /read2_tx/header.sam - | samtools view -b - > $target /read2_tx/mapped.bam 2>> $outputDir /${jobId} errorLog.txt &
138155wait
139156
140- half= $(( numT / 2 ))
141- samtools sort -n -@ $half -o $target /read1_tx/mapped.sorted.bam $target /read1_tx/mapped.bam 2>> $outputDir /errorLog.txt &
142- samtools sort -n -@ $half -o $target /read2_tx/mapped.sorted.bam $target /read2_tx/mapped.bam 2>> $outputDir /errorLog.txt &
157+
158+ samtools sort -n -@ $half -o $target /read1_tx/mapped.sorted.bam $target /read1_tx/mapped.bam 2>> $outputDir /${jobId} errorLog.txt &
159+ samtools sort -n -@ $half -o $target /read2_tx/mapped.sorted.bam $target /read2_tx/mapped.bam 2>> $outputDir /${jobId} errorLog.txt &
143160wait
144161
145- bedtools bamtobed -cigar -i $target /read1_tx/mapped.sorted.bam > $target /read1_tx/mapped.sorted.bed 2>> $outputDir /errorLog.txt &
146- bedtools bamtobed -cigar -i $target /read2_tx/mapped.sorted.bam > $target /read2_tx/mapped.sorted.bed 2>> $outputDir /errorLog.txt &
162+ bedtools bamtobed -cigar -i $target /read1_tx/mapped.sorted.bam > $target /read1_tx/mapped.sorted.bed 2>> $outputDir /${jobId} errorLog.txt &
163+ bedtools bamtobed -cigar -i $target /read2_tx/mapped.sorted.bam > $target /read2_tx/mapped.sorted.bed 2>> $outputDir /${jobId} errorLog.txt &
147164wait
148165
149- python runBedFileSplit_pub.py $target 2>> $outputDir /errorLog.txt
166+ python runBedFileSplit_pub.py $target 2>> $outputDir /${jobId} errorLog.txt
150167wait
151168
152169for file in $target /read1_tx/mapped.sorted.bed_chunk*
153170do
154171i=${file#* chunk}
155- python chimericIdentification_pub .py $outputDir ${i} $geneDic $outputDir /intermediateFiles/chimStats_ ${i} .txt 2>> $outputDir /errorLog.txt
172+ python writeMappedReadPairs_pub .py $outputDir ${i} $geneDic $outputDir /${jobId} intermediateFiles/mappedStats_ ${i} .txt ${jobId} 2>> $outputDir /${jobId} errorLog.txt
156173done
157174wait
158175
159- python runDeduplication_pub.py $outputDir 2>> $outputDir /errorLog.txt
176+ cat $outputDir /${jobId} intermediateFiles/mappedReadPairs_all_bwa.header $outputDir /${jobId} intermediateFiles/mappedReadPairs_all_bwa.csv_* > $target /mappedReadPairs.csv
177+ wait
178+
179+ python runDeduplication_pub.py $outputDir $jobId 2>> $outputDir /${jobId} errorLog.txt
180+ wait
160181
161182rm $target /read1_tx/mapped.sorted.bed_chunk*
162183rm $target /read2_tx/mapped.sorted.bed_chunk*
163184wait
164185
165- python callPPIs_pub.py $outputDir $pCutoff $oddsCutoff $rcCutoff 2>> $outputDir /errorLog.txt
166-
167-
168-
186+ python callPPIs_pub.py $outputDir $pCutoff $oddsCutoff $rcCutoff $jobId 2>> $outputDir /${jobId} errorLog.txt
187+ wait
169188
170189
190+ if [ $removeFlag == ' T' ]
191+ then
192+ rm -r $outputDir /${jobId} intermediateFiles
193+ fi
194+ wait
171195
196+ gzip $outputDir /${jobId} processedFastq/R2.cutadapt.fastp.fastq &
197+ gzip $outputDir /${jobId} processedFastq/R1.cutadapt.fastp.fastq &
0 commit comments