Skip to content

Commit 02c6494

Browse files
committed
mapped read pairs added
1 parent 44506a8 commit 02c6494

File tree

6 files changed

+109
-75
lines changed

6 files changed

+109
-75
lines changed

bin/PROPERseqTools

Lines changed: 61 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,18 @@ helpFunction()
99
echo -e "\t-o String, Path to output directory, required"
1010
echo -e "\t-i String, Path to bwa index of target transcriptome, required"
1111
echo -e "\t-g String, Path to transcript,gene and gene type dictionary file in csv format, required"
12+
echo -e "\t-j String, Job ID to be prepended to the output files and directories, optional, default=PROPERseq"
1213
echo -e "\t-t Int, Number of working threads, optional, default=2"
1314
echo -e "\t-r Char (T or F), remove intermediate files or not, optional, default=T"
1415
echo -e "\t-p Float, false discovery rate used to identify protein-protein interactions, optional, default=0.05"
1516
echo -e "\t-d Float, odds ratio cutoff used to identify protein-protein interactions, optional, default=1"
1617
echo -e "\t-c Float, read count cutoff coefficient used to identify protein-protein interactions, optional, default=4"
1718
echo -e "\t-h Print usage message"
1819
echo
19-
echo
2020
exit 1 # Exit script after printing help
2121
}
2222

23-
while getopts "a:b:t:o:i:h:r:p:d:c:g:" opt
23+
while getopts "a:b:t:o:i:h:r:p:d:c:g:j:" opt
2424
do
2525
case "$opt" in
2626
a ) read1="$OPTARG" ;;
@@ -33,6 +33,7 @@ do
3333
d ) oddsCutoff="$OPTARG" ;;
3434
c ) rcCutoff="$OPTARG" ;;
3535
g ) geneDic="$OPTARG" ;;
36+
j ) jobId="$OPTARG" ;;
3637
h ) helpFunction ;;
3738
? ) helpFunction ;; # Print helpFunction in case parameter is non-existent
3839
esac
@@ -90,82 +91,107 @@ then
9091
fi
9192
wait
9293

94+
if [ ! -z "$jobId" ]
95+
then
96+
jobId=${jobId}_
97+
else
98+
jobId=PROPERseq_
99+
fi
100+
wait
101+
102+
93103
mkdir $outputDir
94-
mkdir $outputDir/processedFastq
95-
mkdir $outputDir/intermediateFiles
96-
mkdir $outputDir/chimericReadPairs
97-
mkdir $outputDir/alignment/
104+
mkdir $outputDir/${jobId}processedFastq
105+
mkdir $outputDir/${jobId}intermediateFiles
106+
mkdir $outputDir/${jobId}alignment/
98107
wait
99108

100-
python getCurrentDateTime_pub.py $outputDir 2>$outputDir/errorLog.txt
109+
python getCurrentDateTime_pub.py $outputDir $jobId
101110
wait
102111

103-
cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read1 > $outputDir/intermediateFiles/R1.cutadapt.fastq 2> $outputDir/intermediateFiles/R1.linkers.txt &
104112

105-
cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read2 > $outputDir/intermediateFiles/R2.cutadapt.fastq 2> $outputDir/intermediateFiles/R2.linkers.txt &
113+
cat $outputDir/${jobId}intermediateFiles/runStart.txt > $outputDir/${jobId}proteinProteinInteractions.csv
114+
cat $outputDir/${jobId}intermediateFiles/runStart.txt > $outputDir/${jobId}errorLog.txt
115+
cat $outputDir/${jobId}intermediateFiles/runStart.txt > $outputDir/${jobId}chimericReadPairs.csv
116+
cat $outputDir/${jobId}intermediateFiles/runStart.txt > $outputDir/${jobId}summary.csv
106117
wait
107118

108119

109-
python processFastq_pub.py $outputDir/intermediateFiles/R1.cutadapt.lengthFiltered.fastq $outputDir/intermediateFiles/R1.cutadapt.fastq $outputDir yes 2>> $outputDir/errorLog.txt &
110-
python processFastq_pub.py $outputDir/intermediateFiles/R2.cutadapt.lengthFiltered.fastq $outputDir/intermediateFiles/R2.cutadapt.fastq $outputDir no 2>> $outputDir/errorLog.txt &
120+
cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read1 > $outputDir/${jobId}intermediateFiles/R1.cutadapt.fastq 2> $outputDir/${jobId}intermediateFiles/R1.linkers.txt &
121+
122+
cutadapt -j $numT -a TGACCAAGACGCCAAAAACATAAAGAAAGGCCCGGCGCCATTGGTCA -a TGACCAATGGCGCCGGGCCTTTCTTTATGTTTTTGGCGTCTTGGTCA -g TTCACTGGAGGGGGGCTCACGAGTAAGGAGGATCCAACATG -g CATGTTGGATCCTCCTTACTCGTGAGCCCCCCTCCAGTGAA -O 23 $read2 > $outputDir/${jobId}intermediateFiles/R2.cutadapt.fastq 2> $outputDir/${jobId}intermediateFiles/R2.linkers.txt &
111123
wait
112124

113-
fastp -w $numT -i $outputDir/intermediateFiles/R1.cutadapt.lengthFiltered.fastq -I $outputDir/intermediateFiles/R2.cutadapt.lengthFiltered.fastq -o $outputDir/processedFastq/R1.cutadapt.fastp.fastq -O $outputDir/processedFastq/R2.cutadapt.fastp.fastq -h $outputDir/intermediateFiles/fastp.html -j $outputDir/intermediateFiles/fastp.json 2>> $outputDir/errorLog.txt
125+
126+
python processFastq_pub.py $outputDir/${jobId}intermediateFiles/R1.cutadapt.lengthFiltered.fastq $outputDir/${jobId}intermediateFiles/R1.cutadapt.fastq $outputDir yes ${jobId} 2>> $outputDir/${jobId}errorLog.txt &
127+
python processFastq_pub.py $outputDir/${jobId}intermediateFiles/R2.cutadapt.lengthFiltered.fastq $outputDir/${jobId}intermediateFiles/R2.cutadapt.fastq $outputDir no ${jobId} 2>> $outputDir/${jobId}errorLog.txt &
114128
wait
115129

116-
python writeNumReadPairs_pub.py $outputDir 2>> $outputDir/errorLog.txt
130+
fastp -w $numT -i $outputDir/${jobId}intermediateFiles/R1.cutadapt.lengthFiltered.fastq -I $outputDir/${jobId}intermediateFiles/R2.cutadapt.lengthFiltered.fastq -o $outputDir/${jobId}processedFastq/R1.cutadapt.fastp.fastq -O $outputDir/${jobId}processedFastq/R2.cutadapt.fastp.fastq -h $outputDir/${jobId}intermediateFiles/fastp.html -j $outputDir/${jobId}intermediateFiles/fastp.json 2>> $outputDir/${jobId}errorLog.txt
117131
wait
118132

119-
mkdir $outputDir/alignment/read1_tx
120-
mkdir $outputDir/alignment/read2_tx
133+
python writeNumReadPairs_pub.py $outputDir $jobId 2>> $outputDir/${jobId}errorLog.txt
121134
wait
122135

123-
source=$outputDir/processedFastq
124-
target=$outputDir/alignment/
136+
mkdir $outputDir/${jobId}alignment/read1_tx
137+
mkdir $outputDir/${jobId}alignment/read2_tx
125138
wait
126139

140+
source=$outputDir/${jobId}processedFastq
141+
target=$outputDir/${jobId}alignment
142+
wait
127143

128-
bwa mem -a -t $numT $bwaIndex $source/R1.cutadapt.fastp.fastq > $target/read1_tx/alignment.sam 2>> $outputDir/errorLog.txt &
129-
bwa mem -a -t $numT $bwaIndex $source/R2.cutadapt.fastp.fastq > $target/read2_tx/alignment.sam 2>> $outputDir/errorLog.txt &
144+
half=$((numT/2))
145+
bwa mem -a -t $half $bwaIndex $source/R1.cutadapt.fastp.fastq > $target/read1_tx/alignment.sam 2>> $outputDir/${jobId}errorLog.txt &
146+
bwa mem -a -t $half $bwaIndex $source/R2.cutadapt.fastp.fastq > $target/read2_tx/alignment.sam 2>> $outputDir/${jobId}errorLog.txt &
130147
wait
131148

132-
samtools view -H $target/read1_tx/alignment.sam > $target/read1_tx/header.sam 2>> $outputDir/errorLog.txt &
133-
samtools view -H $target/read2_tx/alignment.sam > $target/read2_tx/header.sam 2>> $outputDir/errorLog.txt &
149+
samtools view -H $target/read1_tx/alignment.sam > $target/read1_tx/header.sam 2>> $outputDir/${jobId}errorLog.txt &
150+
samtools view -H $target/read2_tx/alignment.sam > $target/read2_tx/header.sam 2>> $outputDir/${jobId}errorLog.txt &
134151
wait
135152

136-
samtools view -F 4 $target/read1_tx/alignment.sam | cat $target/read1_tx/header.sam - | samtools view -b - > $target/read1_tx/mapped.bam 2>> $outputDir/errorLog.txt &
137-
samtools view -F 4 $target/read2_tx/alignment.sam | cat $target/read2_tx/header.sam - | samtools view -b - > $target/read2_tx/mapped.bam 2>> $outputDir/errorLog.txt &
153+
samtools view -F 4 $target/read1_tx/alignment.sam | cat $target/read1_tx/header.sam - | samtools view -b - > $target/read1_tx/mapped.bam 2>> $outputDir/${jobId}errorLog.txt &
154+
samtools view -F 4 $target/read2_tx/alignment.sam | cat $target/read2_tx/header.sam - | samtools view -b - > $target/read2_tx/mapped.bam 2>> $outputDir/${jobId}errorLog.txt &
138155
wait
139156

140-
half=$((numT/2))
141-
samtools sort -n -@ $half -o $target/read1_tx/mapped.sorted.bam $target/read1_tx/mapped.bam 2>> $outputDir/errorLog.txt &
142-
samtools sort -n -@ $half -o $target/read2_tx/mapped.sorted.bam $target/read2_tx/mapped.bam 2>> $outputDir/errorLog.txt &
157+
158+
samtools sort -n -@ $half -o $target/read1_tx/mapped.sorted.bam $target/read1_tx/mapped.bam 2>> $outputDir/${jobId}errorLog.txt &
159+
samtools sort -n -@ $half -o $target/read2_tx/mapped.sorted.bam $target/read2_tx/mapped.bam 2>> $outputDir/${jobId}errorLog.txt &
143160
wait
144161

145-
bedtools bamtobed -cigar -i $target/read1_tx/mapped.sorted.bam > $target/read1_tx/mapped.sorted.bed 2>> $outputDir/errorLog.txt &
146-
bedtools bamtobed -cigar -i $target/read2_tx/mapped.sorted.bam > $target/read2_tx/mapped.sorted.bed 2>> $outputDir/errorLog.txt &
162+
bedtools bamtobed -cigar -i $target/read1_tx/mapped.sorted.bam > $target/read1_tx/mapped.sorted.bed 2>> $outputDir/${jobId}errorLog.txt &
163+
bedtools bamtobed -cigar -i $target/read2_tx/mapped.sorted.bam > $target/read2_tx/mapped.sorted.bed 2>> $outputDir/${jobId}errorLog.txt &
147164
wait
148165

149-
python runBedFileSplit_pub.py $target 2>> $outputDir/errorLog.txt
166+
python runBedFileSplit_pub.py $target 2>> $outputDir/${jobId}errorLog.txt
150167
wait
151168

152169
for file in $target/read1_tx/mapped.sorted.bed_chunk*
153170
do
154171
i=${file#*chunk}
155-
python chimericIdentification_pub.py $outputDir ${i} $geneDic $outputDir/intermediateFiles/chimStats_${i}.txt 2>> $outputDir/errorLog.txt
172+
python writeMappedReadPairs_pub.py $outputDir ${i} $geneDic $outputDir/${jobId}intermediateFiles/mappedStats_${i}.txt ${jobId} 2>> $outputDir/${jobId}errorLog.txt
156173
done
157174
wait
158175

159-
python runDeduplication_pub.py $outputDir 2>> $outputDir/errorLog.txt
176+
cat $outputDir/${jobId}intermediateFiles/mappedReadPairs_all_bwa.header $outputDir/${jobId}intermediateFiles/mappedReadPairs_all_bwa.csv_* > $target/mappedReadPairs.csv
177+
wait
178+
179+
python runDeduplication_pub.py $outputDir $jobId 2>> $outputDir/${jobId}errorLog.txt
180+
wait
160181

161182
rm $target/read1_tx/mapped.sorted.bed_chunk*
162183
rm $target/read2_tx/mapped.sorted.bed_chunk*
163184
wait
164185

165-
python callPPIs_pub.py $outputDir $pCutoff $oddsCutoff $rcCutoff 2>> $outputDir/errorLog.txt
166-
167-
168-
186+
python callPPIs_pub.py $outputDir $pCutoff $oddsCutoff $rcCutoff $jobId 2>> $outputDir/${jobId}errorLog.txt
187+
wait
169188

170189

190+
if [ $removeFlag == 'T' ]
191+
then
192+
rm -r $outputDir/${jobId}intermediateFiles
193+
fi
194+
wait
171195

196+
gzip $outputDir/${jobId}processedFastq/R2.cutadapt.fastp.fastq &
197+
gzip $outputDir/${jobId}processedFastq/R1.cutadapt.fastp.fastq &

bin/callPPIs_pub.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from collections import defaultdict
2+
import scipy
23
import glob
34
import scipy.stats as stats
45
from rpy2.robjects.packages import importr
56
from rpy2.robjects.vectors import FloatVector
67
import sys
78

8-
targetFile=open('%s/proteinProteinInteractions.csv'%(sys.argv[1]),'w')
9+
targetFile=open('%s/%sproteinProteinInteractions.csv'%(sys.argv[1],sys.argv[5]),'a')
910

1011
def getIntCount(filePath):
1112
dicIntCount_positive=defaultdict(int)
@@ -28,10 +29,6 @@ def getIntCount(filePath):
2829
sorted_x_positive = sorted(dicIntCount_positive.items(), key=lambda kv: kv[1],reverse=True)
2930
return dicIntCount_positive,dicProteinCount_positive,sorted_x_positive
3031

31-
dicIntCount_positive_8b,dicProteinCount_positive_8b,sorted_x_positive_8b=getIntCount(
32-
'%s/chimericReadPairs.csv'%(sys.argv[1]))
33-
34-
3532
def identifyPPIs_chimericAdj(sorted_x_positive1_9,dicIntCount_positive1_9,dicProteinCount_positive1_9,coEff,pCutOff,oddsCutoff):
3633
factor=sum([x[1] for x in sorted_x_positive1_9])/len(sorted_x_positive1_9)
3734
chimTotal=sum([x[1] for x in sorted_x_positive1_9])
@@ -84,27 +81,32 @@ def identifyPPIs_chimericAdj(sorted_x_positive1_9,dicIntCount_positive1_9,dicPro
8481
rcList1.append(rcc)
8582
chiSig.append(chichi)
8683
print (len(set(list1)))
87-
return list1,rcList1,orSig_1,chiSig,pvalueSig
84+
return list1,rcList1,orSig_1,chiSig,pvalueSig_1
85+
86+
87+
dicIntCount_positive_8b,dicProteinCount_positive_8b,sorted_x_positive_8b=getIntCount(
88+
'%s/%schimericReadPairs.csv'%(sys.argv[1],sys.argv[5]))
8889

8990

9091
list_PPI,rcList_PPI,orList_PPI,chiList_PPI,pvalueList_PPI=identifyPPIs_chimericAdj(
9192
sorted_x_positive_8b,dicIntCount_positive_8b,dicProteinCount_positive_8b,float(sys.argv[4]),float(sys.argv[2]),float(sys.argv[3]))
9293

9394
#write into the file
9495
targetFile.write('Protein1,Protein2,ReadCount,FDR,oddsRatio,chiSquareStat\n')
95-
for ha in list_super:
96+
for i in range(len(list_PPI)):
97+
ha=list_PPI[i]
9698
[gene1,gene2]=ha.split(';')
9799
oddsRatio=str(orList_PPI[i])
98100
chichi=str(chiList_PPI[i])
99101
pp=str(pvalueList_PPI[i])
100-
rc=str(dicProteinCount_positive_8b[ha])
102+
rc=str(dicIntCount_positive_8b[ha])
101103
infoList=','.join([gene1,gene2,rc,pp,oddsRatio,chichi])
102104
targetFile.write(infoList)
103105
targetFile.write('\n')
104106

105107
targetFile.close()
106108

107109

108-
targetFile==open('%s/summary.csv'%(sys.argv[1]),'a')
110+
targetFile=open('%s/%ssummary.csv'%(sys.argv[1],sys.argv[5]),'a')
109111
targetFile.write('#protein-protein_interactions,%d\n'%(len(list_PPI)))
110112
targetFile.close()

bin/getCurrentDateTime_pub.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33

44
# get current date
55
datetime_object = datetime.now()
6-
targetFile=open('%s/intermediateFiles/runStart.txt'%(sys.argv[1]),'w')
7-
targetFile.write('#Run starts at %s. Job ID:\n'%(str(datetime_object)))
6+
targetFile=open('%s/%sintermediateFiles/runStart.txt'%(sys.argv[1],sys.argv[2]),'w')
7+
targetFile.write('#Run starts at %s. Job ID:%s\n'%(str(datetime_object),sys.argv[2][:-1]))
88
targetFile.close()

bin/processFastq_pub.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,6 @@
2525
targetFile.close()
2626
total=total/4
2727
if sys.argv[4]=='yes':
28-
targetFile=open('%s/summary.csv'%(sys.argv[3]),'w')
28+
targetFile=open('%s/%ssummary.csv'%(sys.argv[3],sys.argv[5]),'a')
2929
targetFile.write('#input_read_pairs,%d\n'%(total))
3030
targetFile.close()

bin/runDeduplication_pub.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@
22
from collections import defaultdict
33
import sys
44
chimNum=0
5-
targetFile=open('%s/chimericReadPairs.csv'%(sys.argv[1]),'w')
5+
targetFile=open('%s/%schimericReadPairs.csv'%(sys.argv[1],sys.argv[2]),'w')
66
targetFile.write('readId,R1Tx,R1start,R1end,R1Gene,R1Cigar,R2Tx,R2start,R2end,R2Gene,R2Cigar\n')
77
dicMapInfo_count=defaultdict(int)
88
for i in [1]:
9-
fileList=glob.glob('%s/intermediateFiles/chimericReadPairs_all_bwa.csv_*'%(sys.argv[1]))
9+
fileList=glob.glob('%s/%sintermediateFiles/chimericReadPairs_all_bwa.csv_*'%(sys.argv[1],sys.argv[2]))
1010
for file in fileList:
1111
with open(file,'r') as f:
1212
next(f)
@@ -28,14 +28,14 @@
2828

2929

3030
mapSum=0
31-
fileList=glob.glob('%s/intermediateFiles/chimStats_*.txt'%(sys.argv[1]))
31+
fileList=glob.glob('%s/%sintermediateFiles/mappedStats_*.txt'%(sys.argv[1],sys.argv[2]))
3232
for file in fileList:
3333
with open(file,'r') as f:
3434
for line in f:
3535
splitLine=line.strip().split(',')
3636
mapSum+=int(splitLine[0])
3737

38-
targetFile=open('%s/summary.csv'%(sys.argv[1]),'a')
38+
targetFile=open('%s/%ssummary.csv'%(sys.argv[1],sys.argv[2]),'a')
3939
targetFile.write('#protein-coding_gene_mapped_read_pairs,%d\n'%(mapSum))
4040
targetFile.write('#chimeric_read_pairs,%d\n'%(chimNum))
4141
targetFile.close()
Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
1+
import glob
12
from collections import defaultdict
23
import sys
34
from cigar import Cigar
45

6+
targetFile=open('%s/%sintermediateFiles/mappedReadPairs_all_bwa.header'%(sys.argv[1],sys.argv[5]),'w')
7+
targetFile.write('ReadId,Read1Gene,Read2Gene,R1transcript,R1Start,R1End,Read1Cigar,Read1GeneType,Read1LesserGenes,R2transcript,R2Start,R2End,Read2Cigar,Read2GeneType,Read2LesserGenes\n')
8+
targetFile.close()
59

610
#read in refseq dic
711
dicIdGeneName={}
@@ -11,9 +15,7 @@
1115
splitLine=line.strip().split(',')
1216
dicIdGeneName[splitLine[0]]=splitLine[1]
1317
dicIdGeneType[splitLine[0]]=splitLine[2]
14-
15-
16-
18+
1719
#read in read1 and read2 file
1820
dicReadIdGene1=defaultdict(list)
1921
dicReadIdGene2=defaultdict(list)
@@ -24,7 +26,7 @@
2426

2527
#Count protein-coding mapped read pairs
2628
dicRead1_count=defaultdict(int)
27-
with open('%s/alignment/read1_tx/mapped.sorted.bed_chunk%s'%(sys.argv[1],sys.argv[2]),'r') as f:
29+
with open('%s/%salignment/read1_tx/mapped.sorted.bed_chunk%s'%(sys.argv[1],sys.argv[5],sys.argv[2]),'r') as f:
2830
for line in f:
2931
splitLine=line.strip().split('\t')
3032
readId=splitLine[3]
@@ -37,7 +39,7 @@
3739
dicIdtoCigar1[readId].append(cigar1)
3840

3941
idList=[]
40-
with open('%s/alignment/read2_tx/mapped.sorted.bed_chunk%s'%(sys.argv[1],sys.argv[2]),'r') as f:
42+
with open('%s/%salignment/read2_tx/mapped.sorted.bed_chunk%s'%(sys.argv[1],sys.argv[5],sys.argv[2]),'r') as f:
4143
for line in f:
4244
splitLine=line.strip().split('\t')
4345
readId=splitLine[3]
@@ -49,19 +51,24 @@
4951
dicReadIdPos2[splitLine[3]].append(splitLine[:3])
5052
dicIdtoCigar2[readId].append(cigar2)
5153

52-
idList=list(set(idList))
53-
#identify chimeric reads
54-
targetFile=open('%s/intermediateFiles/chimericReadPairs_all_bwa.csv_%s'%(sys.argv[1],sys.argv[2]),'w')
55-
targetFile.write('readId,R1Tx,R1start,R1end,R1Gene,R1Cigar,R2Tx,R2start,R2end,R2Gene,R2Cigar\n')
56-
count=0
54+
idList=list(set(idList))
55+
56+
targetFile1=open('%s/%sintermediateFiles/mappedReadPairs_all_bwa.csv_%s'%(sys.argv[1],sys.argv[5],sys.argv[2]),'a')
57+
targetFile2=open('%s/%sintermediateFiles/chimericReadPairs_all_bwa.csv_%s'%(sys.argv[1],sys.argv[5],sys.argv[2]),'a')
58+
targetFile2.write('readId,R1Tx,R1start,R1end,R1Gene,R1Cigar,R2Tx,R2start,R2end,R2Gene,R2Cigar\n')
5759
for readId in idList:
58-
geneList1=dicReadIdGene1[readId]
59-
geneList2=dicReadIdGene2[readId]
60-
#no common genes
60+
geneList1=';'.join(list(dicReadIdGene1[readId]))
61+
geneList2=';'.join(list(dicReadIdGene2[readId]))
62+
cigar1,cigar2=Cigar(dicIdtoCigar1[readId][0]),Cigar(dicIdtoCigar2[readId][0])
63+
[txId1,start1,end1]=dicReadIdPos1[readId][0]
64+
[txId2,start2,end2]=dicReadIdPos2[readId][0]
65+
gene1,gene2=dicIdGeneName[txId1],dicIdGeneName[txId2]
66+
type1,type2=dicIdGeneType[txId1],dicIdGeneType[txId2]
67+
targetFile1.write('%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n'
68+
%(readId,gene1,gene2,txId1,start1,end1,str(cigar1),type1,geneList1,txId2,start2,end2,str(cigar2),type2,geneList2))
69+
6170
if len(set(geneList1)&set(geneList2))==0:
62-
[txId1,start1,end1]=dicReadIdPos1[readId][0]
63-
[txId2,start2,end2]=dicReadIdPos2[readId][0]
64-
if dicIdGeneType[txId1]=='mRNA' and dicIdGeneType[txId2]=='mRNA':
71+
if type1=='mRNA' and type2=='mRNA':
6572
#check cigar string
6673
cigar1,cigar2=Cigar(dicIdtoCigar1[readId][0]),Cigar(dicIdtoCigar2[readId][0])
6774
cigar1List=list(cigar1.items())
@@ -83,15 +90,14 @@
8390
if flag1 and flag2:
8491
gene1,gene2=dicIdGeneName[txId1],dicIdGeneName[txId2]
8592
#write file
86-
targetFile.write('%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n'
93+
targetFile2.write('%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s\n'
8794
%(readId,txId1,start1,end1,gene1,str(cigar1),txId2,start2,end2,gene2,str(cigar2)))
88-
count+=1
89-
90-
targetFile.close()
9195

92-
targetFile=open(sys.argv[4],'w')
93-
targetFile.write('%d,%d'%(len(idList),count))
94-
targetFile.close()
95-
96+
97+
targetFile1.close()
98+
targetFile2.close()
9699

97100

101+
targetFile=open(sys.argv[4],'w')
102+
targetFile.write('%d'%(len(idList)))
103+
targetFile.close()

0 commit comments

Comments
 (0)