Skip to content

Commit 2e5570c

Browse files
authored
Merge pull request #1 from DKFZ-ODCF/fix-yet-another-bug-with-bad-data
Fix yet another bug with bad data
2 parents 79c194d + 3816c69 commit 2e5570c

16 files changed

+869
-793
lines changed

SophiaWorkflow.jar

1 Byte
Binary file not shown.

buildversion.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
2.2
2-
0
2+
1

resources/analysisTools/sophiaworkflow/RNAdecontaminationStep1.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,25 @@
1616
# along with this program; if not, write to the Free Software
1717
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
1818

19-
from sys import argv,stderr
19+
from sys import argv, stderr
2020
from itertools import product
21-
preFilteredBedpe=argv[1]
2221

23-
broadProd=list(product(range(51),range(51)))
24-
narrowProd=list(product(range(10),range(10)))
22+
preFilteredBedpe = argv[1]
2523

26-
lineIndex=0
24+
broadProd = list(product(range(51), range(51)))
25+
narrowProd = list(product(range(10), range(10)))
26+
27+
lineIndex = 0
2728
with open(preFilteredBedpe) as inputHandle:
2829
for line in inputHandle:
29-
if line[0]!='#':
30-
lineChunks=line.rstrip().split('\t')
31-
eventType=lineChunks[8]
32-
if lineChunks[0]==lineChunks[3] and eventType in {"TRA","DEL"}:
33-
minPos=min(int(lineChunks[1]),int(lineChunks[4]))
34-
maxPos=max(int(lineChunks[1]),int(lineChunks[4]))
35-
eventScore=int(lineChunks[9])
30+
if line[0] != '#':
31+
lineChunks = line.rstrip().split('\t')
32+
eventType = lineChunks[8]
33+
if lineChunks[0] == lineChunks[3] and eventType in {"TRA", "DEL"}:
34+
minPos = min(int(lineChunks[1]), int(lineChunks[4]))
35+
maxPos = max(int(lineChunks[1]), int(lineChunks[4]))
36+
eventScore = int(lineChunks[9])
3637
for iterPair in broadProd:
37-
if (minPos-iterPair[0]) < maxPos-1+iterPair[1]:
38-
print(lineChunks[0],minPos-iterPair[0],maxPos-1+iterPair[1],lineIndex,sep='\t')
39-
lineIndex+=1
38+
if (minPos - iterPair[0]) < maxPos - 1 + iterPair[1]:
39+
print(lineChunks[0], minPos - iterPair[0], maxPos - 1 + iterPair[1], lineIndex, sep='\t')
40+
lineIndex += 1

resources/analysisTools/sophiaworkflow/RNAdecontaminationStep2.py

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -17,56 +17,59 @@
1717
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
1818

1919

20-
from sys import argv,stderr
20+
from sys import argv, stderr
2121
from itertools import product
22-
preFilteredBedpe=argv[1]
23-
lineIndicesFile=argv[2]
22+
23+
preFilteredBedpe = argv[1]
24+
lineIndicesFile = argv[2]
25+
2426

2527
def getIntronExonIndex(geneRawName):
26-
geneChunks=geneRawName.split('_')
27-
geneName="."
28-
if geneChunks[0]!=".":
29-
geneName='_'.join(geneChunks[:-1])
30-
index=1
31-
intronStatus="intron" in geneChunks[-1] or geneName=="."
32-
if geneChunks[-1]!=".":
28+
geneChunks = geneRawName.split('_')
29+
geneName = "."
30+
if geneChunks[0] != ".":
31+
geneName = '_'.join(geneChunks[:-1])
32+
index = 1
33+
intronStatus = "intron" in geneChunks[-1] or geneName == "."
34+
if geneChunks[-1] != ".":
3335
if "intron" in geneChunks[-1]:
34-
index=int(geneChunks[-1].split('intron')[-1])
36+
index = int(geneChunks[-1].split('intron')[-1])
3537
elif "exon" in geneChunks[-1]:
36-
index=int(geneChunks[-1].split('exon')[-1])
37-
return [geneName,index,intronStatus]
38+
index = int(geneChunks[-1].split('exon')[-1])
39+
return [geneName, index, intronStatus]
40+
3841

39-
lineIndices=set()
42+
lineIndices = set()
4043
with open(lineIndicesFile) as f:
4144
for line in f:
42-
lineIndex=int(line.rstrip())
45+
lineIndex = int(line.rstrip())
4346
lineIndices.add(lineIndex)
4447

45-
lineIndex=-1
48+
lineIndex = -1
4649
with open(preFilteredBedpe) as inputHandle:
4750
for line in inputHandle:
48-
if line[0]!='#':
49-
lineIndex+=1
50-
skipLine=False
51+
if line[0] != '#':
52+
lineIndex += 1
53+
skipLine = False
5154
if lineIndex not in lineIndices:
52-
lineChunks=line.rstrip().split('\t')
53-
eventType=lineChunks[8]
54-
eventScore=int(lineChunks[9])
55-
if lineChunks[0]==lineChunks[3] and lineChunks[11]!="INV":
56-
gene1Raw=lineChunks[20]
57-
gene2Raw=lineChunks[30]
58-
gene1ListPre=[getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene1Raw.split(',')] if '_' in x]
59-
gene2ListPre=[getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene2Raw.split(',')] if '_' in x]
60-
for eventPair in product(gene1ListPre,gene2ListPre):
61-
if eventPair[0][0]==eventPair[1][0]:
62-
if eventPair[0][0]!=".":
55+
lineChunks = line.rstrip().split('\t')
56+
eventType = lineChunks[8]
57+
eventScore = int(lineChunks[9])
58+
if lineChunks[0] == lineChunks[3] and lineChunks[11] != "INV":
59+
gene1Raw = lineChunks[20]
60+
gene2Raw = lineChunks[30]
61+
gene1ListPre = [getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene1Raw.split(',')] if '_' in x]
62+
gene2ListPre = [getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene2Raw.split(',')] if '_' in x]
63+
for eventPair in product(gene1ListPre, gene2ListPre):
64+
if eventPair[0][0] == eventPair[1][0]:
65+
if eventPair[0][0] != ".":
6366
if (eventPair[0][2] or eventPair[1][2]):
64-
if abs(eventPair[0][1]-eventPair[1][1])<2:
65-
skipLine=True
67+
if abs(eventPair[0][1] - eventPair[1][1]) < 2:
68+
skipLine = True
6669
break
6770
else:
68-
if eventType=="DEL" and abs(eventPair[0][1]-eventPair[1][1])==1:
69-
skipLine=True
71+
if eventType == "DEL" and abs(eventPair[0][1] - eventPair[1][1]) == 1:
72+
skipLine = True
7073
break
7174
if not skipLine:
7275
print(line.rstrip())

0 commit comments

Comments
 (0)