|
17 | 17 | # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. |
18 | 18 |
|
19 | 19 |
|
20 | | -from sys import argv,stderr |
| 20 | +from sys import argv, stderr |
21 | 21 | from itertools import product |
22 | | -preFilteredBedpe=argv[1] |
23 | | -lineIndicesFile=argv[2] |
| 22 | + |
| 23 | +preFilteredBedpe = argv[1] |
| 24 | +lineIndicesFile = argv[2] |
| 25 | + |
24 | 26 |
|
25 | 27 | def getIntronExonIndex(geneRawName): |
26 | | - geneChunks=geneRawName.split('_') |
27 | | - geneName="." |
28 | | - if geneChunks[0]!=".": |
29 | | - geneName='_'.join(geneChunks[:-1]) |
30 | | - index=1 |
31 | | - intronStatus="intron" in geneChunks[-1] or geneName=="." |
32 | | - if geneChunks[-1]!=".": |
| 28 | + geneChunks = geneRawName.split('_') |
| 29 | + geneName = "." |
| 30 | + if geneChunks[0] != ".": |
| 31 | + geneName = '_'.join(geneChunks[:-1]) |
| 32 | + index = 1 |
| 33 | + intronStatus = "intron" in geneChunks[-1] or geneName == "." |
| 34 | + if geneChunks[-1] != ".": |
33 | 35 | if "intron" in geneChunks[-1]: |
34 | | - index=int(geneChunks[-1].split('intron')[-1]) |
| 36 | + index = int(geneChunks[-1].split('intron')[-1]) |
35 | 37 | elif "exon" in geneChunks[-1]: |
36 | | - index=int(geneChunks[-1].split('exon')[-1]) |
37 | | - return [geneName,index,intronStatus] |
| 38 | + index = int(geneChunks[-1].split('exon')[-1]) |
| 39 | + return [geneName, index, intronStatus] |
| 40 | + |
38 | 41 |
|
39 | | -lineIndices=set() |
| 42 | +lineIndices = set() |
40 | 43 | with open(lineIndicesFile) as f: |
41 | 44 | for line in f: |
42 | | - lineIndex=int(line.rstrip()) |
| 45 | + lineIndex = int(line.rstrip()) |
43 | 46 | lineIndices.add(lineIndex) |
44 | 47 |
|
45 | | -lineIndex=-1 |
| 48 | +lineIndex = -1 |
46 | 49 | with open(preFilteredBedpe) as inputHandle: |
47 | 50 | for line in inputHandle: |
48 | | - if line[0]!='#': |
49 | | - lineIndex+=1 |
50 | | - skipLine=False |
| 51 | + if line[0] != '#': |
| 52 | + lineIndex += 1 |
| 53 | + skipLine = False |
51 | 54 | if lineIndex not in lineIndices: |
52 | | - lineChunks=line.rstrip().split('\t') |
53 | | - eventType=lineChunks[8] |
54 | | - eventScore=int(lineChunks[9]) |
55 | | - if lineChunks[0]==lineChunks[3] and lineChunks[11]!="INV": |
56 | | - gene1Raw=lineChunks[20] |
57 | | - gene2Raw=lineChunks[30] |
58 | | - gene1ListPre=[getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene1Raw.split(',')] if '_' in x] |
59 | | - gene2ListPre=[getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene2Raw.split(',')] if '_' in x] |
60 | | - for eventPair in product(gene1ListPre,gene2ListPre): |
61 | | - if eventPair[0][0]==eventPair[1][0]: |
62 | | - if eventPair[0][0]!=".": |
| 55 | + lineChunks = line.rstrip().split('\t') |
| 56 | + eventType = lineChunks[8] |
| 57 | + eventScore = int(lineChunks[9]) |
| 58 | + if lineChunks[0] == lineChunks[3] and lineChunks[11] != "INV": |
| 59 | + gene1Raw = lineChunks[20] |
| 60 | + gene2Raw = lineChunks[30] |
| 61 | + gene1ListPre = [getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene1Raw.split(',')] if '_' in x] |
| 62 | + gene2ListPre = [getIntronExonIndex(x) for x in [y.split(';')[0] for y in gene2Raw.split(',')] if '_' in x] |
| 63 | + for eventPair in product(gene1ListPre, gene2ListPre): |
| 64 | + if eventPair[0][0] == eventPair[1][0]: |
| 65 | + if eventPair[0][0] != ".": |
63 | 66 | if (eventPair[0][2] or eventPair[1][2]): |
64 | | - if abs(eventPair[0][1]-eventPair[1][1])<2: |
65 | | - skipLine=True |
| 67 | + if abs(eventPair[0][1] - eventPair[1][1]) < 2: |
| 68 | + skipLine = True |
66 | 69 | break |
67 | 70 | else: |
68 | | - if eventType=="DEL" and abs(eventPair[0][1]-eventPair[1][1])==1: |
69 | | - skipLine=True |
| 71 | + if eventType == "DEL" and abs(eventPair[0][1] - eventPair[1][1]) == 1: |
| 72 | + skipLine = True |
70 | 73 | break |
71 | 74 | if not skipLine: |
72 | 75 | print(line.rstrip()) |
|
0 commit comments