Skip to content

Commit 8b5f841

Browse files
committed
Parsing bugs
1 parent 885c858 commit 8b5f841

File tree

1 file changed

+39
-37
lines changed

1 file changed

+39
-37
lines changed

agfusion/parsers.py

Lines changed: 39 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@ def __init__(self,infile,logger):
3737
for line in fin.readlines():
3838
if re.findall('^#',line):
3939
line = line.rstrip().split('\t')
40-
assert line[0]=='#FusionName', 'Unrecognized STAR-Fusion input'
40+
if line[0]!='#FusionName' and line[0]!='#fusion_name':
41+
raise AssertionError('Unrecognized STAR-Fusion input for first column in header. Should be #FusionName or #fusion_name.')
42+
4143
assert line[4]=='LeftGene', 'Unrecognized STAR-Fusion input'
4244
assert line[5]=='LeftBreakpoint', 'Unrecognized STAR-Fusion input'
4345
assert line[6]=='RightGene', 'Unrecognized STAR-Fusion input'
@@ -54,12 +56,12 @@ def __init__(self,infile,logger):
5456
gene_3prime_junction = int(line[7].split(':')[1])
5557
self.fusions.append(
5658
{
57-
'ensembl_5prime':gene_5prime,
58-
'ensembl_3prime':gene_3prime,
59+
'gene5prime':gene_5prime,
60+
'gene3prime':gene_3prime,
5961
'alternative_name_5prime':gene_5prime_name,
6062
'alternative_name_3prime':gene_3prime_name,
61-
'junction_5prime':gene_5prime_junction,
62-
'junction_3prime':gene_3prime_junction
63+
'gene5prime_junction':gene_5prime_junction,
64+
'gene3prime_junction':gene_3prime_junction
6365
}
6466
)
6567
fin.close()
@@ -85,12 +87,12 @@ def __init__(self,infile,logger):
8587
gene_3prime_junction = int(line[6])
8688
self.fusions.append(
8789
{
88-
'ensembl_5prime':None,
89-
'ensembl_3prime':None,
90+
'gene5prime':None,
91+
'gene3prime':None,
9092
'alternative_name_5prime':gene_5prime_name,
9193
'alternative_name_3prime':gene_3prime_name,
92-
'junction_5prime':gene_5prime_junction,
93-
'junction_3prime':gene_3prime_junction
94+
'gene5prime_junction':gene_5prime_junction,
95+
'gene3prime_junction':gene_3prime_junction
9496
}
9597
)
9698
fin.close()
@@ -114,12 +116,12 @@ def __init__(self,infile,logger):
114116
line = line.strip().split('\t')
115117
self.fusions.append(
116118
{
117-
'ensembl_5prime':line[10],
118-
'ensembl_3prime':line[11],
119+
'gene5prime':line[10],
120+
'gene3prime':line[11],
119121
'alternative_name_5prime':line[0],
120122
'alternative_name_3prime':line[1],
121-
'junction_5prime':int(line[8].split(':')[1]),
122-
'junction_3prime':int(line[9].split(':')[1])
123+
'gene5prime_junction':int(line[8].split(':')[1]),
124+
'gene3prime_junction':int(line[9].split(':')[1])
123125
}
124126
)
125127
fin.close()
@@ -141,12 +143,12 @@ def __init__(self,infile,logger):
141143
if gene1 is not None and gene2 is not None:
142144
self.fusions.append(
143145
{
144-
'ensembl_5prime':None,
145-
'ensembl_3prime':None,
146+
'gene5prime':None,
147+
'gene3prime':None,
146148
'alternative_name_5prime':gene1,
147149
'alternative_name_3prime':gene2,
148-
'junction_5prime':int(gene1_junction),
149-
'junction_3prime':int(gene2_junction)
150+
'gene5prime_junction':int(gene1_junction),
151+
'gene3prime_junction':int(gene2_junction)
150152
}
151153
)
152154

@@ -160,12 +162,12 @@ def __init__(self,infile,logger):
160162
if gene1 is not None and gene2 is not None:
161163
self.fusions.append(
162164
{
163-
'ensembl_5prime':None,
164-
'ensembl_3prime':None,
165+
'gene5prime':None,
166+
'gene3prime':None,
165167
'alternative_name_5prime':gene1,
166168
'alternative_name_3prime':gene2,
167-
'junction_5prime':int(gene1_junction),
168-
'junction_3prime':int(gene2_junction)
169+
'gene5prime_junction':int(gene1_junction),
170+
'gene3prime_junction':int(gene2_junction)
169171
}
170172
)
171173

@@ -193,12 +195,12 @@ def __init__(self,infile,logger):
193195
if gene1 is not None and gene2 is not None:
194196
self.fusions.append(
195197
{
196-
'ensembl_5prime':None,
197-
'ensembl_3prime':None,
198+
'gene5prime':None,
199+
'gene3prime':None,
198200
'alternative_name_5prime':gene1,
199201
'alternative_name_3prime':gene2,
200-
'junction_5prime':int(gene1_junction),
201-
'junction_3prime':int(gene2_junction)
202+
'gene5prime_junction':int(gene1_junction),
203+
'gene3prime_junction':int(gene2_junction)
202204
}
203205
)
204206

@@ -224,12 +226,12 @@ def __init__(self,infile,logger):
224226
gene_3prime_junction = int(line[8])
225227
self.fusions.append(
226228
{
227-
'ensembl_5prime':None,
228-
'ensembl_3prime':None,
229+
'gene5prime':None,
230+
'gene3prime':None,
229231
'alternative_name_5prime':gene_5prime_name,
230232
'alternative_name_3prime':gene_3prime_name,
231-
'junction_5prime':gene_5prime_junction,
232-
'junction_3prime':gene_3prime_junction
233+
'gene5prime_junction':gene_5prime_junction,
234+
'gene3prime_junction':gene_3prime_junction
233235
}
234236
)
235237
fin.close()
@@ -255,12 +257,12 @@ def __init__(self,infile,logger):
255257
gene_3prime_junction = int(line[2])
256258
self.fusions.append(
257259
{
258-
'ensembl_5prime':None,
259-
'ensembl_3prime':None,
260+
'gene5prime':None,
261+
'gene3prime':None,
260262
'alternative_name_5prime':gene_5prime_name,
261263
'alternative_name_3prime':gene_3prime_name,
262-
'junction_5prime':gene_5prime_junction,
263-
'junction_3prime':gene_3prime_junction
264+
'gene5prime_junction':gene_5prime_junction,
265+
'gene3prime_junction':gene_3prime_junction
264266
}
265267
)
266268
fin.close()
@@ -283,12 +285,12 @@ def __init__(self,infile,logger):
283285
gene_3prime_junction = int(line[6])
284286
self.fusions.append(
285287
{
286-
'ensembl_5prime':gene_5prime,
287-
'ensembl_3prime':gene_3prime,
288+
'gene5prime':gene_5prime,
289+
'gene3prime':gene_3prime,
288290
'alternative_name_5prime':gene_5prime_name,
289291
'alternative_name_3prime':gene_3prime_name,
290-
'junction_5prime':gene_5prime_junction,
291-
'junction_3prime':gene_3prime_junction
292+
'gene5prime_junction':gene_5prime_junction,
293+
'gene3prime_junction':gene_3prime_junction
292294
}
293295
)
294296
fin.close()

0 commit comments

Comments
 (0)