Skip to content

Commit 1d72daf

Browse files
authored
Merge pull request #87 from Hoeze/master
Resolve int conversion bug when inferring UTR from CDS
2 parents e67fab6 + 64c3fd6 commit 1d72daf

File tree

2 files changed

+14
-10
lines changed

2 files changed

+14
-10
lines changed

kipoiseq/extractors/gtf.py

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -346,23 +346,27 @@ def get_utr_from_gtf(
346346
"""
347347
if infer_from_cds:
348348
# get start and end of cds for each transcript
349-
cds = CDSFetcher.get_cds_from_gtf(df=df, on_error_warn=on_error_warn) \
350-
.groupby('transcript_id') \
349+
cds = (
350+
CDSFetcher.get_cds_from_gtf(df=df, on_error_warn=on_error_warn)
351+
.groupby('transcript_id')
351352
.agg({'Start': min, 'End': max})
353+
)
352354

353355
# join cds start and end to utr df
354-
utr_df = df.query("Feature == 'transcript'") \
355-
.set_index('transcript_id') \
356-
.join(cds, rsuffix="_cds") \
356+
utr_df = (
357+
df.query("Feature == 'transcript'")
358+
.set_index('transcript_id')
359+
.join(cds, rsuffix="_cds")
357360
.dropna(subset=['Start_cds', 'End_cds'], axis=0)
361+
)
358362

359363
if feature_type.upper() == "5UTR":
360-
utr_df['Start'] = np.where(utr_df['Strand'] == '+', int(utr_df['Start']), int(utr_df['End_cds']))
361-
utr_df['End'] = np.where(utr_df['Strand'] == '+', int(utr_df['Start_cds']), int(utr_df['End']))
364+
utr_df['Start'] = np.where(utr_df['Strand'] == '+', utr_df['Start'].astype("int"), utr_df['End_cds'].astype("int"))
365+
utr_df['End'] = np.where(utr_df['Strand'] == '+', utr_df['Start_cds'].astype("int"), utr_df['End'].astype("int"))
362366
utr_df['Feature'] = pd.Categorical("5UTR", categories = utr_df['Feature'])
363367
if feature_type.upper() == "3UTR":
364-
utr_df['Start'] = np.where(utr_df['Strand'] == '+', int(utr_df['End_cds']), int(utr_df['Start']))
365-
utr_df['End'] = np.where(utr_df['Strand'] == '+', int(utr_df['End']), int(utr_df['Start_cds']))
368+
utr_df['Start'] = np.where(utr_df['Strand'] == '+', utr_df['End_cds'].astype("int"), utr_df['Start'].astype("int"))
369+
utr_df['End'] = np.where(utr_df['Strand'] == '+', utr_df['End'].astype("int"), utr_df['Start_cds'].astype("int"))
366370
utr_df['Feature'] = pd.Categorical("3UTR", categories = utr_df['Feature'])
367371

368372
utr_df.drop(['Start_cds', 'End_cds'], axis=1, inplace=True)

kipoiseq/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def to_scalar(obj):
1616
"""Convert numpy scalar to native scalar
1717
"""
1818
if isinstance(obj, np.generic):
19-
return np.asscalar(obj)
19+
return obj.item()
2020
else:
2121
return obj
2222

0 commit comments

Comments
 (0)