@@ -346,23 +346,27 @@ def get_utr_from_gtf(
346346 """
347347 if infer_from_cds :
348348 # get start and end of cds for each transcript
349- cds = CDSFetcher .get_cds_from_gtf (df = df , on_error_warn = on_error_warn ) \
350- .groupby ('transcript_id' ) \
349+ cds = (
350+ CDSFetcher .get_cds_from_gtf (df = df , on_error_warn = on_error_warn )
351+ .groupby ('transcript_id' )
351352 .agg ({'Start' : min , 'End' : max })
353+ )
352354
353355 # join cds start and end to utr df
354- utr_df = df .query ("Feature == 'transcript'" ) \
355- .set_index ('transcript_id' ) \
356- .join (cds , rsuffix = "_cds" ) \
356+ utr_df = (
357+ df .query ("Feature == 'transcript'" )
358+ .set_index ('transcript_id' )
359+ .join (cds , rsuffix = "_cds" )
357360 .dropna (subset = ['Start_cds' , 'End_cds' ], axis = 0 )
361+ )
358362
359363 if feature_type .upper () == "5UTR" :
360- utr_df ['Start' ] = np .where (utr_df ['Strand' ] == '+' , int ( utr_df ['Start' ]), int ( utr_df ['End_cds' ]))
361- utr_df ['End' ] = np .where (utr_df ['Strand' ] == '+' , int ( utr_df ['Start_cds' ]), int ( utr_df ['End' ]))
364+ utr_df ['Start' ] = np .where (utr_df ['Strand' ] == '+' , utr_df ['Start' ]. astype ( "int" ), utr_df ['End_cds' ]. astype ( "int" ))
365+ utr_df ['End' ] = np .where (utr_df ['Strand' ] == '+' , utr_df ['Start_cds' ]. astype ( "int" ), utr_df ['End' ]. astype ( "int" ))
362366 utr_df ['Feature' ] = pd .Categorical ("5UTR" , categories = utr_df ['Feature' ])
363367 if feature_type .upper () == "3UTR" :
364- utr_df ['Start' ] = np .where (utr_df ['Strand' ] == '+' , int ( utr_df ['End_cds' ]), int ( utr_df ['Start' ]))
365- utr_df ['End' ] = np .where (utr_df ['Strand' ] == '+' , int ( utr_df ['End' ]), int ( utr_df ['Start_cds' ]))
368+ utr_df ['Start' ] = np .where (utr_df ['Strand' ] == '+' , utr_df ['End_cds' ]. astype ( "int" ), utr_df ['Start' ]. astype ( "int" ))
369+ utr_df ['End' ] = np .where (utr_df ['Strand' ] == '+' , utr_df ['End' ]. astype ( "int" ), utr_df ['Start_cds' ]. astype ( "int" ))
366370 utr_df ['Feature' ] = pd .Categorical ("3UTR" , categories = utr_df ['Feature' ])
367371
368372 utr_df .drop (['Start_cds' , 'End_cds' ], axis = 1 , inplace = True )
0 commit comments