77from Bio .Seq import Seq
88from Bio .SeqRecord import SeqRecord
99try :
10- import pgzip as gzip_module
11- print ("\n Using pgzip for gzip operations.\n " )
10+ import pgzip as gzip_module
11+ print (f "\n Using pgzip for gzip operations.\n " )
1212except ImportError :
13- import gzip as gzip_module
14- print ("\n pgzip not found, falling back to gzip.\n " )
13+ import gzip as gzip_module
14+ print (f "\n pgzip not found, falling back to gzip.\n " )
1515
1616
1717def main ():
@@ -42,7 +42,7 @@ def main():
4242 add_new_chrom_seq (in_arg , index , prev_fasta_path , prev_gff_path , iterations )
4343
4444 print ("------------------------------------------" )
45- print ("Reform Complete" )
45+ print (f "Reform Complete" )
4646 print ("------------------------------------------" )
4747 print (f"New .fa file created: { os .path .realpath (new_fasta )} " )
4848 print (f"New { annotation_ext } file created: { os .path .realpath (new_gff_path )} " )
@@ -71,9 +71,8 @@ def modify_existing_chrom_seq(in_arg, index, prev_fasta_path, prev_modifications
7171 length_changed = len (str (record .seq )) - (down_position - position - 1 )
7272 prev_modifications .append ((position ,length_changed ))
7373 if position != down_position :
74- print ("Removing nucleotides from position {} - {}" .format (position , down_position ))
75- print ("Proceeding to insert sequence '{}' from {} at position {} on chromsome {}"
76- .format (record .description , in_arg .in_fasta [index ], position , in_arg .chrom ))
74+ print (f"Removing nucleotides from position { position } - { down_position } " )
75+ print (f"Proceeding to insert sequence '{ record .description } ' from { in_arg .in_fasta [index ]} at position { position } on chromosome { in_arg .chrom } " )
7776 ## Build the new chromosome sequence with the inserted_seq
7877 ## If the chromosome sequence length is in the header, replace it with new length
7978 new_seq = existing_seq_str [:position ] + str (record .seq ) + existing_seq_str [down_position :]
@@ -207,7 +206,7 @@ def read_fasta(in_arg, index, prev_fasta_path):
207206 raise ValueError (f"Error: { filename_fa } is not a valid FASTA file." )
208207 except Exception as e :
209208 raise ValueError (f"Error parsing FASTA file: { str (e )} " )
210- print ("Preparing to create new FASTA file" )
209+ print (f "Preparing to create new FASTA file" )
211210 print (f"Original FASTA: { real_path_fa } " )
212211 ## Generate index of sequences from ref reference fasta
213212 if prev_fasta_path :
@@ -290,14 +289,14 @@ def valid_gff_line(line_elements):
290289 '''
291290 if not line_elements [0 ].startswith ("##sequence-region" ):
292291 if len (line_elements ) != 9 :
293- print ("** ERROR: in_gff file does not have 9 columns, it has" , len (line_elements ))
292+ print (f "** ERROR: in_gff file does not have 9 columns, it has { len (line_elements )} " )
294293 print (line_elements )
295294 return False
296295 else :
297296 ## Check if ##sequence-region line has 4 columns, the reason why use 5 here is because last element is
298297 ## spliting format indicator.
299298 if len (line_elements ) != 5 :
300- print ("** ERROR: ##sequence-region line does not have 4 columns, it has" , len (line_elements ) - 1 )
299+ print (f "** ERROR: ##sequence-region line does not have 4 columns, it has { len (line_elements ) - 1 } " )
301300 print (line_elements )
302301 return False
303302 return True
@@ -376,7 +375,7 @@ def get_position(index, positions, upstream, downstream, chrom, seq_str, prev_mo
376375 position += lc
377376 if position > len (seq_str ):
378377 print ("** ERROR: Position greater than length of chromosome." )
379- print ("Chromosome: {}\Chromosome length: {}\n Position: \n {}" . format ( chrom , len ( seq_str ), position ) )
378+ print (f "Chromosome: { chrom } \n Chromosome length: { len ( seq_str ) } \n Position: { position } " )
380379 exit ()
381380 elif position == - 1 :
382381 position = len (seq_str )
@@ -403,8 +402,8 @@ def get_position(index, positions, upstream, downstream, chrom, seq_str, prev_mo
403402 else :
404403 print ("** ERROR: The upstream and downstream target sequences must be present and unique in the specified chromosome." )
405404 print ("Chromosome: {}\n " .format (chrom ))
406- print ("Upstream sequence found {} times" . format ( upstream_seq_count ) )
407- print ("Downstream sequence found {} times" . format ( downstream_seq_count ) )
405+ print (f "Upstream sequence found { upstream_seq_count } times" )
406+ print (f "Downstream sequence found { downstream_seq_count } times" )
408407 exit ()
409408 else :
410409 print ("** ERROR: You must specify a valid position or upstream and downstream sequences." )
@@ -602,8 +601,7 @@ def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position,
602601 elif gff_feat_start <= position and gff_feat_end <= down_position :
603602 # Which side of the feature depends on the strand (we add this as a comment)
604603 x = "3" if gff_feat_strand == "+" else "5"
605- print ("Feature cut off - {} prime side of feature cut off ({} strand)"
606- .format (x , gff_feat_strand ))
604+ print (f"Feature cut off - { x } prime side of feature cut off ({ gff_feat_strand } strand)" )
607605 new_comment = format_comment (
608606 "{} prime side of feature cut-off by inserted sequence" .format (x ),
609607 gff_ext
@@ -631,8 +629,7 @@ def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position,
631629 and gff_feat_start <= down_position
632630 and gff_feat_end > down_position ):
633631 x = "5" if gff_feat_strand == "+" else "3"
634- print ("Feature cut off - {} prime side of feature cut off ({} strand)"
635- .format (x , gff_feat_strand ))
632+ print (f"Feature cut off - { x } prime side of feature cut off ({ gff_feat_strand } strand)" )
636633 new_comment = format_comment (
637634 "{} prime side of feature cut-off by inserted sequence" .format (x ),
638635 gff_ext
@@ -656,7 +653,7 @@ def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position,
656653 gff_out .write (modified_line )
657654
658655 else :
659- print ("** Error: Unknown case for GFF modification. Exiting " + str ( line_elements ) )
656+ print (f "** Error: Unknown case for GFF modification. Exiting { line_elements } " )
660657 exit ()
661658
662659 # If we've iterated over the entire original gff
@@ -738,7 +735,7 @@ def format_comment(comment, ext):
738735 elif ext .lower ().startswith ('gff' ):
739736 new_comment = "; reform_comment={}" .format (comment )
740737 else :
741- print ("** Error: Unrecognized extension {} in format_comment(). Exiting" . format ( ext ) )
738+ print (f "** Error: Unrecognized extension { ext } in format_comment(). Exiting" )
742739 exit ()
743740 return new_comment
744741
@@ -750,15 +747,15 @@ def rename_id(line):
750747 attributes = line .split ('\t ' )[8 ].strip ()
751748 elements = attributes .split (';' )
752749 if elements [0 ].startswith ("ID=" ):
753- print ("Renaming split feature {} --> {}_split" . format ( elements [0 ], elements [ 0 ]) )
750+ print (f "Renaming split feature { elements [ 0 ] } --> { elements [0 ]} _split" )
754751 return ("{}_split;{}" .format (elements [0 ], ';' .join (elements [1 :])))
755752 elif elements [0 ].startswith ("gene_id " ):
756753 gene_id = re .match (r'gene_id \"(.+)\"' , elements [0 ])[1 ]
757- print (' Renaming split feature {} --> {}_split' . format ( gene_id , gene_id ) )
754+ print (f" Renaming split feature { gene_id } --> { gene_id } _split" )
758755 return ('gene _id "{}_split";{}' .format (gene_id , ';' .join (elements [1 :])))
759756
760757 else :
761- print ("This feature will not be renamed because it does not has an ID/gene_id attribute:\n " , line )
758+ print (f "This feature will not be renamed because it does not have an ID/gene_id attribute:\n { line } " )
762759 return attributes
763760
764761def get_input_args ():
0 commit comments