Skip to content

Commit 2ce3c1c

Browse files
Merge pull request #36 from YuWei-CH/printing-formats
Printing formats
2 parents 8806778 + df04901 commit 2ce3c1c

File tree

1 file changed

+20
-23
lines changed

1 file changed

+20
-23
lines changed

reform.py

Lines changed: 20 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
from Bio.Seq import Seq
88
from Bio.SeqRecord import SeqRecord
99
try:
10-
import pgzip as gzip_module
11-
print("\nUsing pgzip for gzip operations.\n")
10+
import pgzip as gzip_module
11+
print(f"\nUsing pgzip for gzip operations.\n")
1212
except ImportError:
13-
import gzip as gzip_module
14-
print("\npgzip not found, falling back to gzip.\n")
13+
import gzip as gzip_module
14+
print(f"\npgzip not found, falling back to gzip.\n")
1515

1616

1717
def main():
@@ -42,7 +42,7 @@ def main():
4242
add_new_chrom_seq(in_arg, index, prev_fasta_path, prev_gff_path, iterations)
4343

4444
print("------------------------------------------")
45-
print("Reform Complete")
45+
print(f"Reform Complete")
4646
print("------------------------------------------")
4747
print(f"New .fa file created: {os.path.realpath(new_fasta)}")
4848
print(f"New {annotation_ext} file created: {os.path.realpath(new_gff_path)}")
@@ -71,9 +71,8 @@ def modify_existing_chrom_seq(in_arg, index, prev_fasta_path, prev_modifications
7171
length_changed = len(str(record.seq)) - (down_position - position - 1)
7272
prev_modifications.append((position,length_changed))
7373
if position != down_position:
74-
print("Removing nucleotides from position {} - {}".format(position, down_position))
75-
print("Proceeding to insert sequence '{}' from {} at position {} on chromsome {}"
76-
.format(record.description, in_arg.in_fasta[index], position, in_arg.chrom))
74+
print(f"Removing nucleotides from position {position} - {down_position}")
75+
print(f"Proceeding to insert sequence '{record.description}' from {in_arg.in_fasta[index]} at position {position} on chromosome {in_arg.chrom}")
7776
## Build the new chromosome sequence with the inserted_seq
7877
## If the chromosome sequence length is in the header, replace it with new length
7978
new_seq = existing_seq_str[:position] + str(record.seq) + existing_seq_str[down_position:]
@@ -207,7 +206,7 @@ def read_fasta(in_arg, index, prev_fasta_path):
207206
raise ValueError(f"Error: {filename_fa} is not a valid FASTA file.")
208207
except Exception as e:
209208
raise ValueError(f"Error parsing FASTA file: {str(e)}")
210-
print("Preparing to create new FASTA file")
209+
print(f"Preparing to create new FASTA file")
211210
print(f"Original FASTA: {real_path_fa}")
212211
## Generate index of sequences from ref reference fasta
213212
if prev_fasta_path:
@@ -290,14 +289,14 @@ def valid_gff_line(line_elements):
290289
'''
291290
if not line_elements[0].startswith("##sequence-region"):
292291
if len(line_elements) != 9:
293-
print("** ERROR: in_gff file does not have 9 columns, it has", len(line_elements))
292+
print(f"** ERROR: in_gff file does not have 9 columns, it has {len(line_elements)}")
294293
print(line_elements)
295294
return False
296295
else:
297296
## Check if ##sequence-region line has 4 columns, the reason why use 5 here is because last element is
298297
## spliting format indicator.
299298
if len(line_elements) != 5:
300-
print("** ERROR: ##sequence-region line does not have 4 columns, it has", len(line_elements) - 1)
299+
print(f"** ERROR: ##sequence-region line does not have 4 columns, it has {len(line_elements) - 1}")
301300
print(line_elements)
302301
return False
303302
return True
@@ -376,7 +375,7 @@ def get_position(index, positions, upstream, downstream, chrom, seq_str, prev_mo
376375
position += lc
377376
if position > len(seq_str):
378377
print("** ERROR: Position greater than length of chromosome.")
379-
print("Chromosome: {}\Chromosome length: {}\nPosition: \n{}".format(chrom, len(seq_str), position))
378+
print(f"Chromosome: {chrom}\nChromosome length: {len(seq_str)}\nPosition: {position}")
380379
exit()
381380
elif position == -1:
382381
position = len(seq_str)
@@ -403,8 +402,8 @@ def get_position(index, positions, upstream, downstream, chrom, seq_str, prev_mo
403402
else:
404403
print("** ERROR: The upstream and downstream target sequences must be present and unique in the specified chromosome.")
405404
print("Chromosome: {}\n".format(chrom))
406-
print("Upstream sequence found {} times".format(upstream_seq_count))
407-
print("Downstream sequence found {} times".format(downstream_seq_count))
405+
print(f"Upstream sequence found {upstream_seq_count} times")
406+
print(f"Downstream sequence found {downstream_seq_count} times")
408407
exit()
409408
else:
410409
print("** ERROR: You must specify a valid position or upstream and downstream sequences.")
@@ -602,8 +601,7 @@ def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position,
602601
elif gff_feat_start <= position and gff_feat_end <= down_position:
603602
# Which side of the feature depends on the strand (we add this as a comment)
604603
x = "3" if gff_feat_strand == "+" else "5"
605-
print("Feature cut off - {} prime side of feature cut off ({} strand)"
606-
.format(x, gff_feat_strand))
604+
print(f"Feature cut off - {x} prime side of feature cut off ({gff_feat_strand} strand)")
607605
new_comment = format_comment(
608606
"{} prime side of feature cut-off by inserted sequence".format(x),
609607
gff_ext
@@ -631,8 +629,7 @@ def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position,
631629
and gff_feat_start <= down_position
632630
and gff_feat_end > down_position):
633631
x = "5" if gff_feat_strand == "+" else "3"
634-
print("Feature cut off - {} prime side of feature cut off ({} strand)"
635-
.format(x, gff_feat_strand))
632+
print(f"Feature cut off - {x} prime side of feature cut off ({gff_feat_strand} strand)")
636633
new_comment = format_comment(
637634
"{} prime side of feature cut-off by inserted sequence".format(x),
638635
gff_ext
@@ -656,7 +653,7 @@ def create_new_gff(new_gff_name, ref_gff, in_gff_lines, position, down_position,
656653
gff_out.write(modified_line)
657654

658655
else:
659-
print("** Error: Unknown case for GFF modification. Exiting " + str(line_elements))
656+
print(f"** Error: Unknown case for GFF modification. Exiting {line_elements}")
660657
exit()
661658

662659
# If we've iterated over the entire original gff
@@ -738,7 +735,7 @@ def format_comment(comment, ext):
738735
elif ext.lower().startswith('gff'):
739736
new_comment = "; reform_comment={}".format(comment)
740737
else:
741-
print("** Error: Unrecognized extension {} in format_comment(). Exiting".format(ext))
738+
print(f"** Error: Unrecognized extension {ext} in format_comment(). Exiting")
742739
exit()
743740
return new_comment
744741

@@ -750,15 +747,15 @@ def rename_id(line):
750747
attributes = line.split('\t')[8].strip()
751748
elements = attributes.split(';')
752749
if elements[0].startswith("ID="):
753-
print("Renaming split feature {} --> {}_split".format(elements[0], elements[0]))
750+
print(f"Renaming split feature {elements[0]} --> {elements[0]}_split")
754751
return ("{}_split;{}".format(elements[0], ';'.join(elements[1:])))
755752
elif elements[0].startswith("gene_id "):
756753
gene_id = re.match(r'gene_id \"(.+)\"', elements[0])[1]
757-
print('Renaming split feature {} --> {}_split'.format(gene_id, gene_id))
754+
print(f"Renaming split feature {gene_id} --> {gene_id}_split")
758755
return ('gene _id "{}_split";{}'.format(gene_id, ';'.join(elements[1:])))
759756

760757
else:
761-
print("This feature will not be renamed because it does not has an ID/gene_id attribute:\n", line)
758+
print(f"This feature will not be renamed because it does not have an ID/gene_id attribute:\n{line}")
762759
return attributes
763760

764761
def get_input_args():

0 commit comments

Comments
 (0)