@@ -14,22 +14,22 @@ from codoff import codoff
1414version = pkg_resources .require ("codoff" )[0 ].version
1515
1616def create_parser ():
17- """ Parse arguments """
18- parser = argparse .ArgumentParser (description = """
17+ """ Parse arguments """
18+ parser = argparse .ArgumentParser (description = """
1919 Program: codoff
2020 Author: Rauf Salamzade
2121 Affiliation: Kalan Lab, UW Madison, Department of Medical Microbiology and Immunology
2222
2323 Simple wrapper of codoff for analysis of antiSMASH results for a single genome.
24- """ , formatter_class = argparse .RawTextHelpFormatter )
24+ """ , formatter_class = argparse .RawTextHelpFormatter )
2525
26- parser .add_argument ('-a' , '--antismash-dir' , help = "Path to annotated full-genome in GenBank or FASTA format for isolate's genome." , required = True )
27- parser .add_argument ('-o' , '--output-dir' , help = "Result direcotry." , required = True )
28- parser .add_argument ('-v' , '--version' , action = 'store_true' , help = "Print version and exist" , required = False , default = False )
29- parser .add_argument ('-p' , '--processors' , type = int , help = "Number of codoff jobs to run at once [Default is 1]." , required = False , default = 1 )
26+ parser .add_argument ('-a' , '--antismash-dir' , help = "Path to annotated full-genome in GenBank or FASTA format for isolate's genome." , required = True )
27+ parser .add_argument ('-o' , '--output-dir' , help = "Result direcotry." , required = True )
28+ parser .add_argument ('-v' , '--version' , action = 'store_true' , help = "Print version and exist" , required = False , default = False )
29+ parser .add_argument ('-p' , '--processors' , type = int , help = "Number of codoff jobs to run at once [Default is 1]." , required = False , default = 1 )
3030
31- args = parser .parse_args ()
32- return args
31+ args = parser .parse_args ()
32+ return args
3333
3434def antismash_bgc_region_process (inputs ):
3535 try :
@@ -43,75 +43,75 @@ def antismash_bgc_region_process(inputs):
4343 sys .stderr .write ('Unable to process codoff inputs.\n ' )
4444 sys .stderr .write (traceback .format_exc () + '\n ' )
4545 sys .exit (1 )
46-
46+
4747def main ():
48- """
49- Void function which runs primary workflow for program.
50- """
51-
52- sys .stderr .write ('Running version ' + str (version ) + ' of codoff!\n ' )
53- if len (sys .argv )> 1 and ('-v' in set (sys .argv ) or '--version' in set (sys .argv )):
54- sys .exit (0 )
55-
56- """
57- PARSE INPUTS
58- """
59- myargs = create_parser ()
60-
61- antismash_dir = os .path .abspath (myargs .antismash_dir ) + '/'
62- output_dir = os .path .abspath (myargs .output_dir ) + '/'
63- processors = myargs .processors
64-
65- try :
66- assert (os .path .isdir (antismash_dir ))
67- except :
68- sys .stderr .write ('Error: Unalbe to validate antiSMASH directory exists.\n ' )
69- sys .exit (1 )
70-
71- if os .path .isdir (output_dir ):
72- sys .stderr .write ('Error: Output directory already exists. Please provide a different output directory.\n ' )
73- sys .exit (1 )
74- else :
75- os .makedirs (output_dir )
76-
77- """
78- START WORKFLOW
79- """
80-
81- # Process antiSMASH results directory
82-
83- genome_gbk = None
84- bgc_gbks = []
85- for f in os .listdir (antismash_dir ):
86- if f .endswith ('.gbk' ):
87- if not '.region' in f :
88- genome_gbk = os .path .join (antismash_dir , f )
89- elif '.region' in f :
90- bgc_gbks .append (os .path .join (antismash_dir , f ))
91-
92- if genome_gbk == None :
93- sys .stderr .write ('Unable to find full genome GenBank file in antiSMASH results directory.\n ' )
94- sys .exit (1 )
95-
96- if len (bgc_gbks ) == 0 :
97- sys .stderr .write ('Unalbe to find any BGC GenBank files in antiSMASH results directory.\n ' )
98- sys .exit (1 )
99-
100- codoff_inputs = []
101- for bgc_gbk in bgc_gbks :
102- bgc_gbk_name = bgc_gbk .split ('/' )[- 1 ].split ('.gbk' )[0 ]
103- bgc_outfile = output_dir + bgc_gbk_name + '.txt'
104- bgc_plotfile = output_dir + bgc_gbk_name + '.svg'
105- ci = [bgc_gbk , genome_gbk , bgc_plotfile , bgc_outfile ]
106- codoff_inputs .append (ci )
107-
108- msg = "Running codoff on %d BGC regions in %s" % (len (codoff_inputs ), antismash_dir )
109- sys .stdout .write (msg + '\n ' )
110-
111- p = multiprocessing .Pool (processes = processors )
112- for _ in tqdm .tqdm (p .imap_unordered (antismash_bgc_region_process , codoff_inputs ), total = len (codoff_inputs )):
113- pass
114- p .close ()
115-
48+ """
49+ Void function which runs primary workflow for program.
50+ """
51+
52+ sys .stderr .write ('Running version ' + str (version ) + ' of codoff!\n ' )
53+ if len (sys .argv )> 1 and ('-v' in set (sys .argv ) or '--version' in set (sys .argv )):
54+ sys .exit (0 )
55+
56+ """
57+ PARSE INPUTS
58+ """
59+ myargs = create_parser ()
60+
61+ antismash_dir = os .path .abspath (myargs .antismash_dir ) + '/'
62+ output_dir = os .path .abspath (myargs .output_dir ) + '/'
63+ processors = myargs .processors
64+
65+ try :
66+ assert (os .path .isdir (antismash_dir ))
67+ except :
68+ sys .stderr .write ('Error: Unalbe to validate antiSMASH directory exists.\n ' )
69+ sys .exit (1 )
70+
71+ if os .path .isdir (output_dir ):
72+ sys .stderr .write ('Error: Output directory already exists. Please provide a different output directory.\n ' )
73+ sys .exit (1 )
74+ else :
75+ os .makedirs (output_dir )
76+
77+ """
78+ START WORKFLOW
79+ """
80+
81+ # Process antiSMASH results directory
82+
83+ genome_gbk = None
84+ bgc_gbks = []
85+ for f in os .listdir (antismash_dir ):
86+ if f .endswith ('.gbk' ):
87+ if not '.region' in f :
88+ genome_gbk = os .path .join (antismash_dir , f )
89+ elif '.region' in f :
90+ bgc_gbks .append (os .path .join (antismash_dir , f ))
91+
92+ if genome_gbk == None :
93+ sys .stderr .write ('Unable to find full genome GenBank file in antiSMASH results directory.\n ' )
94+ sys .exit (1 )
95+
96+ if len (bgc_gbks ) == 0 :
97+ sys .stderr .write ('Unalbe to find any BGC GenBank files in antiSMASH results directory.\n ' )
98+ sys .exit (1 )
99+
100+ codoff_inputs = []
101+ for bgc_gbk in bgc_gbks :
102+ bgc_gbk_name = bgc_gbk .split ('/' )[- 1 ].split ('.gbk' )[0 ]
103+ bgc_outfile = output_dir + bgc_gbk_name + '.txt'
104+ bgc_plotfile = output_dir + bgc_gbk_name + '.svg'
105+ ci = [bgc_gbk , genome_gbk , bgc_plotfile , bgc_outfile ]
106+ codoff_inputs .append (ci )
107+
108+ msg = "Running codoff on %d BGC regions in %s" % (len (codoff_inputs ), antismash_dir )
109+ sys .stdout .write (msg + '\n ' )
110+
111+ p = multiprocessing .Pool (processes = processors )
112+ for _ in tqdm .tqdm (p .imap_unordered (antismash_bgc_region_process , codoff_inputs ), total = len (codoff_inputs )):
113+ pass
114+ p .close ()
115+
116116if __name__ == '__main__' :
117- main ()
117+ main ()
0 commit comments