@@ -14,11 +14,13 @@ class GetAmrFinderPlusDataManager:
1414 Create the json file with database information for galaxy data manager
1515 """
1616
17- def __init__ (self ,
18- amrfinderplus_database = "amrfinderplus_versioned_database" ,
19- db_name = "amrfinderplus-db" ,
20- amrfinderplus_version = "latest" ,
21- date_version = None ):
17+ def __init__ (
18+ self ,
19+ amrfinderplus_database = "amrfinderplus_versioned_database" ,
20+ db_name = "amrfinderplus-db" ,
21+ amrfinderplus_version = "latest" ,
22+ date_version = None ,
23+ ):
2224 self .data_table_name = amrfinderplus_database
2325 self ._db_name = db_name
2426 self ._amrfinderplus_version = amrfinderplus_version
@@ -31,11 +33,7 @@ def get_data_table_format(self):
3133 Skeleton of a data_table format
3234 return: a data table formatted for json output
3335 """
34- self .data_table_entry = {
35- "data_tables" : {
36- self .data_table_name : {}
37- }
38- }
36+ self .data_table_entry = {"data_tables" : {self .data_table_name : {}}}
3937 return self .data_table_entry
4038
4139 def get_data_manager (self ):
@@ -44,14 +42,19 @@ def get_data_manager(self):
4442 return: The data table with database information
4543 """
4644 self .amrfinderplus_table_list = self .get_data_table_format ()
47- amrfinderplus_value = f"amrfinderplus_V{ self ._amrfinderplus_version } " \
48- f"_{ self ._amrfinderplus_date_version } "
49- amrfinderplus_name = f"V{ self ._amrfinderplus_version } " \
50- f"-{ self ._amrfinderplus_date_version } "
51- data_info = dict (value = amrfinderplus_value ,
52- name = amrfinderplus_name ,
53- db_version = self ._amrfinderplus_version ,
54- path = self ._db_name )
45+ amrfinderplus_value = (
46+ f"amrfinderplus_V{ self ._amrfinderplus_version } "
47+ f"_{ self ._amrfinderplus_date_version } "
48+ )
49+ amrfinderplus_name = (
50+ f"V{ self ._amrfinderplus_version } " f"-{ self ._amrfinderplus_date_version } "
51+ )
52+ data_info = dict (
53+ value = amrfinderplus_value ,
54+ name = amrfinderplus_name ,
55+ db_version = self ._amrfinderplus_version ,
56+ path = self ._db_name ,
57+ )
5558 self .amrfinderplus_table_list ["data_tables" ][self .data_table_name ] = [data_info ]
5659 return self .amrfinderplus_table_list
5760
@@ -63,24 +66,28 @@ class DownloadAmrFinderPlusDatabase(GetAmrFinderPlusDataManager):
6366 Build the data manager infos for galaxy
6467 """
6568
66- def __init__ (self ,
67- output_dir = Path .cwd (),
68- ncbi_url = "ftp.ncbi.nlm.nih.gov" ,
69- ftp_login = "anonymous" ,
70- ftp_password = "anonymous" ,
71- amrfinderplus_database = "amrfinderplus_database" ,
72- db_name = "amrfinderplus-db" ,
73- amrfinderplus_version = "latest" ,
74- json_file_path = None ,
75- date_version = None ,
76- amrfinderplus_db_path = None ,
77- test_mode = False ):
69+ def __init__ (
70+ self ,
71+ output_dir = Path .cwd (),
72+ ncbi_url = "ftp.ncbi.nlm.nih.gov" ,
73+ ftp_login = "anonymous" ,
74+ ftp_password = "anonymous" ,
75+ amrfinderplus_database = "amrfinderplus_database" ,
76+ db_name = "amrfinderplus-db" ,
77+ amrfinderplus_version = "latest" ,
78+ json_file_path = None ,
79+ date_version = None ,
80+ amrfinderplus_db_path = None ,
81+ test_mode = False ,
82+ ):
7883
7984 super ().__init__ ()
8085 self .json_file_path = json_file_path
8186 self ._output_dir = output_dir
8287 self ._ncbi_ftp_url = ncbi_url
83- self ._ncbi_database_path = "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"
88+ self ._ncbi_database_path = (
89+ "pathogen/Antimicrobial_resistance/AMRFinderPlus/database"
90+ )
8491 self ._login = ftp_login
8592 self ._password = ftp_password
8693 self ._amrfinderplus_database = amrfinderplus_database
@@ -103,40 +110,61 @@ def subprocess_cmd(command, *args):
103110 [cmd .append (i ) for i in args ]
104111 proc = sp .run (cmd , stdout = sp .PIPE , stderr = sp .PIPE )
105112 if proc .returncode != 0 :
106- print (f' Error type { proc .returncode } with : \n { proc } ' )
113+ print (f" Error type { proc .returncode } with : \n { proc } " )
107114
108115 def download_amrfinderplus_db (self ):
109116 """
110117 Download the amrfinderplus database from the ncbi ftp server
111118 """
112- self .amrfinderplus_db_path = f' { self ._output_dir } /{ self ._db_name } '
119+ self .amrfinderplus_db_path = f" { self ._output_dir } /{ self ._db_name } "
113120 os .makedirs (self .amrfinderplus_db_path )
114121
115- amrfinderplus_ftp_path = f"ftp://{ self ._login } :" \
116- f"{ self ._password } @{ self ._ncbi_ftp_url } /" \
117- f"{ self ._ncbi_database_path } /" \
118- f"{ self ._amrfinderplus_version } /" \
119- f"{ self ._amrfinderplus_date_version } "
122+ if self ._amrfinderplus_version == "latest" :
123+ self .get_amrfinderplus_version ()
124+
125+ amrfinderplus_ftp_path = (
126+ f"ftp://{ self ._login } :"
127+ f"{ self ._password } @{ self ._ncbi_ftp_url } /"
128+ f"{ self ._ncbi_database_path } /"
129+ f"{ self ._amrfinderplus_version } /"
130+ f"{ self ._amrfinderplus_date_version } "
131+ )
132+
133+ if self ._amrfinderplus_version == "3.12" :
134+ taxa_group_file = "taxgroup.tab"
135+ test_dna_fasta = "AMR_DNA-Escherichia"
136+ else :
137+ taxa_group_file = "taxgroup.tsv"
138+ test_dna_fasta = "AMR_DNA-Escherichia.fa"
120139 if self .test_mode is True :
121- file_list = ["AMR_DNA-Escherichia" , "version.txt" , "taxgroup.tab" , "database_format_version.txt" ]
140+ file_list = [
141+ test_dna_fasta ,
142+ "version.txt" ,
143+ taxa_group_file ,
144+ "database_format_version.txt" ,
145+ ]
122146 output_option = "-O"
123147 for file in file_list :
124- self .subprocess_cmd ("wget" ,
125- "-nd" ,
126- "-np" ,
127- "-r" ,
128- f"{ amrfinderplus_ftp_path } /{ file } " ,
129- output_option ,
130- f"{ self .amrfinderplus_db_path } /{ file } " )
148+ self .subprocess_cmd (
149+ "wget" ,
150+ "-nd" ,
151+ "-np" ,
152+ "-r" ,
153+ f"{ amrfinderplus_ftp_path } /{ file } " ,
154+ output_option ,
155+ f"{ self .amrfinderplus_db_path } /{ file } " ,
156+ )
131157 else :
132158 output_option = "-P"
133- self .subprocess_cmd ("wget" ,
134- "-nd" ,
135- "-np" ,
136- "-r" ,
137- amrfinderplus_ftp_path ,
138- output_option ,
139- self .amrfinderplus_db_path )
159+ self .subprocess_cmd (
160+ "wget" ,
161+ "-nd" ,
162+ "-np" ,
163+ "-r" ,
164+ amrfinderplus_ftp_path ,
165+ output_option ,
166+ self .amrfinderplus_db_path ,
167+ )
140168
141169 def make_hmm_profile (self ):
142170 """
@@ -153,27 +181,48 @@ def extract_filelist_makeblast(self):
153181 Extract le list of species which have file in the database
154182 return: a filtered species list of available species in the database
155183 """
156- taxa_group_path = Path (f"{ self .amrfinderplus_db_path } /taxgroup.tab" )
184+ if self ._amrfinderplus_version == "3.12" :
185+ taxa_group_file = "taxgroup.tab"
186+ else :
187+ taxa_group_file = "taxgroup.tsv"
188+ taxa_group_path = Path (f"{ self .amrfinderplus_db_path } /{ taxa_group_file } " )
157189 if Path .exists (taxa_group_path ):
158190 taxa_table = pd .read_table (taxa_group_path )
159- taxa_table .columns = ["taxgroup" , "gpipe_taxgroup" , "number_of_nucl_ref_genes" ]
160- taxa_df = taxa_table [taxa_table .number_of_nucl_ref_genes > 0 ].filter (items = ["taxgroup" ], axis = 1 )
191+ taxa_table .columns = [
192+ "taxgroup" ,
193+ "gpipe_taxgroup" ,
194+ "number_of_nucl_ref_genes" ,
195+ ]
196+ taxa_df = taxa_table [taxa_table .number_of_nucl_ref_genes > 0 ].filter (
197+ items = ["taxgroup" ], axis = 1
198+ )
161199 if self .test_mode is True :
162200 taxa_df = taxa_df [taxa_df .taxgroup == "Escherichia" ].taxgroup
163201 else :
164202 taxa_df = taxa_df .taxgroup
165203 self .species_list = list (taxa_df )
166204 else :
167- print ("taxgroup.tab file is missing to list available species" )
205+ print (f" { taxa_group_file } file is missing to list available species" )
168206
169207 def make_blastdb (self ):
170208 """
171209 Index fasta file for blast
172210 """
173211 self .extract_filelist_makeblast ()
174- nucl_file_db_list = [f'{ self .amrfinderplus_db_path } /AMR_DNA-{ specie } ' for specie in self .species_list ]
175- amr_dna = f'{ self .amrfinderplus_db_path } /AMR_CDS'
176- amr_prot = f'{ self .amrfinderplus_db_path } /AMRProt'
212+ if self ._amrfinderplus_version == "3.12" :
213+ nucl_file_db_list = [
214+ f"{ self .amrfinderplus_db_path } /AMR_DNA-{ specie } "
215+ for specie in self .species_list
216+ ]
217+ amr_dna = f"{ self .amrfinderplus_db_path } /AMR_CDS"
218+ amr_prot = f"{ self .amrfinderplus_db_path } /AMRProt"
219+ else :
220+ nucl_file_db_list = [
221+ f"{ self .amrfinderplus_db_path } /AMR_DNA-{ specie } .fa"
222+ for specie in self .species_list
223+ ]
224+ amr_dna = f"{ self .amrfinderplus_db_path } /AMR_CDS.fa"
225+ amr_prot = f"{ self .amrfinderplus_db_path } /AMRProt.fa"
177226 os .chdir (self .amrfinderplus_db_path )
178227 if Path (amr_dna ).exists ():
179228 nucl_file_db_list .append (amr_dna )
@@ -183,10 +232,16 @@ def make_blastdb(self):
183232 self .subprocess_cmd ("makeblastdb" , "-in" , amr_prot , "-dbtype" , "prot" )
184233 else :
185234 print ("No file AMRProt detected for indexing" )
186- [self .subprocess_cmd ("makeblastdb" , "-in" , file , "-dbtype" , "nucl" ) for file in nucl_file_db_list ]
235+ [
236+ self .subprocess_cmd ("makeblastdb" , "-in" , file , "-dbtype" , "nucl" )
237+ for file in nucl_file_db_list
238+ ]
187239
188- def get_amrfinderplus_version (self , version_file = "version.txt" ,
189- database_version_file = "database_format_version.txt" ):
240+ def get_amrfinderplus_version (
241+ self ,
242+ version_file = "version.txt" ,
243+ database_version_file = "database_format_version.txt" ,
244+ ):
190245 """
191246 Check the version when latest if provided and update the number
192247 param version_file: name of the file containing version information
@@ -197,27 +252,30 @@ def get_amrfinderplus_version(self, version_file="version.txt",
197252 ftp .cwd (f"{ self ._ncbi_database_path } /{ self ._amrfinderplus_version } " )
198253 db_version = BytesIO ()
199254 db_date_version = BytesIO ()
200- ftp .retrbinary (f'RETR { version_file } ' , db_version .write )
201- ftp .retrbinary (f'RETR { database_version_file } ' , db_date_version .write )
202- self ._amrfinderplus_date_version = db_version .getvalue ().decode ("utf-8" ).splitlines ()[0 ]
203- self ._amrfinderplus_version = '.' .join (
204- db_date_version .getvalue ().decode ("utf-8" ).splitlines ()[0 ].split ("." )[:2 ])
255+ ftp .retrbinary (f"RETR { version_file } " , db_version .write )
256+ ftp .retrbinary (f"RETR { database_version_file } " , db_date_version .write )
257+ self ._amrfinderplus_date_version = (
258+ db_version .getvalue ().decode ("utf-8" ).splitlines ()[0 ]
259+ )
260+ self ._amrfinderplus_version = "." .join (
261+ db_date_version .getvalue ().decode ("utf-8" ).splitlines ()[0 ].split ("." )[:2 ]
262+ )
205263
206264 def read_json_input_file (self ):
207265 """
208266 Import the json file
209267 """
210268 with open (self .json_file_path ) as fh :
211269 params = json .load (fh )
212- target_dir = params [' output_data' ][0 ][' extra_files_path' ]
270+ target_dir = params [" output_data" ][0 ][" extra_files_path" ]
213271 os .makedirs (target_dir )
214272 self ._output_dir = target_dir
215273
216274 def write_json_infos (self ):
217275 """
218276 Write in the imported json file
219277 """
220- with open (self .json_file_path , 'w' ) as fh :
278+ with open (self .json_file_path , "w" ) as fh :
221279 json .dump (self .get_data_manager (), fh , sort_keys = True )
222280
223281
@@ -228,29 +286,38 @@ def parse_arguments():
228286 """
229287 # parse options and arguments
230288 arg_parser = argparse .ArgumentParser ()
231- arg_parser .add_argument ("data_manager_json" ,
232- help = "json file from galaxy" )
233- arg_parser .add_argument ("--db_version" , default = "latest" ,
234- help = "select the major version of the database (e.g. 3.10, 3.8), default is latest" )
235- arg_parser .add_argument ("--db_date" ,
236- help = "select the date into the database version (e.g. 2022-10-11.2)" )
237- arg_parser .add_argument ("--test" , action = 'store_true' ,
238- help = "option to test the script with an lighted database" )
289+ arg_parser .add_argument ("data_manager_json" , help = "json file from galaxy" )
290+ arg_parser .add_argument (
291+ "--db_version" ,
292+ default = "latest" ,
293+ help = "select the major version of the database (e.g. 3.10, 3.8), default is latest" ,
294+ )
295+ arg_parser .add_argument (
296+ "--db_date" ,
297+ help = "select the date into the database version (e.g. 2022-10-11.2)" ,
298+ )
299+ arg_parser .add_argument (
300+ "--test" ,
301+ action = "store_true" ,
302+ help = "option to test the script with an lighted database" ,
303+ )
239304 return arg_parser .parse_args ()
240305
241306
242307def main ():
243308 all_args = parse_arguments ()
244- amrfinderplus_download = DownloadAmrFinderPlusDatabase (amrfinderplus_version = all_args .db_version ,
245- date_version = all_args .db_date ,
246- json_file_path = all_args .data_manager_json ,
247- test_mode = all_args .test )
309+ amrfinderplus_download = DownloadAmrFinderPlusDatabase (
310+ amrfinderplus_version = all_args .db_version ,
311+ date_version = all_args .db_date ,
312+ json_file_path = all_args .data_manager_json ,
313+ test_mode = all_args .test ,
314+ )
248315 amrfinderplus_download .read_json_input_file ()
249316 amrfinderplus_download .download_amrfinderplus_db ()
250317 amrfinderplus_download .make_hmm_profile ()
251318 amrfinderplus_download .make_blastdb ()
252319 amrfinderplus_download .write_json_infos ()
253320
254321
255- if __name__ == ' __main__' :
322+ if __name__ == " __main__" :
256323 main ()
0 commit comments