@@ -34,28 +34,33 @@ def __init__(self, database, term, outfile, gui):
3434 self .terminated = False
3535 super (Downloader , self ).__init__ ()
3636
37-
3837 def ncbi_search (self , database , term ):
3938 """
4039 Submit search to NCBI and return the records.
4140 """
4241 self .handle = Entrez .esearch (db = database , term = term , usehistory = "y" ,
43- retmax = 100000000 , idtype = "acc" )
42+ retmax = 10 , idtype = "acc" )
4443 self .record = Entrez .read (self .handle )
4544 self .handle .close ()
4645
4746 return self .record
4847
49-
50- def record_processor (self , record ):
48+ def record_processor (self , record , database ):
5149 """
5250 Splits the record returned by Entrez into sparate variables and returns
5351 them.
5452 """
5553 count = int (record ["Count" ]) # Int
56- IDs = record ["IdList" ] # List
5754 webenv = record ["WebEnv" ] # String
5855 query_key = record ["QueryKey" ] # String
56+ IDs = []
57+
58+ for i in range (0 , count , 10000 ):
59+ iter_handle = Entrez .efetch (db = database , webenv = webenv ,
60+ query_key = query_key , retmax = 10000 ,
61+ rettype = "acc" , retstart = i )
62+ IDs += [x .rstrip () for x in iter_handle ]
63+ iter_handle .close ()
5964
6065 assert count == len (IDs )
6166
@@ -68,7 +73,6 @@ def record_processor(self, record):
6873
6974 return count , IDs , webenv , query_key
7075
71-
7276 def main_organizer (self , count , IDs , webenv , query_key , b_size , Run ):
7377 """
7478 Defines what tasks need to be performed, handles NCBI server errors and
@@ -132,7 +136,6 @@ def main_organizer(self, count, IDs, webenv, query_key, b_size, Run):
132136 if self .terminated is False :
133137 self .re_downloader (IDs , webenv , query_key , b_size )
134138
135-
136139 def re_downloader (self , IDs , webenv , query_key , b_size ):
137140 """
138141 Checks for missing sequences.
@@ -162,7 +165,6 @@ def re_downloader(self, IDs, webenv, query_key, b_size):
162165 self .main_organizer (numb_missing , IDs , webenv , query_key ,
163166 b_size , 2 )
164167
165-
166168 def error_finder (self , target_file ):
167169 """
168170 Looks for errors in the output fasta and retruns a list of necessary
@@ -179,7 +181,6 @@ def error_finder(self, target_file):
179181 target_handle .close ()
180182 return verified_ids
181183
182-
183184 def fetch_by_id (self , IDs , b_size ):
184185 """
185186 Fetches NCBI data based on the IDs, rather than a search query. Returns
@@ -195,7 +196,6 @@ def fetch_by_id(self, IDs, b_size):
195196
196197 return data
197198
198-
199199 def fetch_by_history (self , start , b_size , webenv , query_key ):
200200 """
201201 Fetches NCBI data based on the provided search query. Returns the data
@@ -213,7 +213,6 @@ def fetch_by_history(self, start, b_size, webenv, query_key):
213213
214214 return data
215215
216-
217216 def translate_genome (self , acclist ):
218217 """
219218 Translates genome query IDs into a nucleotide query IDs, since NCBI has
@@ -236,7 +235,6 @@ def translate_genome(self, acclist):
236235
237236 return nuc_acc_list
238237
239-
240238 def run_everything (self ):
241239 """
242240 Run the functions in order.
@@ -248,7 +246,8 @@ def run_everything(self):
248246
249247 rec = self .ncbi_search (self .database , self .term )
250248 try :
251- count , IDs , webenv , query_key = self .record_processor (rec )
249+ count , IDs , webenv , query_key = self .record_processor (rec ,
250+ self .database )
252251 except TypeError :
253252 return None
254253 if self .database == "genome" :
0 commit comments