@@ -37,14 +37,18 @@ def batch_iterator(iterable: Iterable[T], batch_size: int = 10) -> Iterator[List
3737
3838
3939def _get_entrez_gene_summary (id_list ):
40- request = Entrez .epost ("gene" , id = "," .join (id_list ))
41- result = Entrez .read (request )
42- web_env = result ["WebEnv" ]
43- query_key = result ["QueryKey" ]
44- data = Entrez .esummary (db = "gene" , webenv = web_env , query_key = query_key )
45- document = Entrez .read (data , ignore_errors = True , validate = False ) # Need recent BioPython
46- return document ["DocumentSummarySet" ]["DocumentSummary" ]
47-
40+ for _ in range (3 ):
41+ try :
42+ request = Entrez .epost ("gene" , id = "," .join (id_list ))
43+ result = Entrez .read (request )
44+ web_env = result ["WebEnv" ]
45+ query_key = result ["QueryKey" ]
46+ data = Entrez .esummary (db = "gene" , webenv = web_env , query_key = query_key )
47+ document = Entrez .read (data , ignore_errors = True , validate = False ) # Need recent BioPython
48+ return document ["DocumentSummarySet" ]["DocumentSummary" ]
49+ except Exception as e :
50+ logging .warning (e )
51+ logging .warning ("Trying again..." )
4852
4953def iter_entrez_ids (reader ):
5054 for gi in reader :
@@ -57,7 +61,8 @@ def main():
5761 start_date = datetime .now ().isoformat ()
5862
5963 # 10k limit of return data from NCBI
60- NCBI_BATCH_SIZE = 10000
64+ # NCBI_BATCH_SIZE = 10000
65+ NCBI_BATCH_SIZE = 1000
6166
6267 gene_info = {}
6368 with gzip .open (args .gene_info , "rt" ) as f :
0 commit comments