Skip to content

Commit 4630da8

Browse files
committed
Handle 503 Gateway errors better. Pubchem ignore_chems updated to only 404s. Build_all retries set at 3 and 10 min
1 parent b347513 commit 4630da8

File tree

2 files changed

+15
-17
lines changed

2 files changed

+15
-17
lines changed

build/build_all.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ def run_docker_cmd(cmd_arr,filename):
6060
Essentially a wrapper for 'docker run'. Also provides output.
6161
'''
6262
retries=3
63+
delays = [3 * 60, 10 * 60] # 3 minutes, 10 minutes
6364
print('running...'+filename)
6465
env = os.environ.copy()
6566
if 'SYNAPSE_AUTH_TOKEN' not in env.keys():
@@ -80,21 +81,13 @@ def run_docker_cmd(cmd_arr,filename):
8081
else:
8182
print(f"[{filename}] failed (exit {res.returncode}).")
8283
if attempt < retries:
83-
print("Retrying...")
84+
delay = delays[attempt - 1]
85+
print(f"[{filename}] waiting {delay//60} minutes before retrying...")
8486
print(cmd)
87+
time.sleep(delay)
8588
attempt += 1
8689
raise RuntimeError(f"{filename} failed after {retries} attempts")
87-
88-
89-
# cmd = docker_run+cmd_arr
90-
# print(cmd)
91-
# # res = subprocess.run(cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
92-
# res = subprocess.run(cmd, stdout=sys.stdout, stderr=sys.stderr)
93-
# if res.returncode !=0:
94-
# print(res.stderr)
95-
# exit(filename+' file failed')
96-
# else:
97-
# print(filename+' retrieved')
90+
9891

9992

10093
def process_docker(datasets):

build/utils/pubchem_retrieval.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,10 @@ def fetch_url(url, retries=3, backoff_factor=1):
5555
try:
5656
response = requests.get(url, timeout=10)
5757
if response.status_code == 200:
58-
return response.json()
59-
else:
60-
raise Exception(f"Failed to fetch {url}, Status Code: {response.status_code}")
58+
return response.json()
59+
if response.status_code == 404: # permanent, no existing CID/name
60+
raise FileNotFoundError("404")
61+
raise Exception(f"Failed to fetch {url}, Status Code: {response.status_code}")
6162
except Exception as exc:
6263
if attempt < retries:
6364
wait = backoff_factor * (2 ** attempt)
@@ -104,11 +105,15 @@ def retrieve_drug_info(compound, ignore_chems, isname=True):
104105
try:
105106
data = future.result()
106107
results[key] = data
107-
except Exception as exc:
108-
print(f'{compound} generated an exception: {exc}')
108+
109+
except FileNotFoundError: # only 404s are added
110+
print(f"{compound} not found in PubChem. Adding to ignore list.")
109111
with open(ignore_chems, "a") as f:
110112
f.write(f"{compound}\n")
111113
return None
114+
except Exception as exc: # transient error, don't blacklist
115+
print(f"{compound} generated a transient exception: {exc}")
116+
return None
112117

113118
if all(key in results for key in ["properties", "synonyms"]):
114119
properties = results["properties"]['PropertyTable']['Properties'][0]

0 commit comments

Comments
 (0)