Skip to content

Commit b822577

Browse files
committed
handle 429 correctly
1 parent f5a6c45 commit b822577

File tree

1 file changed

+7
-4
lines changed

1 file changed

+7
-4
lines changed

datapackage_pipelines_migdar/flows/broken_links.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,26 @@ def func(row):
4848
error = None
4949
backoff = 10
5050
try:
51-
print(datetime.datetime.now().isoformat(), 'Checking', row['url'])
52-
while True:
51+
print('%s:CHECK:%s' % (datetime.datetime.now().isoformat(), row["url"]))
52+
for _ in range(5):
5353
resp = requests.get(row['url'], allow_redirects=True, headers=HEADERS, timeout=10, stream=True)
5454
if resp.status_code == 429:
5555
time.sleep(backoff)
5656
backoff *= 2
57+
error = 'Server Overload'
5758
continue
58-
if resp.status_code >= 300:
59+
elif resp.status_code >= 300:
5960
error = '%s: %s' % (resp.status_code, resp.reason)
61+
else:
62+
error = None
6063
time.sleep(1)
6164
break
6265
except requests.exceptions.RequestException as e:
6366
error = str(e.__class__.__name__)
6467
except requests.exceptions.BaseHTTPError as e:
6568
error = str(e.__class__.__name__)
6669
if error:
67-
print(datetime.datetime.now().isoformat(), 'ERROR', row['url'], error)
70+
print('%s:ERROR:%s: %s' % (datetime.datetime.now().isoformat(), row['url'], error))
6871
row['error'] = error
6972
return func
7073

0 commit comments

Comments
 (0)