Skip to content

Commit 4d2513e

Browse files
Be a bit more tolerant of timeouts here
1 parent 67685b7 commit 4d2513e

File tree

1 file changed

+19
-1
lines changed

1 file changed

+19
-1
lines changed

lib/wayback_machine_downloader/archive_api.rb

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ def get_raw_list_from_api(url, page_index, http)
1616
params = [["output", "json"], ["url", url]] + parameters_for_api(page_index)
1717
request_url.query = URI.encode_www_form(params)
1818

19+
retries = 0
20+
max_retries = (@max_retries || 3)
21+
delay = WaybackMachineDownloader::RETRY_DELAY rescue 2
22+
1923
begin
2024
response = http.get(request_url)
2125
body = response.body.to_s.strip
@@ -26,7 +30,21 @@ def get_raw_list_from_api(url, page_index, http)
2630
json.shift if json.first == ["timestamp", "original"]
2731
json
2832
rescue JSON::ParserError => e
29-
warn "Failed to fetch data from API: #{e.message}"
33+
warn "Failed to parse JSON from API for #{url}: #{e.message}"
34+
[]
35+
rescue Net::ReadTimeout, Net::OpenTimeout => e
36+
if retries < max_retries
37+
retries += 1
38+
warn "Timeout talking to Wayback CDX API (#{e.class}: #{e.message}) for #{url}, retry #{retries}/#{max_retries}..."
39+
sleep(delay * retries)
40+
retry
41+
else
42+
warn "Giving up on Wayback CDX API for #{url} after #{max_retries} timeouts."
43+
[]
44+
end
45+
rescue StandardError => e
46+
# treat any other transient-ish error similarly, though without retries for now
47+
warn "Error fetching CDX data for #{url}: #{e.message}"
3048
[]
3149
end
3250
end

0 commit comments

Comments
 (0)