@@ -16,6 +16,10 @@ def get_raw_list_from_api(url, page_index, http)
1616 params = [ [ "output" , "json" ] , [ "url" , url ] ] + parameters_for_api ( page_index )
1717 request_url . query = URI . encode_www_form ( params )
1818
19+ retries = 0
20+ max_retries = ( @max_retries || 3 )
21+ delay = WaybackMachineDownloader ::RETRY_DELAY rescue 2
22+
1923 begin
2024 response = http . get ( request_url )
2125 body = response . body . to_s . strip
@@ -26,7 +30,21 @@ def get_raw_list_from_api(url, page_index, http)
2630 json . shift if json . first == [ "timestamp" , "original" ]
2731 json
2832 rescue JSON ::ParserError => e
29- warn "Failed to fetch data from API: #{ e . message } "
33+ warn "Failed to parse JSON from API for #{ url } : #{ e . message } "
34+ [ ]
35+ rescue Net ::ReadTimeout , Net ::OpenTimeout => e
36+ if retries < max_retries
37+ retries += 1
38+ warn "Timeout talking to Wayback CDX API (#{ e . class } : #{ e . message } ) for #{ url } , retry #{ retries } /#{ max_retries } ..."
39+ sleep ( delay * retries )
40+ retry
41+ else
42+ warn "Giving up on Wayback CDX API for #{ url } after #{ max_retries } timeouts."
43+ [ ]
44+ end
45+ rescue StandardError => e
46+ # treat any other transient-ish error similarly, though without retries for now
47+ warn "Error fetching CDX data for #{ url } : #{ e . message } "
3048 [ ]
3149 end
3250 end
0 commit comments