Skip to content

Commit afab72c

Browse files
committed
Construct the cdx API query using a URI object
This avoids problems related to URL encoding. Obsoletes: #116
1 parent e6707a9 commit afab72c

File tree

1 file changed

+12
-11
lines changed

1 file changed

+12
-11
lines changed

lib/wayback_machine_downloader/archive_api.rb

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,29 @@
1+
require 'uri'
2+
13
module ArchiveAPI
24

35
def get_raw_list_from_api url, page_index
4-
request_url = "https://web.archive.org/cdx/search/xd?url="
5-
request_url += url
6-
request_url += parameters_for_api page_index
6+
request_url = URI("https://web.archive.org/cdx/search/xd")
7+
params = [["url", url]]
8+
params += parameters_for_api page_index
9+
request_url.query = URI.encode_www_form(params)
710

811
URI.open(request_url).read
912
end
1013

1114
def parameters_for_api page_index
12-
parameters = "&fl=timestamp,original&collapse=digest&gzip=false"
13-
if @all
14-
parameters += ""
15-
else
16-
parameters += "&filter=statuscode:200"
15+
parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]]
16+
if !@all
17+
parameters.push(["filter", "statuscode:200"])
1718
end
1819
if @from_timestamp and @from_timestamp != 0
19-
parameters += "&from=" + @from_timestamp.to_s
20+
parameters.push(["from", @from_timestamp.to_s])
2021
end
2122
if @to_timestamp and @to_timestamp != 0
22-
parameters += "&to=" + @to_timestamp.to_s
23+
parameters.push(["to", @to_timestamp.to_s])
2324
end
2425
if page_index
25-
parameters += "&page=#{page_index}"
26+
parameters.push(["page", page_index])
2627
end
2728
parameters
2829
end

0 commit comments

Comments
 (0)