Skip to content

Commit 6e3986b

Browse files
committed
Merge branch 'pr/82'
2 parents 63941f9 + 4830913 commit 6e3986b

File tree

2 files changed

+16
-1
lines changed

2 files changed

+16
-1
lines changed

lib/wayback_machine_downloader.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ def structure_dir_path dir_path
201201
end
202202

203203
def download_file file_remote_info
204-
file_url = file_remote_info[:file_url]
204+
current_encoding = "".encoding
205+
file_url = file_remote_info[:file_url].encode(current_encoding)
205206
file_id = file_remote_info[:file_id]
206207
file_timestamp = file_remote_info[:timestamp]
207208
file_path_elements = file_id.split('/')

test/test_wayback_machine_downloader.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,5 +89,19 @@ def test_file_list_exclude_filter_with_a_regex
8989
@wayback_machine_downloader.all = true
9090
assert_equal 69, @wayback_machine_downloader.get_file_list_curated.size
9191
end
92+
93+
# Testing encoding conflicts needs a different base_url
94+
def test_nonascii_suburls_download
95+
@wayback_machine_downloader = WaybackMachineDownloader.new base_url: 'https://en.wikipedia.org/wiki/%C3%84'
96+
# Once just for the downloading...
97+
@wayback_machine_downloader.download_files
98+
end
99+
100+
def test_nonascii_suburls_already_present
101+
@wayback_machine_downloader = WaybackMachineDownloader.new base_url: 'https://en.wikipedia.org/wiki/%C3%84'
102+
# ... twice to test the "is already present" case
103+
@wayback_machine_downloader.download_files
104+
@wayback_machine_downloader.download_files
105+
end
92106

93107
end

0 commit comments

Comments
 (0)