diff --git a/lib/wayback_machine_downloader.rb b/lib/wayback_machine_downloader.rb index 730714a..98b2a17 100644 --- a/lib/wayback_machine_downloader.rb +++ b/lib/wayback_machine_downloader.rb @@ -6,6 +6,8 @@ require 'fileutils' require 'cgi' require 'json' +require 'zlib' +require 'stringio' require_relative 'wayback_machine_downloader/tidy_bytes' require_relative 'wayback_machine_downloader/to_regex' require_relative 'wayback_machine_downloader/archive_api' @@ -269,7 +271,14 @@ def download_file file_remote_info open(file_path, "wb") do |file| begin URI("https://web.archive.org/web/#{file_timestamp}id_/#{file_url}").open("Accept-Encoding" => "plain") do |uri| - file.write(uri.read) + content = uri.read + + if uri.content_encoding.include? 'gzip' + gz = Zlib::GzipReader.new(StringIO.new(content)) + content = gz.read + end + + file.write(content) end rescue OpenURI::HTTPError => e puts "#{file_url} # #{e}"