Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions bin/wayback_machine_downloader
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ option_parser = OptionParser.new do |opts|
options[:all] = true
end

opts.on("-n", "--delay DELAY", "A configurable delay between page/file downloads (in seconds) to combat rate limiting. Default is 4 seconds") do |t|
options[:delay] = t
end

opts.on("-c", "--concurrency NUMBER", Integer, "Number of multiple files to download at a time", "Default is one file at a time (ie. 20)") do |t|
options[:threads_count] = t
end
Expand Down
9 changes: 8 additions & 1 deletion lib/wayback_machine_downloader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ class WaybackMachineDownloader

attr_accessor :base_url, :exact_url, :directory, :all_timestamps,
:from_timestamp, :to_timestamp, :only_filter, :exclude_filter,
:all, :maximum_pages, :threads_count
:all, :maximum_pages, :threads_count, :delay

def initialize params
@base_url = params[:base_url]
Expand All @@ -30,8 +30,11 @@ def initialize params
@only_filter = params[:only_filter]
@exclude_filter = params[:exclude_filter]
@all = params[:all]
# maximum page default is 100
@maximum_pages = params[:maximum_pages] ? params[:maximum_pages].to_i : 100
@threads_count = params[:threads_count].to_i
# default delay is 4 seconds
@delay = params[:delay] ? params[:delay].to_i : 4
end

def backup_name
Expand Down Expand Up @@ -89,6 +92,8 @@ def get_all_snapshots_to_consider
print "."
unless @exact_url
@maximum_pages.times do |page_index|
# wait before fetching individual snapshots
sleep(@delay)
snapshot_list = get_raw_list_from_api(@base_url + '/*', page_index)
break if snapshot_list.empty?
snapshot_list_to_consider += snapshot_list
Expand Down Expand Up @@ -209,6 +214,8 @@ def download_files
threads << Thread.new do
until file_queue.empty?
file_remote_info = file_queue.pop(true) rescue nil
# delay start of download operation for configurable amount of time
sleep(@delay)
download_file(file_remote_info) if file_remote_info
end
end
Expand Down