From d926f965f9ce914d9bc14381c54254c7b241f7c7 Mon Sep 17 00:00:00 2001 From: Oleg Pudeyev Date: Wed, 15 Mar 2017 17:58:05 -0400 Subject: [PATCH] Add exact_match option. With this option set, Wayback Machine Downloader will only look for snapshots matching the exact base_url passed in rather than base_url and its children. This is useful when trying to download a single file rather than mirroring a site. --- lib/wayback_machine_downloader.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/lib/wayback_machine_downloader.rb b/lib/wayback_machine_downloader.rb index 0e79623..9f466df 100644 --- a/lib/wayback_machine_downloader.rb +++ b/lib/wayback_machine_downloader.rb @@ -16,11 +16,13 @@ class WaybackMachineDownloader VERSION = "1.1.5" - attr_accessor :base_url, :directory, :from_timestamp, :to_timestamp, + attr_accessor :base_url, :exact_match, :directory, + :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :maximum_pages, :threads_count def initialize params @base_url = params[:base_url] + @exact_match = params[:exact_match] @directory = params[:directory] @from_timestamp = params[:from_timestamp].to_i @to_timestamp = params[:to_timestamp].to_i @@ -84,8 +86,10 @@ class WaybackMachineDownloader snapshot_list_to_consider = "" snapshot_list_to_consider += get_raw_list_from_api(@base_url, nil) print "." - snapshot_list_to_consider += get_raw_list_from_api(@base_url + '/*', nil) - print "." + unless @exact_match + snapshot_list_to_consider += get_raw_list_from_api(@base_url + '/*', nil) + print "." + end @maximum_pages.times do |page_index| snapshot_list = get_raw_list_from_api(@base_url + '/*', page_index) break if snapshot_list.empty?