diff --git a/bin/wayback_machine_downloader b/bin/wayback_machine_downloader index c466de7..8cc0730 100755 --- a/bin/wayback_machine_downloader +++ b/bin/wayback_machine_downloader @@ -30,6 +30,10 @@ option_parser = OptionParser.new do |opts| options[:exclude_filter] = t end + opts.on("-a", "--all", "Expand downloading to error files (40x and 50x) and redirections (30x)") do |t| + options[:all] = true + end + opts.on("-v", "--version", "Display version") do |t| options[:version] = t end diff --git a/lib/wayback_machine_downloader.rb b/lib/wayback_machine_downloader.rb index 49883e0..c0c74a8 100644 --- a/lib/wayback_machine_downloader.rb +++ b/lib/wayback_machine_downloader.rb @@ -10,7 +10,7 @@ class WaybackMachineDownloader VERSION = "0.4.1" - attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter + attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all def initialize params @base_url = params[:base_url] @@ -18,6 +18,7 @@ class WaybackMachineDownloader @to_timestamp = params[:to_timestamp].to_i @only_filter = params[:only_filter] @exclude_filter = params[:exclude_filter] + @all = params[:all] end def backup_name @@ -55,7 +56,10 @@ class WaybackMachineDownloader end def get_file_list_curated - parameters_for_wayback_machine_api = "&fl=timestamp,original&fastLatest=true&filter=statuscode:200&collapse=original" + parameters_for_wayback_machine_api = "&fl=timestamp,original&collapse=original" + unless @all + parameters_for_wayback_machine_api += "&filter=statuscode:200" + end if @from_timestamp and @from_timestamp != 0 parameters_for_wayback_machine_api += "&from=" + @from_timestamp.to_s end diff --git a/test/test_wayback_machine_downloader.rb b/test/test_wayback_machine_downloader.rb index 717b0e9..9dfc25c 100644 --- a/test/test_wayback_machine_downloader.rb +++ b/test/test_wayback_machine_downloader.rb @@ -76,4 +76,9 @@ class WaybackMachineDownloaderTest < Minitest::Test assert_nil @wayback_machine_downloader.get_file_list_curated["linux.htm"] end + def test_file_list_exclude_filter_with_a_regex + @wayback_machine_downloader.all = true + assert_equal 69, @wayback_machine_downloader.get_file_list_curated.size + end + end