Add option to only list files without downloading

This commit is contained in:
hartator 2016-08-03 14:23:35 -05:00
parent 4ea8505394
commit 32156db551
3 changed files with 32 additions and 3 deletions

View File

@ -33,6 +33,7 @@ It will download the last version of every file present on Wayback Machine to `.
-o, --only ONLY_FILTER Restrict downloading to urls that match this filter (use // notation for the filter to be treated as a regex) -o, --only ONLY_FILTER Restrict downloading to urls that match this filter (use // notation for the filter to be treated as a regex)
-x, --exclude EXCLUDE_FILTER Skip downloading of urls that match this filter (use // notation for the filter to be treated as a regex) -x, --exclude EXCLUDE_FILTER Skip downloading of urls that match this filter (use // notation for the filter to be treated as a regex)
-a, --all Expand downloading to error files (40x and 50x) and redirections (30x) -a, --all Expand downloading to error files (40x and 50x) and redirections (30x)
-l, --list Only list file urls in a JSON format with the archived timestamps. Won't download anything.
-v, --version Display version -v, --version Display version
## From Timestamp ## From Timestamp
@ -95,6 +96,16 @@ Example:
wayback_machine_downloader http://example.com --all wayback_machine_downloader http://example.com --all
## Only list files without downloading
-l, --list
It will just display the files to be downloaded with their snapshot timestamps and urls. The output format is JSON. It won't download anything. It's useful for debugging or to connect to another application.
Example:
wayback_machine_downloader http://example.com --list
## Using the Docker image ## Using the Docker image
As an alternative installation way, we have a Docker image! Retrieve the wayback-machine-downloader Docker image this way: As an alternative installation way, we have a Docker image! Retrieve the wayback-machine-downloader Docker image this way:

View File

@ -34,6 +34,10 @@ option_parser = OptionParser.new do |opts|
options[:all] = true options[:all] = true
end end
opts.on("-l", "--list", "Only list file urls in a JSON format with the archived timestamps. Won't download anything.") do |t|
options[:list] = true
end
opts.on("-v", "--version", "Display version") do |t| opts.on("-v", "--version", "Display version") do |t|
options[:version] = t options[:version] = t
end end
@ -42,7 +46,11 @@ end.parse!
if (base_url = ARGV[-1]) if (base_url = ARGV[-1])
options[:base_url] = base_url options[:base_url] = base_url
wayback_machine_downloader = WaybackMachineDownloader.new options wayback_machine_downloader = WaybackMachineDownloader.new options
wayback_machine_downloader.download_files if wayback_machine_downloader.list
wayback_machine_downloader.list_files
else
wayback_machine_downloader.download_files
end
elsif options[:version] elsif options[:version]
puts WaybackMachineDownloader::VERSION puts WaybackMachineDownloader::VERSION
else else

View File

@ -3,14 +3,15 @@
require 'open-uri' require 'open-uri'
require 'fileutils' require 'fileutils'
require 'cgi' require 'cgi'
require 'json'
require_relative 'wayback_machine_downloader/tidy_bytes' require_relative 'wayback_machine_downloader/tidy_bytes'
require_relative 'wayback_machine_downloader/to_regex' require_relative 'wayback_machine_downloader/to_regex'
class WaybackMachineDownloader class WaybackMachineDownloader
VERSION = "0.4.3" VERSION = "0.4.4"
attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list
def initialize params def initialize params
@base_url = params[:base_url] @base_url = params[:base_url]
@ -19,6 +20,7 @@ class WaybackMachineDownloader
@only_filter = params[:only_filter] @only_filter = params[:only_filter]
@exclude_filter = params[:exclude_filter] @exclude_filter = params[:exclude_filter]
@all = params[:all] @all = params[:all]
@list = params[:list]
end end
def backup_name def backup_name
@ -106,6 +108,14 @@ class WaybackMachineDownloader
end end
end end
def list_files
puts "["
get_file_list_by_timestamp.each do |file|
puts file.to_json + ","
end
puts "]"
end
def download_files def download_files
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..." puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
puts puts