diff --git a/README.md b/README.md index 5f13605..70742c9 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ It will download the last version of every file present on Wayback Machine to `. -o, --only ONLY_FILTER Restrict downloading to urls that match this filter (use // notation for the filter to be treated as a regex) -x, --exclude EXCLUDE_FILTER Skip downloading of urls that match this filter (use // notation for the filter to be treated as a regex) -a, --all Expand downloading to error files (40x and 50x) and redirections (30x) + -l, --list Only list file urls in a JSON format with the archived timestamps. Won't download anything. -v, --version Display version ## From Timestamp @@ -95,6 +96,16 @@ Example: wayback_machine_downloader http://example.com --all +## Only list files without downloading + + -l, --list + +It will just display the files to be downloaded with their snapshot timestamps and urls. The output format is JSON. It won't download anything. It's useful for debugging or to connect to another application. + +Example: + + wayback_machine_downloader http://example.com --list + ## Using the Docker image As an alternative installation way, we have a Docker image! Retrieve the wayback-machine-downloader Docker image this way: diff --git a/bin/wayback_machine_downloader b/bin/wayback_machine_downloader index 8cc0730..fc36739 100755 --- a/bin/wayback_machine_downloader +++ b/bin/wayback_machine_downloader @@ -34,6 +34,10 @@ option_parser = OptionParser.new do |opts| options[:all] = true end + opts.on("-l", "--list", "Only list file urls in a JSON format with the archived timestamps. Won't download anything.") do |t| + options[:list] = true + end + opts.on("-v", "--version", "Display version") do |t| options[:version] = t end @@ -42,7 +46,11 @@ end.parse! if (base_url = ARGV[-1]) options[:base_url] = base_url wayback_machine_downloader = WaybackMachineDownloader.new options - wayback_machine_downloader.download_files + if wayback_machine_downloader.list + wayback_machine_downloader.list_files + else + wayback_machine_downloader.download_files + end elsif options[:version] puts WaybackMachineDownloader::VERSION else diff --git a/lib/wayback_machine_downloader.rb b/lib/wayback_machine_downloader.rb index 6979b3b..4100758 100644 --- a/lib/wayback_machine_downloader.rb +++ b/lib/wayback_machine_downloader.rb @@ -3,14 +3,15 @@ require 'open-uri' require 'fileutils' require 'cgi' +require 'json' require_relative 'wayback_machine_downloader/tidy_bytes' require_relative 'wayback_machine_downloader/to_regex' class WaybackMachineDownloader - VERSION = "0.4.3" + VERSION = "0.4.4" - attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all + attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list def initialize params @base_url = params[:base_url] @@ -19,6 +20,7 @@ class WaybackMachineDownloader @only_filter = params[:only_filter] @exclude_filter = params[:exclude_filter] @all = params[:all] + @list = params[:list] end def backup_name @@ -106,6 +108,14 @@ class WaybackMachineDownloader end end + def list_files + puts "[" + get_file_list_by_timestamp.each do |file| + puts file.to_json + "," + end + puts "]" + end + def download_files puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..." puts