mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-17 17:56:44 +00:00
Add option to only list files without downloading
This commit is contained in:
parent
4ea8505394
commit
32156db551
11
README.md
11
README.md
@ -33,6 +33,7 @@ It will download the last version of every file present on Wayback Machine to `.
|
||||
-o, --only ONLY_FILTER Restrict downloading to urls that match this filter (use // notation for the filter to be treated as a regex)
|
||||
-x, --exclude EXCLUDE_FILTER Skip downloading of urls that match this filter (use // notation for the filter to be treated as a regex)
|
||||
-a, --all Expand downloading to error files (40x and 50x) and redirections (30x)
|
||||
-l, --list Only list file urls in a JSON format with the archived timestamps. Won't download anything.
|
||||
-v, --version Display version
|
||||
|
||||
## From Timestamp
|
||||
@ -95,6 +96,16 @@ Example:
|
||||
|
||||
wayback_machine_downloader http://example.com --all
|
||||
|
||||
## Only list files without downloading
|
||||
|
||||
-l, --list
|
||||
|
||||
It will just display the files to be downloaded with their snapshot timestamps and urls. The output format is JSON. It won't download anything. It's useful for debugging or to connect to another application.
|
||||
|
||||
Example:
|
||||
|
||||
wayback_machine_downloader http://example.com --list
|
||||
|
||||
## Using the Docker image
|
||||
|
||||
As an alternative installation way, we have a Docker image! Retrieve the wayback-machine-downloader Docker image this way:
|
||||
|
||||
@ -34,6 +34,10 @@ option_parser = OptionParser.new do |opts|
|
||||
options[:all] = true
|
||||
end
|
||||
|
||||
opts.on("-l", "--list", "Only list file urls in a JSON format with the archived timestamps. Won't download anything.") do |t|
|
||||
options[:list] = true
|
||||
end
|
||||
|
||||
opts.on("-v", "--version", "Display version") do |t|
|
||||
options[:version] = t
|
||||
end
|
||||
@ -42,7 +46,11 @@ end.parse!
|
||||
if (base_url = ARGV[-1])
|
||||
options[:base_url] = base_url
|
||||
wayback_machine_downloader = WaybackMachineDownloader.new options
|
||||
wayback_machine_downloader.download_files
|
||||
if wayback_machine_downloader.list
|
||||
wayback_machine_downloader.list_files
|
||||
else
|
||||
wayback_machine_downloader.download_files
|
||||
end
|
||||
elsif options[:version]
|
||||
puts WaybackMachineDownloader::VERSION
|
||||
else
|
||||
|
||||
@ -3,14 +3,15 @@
|
||||
require 'open-uri'
|
||||
require 'fileutils'
|
||||
require 'cgi'
|
||||
require 'json'
|
||||
require_relative 'wayback_machine_downloader/tidy_bytes'
|
||||
require_relative 'wayback_machine_downloader/to_regex'
|
||||
|
||||
class WaybackMachineDownloader
|
||||
|
||||
VERSION = "0.4.3"
|
||||
VERSION = "0.4.4"
|
||||
|
||||
attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all
|
||||
attr_accessor :base_url, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list
|
||||
|
||||
def initialize params
|
||||
@base_url = params[:base_url]
|
||||
@ -19,6 +20,7 @@ class WaybackMachineDownloader
|
||||
@only_filter = params[:only_filter]
|
||||
@exclude_filter = params[:exclude_filter]
|
||||
@all = params[:all]
|
||||
@list = params[:list]
|
||||
end
|
||||
|
||||
def backup_name
|
||||
@ -106,6 +108,14 @@ class WaybackMachineDownloader
|
||||
end
|
||||
end
|
||||
|
||||
def list_files
|
||||
puts "["
|
||||
get_file_list_by_timestamp.each do |file|
|
||||
puts file.to_json + ","
|
||||
end
|
||||
puts "]"
|
||||
end
|
||||
|
||||
def download_files
|
||||
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
|
||||
puts
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user