added new option for regex acceptance you dont have to download entire site when looking for specific path

This commit is contained in:
Gil Cohen
2015-11-06 13:11:26 -05:00
parent ed7948a372
commit 432ca1d5b5
3 changed files with 41 additions and 4 deletions

View File

@@ -2,6 +2,7 @@
require_relative '../lib/wayback_machine_downloader'
require 'optparse'
require 'pp'
options = {}
option_parser = OptionParser.new do |opts|
@@ -11,19 +12,26 @@ option_parser = OptionParser.new do |opts|
opts.separator "Download any website from the Wayback Machine."
opts.separator ""
opts.separator "Optional option:"
opts.separator "Optional options:"
opts.on("-t", "--timestamp TIMESTAMP", Integer, "Only files on or before timestamp supplied (ie. 20150806225358)") do |t|
options[:timestamp] = t
end
opts.on("--accept-regex [ACCEPT_REGEX]", String,"Specify a regular expression to download. If a path doesn't meet this regex, it won't get downloaded.") do |accept_regex|
options[:accept_regex] = accept_regex
end
opts.on("-v", "--version", "Display version") do |t|
options[:version] = t
end
end.parse!
if base_url = ARGV[0]
wayback_machine_downloader = WaybackMachineDownloader.new base_url: base_url, timestamp: options[:timestamp]
# this used to be 0. we want to look at the /last/ option.
#
# TODO: this argument needs to be handled better. argument handling is sorta messy.
if base_url = ARGV[-1]
wayback_machine_downloader = WaybackMachineDownloader.new base_url: base_url, timestamp: options[:timestamp], accept_regex: options[:accept_regex]
wayback_machine_downloader.download_files
elsif options[:version]
puts WaybackMachineDownloader::VERSION