mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-17 17:56:44 +00:00
Added ability to download rewritten Wayback Archive files
This commit is contained in:
parent
3fff7daf35
commit
9283f04a57
@ -58,6 +58,10 @@ option_parser = OptionParser.new do |opts|
|
||||
options[:list] = true
|
||||
end
|
||||
|
||||
opts.on("-r", "--rewritten", "Downloads the rewritten Wayback Machine files instead of the original files") do |t|
|
||||
options[:rewritten] = t
|
||||
end
|
||||
|
||||
opts.on("-v", "--version", "Display version") do |t|
|
||||
options[:version] = t
|
||||
end
|
||||
|
||||
@ -136,6 +136,7 @@ class WaybackMachineDownloader
|
||||
@all = params[:all]
|
||||
@maximum_pages = params[:maximum_pages] ? params[:maximum_pages].to_i : 100
|
||||
@threads_count = [params[:threads_count].to_i, 1].max
|
||||
@rewritten = params[:rewritten]
|
||||
@timeout = params[:timeout] || DEFAULT_TIMEOUT
|
||||
@logger = setup_logger
|
||||
@failed_downloads = Concurrent::Array.new
|
||||
@ -428,7 +429,13 @@ class WaybackMachineDownloader
|
||||
def download_with_retry(file_path, file_url, file_timestamp, connection)
|
||||
retries = 0
|
||||
begin
|
||||
request = Net::HTTP::Get.new(URI("https://web.archive.org/web/#{file_timestamp}id_/#{file_url}"))
|
||||
wayback_url = if @rewritten
|
||||
"https://web.archive.org/web/#{file_timestamp}/#{file_url}"
|
||||
else
|
||||
"https://web.archive.org/web/#{file_timestamp}id_/#{file_url}"
|
||||
end
|
||||
|
||||
request = Net::HTTP::Get.new(URI(wayback_url))
|
||||
request["Connection"] = "keep-alive"
|
||||
request["User-Agent"] = "WaybackMachineDownloader/#{VERSION}"
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user