mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-29 16:16:06 +00:00
Minor cleanup
This commit is contained in:
@@ -201,7 +201,7 @@ class WaybackMachineDownloader
|
|||||||
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine archives."
|
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine archives."
|
||||||
puts
|
puts
|
||||||
|
|
||||||
if file_list_by_timestamp.count == 0
|
if file_list_by_timestamp.empty?
|
||||||
puts "No files to download."
|
puts "No files to download."
|
||||||
puts "Possible reasons:"
|
puts "Possible reasons:"
|
||||||
puts "\t* Site is not in Wayback Machine Archive."
|
puts "\t* Site is not in Wayback Machine Archive."
|
||||||
@@ -215,18 +215,23 @@ class WaybackMachineDownloader
|
|||||||
puts "#{file_list_by_timestamp.count} files to download:"
|
puts "#{file_list_by_timestamp.count} files to download:"
|
||||||
|
|
||||||
threads = []
|
threads = []
|
||||||
|
mutex = Mutex.new
|
||||||
@processed_file_count = 0
|
@processed_file_count = 0
|
||||||
@threads_count = 1 unless @threads_count != 0
|
@threads_count = 1 unless @threads_count != 0
|
||||||
@threads_count.times do
|
@threads_count.times do
|
||||||
http = Net::HTTP.new("web.archive.org", 443)
|
|
||||||
http.use_ssl = true
|
|
||||||
http.start()
|
|
||||||
threads << Thread.new do
|
threads << Thread.new do
|
||||||
until file_queue.empty?
|
http = Net::HTTP.new("web.archive.org", 443)
|
||||||
file_remote_info = file_queue.pop(true) rescue nil
|
http.use_ssl = true
|
||||||
download_file(file_remote_info, http) if file_remote_info
|
|
||||||
|
begin
|
||||||
|
until file_queue.empty?
|
||||||
|
file_remote_info = nil
|
||||||
|
mutex.synchronize { file_remote_info = file_queue.pop(true) rescue nil }
|
||||||
|
download_file(file_remote_info, http) if file_remote_info
|
||||||
|
end
|
||||||
|
ensure
|
||||||
|
http.finish if http.started?
|
||||||
end
|
end
|
||||||
http.finish()
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -265,7 +270,7 @@ class WaybackMachineDownloader
|
|||||||
file_id = file_remote_info[:file_id]
|
file_id = file_remote_info[:file_id]
|
||||||
file_timestamp = file_remote_info[:timestamp]
|
file_timestamp = file_remote_info[:timestamp]
|
||||||
file_path_elements = file_id.split('/')
|
file_path_elements = file_id.split('/')
|
||||||
original_file_mtime = nil
|
|
||||||
if file_id == ""
|
if file_id == ""
|
||||||
dir_path = backup_path
|
dir_path = backup_path
|
||||||
file_path = backup_path + 'index.html'
|
file_path = backup_path + 'index.html'
|
||||||
@@ -287,11 +292,6 @@ class WaybackMachineDownloader
|
|||||||
begin
|
begin
|
||||||
http.get(URI("https://web.archive.org/web/#{file_timestamp}id_/#{file_url}")) do |body|
|
http.get(URI("https://web.archive.org/web/#{file_timestamp}id_/#{file_url}")) do |body|
|
||||||
file.write(body)
|
file.write(body)
|
||||||
|
|
||||||
if uri.meta.has_key?("x-archive-orig-last-modified")
|
|
||||||
original_file_mtime = Time.parse(uri.meta["x-archive-orig-last-modified"])
|
|
||||||
end
|
|
||||||
|
|
||||||
end
|
end
|
||||||
rescue OpenURI::HTTPError => e
|
rescue OpenURI::HTTPError => e
|
||||||
puts "#{file_url} # #{e}"
|
puts "#{file_url} # #{e}"
|
||||||
|
|||||||
Reference in New Issue
Block a user