Add timestamp to lock backup to a specific version

This commit is contained in:
hartator 2015-08-09 21:26:43 -05:00
parent e334851eb4
commit 0f1d77bf67

View File

@ -3,10 +3,11 @@ require 'fileutils'
class WaybackMachineDownloader class WaybackMachineDownloader
attr_accessor :base_url attr_accessor :base_url, :timestamp
def initialize params def initialize params
@base_url = params[:base_url] @base_url = params[:base_url]
@timestamp = params[:timestamp]
end end
def backup_name def backup_name
@ -17,7 +18,7 @@ class WaybackMachineDownloader
'websites/' + backup_name + '/' 'websites/' + backup_name + '/'
end end
def file_list_curated def get_file_list_curated
file_list_raw = open "http://web.archive.org/cdx/search/xd?url=#{@base_url}/*" file_list_raw = open "http://web.archive.org/cdx/search/xd?url=#{@base_url}/*"
file_list_curated = Hash.new file_list_curated = Hash.new
file_list_raw.each_line do |line| file_list_raw.each_line do |line|
@ -38,6 +39,9 @@ class WaybackMachineDownloader
end end
def download_files def download_files
puts "Downlading #{@base_url} from Wayback Machine..."
puts
file_list_curated = get_file_list_curated
file_list_curated.each do |file_id, file_remote_info| file_list_curated.each do |file_id, file_remote_info|
timestamp = file_remote_info[:timestamp] timestamp = file_remote_info[:timestamp]
file_url = file_remote_info[:file_url] file_url = file_remote_info[:file_url]
@ -45,7 +49,7 @@ class WaybackMachineDownloader
if file_id == "" if file_id == ""
dir_path = backup_path dir_path = backup_path
file_path = backup_path + 'index.html' file_path = backup_path + 'index.html'
elsif file_url[-1] == '/' elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
dir_path = backup_path + file_path_elements[0..-1].join('/') dir_path = backup_path + file_path_elements[0..-1].join('/')
file_path = backup_path + file_path_elements[0..-1].join('/') + 'index.html' file_path = backup_path + file_path_elements[0..-1].join('/') + 'index.html'
else else
@ -60,7 +64,7 @@ class WaybackMachineDownloader
file.write(uri.read) file.write(uri.read)
end end
rescue OpenURI::HTTPError => e rescue OpenURI::HTTPError => e
puts "#{file_url} # 404" puts "#{file_url} # #{e}"
file.write(e.io.read) file.write(e.io.read)
end end
end end
@ -69,6 +73,8 @@ class WaybackMachineDownloader
puts "#{file_url} # #{file_path} already exists." puts "#{file_url} # #{file_path} already exists."
end end
end end
puts
puts "Download complete, saved in #{backup_path}. (#{file_list_curated.size} files downloaded.)"
end end
end end