Make file_list_curated aware of timestamp option

This commit is contained in:
hartator 2015-08-09 22:33:32 -05:00
parent b6aa2da916
commit fa366c14a9

View File

@ -7,7 +7,7 @@ class WaybackMachineDownloader
def initialize params def initialize params
@base_url = params[:base_url] @base_url = params[:base_url]
@timestamp = params[:timestamp] @timestamp = params[:timestamp].to_i
end end
def backup_name def backup_name
@ -23,16 +23,18 @@ class WaybackMachineDownloader
file_list_curated = Hash.new file_list_curated = Hash.new
file_list_raw.each_line do |line| file_list_raw.each_line do |line|
line = line.split(' ') line = line.split(' ')
timestamp = line[1].to_i file_timestamp = line[1].to_i
file_url = line[2] file_url = line[2]
file_id = file_url.split('/')[3..-1].join('/') file_id = file_url.split('/')[3..-1].join('/')
file_id = URI.unescape file_id file_id = URI.unescape file_id
if @timestamp == 0 or file_timestamp <= @timestamp
if file_list_curated[file_id] if file_list_curated[file_id]
unless file_list_curated[file_id][:timestamp] > timestamp unless file_list_curated[file_id][:timestamp] > file_timestamp
file_list_curated[file_id] = {file_url: file_url, timestamp: timestamp} file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
end end
else else
file_list_curated[file_id] = {file_url: file_url, timestamp: timestamp} file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
end
end end
end end
file_list_curated file_list_curated
@ -43,7 +45,6 @@ class WaybackMachineDownloader
puts puts
file_list_curated = get_file_list_curated file_list_curated = get_file_list_curated
file_list_curated.each do |file_id, file_remote_info| file_list_curated.each do |file_id, file_remote_info|
timestamp = file_remote_info[:timestamp]
file_url = file_remote_info[:file_url] file_url = file_remote_info[:file_url]
file_path_elements = file_id.split('/') file_path_elements = file_id.split('/')
if file_id == "" if file_id == ""
@ -74,7 +75,7 @@ class WaybackMachineDownloader
end end
end end
puts puts
puts "Download complete, saved in #{backup_path}. (#{file_list_curated.size} files downloaded.)" puts "Download complete, saved in #{backup_path}. (#{file_list_curated.size} files downloaded)"
end end
end end