mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-17 17:56:44 +00:00
Make file_list_curated aware of timestamp option
This commit is contained in:
parent
b6aa2da916
commit
fa366c14a9
@ -7,7 +7,7 @@ class WaybackMachineDownloader
|
|||||||
|
|
||||||
def initialize params
|
def initialize params
|
||||||
@base_url = params[:base_url]
|
@base_url = params[:base_url]
|
||||||
@timestamp = params[:timestamp]
|
@timestamp = params[:timestamp].to_i
|
||||||
end
|
end
|
||||||
|
|
||||||
def backup_name
|
def backup_name
|
||||||
@ -23,16 +23,18 @@ class WaybackMachineDownloader
|
|||||||
file_list_curated = Hash.new
|
file_list_curated = Hash.new
|
||||||
file_list_raw.each_line do |line|
|
file_list_raw.each_line do |line|
|
||||||
line = line.split(' ')
|
line = line.split(' ')
|
||||||
timestamp = line[1].to_i
|
file_timestamp = line[1].to_i
|
||||||
file_url = line[2]
|
file_url = line[2]
|
||||||
file_id = file_url.split('/')[3..-1].join('/')
|
file_id = file_url.split('/')[3..-1].join('/')
|
||||||
file_id = URI.unescape file_id
|
file_id = URI.unescape file_id
|
||||||
|
if @timestamp == 0 or file_timestamp <= @timestamp
|
||||||
if file_list_curated[file_id]
|
if file_list_curated[file_id]
|
||||||
unless file_list_curated[file_id][:timestamp] > timestamp
|
unless file_list_curated[file_id][:timestamp] > file_timestamp
|
||||||
file_list_curated[file_id] = {file_url: file_url, timestamp: timestamp}
|
file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
file_list_curated[file_id] = {file_url: file_url, timestamp: timestamp}
|
file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
file_list_curated
|
file_list_curated
|
||||||
@ -43,7 +45,6 @@ class WaybackMachineDownloader
|
|||||||
puts
|
puts
|
||||||
file_list_curated = get_file_list_curated
|
file_list_curated = get_file_list_curated
|
||||||
file_list_curated.each do |file_id, file_remote_info|
|
file_list_curated.each do |file_id, file_remote_info|
|
||||||
timestamp = file_remote_info[:timestamp]
|
|
||||||
file_url = file_remote_info[:file_url]
|
file_url = file_remote_info[:file_url]
|
||||||
file_path_elements = file_id.split('/')
|
file_path_elements = file_id.split('/')
|
||||||
if file_id == ""
|
if file_id == ""
|
||||||
@ -74,7 +75,7 @@ class WaybackMachineDownloader
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
puts
|
puts
|
||||||
puts "Download complete, saved in #{backup_path}. (#{file_list_curated.size} files downloaded.)"
|
puts "Download complete, saved in #{backup_path}. (#{file_list_curated.size} files downloaded)"
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user