Avoid making 2 times the request to get raw file list

This commit is contained in:
hartator
2015-11-05 16:19:03 -06:00
parent 355f4c1be7
commit 5d17e3e631
2 changed files with 6 additions and 7 deletions

View File

@@ -49,7 +49,7 @@ class WaybackMachineDownloader
file_list_curated file_list_curated
end end
def file_list_by_timestamp def get_file_list_by_timestamp
file_list_curated = get_file_list_curated file_list_curated = get_file_list_curated
file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
file_list_curated.map do |file_remote_info| file_list_curated.map do |file_remote_info|
@@ -61,7 +61,7 @@ class WaybackMachineDownloader
def download_files def download_files
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..." puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
puts puts
file_list_curated = get_file_list_curated file_list_by_timestamp = get_file_list_by_timestamp
count = 0 count = 0
file_list_by_timestamp.each do |file_remote_info| file_list_by_timestamp.each do |file_remote_info|
count += 1 count += 1
@@ -84,7 +84,6 @@ class WaybackMachineDownloader
structure_dir_path dir_path structure_dir_path dir_path
open(file_path, "wb") do |file| open(file_path, "wb") do |file|
begin begin
open("http://web.archive.org/web/#{timestamp}id_/#{file_url}") do |uri|
open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}") do |uri| open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}") do |uri|
file.write(uri.read) file.write(uri.read)
end end
@@ -98,13 +97,13 @@ class WaybackMachineDownloader
rescue StandardError => e rescue StandardError => e
puts "#{file_url} # #{e}" puts "#{file_url} # #{e}"
end end
puts "#{file_url} -> #{file_path} (#{count}/#{file_list_curated.size})" puts "#{file_url} -> #{file_path} (#{count}/#{file_list_by_timestamp.size})"
else else
puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_curated.size})" puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_by_timestamp.size})"
end end
end end
puts puts
puts "Download complete, saved in #{backup_path} (#{file_list_curated.size} files)" puts "Download complete, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"
end end
def structure_dir_path dir_path def structure_dir_path dir_path

View File

@@ -27,7 +27,7 @@ class WaybackMachineDownloaderTest < Minitest::Test
file_url: "http://www.onlyfreegames.net:80/Fs-06.jpg", file_url: "http://www.onlyfreegames.net:80/Fs-06.jpg",
timestamp: 20060716125343 timestamp: 20060716125343
} }
assert_equal file_expected, @wayback_machine_downloader.file_list_by_timestamp[-1] assert_equal file_expected, @wayback_machine_downloader.get_file_list_by_timestamp[-1]
end end
def test_file_download def test_file_download