mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-29 16:16:06 +00:00
Avoid making 2 times the request to get raw file list
This commit is contained in:
@@ -49,7 +49,7 @@ class WaybackMachineDownloader
|
|||||||
file_list_curated
|
file_list_curated
|
||||||
end
|
end
|
||||||
|
|
||||||
def file_list_by_timestamp
|
def get_file_list_by_timestamp
|
||||||
file_list_curated = get_file_list_curated
|
file_list_curated = get_file_list_curated
|
||||||
file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
|
file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
|
||||||
file_list_curated.map do |file_remote_info|
|
file_list_curated.map do |file_remote_info|
|
||||||
@@ -61,7 +61,7 @@ class WaybackMachineDownloader
|
|||||||
def download_files
|
def download_files
|
||||||
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
|
puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine..."
|
||||||
puts
|
puts
|
||||||
file_list_curated = get_file_list_curated
|
file_list_by_timestamp = get_file_list_by_timestamp
|
||||||
count = 0
|
count = 0
|
||||||
file_list_by_timestamp.each do |file_remote_info|
|
file_list_by_timestamp.each do |file_remote_info|
|
||||||
count += 1
|
count += 1
|
||||||
@@ -84,7 +84,6 @@ class WaybackMachineDownloader
|
|||||||
structure_dir_path dir_path
|
structure_dir_path dir_path
|
||||||
open(file_path, "wb") do |file|
|
open(file_path, "wb") do |file|
|
||||||
begin
|
begin
|
||||||
open("http://web.archive.org/web/#{timestamp}id_/#{file_url}") do |uri|
|
|
||||||
open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}") do |uri|
|
open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}") do |uri|
|
||||||
file.write(uri.read)
|
file.write(uri.read)
|
||||||
end
|
end
|
||||||
@@ -98,13 +97,13 @@ class WaybackMachineDownloader
|
|||||||
rescue StandardError => e
|
rescue StandardError => e
|
||||||
puts "#{file_url} # #{e}"
|
puts "#{file_url} # #{e}"
|
||||||
end
|
end
|
||||||
puts "#{file_url} -> #{file_path} (#{count}/#{file_list_curated.size})"
|
puts "#{file_url} -> #{file_path} (#{count}/#{file_list_by_timestamp.size})"
|
||||||
else
|
else
|
||||||
puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_curated.size})"
|
puts "#{file_url} # #{file_path} already exists. (#{count}/#{file_list_by_timestamp.size})"
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
puts
|
puts
|
||||||
puts "Download complete, saved in #{backup_path} (#{file_list_curated.size} files)"
|
puts "Download complete, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"
|
||||||
end
|
end
|
||||||
|
|
||||||
def structure_dir_path dir_path
|
def structure_dir_path dir_path
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ class WaybackMachineDownloaderTest < Minitest::Test
|
|||||||
file_url: "http://www.onlyfreegames.net:80/Fs-06.jpg",
|
file_url: "http://www.onlyfreegames.net:80/Fs-06.jpg",
|
||||||
timestamp: 20060716125343
|
timestamp: 20060716125343
|
||||||
}
|
}
|
||||||
assert_equal file_expected, @wayback_machine_downloader.file_list_by_timestamp[-1]
|
assert_equal file_expected, @wayback_machine_downloader.get_file_list_by_timestamp[-1]
|
||||||
end
|
end
|
||||||
|
|
||||||
def test_file_download
|
def test_file_download
|
||||||
|
|||||||
Reference in New Issue
Block a user