mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-17 17:56:44 +00:00
Add more precise error handling of UTF-8 malformed urls
This commit is contained in:
parent
be6fe01cf0
commit
c0024cbcb5
@ -1,5 +1,6 @@
|
||||
require 'open-uri'
|
||||
require 'fileutils'
|
||||
require_relative 'tidy_bytes'
|
||||
|
||||
class WaybackMachineDownloader
|
||||
|
||||
@ -31,7 +32,10 @@ class WaybackMachineDownloader
|
||||
file_url = line[2]
|
||||
file_id = file_url.split('/')[3..-1].join('/')
|
||||
file_id = URI.unescape file_id
|
||||
if @timestamp == 0 or file_timestamp <= @timestamp
|
||||
file_id = file_id.tidy_bytes unless file_id == ""
|
||||
if file_id.nil?
|
||||
puts "Malformed file url, ignoring: #{file_url}"
|
||||
elsif @timestamp == 0 or file_timestamp <= @timestamp
|
||||
if file_list_curated[file_id]
|
||||
unless file_list_curated[file_id][:timestamp] > file_timestamp
|
||||
file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
|
||||
@ -55,7 +59,7 @@ class WaybackMachineDownloader
|
||||
end
|
||||
|
||||
def download_files
|
||||
puts "Downlading #{@base_url} from Wayback Machine..."
|
||||
puts "Downlading #{@base_url} to #{backup_path} from Wayback Machine..."
|
||||
puts
|
||||
file_list_curated = get_file_list_curated
|
||||
count = 0
|
||||
@ -84,7 +88,7 @@ class WaybackMachineDownloader
|
||||
rescue OpenURI::HTTPError => e
|
||||
puts "#{file_url} # #{e}"
|
||||
file.write(e.io.read)
|
||||
rescue Exception => e
|
||||
rescue StandardError => e
|
||||
puts "#{file_url} # #{e}"
|
||||
end
|
||||
end
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user