mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-18 02:06:35 +00:00
Clean up data earlier from UTF-8 bad bytes
This commit is contained in:
parent
cfa236a4d1
commit
7b69f3c236
@ -78,12 +78,11 @@ class WaybackMachineDownloader
|
||||
file_list_curated = Hash.new
|
||||
[index_file_list_raw, all_file_list_raw].each do |file|
|
||||
file.each_line do |line|
|
||||
line = line.split(' ')
|
||||
line = line.tidy_bytes.split(' ')
|
||||
file_timestamp = line[0].to_i
|
||||
file_url = line[1]
|
||||
file_id = file_url.split('/')[3..-1].join('/')
|
||||
file_id = CGI::unescape file_id
|
||||
file_id = file_id.tidy_bytes unless file_id == ""
|
||||
if file_id.nil?
|
||||
puts "Malformed file url, ignoring: #{file_url}"
|
||||
else
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user