Ignore line with erroneous UTF-8 characters

This commit is contained in:
hartator 2015-11-05 16:38:52 -06:00
parent 95655b1263
commit 44924d0f5c

View File

@ -28,6 +28,10 @@ class WaybackMachineDownloader
[index_file_list_raw, all_file_list_raw].each do |file| [index_file_list_raw, all_file_list_raw].each do |file|
file.each_line do |line| file.each_line do |line|
line = line.tidy_bytes line = line.tidy_bytes
unless line
puts "Malformed line, ignoring."
next
end
line = line.split(' ') line = line.split(' ')
file_timestamp = line[1].to_i file_timestamp = line[1].to_i
file_url = line[2] file_url = line[2]