From 44924d0f5cde43069c3ba1d04bf46bb16890dfee Mon Sep 17 00:00:00 2001 From: hartator Date: Thu, 5 Nov 2015 16:38:52 -0600 Subject: [PATCH] Ignore line with erroneous UTF-8 characters --- lib/wayback_machine_downloader.rb | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/wayback_machine_downloader.rb b/lib/wayback_machine_downloader.rb index a975570..ac43178 100644 --- a/lib/wayback_machine_downloader.rb +++ b/lib/wayback_machine_downloader.rb @@ -28,6 +28,10 @@ class WaybackMachineDownloader [index_file_list_raw, all_file_list_raw].each do |file| file.each_line do |line| line = line.tidy_bytes + unless line + puts "Malformed line, ignoring." + next + end line = line.split(' ') file_timestamp = line[1].to_i file_url = line[2]