Set file id to nil when encounter UTF-8 issues

This commit is contained in:
hartator 2016-09-16 15:52:18 -05:00
parent 94acdd6a7d
commit bdf611bce9
2 changed files with 3 additions and 2 deletions

2
.gitignore vendored
View File

@ -24,3 +24,5 @@ tmp
## RUBINIUS ## RUBINIUS
*.rbc *.rbc
test.rb

View File

@ -78,10 +78,9 @@ class WaybackMachineDownloader
file_list_curated = Hash.new file_list_curated = Hash.new
[index_file_list_raw, all_file_list_raw].each do |file| [index_file_list_raw, all_file_list_raw].each do |file|
file.each_line do |line| file.each_line do |line|
next if line.size < 20
file_timestamp = line[0..13].to_i file_timestamp = line[0..13].to_i
file_url = line[15..-2] file_url = line[15..-2]
file_id = file_url.split('/')[3..-1].join('/') file_id = file_url.split('/')[3..-1].join('/') rescue nil
file_id = CGI::unescape file_id file_id = CGI::unescape file_id
file_id = file_id.tidy_bytes unless file_id == "" file_id = file_id.tidy_bytes unless file_id == ""
if file_id.nil? if file_id.nil?