mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-17 17:56:44 +00:00
Set file id to nil when encounter UTF-8 issues
This commit is contained in:
parent
94acdd6a7d
commit
bdf611bce9
2
.gitignore
vendored
2
.gitignore
vendored
@ -24,3 +24,5 @@ tmp
|
|||||||
|
|
||||||
## RUBINIUS
|
## RUBINIUS
|
||||||
*.rbc
|
*.rbc
|
||||||
|
|
||||||
|
test.rb
|
||||||
|
|||||||
@ -78,10 +78,9 @@ class WaybackMachineDownloader
|
|||||||
file_list_curated = Hash.new
|
file_list_curated = Hash.new
|
||||||
[index_file_list_raw, all_file_list_raw].each do |file|
|
[index_file_list_raw, all_file_list_raw].each do |file|
|
||||||
file.each_line do |line|
|
file.each_line do |line|
|
||||||
next if line.size < 20
|
|
||||||
file_timestamp = line[0..13].to_i
|
file_timestamp = line[0..13].to_i
|
||||||
file_url = line[15..-2]
|
file_url = line[15..-2]
|
||||||
file_id = file_url.split('/')[3..-1].join('/')
|
file_id = file_url.split('/')[3..-1].join('/') rescue nil
|
||||||
file_id = CGI::unescape file_id
|
file_id = CGI::unescape file_id
|
||||||
file_id = file_id.tidy_bytes unless file_id == ""
|
file_id = file_id.tidy_bytes unless file_id == ""
|
||||||
if file_id.nil?
|
if file_id.nil?
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user