Merge branch 'pr/82'

This commit is contained in:
hartator 2017-02-17 12:55:20 -06:00
commit 6e3986b875
2 changed files with 16 additions and 1 deletions

View File

@ -201,7 +201,8 @@ class WaybackMachineDownloader
end end
def download_file file_remote_info def download_file file_remote_info
file_url = file_remote_info[:file_url] current_encoding = "".encoding
file_url = file_remote_info[:file_url].encode(current_encoding)
file_id = file_remote_info[:file_id] file_id = file_remote_info[:file_id]
file_timestamp = file_remote_info[:timestamp] file_timestamp = file_remote_info[:timestamp]
file_path_elements = file_id.split('/') file_path_elements = file_id.split('/')

View File

@ -89,5 +89,19 @@ class WaybackMachineDownloaderTest < Minitest::Test
@wayback_machine_downloader.all = true @wayback_machine_downloader.all = true
assert_equal 69, @wayback_machine_downloader.get_file_list_curated.size assert_equal 69, @wayback_machine_downloader.get_file_list_curated.size
end end
# Testing encoding conflicts needs a different base_url
def test_nonascii_suburls_download
@wayback_machine_downloader = WaybackMachineDownloader.new base_url: 'https://en.wikipedia.org/wiki/%C3%84'
# Once just for the downloading...
@wayback_machine_downloader.download_files
end
def test_nonascii_suburls_already_present
@wayback_machine_downloader = WaybackMachineDownloader.new base_url: 'https://en.wikipedia.org/wiki/%C3%84'
# ... twice to test the "is already present" case
@wayback_machine_downloader.download_files
@wayback_machine_downloader.download_files
end
end end