Switch to the JSON output format for easier parsing

This commit is contained in:
Paul Wise
2021-05-03 16:48:49 +08:00
parent afab72c894
commit cd29f79fd0
2 changed files with 17 additions and 12 deletions

View File

@@ -1,14 +1,23 @@
require 'json'
require 'uri'
module ArchiveAPI
def get_raw_list_from_api url, page_index
request_url = URI("https://web.archive.org/cdx/search/xd")
params = [["url", url]]
params = [["output", "json"], ["url", url]]
params += parameters_for_api page_index
request_url.query = URI.encode_www_form(params)
URI.open(request_url).read
begin
json = JSON.parse(URI.open(request_url).read)
if (json[0] <=> ["timestamp","original"]) == 0
json.shift
end
json
rescue JSON::ParserError
[]
end
end
def parameters_for_api page_index