2021-05-03 16:48:49 +08:00
|
|
|
require 'json'
|
2021-05-03 14:01:04 +08:00
|
|
|
require 'uri'
|
|
|
|
|
|
2016-09-17 13:37:13 -05:00
|
|
|
module ArchiveAPI
|
|
|
|
|
|
2017-03-15 17:08:40 -04:00
|
|
|
def get_raw_list_from_api url, page_index
|
2021-05-03 14:01:04 +08:00
|
|
|
request_url = URI("https://web.archive.org/cdx/search/xd")
|
2021-05-03 16:48:49 +08:00
|
|
|
params = [["output", "json"], ["url", url]]
|
2021-05-03 14:01:04 +08:00
|
|
|
params += parameters_for_api page_index
|
|
|
|
|
request_url.query = URI.encode_www_form(params)
|
2016-10-31 17:46:35 +01:00
|
|
|
|
2021-05-03 16:48:49 +08:00
|
|
|
begin
|
|
|
|
|
json = JSON.parse(URI.open(request_url).read)
|
|
|
|
|
if (json[0] <=> ["timestamp","original"]) == 0
|
|
|
|
|
json.shift
|
|
|
|
|
end
|
|
|
|
|
json
|
|
|
|
|
rescue JSON::ParserError
|
|
|
|
|
[]
|
|
|
|
|
end
|
2017-03-15 17:08:40 -04:00
|
|
|
end
|
2016-09-17 13:37:13 -05:00
|
|
|
|
2017-03-15 17:08:40 -04:00
|
|
|
def parameters_for_api page_index
|
2021-05-03 14:01:04 +08:00
|
|
|
parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]]
|
|
|
|
|
if !@all
|
|
|
|
|
parameters.push(["filter", "statuscode:200"])
|
2016-09-17 13:37:13 -05:00
|
|
|
end
|
|
|
|
|
if @from_timestamp and @from_timestamp != 0
|
2021-05-03 14:01:04 +08:00
|
|
|
parameters.push(["from", @from_timestamp.to_s])
|
2016-09-17 13:37:13 -05:00
|
|
|
end
|
|
|
|
|
if @to_timestamp and @to_timestamp != 0
|
2021-05-03 14:01:04 +08:00
|
|
|
parameters.push(["to", @to_timestamp.to_s])
|
2016-09-17 13:37:13 -05:00
|
|
|
end
|
2016-09-24 10:04:57 -07:00
|
|
|
if page_index
|
2021-05-03 14:01:04 +08:00
|
|
|
parameters.push(["page", page_index])
|
2016-09-24 10:04:57 -07:00
|
|
|
end
|
2016-09-17 13:37:13 -05:00
|
|
|
parameters
|
|
|
|
|
end
|
|
|
|
|
|
2016-10-31 17:46:35 +01:00
|
|
|
end
|