mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-18 10:16:47 +00:00
Refactoring the archive API
This commit is contained in:
parent
86b1abc646
commit
466228fee4
@ -3,37 +3,30 @@ require 'uri'
|
||||
|
||||
module ArchiveAPI
|
||||
|
||||
def get_raw_list_from_api url, page_index
|
||||
def get_raw_list_from_api(url, page_index, http)
|
||||
request_url = URI("https://web.archive.org/cdx/search/xd")
|
||||
params = [["output", "json"], ["url", url]]
|
||||
params += parameters_for_api page_index
|
||||
params = [["output", "json"], ["url", url]] + parameters_for_api(page_index)
|
||||
request_url.query = URI.encode_www_form(params)
|
||||
|
||||
begin
|
||||
json = JSON.parse(URI(request_url).open.read)
|
||||
if (json[0] <=> ["timestamp","original"]) == 0
|
||||
json.shift
|
||||
end
|
||||
response = http.get(request_url)
|
||||
json = JSON.parse(response.body)
|
||||
|
||||
# Check if the response contains the header ["timestamp", "original"]
|
||||
json.shift if json.first == ["timestamp", "original"]
|
||||
json
|
||||
rescue JSON::ParserError
|
||||
rescue JSON::ParserError, StandardError => e
|
||||
warn "Failed to fetch data from API: #{e.message}"
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
def parameters_for_api page_index
|
||||
def parameters_for_api(page_index)
|
||||
parameters = [["fl", "timestamp,original"], ["collapse", "digest"], ["gzip", "false"]]
|
||||
if !@all
|
||||
parameters.push(["filter", "statuscode:200"])
|
||||
end
|
||||
if @from_timestamp and @from_timestamp != 0
|
||||
parameters.push(["from", @from_timestamp.to_s])
|
||||
end
|
||||
if @to_timestamp and @to_timestamp != 0
|
||||
parameters.push(["to", @to_timestamp.to_s])
|
||||
end
|
||||
if page_index
|
||||
parameters.push(["page", page_index])
|
||||
end
|
||||
parameters.push(["filter", "statuscode:200"]) unless @all
|
||||
parameters.push(["from", @from_timestamp.to_s]) if @from_timestamp && @from_timestamp != 0
|
||||
parameters.push(["to", @to_timestamp.to_s]) if @to_timestamp && @to_timestamp != 0
|
||||
parameters.push(["page", page_index]) if page_index
|
||||
parameters
|
||||
end
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user