Refactor archive API calls to own module

This commit is contained in:
hartator
2016-09-17 13:37:13 -05:00
parent 59b379b9c6
commit 95eaa91715
3 changed files with 32 additions and 13 deletions

View File

@@ -6,9 +6,12 @@ require 'cgi'
require 'json'
require_relative 'wayback_machine_downloader/tidy_bytes'
require_relative 'wayback_machine_downloader/to_regex'
require_relative 'wayback_machine_downloader/archive_api'
class WaybackMachineDownloader
include ArchiveAPI
VERSION = "0.5.4"
attr_accessor :base_url, :directory, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list, :threads_count
@@ -72,18 +75,8 @@ class WaybackMachineDownloader
end
def get_file_list_curated
parameters_for_wayback_machine_api = "&fl=timestamp,original&collapse=original"
unless @all
parameters_for_wayback_machine_api += "&filter=statuscode:200"
end
if @from_timestamp and @from_timestamp != 0
parameters_for_wayback_machine_api += "&from=" + @from_timestamp.to_s
end
if @to_timestamp and @to_timestamp != 0
parameters_for_wayback_machine_api += "&to=" + @to_timestamp.to_s
end
index_file_list_raw = open("http://web.archive.org/cdx/search/xd?url=#{@base_url}" + parameters_for_wayback_machine_api)
all_file_list_raw = open("http://web.archive.org/cdx/search/xd?url=#{@base_url}/*" + parameters_for_wayback_machine_api)
index_file_list_raw = get_raw_list_from_api(@base_url)
all_file_list_raw = get_raw_list_from_api(@base_url + '/*')
file_list_curated = Hash.new
[index_file_list_raw, all_file_list_raw].each do |file|
file.each_line do |line|

View File

@@ -0,0 +1,26 @@
module ArchiveAPI
def get_raw_list_from_api url
request_url = "http://web.archive.org/cdx/search/xd?url="
request_url += url
request_url += parameters_for_api
request_uri = URI.parse request_url
response = Net::HTTP.get_response request_uri
response.body
end
def parameters_for_api
parameters = "&fl=timestamp,original&collapse=original"
unless @all
parameters += "&filter=statuscode:200"
end
if @from_timestamp and @from_timestamp != 0
parameters += "&from=" + @from_timestamp.to_s
end
if @to_timestamp and @to_timestamp != 0
parameters += "&to=" + @to_timestamp.to_s
end
parameters
end
end