mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-29 16:16:06 +00:00
Refactor archive API calls to own module
This commit is contained in:
@@ -6,9 +6,12 @@ require 'cgi'
|
||||
require 'json'
|
||||
require_relative 'wayback_machine_downloader/tidy_bytes'
|
||||
require_relative 'wayback_machine_downloader/to_regex'
|
||||
require_relative 'wayback_machine_downloader/archive_api'
|
||||
|
||||
class WaybackMachineDownloader
|
||||
|
||||
include ArchiveAPI
|
||||
|
||||
VERSION = "0.5.4"
|
||||
|
||||
attr_accessor :base_url, :directory, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list, :threads_count
|
||||
@@ -72,18 +75,8 @@ class WaybackMachineDownloader
|
||||
end
|
||||
|
||||
def get_file_list_curated
|
||||
parameters_for_wayback_machine_api = "&fl=timestamp,original&collapse=original"
|
||||
unless @all
|
||||
parameters_for_wayback_machine_api += "&filter=statuscode:200"
|
||||
end
|
||||
if @from_timestamp and @from_timestamp != 0
|
||||
parameters_for_wayback_machine_api += "&from=" + @from_timestamp.to_s
|
||||
end
|
||||
if @to_timestamp and @to_timestamp != 0
|
||||
parameters_for_wayback_machine_api += "&to=" + @to_timestamp.to_s
|
||||
end
|
||||
index_file_list_raw = open("http://web.archive.org/cdx/search/xd?url=#{@base_url}" + parameters_for_wayback_machine_api)
|
||||
all_file_list_raw = open("http://web.archive.org/cdx/search/xd?url=#{@base_url}/*" + parameters_for_wayback_machine_api)
|
||||
index_file_list_raw = get_raw_list_from_api(@base_url)
|
||||
all_file_list_raw = get_raw_list_from_api(@base_url + '/*')
|
||||
file_list_curated = Hash.new
|
||||
[index_file_list_raw, all_file_list_raw].each do |file|
|
||||
file.each_line do |line|
|
||||
|
||||
26
lib/wayback_machine_downloader/archive_api.rb
Normal file
26
lib/wayback_machine_downloader/archive_api.rb
Normal file
@@ -0,0 +1,26 @@
|
||||
module ArchiveAPI
|
||||
|
||||
def get_raw_list_from_api url
|
||||
request_url = "http://web.archive.org/cdx/search/xd?url="
|
||||
request_url += url
|
||||
request_url += parameters_for_api
|
||||
request_uri = URI.parse request_url
|
||||
response = Net::HTTP.get_response request_uri
|
||||
response.body
|
||||
end
|
||||
|
||||
def parameters_for_api
|
||||
parameters = "&fl=timestamp,original&collapse=original"
|
||||
unless @all
|
||||
parameters += "&filter=statuscode:200"
|
||||
end
|
||||
if @from_timestamp and @from_timestamp != 0
|
||||
parameters += "&from=" + @from_timestamp.to_s
|
||||
end
|
||||
if @to_timestamp and @to_timestamp != 0
|
||||
parameters += "&to=" + @to_timestamp.to_s
|
||||
end
|
||||
parameters
|
||||
end
|
||||
|
||||
end
|
||||
Reference in New Issue
Block a user