mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-17 17:56:44 +00:00
gzip support
This commit is contained in:
parent
576298dca8
commit
27dd619aa4
@ -9,6 +9,8 @@ require 'json'
|
|||||||
require 'time'
|
require 'time'
|
||||||
require 'concurrent-ruby'
|
require 'concurrent-ruby'
|
||||||
require 'logger'
|
require 'logger'
|
||||||
|
require 'zlib'
|
||||||
|
require 'stringio'
|
||||||
require_relative 'wayback_machine_downloader/tidy_bytes'
|
require_relative 'wayback_machine_downloader/tidy_bytes'
|
||||||
require_relative 'wayback_machine_downloader/to_regex'
|
require_relative 'wayback_machine_downloader/to_regex'
|
||||||
require_relative 'wayback_machine_downloader/archive_api'
|
require_relative 'wayback_machine_downloader/archive_api'
|
||||||
@ -478,23 +480,33 @@ class WaybackMachineDownloader
|
|||||||
begin
|
begin
|
||||||
wayback_url = if @rewritten
|
wayback_url = if @rewritten
|
||||||
"https://web.archive.org/web/#{file_timestamp}/#{file_url}"
|
"https://web.archive.org/web/#{file_timestamp}/#{file_url}"
|
||||||
else
|
else
|
||||||
"https://web.archive.org/web/#{file_timestamp}id_/#{file_url}"
|
"https://web.archive.org/web/#{file_timestamp}id_/#{file_url}"
|
||||||
end
|
end
|
||||||
|
|
||||||
request = Net::HTTP::Get.new(URI(wayback_url))
|
request = Net::HTTP::Get.new(URI(wayback_url))
|
||||||
request["Connection"] = "keep-alive"
|
request["Connection"] = "keep-alive"
|
||||||
request["User-Agent"] = "WaybackMachineDownloader/#{VERSION}"
|
request["User-Agent"] = "WaybackMachineDownloader/#{VERSION}"
|
||||||
|
request["Accept-Encoding"] = "gzip, deflate"
|
||||||
|
|
||||||
response = connection.request(request)
|
response = connection.request(request)
|
||||||
|
|
||||||
case response
|
case response
|
||||||
when Net::HTTPSuccess
|
when Net::HTTPSuccess
|
||||||
File.open(file_path, "wb") do |file|
|
File.open(file_path, "wb") do |file|
|
||||||
if block_given?
|
body = response.body
|
||||||
yield(response, file)
|
if response['content-encoding'] == 'gzip' && body && !body.empty?
|
||||||
|
begin
|
||||||
|
gz = Zlib::GzipReader.new(StringIO.new(body))
|
||||||
|
decompressed_body = gz.read
|
||||||
|
gz.close
|
||||||
|
file.write(decompressed_body)
|
||||||
|
rescue Zlib::GzipFile::Error => e
|
||||||
|
@logger.warn("Failure decompressing gzip file #{file_url}: #{e.message}")
|
||||||
|
file.write(body)
|
||||||
|
end
|
||||||
else
|
else
|
||||||
file.write(response.body)
|
file.write(body) if body
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
when Net::HTTPRedirection
|
when Net::HTTPRedirection
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user