gzip support

This commit is contained in:
Felipe 2025-04-19 13:07:07 +00:00 committed by GitHub
parent 576298dca8
commit 27dd619aa4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -9,6 +9,8 @@ require 'json'
require 'time' require 'time'
require 'concurrent-ruby' require 'concurrent-ruby'
require 'logger' require 'logger'
require 'zlib'
require 'stringio'
require_relative 'wayback_machine_downloader/tidy_bytes' require_relative 'wayback_machine_downloader/tidy_bytes'
require_relative 'wayback_machine_downloader/to_regex' require_relative 'wayback_machine_downloader/to_regex'
require_relative 'wayback_machine_downloader/archive_api' require_relative 'wayback_machine_downloader/archive_api'
@ -485,16 +487,26 @@ class WaybackMachineDownloader
request = Net::HTTP::Get.new(URI(wayback_url)) request = Net::HTTP::Get.new(URI(wayback_url))
request["Connection"] = "keep-alive" request["Connection"] = "keep-alive"
request["User-Agent"] = "WaybackMachineDownloader/#{VERSION}" request["User-Agent"] = "WaybackMachineDownloader/#{VERSION}"
request["Accept-Encoding"] = "gzip, deflate"
response = connection.request(request) response = connection.request(request)
case response case response
when Net::HTTPSuccess when Net::HTTPSuccess
File.open(file_path, "wb") do |file| File.open(file_path, "wb") do |file|
if block_given? body = response.body
yield(response, file) if response['content-encoding'] == 'gzip' && body && !body.empty?
begin
gz = Zlib::GzipReader.new(StringIO.new(body))
decompressed_body = gz.read
gz.close
file.write(decompressed_body)
rescue Zlib::GzipFile::Error => e
@logger.warn("Failure decompressing gzip file #{file_url}: #{e.message}")
file.write(body)
end
else else
file.write(response.body) file.write(body) if body
end end
end end
when Net::HTTPRedirection when Net::HTTPRedirection