From 509d7034a10c9971af68c4e8b3a12de2b34bc9cd Mon Sep 17 00:00:00 2001 From: Felipe <41008398+StrawberryMaster@users.noreply.github.com> Date: Wed, 26 Jun 2024 19:52:12 +0000 Subject: [PATCH] Setting file modified time to value reported by Wayback Machine Implements 937306712c564e5757d898feacc14fbabd10722d, fixes Maintain original creation/modified dates of files while downloading #174 --- lib/wayback_machine_downloader.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/wayback_machine_downloader.rb b/lib/wayback_machine_downloader.rb index 8ff8f0f..c9193c6 100644 --- a/lib/wayback_machine_downloader.rb +++ b/lib/wayback_machine_downloader.rb @@ -6,6 +6,7 @@ require 'open-uri' require 'fileutils' require 'cgi' require 'json' +requite 'time' require_relative 'wayback_machine_downloader/tidy_bytes' require_relative 'wayback_machine_downloader/to_regex' require_relative 'wayback_machine_downloader/archive_api' @@ -264,6 +265,7 @@ class WaybackMachineDownloader file_id = file_remote_info[:file_id] file_timestamp = file_remote_info[:timestamp] file_path_elements = file_id.split('/') + original_file_mtime = nil if file_id == "" dir_path = backup_path file_path = backup_path + 'index.html' @@ -285,6 +287,11 @@ class WaybackMachineDownloader begin http.get(URI("https://web.archive.org/web/#{file_timestamp}id_/#{file_url}")) do |body| file.write(body) + + if uri.meta.has_key?("x-archive-orig-last-modified") + original_file_mtime = Time.parse(uri.meta["x-archive-orig-last-modified"]) + end + end rescue OpenURI::HTTPError => e puts "#{file_url} # #{e}"