wayback-machine-downloader/lib/wayback_machine_downloader.rb

# encoding: UTF-8

require 'net/http'
require 'open-uri'
require 'fileutils'
require 'cgi'
require 'json'
require_relative 'wayback_machine_downloader/tidy_bytes'
require_relative 'wayback_machine_downloader/to_regex'
require_relative 'wayback_machine_downloader/archive_api'

class WaybackMachineDownloader

  include ArchiveAPI

  VERSION = "1.1.1"

  attr_accessor :base_url, :directory, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list, :maximum_pages, :threads_count

  def initialize params
    @base_url = params[:base_url]
    @directory = params[:directory]
    @from_timestamp = params[:from_timestamp].to_i
    @to_timestamp = params[:to_timestamp].to_i
    @only_filter = params[:only_filter]
    @exclude_filter = params[:exclude_filter]
    @all = params[:all]
    @list = params[:list]
    @maximum_pages = params[:maximum_pages] ? params[:maximum_pages].to_i : 100
    @threads_count = params[:threads_count].to_i
  end

  def backup_name
    if @base_url.include? '//'
      @base_url.split('/')[2]
    else
      @base_url
    end
  end

  def backup_path
    if @directory
      if @directory[-1] == '/'
        @directory
      else
        @directory + '/'
      end
    else
      'websites/' + backup_name + '/'
    end
  end

  def match_only_filter file_url
    if @only_filter
      only_filter_regex = @only_filter.to_regex
      if only_filter_regex
        only_filter_regex =~ file_url
      else
        file_url.downcase.include? @only_filter.downcase
      end
    else
      true
    end
  end

  def match_exclude_filter file_url
    if @exclude_filter
      exclude_filter_regex = @exclude_filter.to_regex
      if exclude_filter_regex
        exclude_filter_regex =~ file_url
      else
        file_url.downcase.include? @exclude_filter.downcase
      end
    else
      false
    end
  end

  def get_all_snapshots_to_consider
    # Note: Passing a page index parameter allow us to get more snapshot, but from a less fresh index
    print "Getting snapshot pages"
    snapshot_list_to_consider = ""
    snapshot_list_to_consider += get_raw_list_from_api(@base_url, nil)
    print "."
    snapshot_list_to_consider += get_raw_list_from_api(@base_url + '/*', nil)
    print "."
    @maximum_pages.times do |page_index|
      snapshot_list = get_raw_list_from_api(@base_url + '/*', page_index)
      break if snapshot_list.empty?
      snapshot_list_to_consider += snapshot_list
      print "."
    end
    puts " found #{snapshot_list_to_consider.lines.count} snaphots to consider."
    puts
    snapshot_list_to_consider
  end

  def get_file_list_curated
    file_list_curated = Hash.new
    get_all_snapshots_to_consider.each_line do |line|
      next unless line.include?('/')
      file_timestamp = line[0..13].to_i
      file_url = line[15..-2]
      file_id = file_url.split('/')[3..-1].join('/')
      file_id = CGI::unescape file_id 
      file_id = file_id.tidy_bytes unless file_id == ""
      if file_id.nil?
        puts "Malformed file url, ignoring: #{file_url}"
      else
        if match_exclude_filter(file_url)
          puts "File url matches exclude filter, ignoring: #{file_url}"
        elsif not match_only_filter(file_url)
          puts "File url doesn't match only filter, ignoring: #{file_url}"
        elsif file_list_curated[file_id]
          unless file_list_curated[file_id][:timestamp] > file_timestamp
            file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
          end
        else
          file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}
        end
      end
    end
    file_list_curated
  end

  def get_file_list_by_timestamp
    file_list_curated = get_file_list_curated
    file_list_curated = file_list_curated.sort_by { |k,v| v[:timestamp] }.reverse
    file_list_curated.map do |file_remote_info|
      file_remote_info[1][:file_id] = file_remote_info[0]
      file_remote_info[1]
    end
  end

  def list_files
    puts "["
    get_file_list_by_timestamp.each do |file|
      puts file.to_json + ","
    end
    puts "]"
  end

  def download_files
    start_time = Time.now
    puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine archives."
    puts

    if file_list_by_timestamp.count == 0
      puts "No files to download."
      puts "Possible reasons:"
      puts "\t* Site is not in Wayback Machine Archive."
      puts "\t* From timestamp too much in the future." if @from_timestamp and @from_timestamp != 0
      puts "\t* To timestamp too much in the past." if @to_timestamp and @to_timestamp != 0
      puts "\t* Only filter too restrictive (#{only_filter.to_s})" if @only_filter
      puts "\t* Exclude filter too wide (#{exclude_filter.to_s})" if @exclude_filter
      return
    end
 
    puts "#{file_list_by_timestamp.count} files to download:"

    threads = []
    @processed_file_count = 0
    @threads_count = 1 unless @threads_count != 0
    @threads_count.times do
      threads << Thread.new do
        until file_queue.empty?
          file_remote_info = file_queue.pop(true) rescue nil
          download_file(file_remote_info) if file_remote_info
        end
      end
    end

    threads.each(&:join)
    end_time = Time.now
    puts
    puts "Download completed in #{(end_time - start_time).round(2)}s, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"
  end

  def structure_dir_path dir_path
    begin
      FileUtils::mkdir_p dir_path unless File.exists? dir_path
    rescue Errno::EEXIST => e
      error_to_string = e.to_s
      puts "# #{error_to_string}"
      if error_to_string.include? "File exists @ dir_s_mkdir - "
        file_already_existing = error_to_string.split("File exists @ dir_s_mkdir - ")[-1]
      elsif error_to_string.include? "File exists - "
        file_already_existing = error_to_string.split("File exists - ")[-1]
      else
        raise "Unhandled directory restructure error # #{error_to_string}"
      end
      file_already_existing_temporary = file_already_existing + '.temp'
      file_already_existing_permanent = file_already_existing + '/index.html'
      FileUtils::mv file_already_existing, file_already_existing_temporary
      FileUtils::mkdir_p file_already_existing
      FileUtils::mv file_already_existing_temporary, file_already_existing_permanent
      puts "#{file_already_existing} -> #{file_already_existing_permanent}"
      structure_dir_path dir_path
    end
  end

  def download_file file_remote_info
    file_url = file_remote_info[:file_url]
    file_id = file_remote_info[:file_id]
    file_timestamp = file_remote_info[:timestamp]
    file_path_elements = file_id.split('/')
    if file_id == ""
      dir_path = backup_path
      file_path = backup_path + 'index.html'
    elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
      dir_path = backup_path + file_path_elements[0..-1].join('/')
      file_path = backup_path + file_path_elements[0..-1].join('/') + '/index.html'
    else
      dir_path = backup_path + file_path_elements[0..-2].join('/')
      file_path = backup_path + file_path_elements[0..-1].join('/')
    end
    if Gem.win_platform?
      file_path = file_path.gsub(/[:*?&=<>\\|]/) {|s| '%' + s.ord.to_s(16) }
    end
    unless File.exists? file_path
      begin
        structure_dir_path dir_path
        open(file_path, "wb") do |file|
          begin
            open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}", "Accept-Encoding" => "plain") do |uri|
              file.write(uri.read)
            end
          rescue OpenURI::HTTPError => e
            puts "#{file_url} # #{e}"
            if @all
              file.write(e.io.read)
              puts "#{file_path} saved anyway."
            end
          rescue StandardError => e
            puts "#{file_url} # #{e}"
          end
        end
      rescue StandardError => e
        puts "#{file_url} # #{e}"
      ensure
        if not @all and File.exists?(file_path) and File.size(file_path) == 0
          File.delete(file_path)
          puts "#{file_path} was empty and was removed."
        end
      end
      semaphore.synchronize do
        @processed_file_count += 1
        puts "#{file_url} -> #{file_path} (#{@processed_file_count}/#{file_list_by_timestamp.size})"
      end
    else
      semaphore.synchronize do
        @processed_file_count += 1
        puts "#{file_url} # #{file_path} already exists. (#{@processed_file_count}/#{file_list_by_timestamp.size})"
      end
    end
  end

  def file_queue
    @file_queue ||= file_list_by_timestamp.each_with_object(Queue.new) { |file_info, q| q << file_info }
  end

  def file_list_by_timestamp
    @file_list_by_timestamp ||= get_file_list_by_timestamp
  end

  def semaphore
    @semaphore ||= Mutex.new
  end
end
Add default encoding to UFT-8 for Ruby < 2 and Ruby 2015-11-05 16:44:41 -06:00			`# encoding: UTF-8`

Load early net/http library for Ruby 1.9.x 2016-09-17 14:06:43 -05:00			`require 'net/http'`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`require 'open-uri'`
			`require 'fileutils'`
add --exclude filter - add exclude filter. Has precedence over the --only filter. - bumped the version. - tests: I modified the tests, but.. the whole suite is hopelessly broken. And when I say hopeless, I'm talking about myself; my Ruby is so rudimentary I was looking up how to create a variable. It really needs a static list of the website 'contents' mocked into it. I have a branch in my repo showing my hopeless work. 2016-06-28 23:27:36 -07:00			`require 'cgi'`
Add option to only list files without downloading 2016-08-03 14:23:35 -05:00			`require 'json'`
Move TidyBytes to fix executable issue #3 2015-08-19 12:02:08 -05:00			`require_relative 'wayback_machine_downloader/tidy_bytes'`
Improve only_filter to accept both strings and regexes 2015-11-19 15:28:02 -06:00			`require_relative 'wayback_machine_downloader/to_regex'`
Refactor archive API calls to own module 2016-09-17 13:37:13 -05:00			`require_relative 'wayback_machine_downloader/archive_api'`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00
Add test file for gem deployment 2015-07-25 18:44:37 -05:00			`class WaybackMachineDownloader`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00
Refactor archive API calls to own module 2016-09-17 13:37:13 -05:00			`include ArchiveAPI`

Bump Gem version 2016-09-24 10:21:50 -07:00			`VERSION = "1.1.1"`
Add better way to handle complex directory structure 2015-08-10 01:13:59 -05:00
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`attr_accessor :base_url, :directory, :from_timestamp, :to_timestamp, :only_filter, :exclude_filter, :all, :list, :maximum_pages, :threads_count`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00
			`def initialize params`
			`@base_url = params[:base_url]`
Add possibility to save files to an user specified directory 2016-09-17 12:49:48 -05:00			`@directory = params[:directory]`
Add to timestamp option 2016-07-30 14:08:01 -05:00			`@from_timestamp = params[:from_timestamp].to_i`
			`@to_timestamp = params[:to_timestamp].to_i`
Improve only_filter to accept both strings and regexes 2015-11-19 15:28:02 -06:00			`@only_filter = params[:only_filter]`
add --exclude filter - add exclude filter. Has precedence over the --only filter. - bumped the version. - tests: I modified the tests, but.. the whole suite is hopelessly broken. And when I say hopeless, I'm talking about myself; my Ruby is so rudimentary I was looking up how to create a variable. It really needs a static list of the website 'contents' mocked into it. I have a branch in my repo showing my hopeless work. 2016-06-28 23:27:36 -07:00			`@exclude_filter = params[:exclude_filter]`
Add all file types download option 2016-07-31 09:51:27 -05:00			`@all = params[:all]`
Add option to only list files without downloading 2016-08-03 14:23:35 -05:00			`@list = params[:list]`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`@maximum_pages = params[:maximum_pages] ? params[:maximum_pages].to_i : 100`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00			`@threads_count = params[:threads_count].to_i`
Add test file for gem deployment 2015-07-25 18:44:37 -05:00			`end`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00
			`def backup_name`
Add support for raw domain names as inputed url 2016-08-16 11:47:47 -05:00			`if @base_url.include? '//'`
			`@base_url.split('/')[2]`
			`else`
			`@base_url`
			`end`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`end`

			`def backup_path`
Add possibility to save files to an user specified directory 2016-09-17 12:49:48 -05:00			`if @directory`
			`if @directory[-1] == '/'`
			`@directory`
			`else`
			`@directory + '/'`
			`end`
			`else`
			`'websites/' + backup_name + '/'`
			`end`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`end`

Improve only_filter to accept both strings and regexes 2015-11-19 15:28:02 -06:00			`def match_only_filter file_url`
			`if @only_filter`
			`only_filter_regex = @only_filter.to_regex`
			`if only_filter_regex`
			`only_filter_regex =~ file_url`
			`else`
			`file_url.downcase.include? @only_filter.downcase`
			`end`
			`else`
			`true`
			`end`
			`end`

add --exclude filter - add exclude filter. Has precedence over the --only filter. - bumped the version. - tests: I modified the tests, but.. the whole suite is hopelessly broken. And when I say hopeless, I'm talking about myself; my Ruby is so rudimentary I was looking up how to create a variable. It really needs a static list of the website 'contents' mocked into it. I have a branch in my repo showing my hopeless work. 2016-06-28 23:27:36 -07:00			`def match_exclude_filter file_url`
			`if @exclude_filter`
			`exclude_filter_regex = @exclude_filter.to_regex`
			`if exclude_filter_regex`
			`exclude_filter_regex =~ file_url`
			`else`
			`file_url.downcase.include? @exclude_filter.downcase`
			`end`
			`else`
Fix behavior when no exclude filter is supplied 2016-07-28 17:58:54 -05:00			`false`
add --exclude filter - add exclude filter. Has precedence over the --only filter. - bumped the version. - tests: I modified the tests, but.. the whole suite is hopelessly broken. And when I say hopeless, I'm talking about myself; my Ruby is so rudimentary I was looking up how to create a variable. It really needs a static list of the website 'contents' mocked into it. I have a branch in my repo showing my hopeless work. 2016-06-28 23:27:36 -07:00			`end`
			`end`

Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`def get_all_snapshots_to_consider`
Add an additional request to archive.org API to get fresher snapshots 2016-09-24 10:21:17 -07:00			`# Note: Passing a page index parameter allow us to get more snapshot, but from a less fresh index`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`print "Getting snapshot pages"`
			`snapshot_list_to_consider = ""`
			`snapshot_list_to_consider += get_raw_list_from_api(@base_url, nil)`
			`print "."`
Add an additional request to archive.org API to get fresher snapshots 2016-09-24 10:21:17 -07:00			`snapshot_list_to_consider += get_raw_list_from_api(@base_url + '/*', nil)`
			`print "."`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`@maximum_pages.times do \|page_index\|`
			`snapshot_list = get_raw_list_from_api(@base_url + '/*', page_index)`
			`break if snapshot_list.empty?`
			`snapshot_list_to_consider += snapshot_list`
			`print "."`
			`end`
			`puts " found #{snapshot_list_to_consider.lines.count} snaphots to consider."`
			`puts`
			`snapshot_list_to_consider`
			`end`

Add timestamp to lock backup to a specific version 2015-08-09 21:26:43 -05:00			`def get_file_list_curated`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`file_list_curated = Hash.new`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`get_all_snapshots_to_consider.each_line do \|line\|`
			`next unless line.include?('/')`
			`file_timestamp = line[0..13].to_i`
			`file_url = line[15..-2]`
			`file_id = file_url.split('/')[3..-1].join('/')`
			`file_id = CGI::unescape file_id`
			`file_id = file_id.tidy_bytes unless file_id == ""`
			`if file_id.nil?`
			`puts "Malformed file url, ignoring: #{file_url}"`
			`else`
			`if match_exclude_filter(file_url)`
			`puts "File url matches exclude filter, ignoring: #{file_url}"`
			`elsif not match_only_filter(file_url)`
			`puts "File url doesn't match only filter, ignoring: #{file_url}"`
			`elsif file_list_curated[file_id]`
			`unless file_list_curated[file_id][:timestamp] > file_timestamp`
Make file_list_curated aware of timestamp option 2015-08-09 22:33:32 -05:00			`file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}`
			`end`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`else`
			`file_list_curated[file_id] = {file_url: file_url, timestamp: file_timestamp}`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`end`
			`end`
			`end`
Improve only_filter to accept both strings and regexes 2015-11-19 15:28:02 -06:00			`file_list_curated`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`end`

Avoid making 2 times the request to get raw file list 2015-11-05 16:19:03 -06:00			`def get_file_list_by_timestamp`
Ensure latest version in case of name conflict 2015-08-15 15:37:37 -05:00			`file_list_curated = get_file_list_curated`
			`file_list_curated = file_list_curated.sort_by { \|k,v\| v[:timestamp] }.reverse`
			`file_list_curated.map do \|file_remote_info\|`
			`file_remote_info[1][:file_id] = file_remote_info[0]`
			`file_remote_info[1]`
			`end`
			`end`

Add option to only list files without downloading 2016-08-03 14:23:35 -05:00			`def list_files`
			`puts "["`
			`get_file_list_by_timestamp.each do \|file\|`
			`puts file.to_json + ","`
			`end`
			`puts "]"`
			`end`

Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`def download_files`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00			`start_time = Time.now`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00			`puts "Downloading #{@base_url} to #{backup_path} from Wayback Machine archives."`
Add timestamp to lock backup to a specific version 2015-08-09 21:26:43 -05:00			`puts`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00
added new option for regex acceptance you dont have to download entire site when looking for specific path 2015-11-06 13:11:26 -05:00			`if file_list_by_timestamp.count == 0`
Improve user output 2016-07-28 17:59:59 -05:00			`puts "No files to download."`
Fix typo #50 2016-08-01 17:01:39 -05:00			`puts "Possible reasons:"`
Improve user output 2016-07-28 17:59:59 -05:00			`puts "\t* Site is not in Wayback Machine Archive."`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00			`puts "\t* From timestamp too much in the future." if @from_timestamp and @from_timestamp != 0`
			`puts "\t* To timestamp too much in the past." if @to_timestamp and @to_timestamp != 0`
Improve user output 2016-07-28 17:59:59 -05:00			`puts "\t* Only filter too restrictive (#{only_filter.to_s})" if @only_filter`
			`puts "\t* Exclude filter too wide (#{exclude_filter.to_s})" if @exclude_filter`
added new option for regex acceptance you dont have to download entire site when looking for specific path 2015-11-06 13:11:26 -05:00			`return`
			`end`
Add maximum snapshot page option 2016-09-24 10:06:27 -07:00
			`puts "#{file_list_by_timestamp.count} files to download:"`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00
			`threads = []`
Make instance variable definitions more explicit 2016-09-15 19:59:42 -05:00			`@processed_file_count = 0`
			`@threads_count = 1 unless @threads_count != 0`
			`@threads_count.times do`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00			`threads << Thread.new do`
			`until file_queue.empty?`
			`file_remote_info = file_queue.pop(true) rescue nil`
			`download_file(file_remote_info) if file_remote_info`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`end`
			`end`
			`end`
Add downloading in multi threads 2016-09-04 23:38:38 +03:00
			`threads.each(&:join)`
			`end_time = Time.now`
Add timestamp to lock backup to a specific version 2015-08-09 21:26:43 -05:00			`puts`
Format seconds to avoid too long output 2016-09-15 20:00:35 -05:00			`puts "Download completed in #{(end_time - start_time).round(2)}s, saved in #{backup_path} (#{file_list_by_timestamp.size} files)"`
Add better way to handle complex directory structure 2015-08-10 01:13:59 -05:00			`end`

			`def structure_dir_path dir_path`
			`begin`
			`FileUtils::mkdir_p dir_path unless File.exists? dir_path`
			`rescue Errno::EEXIST => e`
Handle different Errno::EEXIST exceptions from different ruby versions 2015-09-10 00:35:48 -05:00			`error_to_string = e.to_s`
			`puts "# #{error_to_string}"`
			`if error_to_string.include? "File exists @ dir_s_mkdir - "`
Clarify error variable names 2015-09-10 00:43:21 -05:00			`file_already_existing = error_to_string.split("File exists @ dir_s_mkdir - ")[-1]`
Handle different Errno::EEXIST exceptions from different ruby versions 2015-09-10 00:35:48 -05:00			`elsif error_to_string.include? "File exists - "`
Clarify error variable names 2015-09-10 00:43:21 -05:00			`file_already_existing = error_to_string.split("File exists - ")[-1]`
Handle different Errno::EEXIST exceptions from different ruby versions 2015-09-10 00:35:48 -05:00			`else`
Clarify error variable names 2015-09-10 00:43:21 -05:00			`raise "Unhandled directory restructure error # #{error_to_string}"`
Handle different Errno::EEXIST exceptions from different ruby versions 2015-09-10 00:35:48 -05:00			`end`
Add better way to handle complex directory structure 2015-08-10 01:13:59 -05:00			`file_already_existing_temporary = file_already_existing + '.temp'`
			`file_already_existing_permanent = file_already_existing + '/index.html'`
			`FileUtils::mv file_already_existing, file_already_existing_temporary`
			`FileUtils::mkdir_p file_already_existing`
			`FileUtils::mv file_already_existing_temporary, file_already_existing_permanent`
Ignore urls with malformed base directories #5 2015-08-24 18:00:36 -05:00			`puts "#{file_already_existing} -> #{file_already_existing_permanent}"`
Add better way to handle complex directory structure 2015-08-10 01:13:59 -05:00			`structure_dir_path dir_path`
			`end`
Add file list retrieval and file list downloading 2015-08-08 16:15:14 -05:00			`end`

Add downloading in multi threads 2016-09-04 23:38:38 +03:00			`def download_file file_remote_info`
			`file_url = file_remote_info[:file_url]`
			`file_id = file_remote_info[:file_id]`
			`file_timestamp = file_remote_info[:timestamp]`
			`file_path_elements = file_id.split('/')`
			`if file_id == ""`
			`dir_path = backup_path`
			`file_path = backup_path + 'index.html'`
			`elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'`
			`dir_path = backup_path + file_path_elements[0..-1].join('/')`
			`file_path = backup_path + file_path_elements[0..-1].join('/') + '/index.html'`
			`else`
			`dir_path = backup_path + file_path_elements[0..-2].join('/')`
			`file_path = backup_path + file_path_elements[0..-1].join('/')`
			`end`
			`if Gem.win_platform?`
			`file_path = file_path.gsub(/[:*?&=<>\\\|]/) {\|s\| '%' + s.ord.to_s(16) }`
			`end`
			`unless File.exists? file_path`
			`begin`
			`structure_dir_path dir_path`
			`open(file_path, "wb") do \|file\|`
			`begin`
			`open("http://web.archive.org/web/#{file_timestamp}id_/#{file_url}", "Accept-Encoding" => "plain") do \|uri\|`
			`file.write(uri.read)`
			`end`
			`rescue OpenURI::HTTPError => e`
			`puts "#{file_url} # #{e}"`
			`if @all`
			`file.write(e.io.read)`
			`puts "#{file_path} saved anyway."`
			`end`
			`rescue StandardError => e`
			`puts "#{file_url} # #{e}"`
			`end`
			`end`
			`rescue StandardError => e`
			`puts "#{file_url} # #{e}"`
			`ensure`
			`if not @all and File.exists?(file_path) and File.size(file_path) == 0`
			`File.delete(file_path)`
			`puts "#{file_path} was empty and was removed."`
			`end`
			`end`
			`semaphore.synchronize do`
			`@processed_file_count += 1`
			`puts "#{file_url} -> #{file_path} (#{@processed_file_count}/#{file_list_by_timestamp.size})"`
			`end`
			`else`
			`semaphore.synchronize do`
			`@processed_file_count += 1`
			`puts "#{file_url} # #{file_path} already exists. (#{@processed_file_count}/#{file_list_by_timestamp.size})"`
			`end`
			`end`
			`end`

			`def file_queue`
			`@file_queue \|\|= file_list_by_timestamp.each_with_object(Queue.new) { \|file_info, q\| q << file_info }`
			`end`

			`def file_list_by_timestamp`
			`@file_list_by_timestamp \|\|= get_file_list_by_timestamp`
			`end`

			`def semaphore`
			`@semaphore \|\|= Mutex.new`
			`end`
Add test file for gem deployment 2015-07-25 18:44:37 -05:00			`end`