workaround for API only showing html files for some domains

See https://github.com/StrawberryMaster/wayback-machine-downloader/issues/6
2025-12-29 16:16:06 +00:00 · 2025-05-30 12:50:48 -04:00
parent f38756dd76
commit 1681a12579
1 changed files with 7 additions and 0 deletions
--- a/lib/wayback_machine_downloader/archive_api.rb
+++ b/lib/wayback_machine_downloader/archive_api.rb
@@ -4,6 +4,13 @@ require 'uri'
 module ArchiveAPI

  def get_raw_list_from_api(url, page_index, http)
+    # Automatically append /* if the URL doesn't contain a path after the domain
+    # This is a workaround for an issue with the API and *some* domains.
+    # See https://github.com/StrawberryMaster/wayback-machine-downloader/issues/6
+    if url && !url.match(/^https?:\/\/.*\//i)
+      url = "#{url}/*"
+    end
+
    request_url = URI("https://web.archive.org/cdx/search/cdx")
    params = [["output", "json"], ["url", url]] + parameters_for_api(page_index)
    request_url.query = URI.encode_www_form(params)