3 Commits

Author SHA1 Message Date
Felipe
917f4f8798 Bumping version 2025-04-30 13:05:30 +00:00
Felipe
787bc2e535 Added missing configs 2025-04-30 13:05:21 +00:00
Felipe
4db13a7792 Fix --all-timestamps
we were accidentally removing the timestamp prefix from `file_id`, rendering that option useless in 2.3.4. This should again now. This will fix #4
2025-04-30 13:01:29 +00:00
2 changed files with 10 additions and 10 deletions

View File

@@ -81,14 +81,15 @@ services:
## ⚙️ Configuration ## ⚙️ Configuration
There are a few constants that can be edited in the `wayback_machine_downloader.rb` file for your convenience. The default values may be conservative, so you can adjust them to your needs. They are: There are a few constants that can be edited in the `wayback_machine_downloader.rb` file for your convenience. The default values may be conservative, so you can adjust them to your needs. They are:
```ruby ```ruby
DEFAULT_TIMEOUT = 30 # HTTP timeout (in seconds) DEFAULT_TIMEOUT = 30 # HTTP timeout (in seconds)
MAX_RETRIES = 3 # Failed request retries MAX_RETRIES = 3 # Number of times to retry failed requests
RETRY_DELAY = 2 # Wait between retries RETRY_DELAY = 2 # Wait time between retries (seconds)
RATE_LIMIT = 0.25 # Throttle between requests RATE_LIMIT = 0.25 # Throttle between requests (seconds)
CONNECTION_POOL_SIZE = 10 # No. of simultaneous connections CONNECTION_POOL_SIZE = 10 # Maximum simultaneous connections
MEMORY_BUFFER_SIZE = 16384 # Size of download buffer MEMORY_BUFFER_SIZE = 16384 # Download buffer size (bytes)
STATE_CDX_FILENAME = '.cdx.json' # Stores snapshot listing
STATE_DB_FILENAME = '.downloaded.txt' # Tracks completed downloads
``` ```
## 🛠️ Advanced usage ## 🛠️ Advanced usage

View File

@@ -113,7 +113,7 @@ class WaybackMachineDownloader
include ArchiveAPI include ArchiveAPI
VERSION = "2.3.4" VERSION = "2.3.5"
DEFAULT_TIMEOUT = 30 DEFAULT_TIMEOUT = 30
MAX_RETRIES = 3 MAX_RETRIES = 3
RETRY_DELAY = 2 RETRY_DELAY = 2
@@ -538,10 +538,9 @@ class WaybackMachineDownloader
file_url = file_remote_info[:file_url].encode(current_encoding) file_url = file_remote_info[:file_url].encode(current_encoding)
file_id = file_remote_info[:file_id] file_id = file_remote_info[:file_id]
file_timestamp = file_remote_info[:timestamp] file_timestamp = file_remote_info[:timestamp]
original_file_id = @all_timestamps ? file_id.split('/', 2)[1] : file_id file_path_elements = file_id.split('/')
file_path_elements = original_file_id.split('/')
if original_file_id == "" if file_id == ""
dir_path = backup_path dir_path = backup_path
file_path = backup_path + 'index.html' file_path = backup_path + 'index.html'
elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.' elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'