mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-29 16:16:06 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
917f4f8798 | ||
|
|
787bc2e535 | ||
|
|
4db13a7792 |
13
README.md
13
README.md
@@ -81,14 +81,15 @@ services:
|
||||
|
||||
## ⚙️ Configuration
|
||||
There are a few constants that can be edited in the `wayback_machine_downloader.rb` file for your convenience. The default values may be conservative, so you can adjust them to your needs. They are:
|
||||
|
||||
```ruby
|
||||
DEFAULT_TIMEOUT = 30 # HTTP timeout (in seconds)
|
||||
MAX_RETRIES = 3 # Failed request retries
|
||||
RETRY_DELAY = 2 # Wait between retries
|
||||
RATE_LIMIT = 0.25 # Throttle between requests
|
||||
CONNECTION_POOL_SIZE = 10 # No. of simultaneous connections
|
||||
MEMORY_BUFFER_SIZE = 16384 # Size of download buffer
|
||||
MAX_RETRIES = 3 # Number of times to retry failed requests
|
||||
RETRY_DELAY = 2 # Wait time between retries (seconds)
|
||||
RATE_LIMIT = 0.25 # Throttle between requests (seconds)
|
||||
CONNECTION_POOL_SIZE = 10 # Maximum simultaneous connections
|
||||
MEMORY_BUFFER_SIZE = 16384 # Download buffer size (bytes)
|
||||
STATE_CDX_FILENAME = '.cdx.json' # Stores snapshot listing
|
||||
STATE_DB_FILENAME = '.downloaded.txt' # Tracks completed downloads
|
||||
```
|
||||
|
||||
## 🛠️ Advanced usage
|
||||
|
||||
@@ -113,7 +113,7 @@ class WaybackMachineDownloader
|
||||
|
||||
include ArchiveAPI
|
||||
|
||||
VERSION = "2.3.4"
|
||||
VERSION = "2.3.5"
|
||||
DEFAULT_TIMEOUT = 30
|
||||
MAX_RETRIES = 3
|
||||
RETRY_DELAY = 2
|
||||
@@ -538,10 +538,9 @@ class WaybackMachineDownloader
|
||||
file_url = file_remote_info[:file_url].encode(current_encoding)
|
||||
file_id = file_remote_info[:file_id]
|
||||
file_timestamp = file_remote_info[:timestamp]
|
||||
original_file_id = @all_timestamps ? file_id.split('/', 2)[1] : file_id
|
||||
file_path_elements = original_file_id.split('/')
|
||||
file_path_elements = file_id.split('/')
|
||||
|
||||
if original_file_id == ""
|
||||
if file_id == ""
|
||||
dir_path = backup_path
|
||||
file_path = backup_path + 'index.html'
|
||||
elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
|
||||
|
||||
Reference in New Issue
Block a user