mirror of
https://github.com/StrawberryMaster/wayback-machine-downloader.git
synced 2025-12-29 16:16:06 +00:00
Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
917f4f8798 | ||
|
|
787bc2e535 | ||
|
|
4db13a7792 |
13
README.md
13
README.md
@@ -81,14 +81,15 @@ services:
|
|||||||
|
|
||||||
## ⚙️ Configuration
|
## ⚙️ Configuration
|
||||||
There are a few constants that can be edited in the `wayback_machine_downloader.rb` file for your convenience. The default values may be conservative, so you can adjust them to your needs. They are:
|
There are a few constants that can be edited in the `wayback_machine_downloader.rb` file for your convenience. The default values may be conservative, so you can adjust them to your needs. They are:
|
||||||
|
|
||||||
```ruby
|
```ruby
|
||||||
DEFAULT_TIMEOUT = 30 # HTTP timeout (in seconds)
|
DEFAULT_TIMEOUT = 30 # HTTP timeout (in seconds)
|
||||||
MAX_RETRIES = 3 # Failed request retries
|
MAX_RETRIES = 3 # Number of times to retry failed requests
|
||||||
RETRY_DELAY = 2 # Wait between retries
|
RETRY_DELAY = 2 # Wait time between retries (seconds)
|
||||||
RATE_LIMIT = 0.25 # Throttle between requests
|
RATE_LIMIT = 0.25 # Throttle between requests (seconds)
|
||||||
CONNECTION_POOL_SIZE = 10 # No. of simultaneous connections
|
CONNECTION_POOL_SIZE = 10 # Maximum simultaneous connections
|
||||||
MEMORY_BUFFER_SIZE = 16384 # Size of download buffer
|
MEMORY_BUFFER_SIZE = 16384 # Download buffer size (bytes)
|
||||||
|
STATE_CDX_FILENAME = '.cdx.json' # Stores snapshot listing
|
||||||
|
STATE_DB_FILENAME = '.downloaded.txt' # Tracks completed downloads
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🛠️ Advanced usage
|
## 🛠️ Advanced usage
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ class WaybackMachineDownloader
|
|||||||
|
|
||||||
include ArchiveAPI
|
include ArchiveAPI
|
||||||
|
|
||||||
VERSION = "2.3.4"
|
VERSION = "2.3.5"
|
||||||
DEFAULT_TIMEOUT = 30
|
DEFAULT_TIMEOUT = 30
|
||||||
MAX_RETRIES = 3
|
MAX_RETRIES = 3
|
||||||
RETRY_DELAY = 2
|
RETRY_DELAY = 2
|
||||||
@@ -538,10 +538,9 @@ class WaybackMachineDownloader
|
|||||||
file_url = file_remote_info[:file_url].encode(current_encoding)
|
file_url = file_remote_info[:file_url].encode(current_encoding)
|
||||||
file_id = file_remote_info[:file_id]
|
file_id = file_remote_info[:file_id]
|
||||||
file_timestamp = file_remote_info[:timestamp]
|
file_timestamp = file_remote_info[:timestamp]
|
||||||
original_file_id = @all_timestamps ? file_id.split('/', 2)[1] : file_id
|
file_path_elements = file_id.split('/')
|
||||||
file_path_elements = original_file_id.split('/')
|
|
||||||
|
|
||||||
if original_file_id == ""
|
if file_id == ""
|
||||||
dir_path = backup_path
|
dir_path = backup_path
|
||||||
file_path = backup_path + 'index.html'
|
file_path = backup_path + 'index.html'
|
||||||
elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
|
elsif file_url[-1] == '/' or not file_path_elements[-1].include? '.'
|
||||||
|
|||||||
Reference in New Issue
Block a user