Merge pull request #90 from BlessedRebuS/fix/backup-job-enabled-flag

Add parameter to disable backup
This commit is contained in:
Lorenzo Venerandi
2026-02-22 16:04:40 +01:00
committed by GitHub
6 changed files with 20 additions and 1 deletions

View File

@@ -213,6 +213,7 @@ Krawl uses a **configuration hierarchy** in which **environment variables take p
| `KRAWL_EXPORTS_PATH` | Path where firewalls rule sets are exported | `exports` | | `KRAWL_EXPORTS_PATH` | Path where firewalls rule sets are exported | `exports` |
| `KRAWL_BACKUPS_PATH` | Path where database dump are saved | `backups` | | `KRAWL_BACKUPS_PATH` | Path where database dump are saved | `backups` |
| `KRAWL_BACKUPS_CRON` | cron expression to control backup job schedule | `*/30 * * * *` | | `KRAWL_BACKUPS_CRON` | cron expression to control backup job schedule | `*/30 * * * *` |
| `KRAWL_BACKUPS_ENABLED` | Boolean to enable db dump job | `true` |
| `KRAWL_DATABASE_RETENTION_DAYS` | Days to retain data in database | `30` | | `KRAWL_DATABASE_RETENTION_DAYS` | Days to retain data in database | `30` |
| `KRAWL_HTTP_RISKY_METHODS_THRESHOLD` | Threshold for risky HTTP methods detection | `0.1` | | `KRAWL_HTTP_RISKY_METHODS_THRESHOLD` | Threshold for risky HTTP methods detection | `0.1` |
| `KRAWL_VIOLATED_ROBOTS_THRESHOLD` | Threshold for robots.txt violations | `0.1` | | `KRAWL_VIOLATED_ROBOTS_THRESHOLD` | Threshold for robots.txt violations | `0.1` |
@@ -334,7 +335,20 @@ Alternatively, you can create a bunch of different "interesting" looking domains
Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance. Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance.
## Enable database dump job for backups
To enable the database dump job, set the following variables (*config file example*)
```yaml
backups:
path: "backups" # where backup will be saved
cron: "*/30 * * * *" # frequency of the cronjob
enabled: true
```
## Customizing the Canary Token ## Customizing the Canary Token
To create a custom canary token, visit https://canarytokens.org To create a custom canary token, visit https://canarytokens.org
and generate a “Web bug” canary token. and generate a “Web bug” canary token.

View File

@@ -28,6 +28,7 @@ dashboard:
backups: backups:
path: "backups" path: "backups"
cron: "*/30 * * * *" cron: "*/30 * * * *"
enabled: false
exports: exports:
path: "exports" path: "exports"

View File

@@ -25,6 +25,7 @@ data:
backups: backups:
path: {{ .Values.config.backups.path | quote }} path: {{ .Values.config.backups.path | quote }}
cron: {{ .Values.config.backups.cron | quote }} cron: {{ .Values.config.backups.cron | quote }}
enabled: {{ .Values.config.backups.enabled | quote }}
exports: exports:
path: {{ .Values.config.exports.path | quote }} path: {{ .Values.config.exports.path | quote }}
database: database:

View File

@@ -86,6 +86,7 @@ config:
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
backups: backups:
path: "backups" path: "backups"
enabled: true
cron: "*/30 * * * *" cron: "*/30 * * * *"
exports: exports:
path: "exports" path: "exports"

View File

@@ -42,6 +42,7 @@ class Config:
# backup job settings # backup job settings
backups_path: str = "backups" backups_path: str = "backups"
backups_enabled: bool = False
backups_cron: str = "*/30 * * * *" backups_cron: str = "*/30 * * * *"
# Database settings # Database settings
database_path: str = "data/krawl.db" database_path: str = "data/krawl.db"
@@ -195,6 +196,7 @@ class Config:
probability_error_codes=behavior.get("probability_error_codes", 0), probability_error_codes=behavior.get("probability_error_codes", 0),
exports_path=exports.get("path"), exports_path=exports.get("path"),
backups_path=backups.get("path"), backups_path=backups.get("path"),
backups_enabled=backups.get("enabled", False),
backups_cron=backups.get("cron"), backups_cron=backups.get("cron"),
database_path=database.get("path", "data/krawl.db"), database_path=database.get("path", "data/krawl.db"),
database_retention_days=database.get("retention_days", 30), database_retention_days=database.get("retention_days", 30),

View File

@@ -16,7 +16,7 @@ app_logger = get_app_logger()
TASK_CONFIG = { TASK_CONFIG = {
"name": "dump-krawl-data", "name": "dump-krawl-data",
"cron": f"{config.backups_cron}", "cron": f"{config.backups_cron}",
"enabled": True, "enabled": config.backups_enabled,
"run_when_loaded": True, "run_when_loaded": True,
} }