From 76219326021f6ab9532c80607f760e49459a6dff Mon Sep 17 00:00:00 2001 From: carnivuth Date: Thu, 5 Feb 2026 17:26:06 +0100 Subject: [PATCH 1/3] added task dump krawl data and adjusted configuration files --- .gitignore | 3 +- config.yaml | 4 ++ docker-compose.yaml | 1 + helm/templates/configmap.yaml | 3 ++ helm/values.yaml | 3 ++ src/config.py | 7 +++ src/tasks/db_dump.py | 93 +++++++++++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 src/tasks/db_dump.py diff --git a/.gitignore b/.gitignore index ed1f3d9..109cf28 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ data/ *.db *.sqlite *.sqlite3 +backups/ # Temporary files *.tmp @@ -83,4 +84,4 @@ personal-values.yaml /src/exports/* # tmux config -.tmux.conf \ No newline at end of file +.tmux.conf diff --git a/config.yaml b/config.yaml index 40246db..08f9fcc 100644 --- a/config.yaml +++ b/config.yaml @@ -25,6 +25,10 @@ dashboard: # secret_path: super-secret-dashboard-path secret_path: test +backups: + path: "backups" + cron: "*/30 * * * *" + exports: path: "exports" diff --git a/docker-compose.yaml b/docker-compose.yaml index 44b534d..17680de 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -18,6 +18,7 @@ services: - ./logs:/app/logs - ./exports:/app/exports - ./data:/app/data + - ./backups:/app/backups restart: unless-stopped develop: watch: diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 4d503ab..176ef21 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -22,6 +22,9 @@ data: token_tries: {{ .Values.config.canary.token_tries }} dashboard: secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + backups: + path: {{ .Values.config.backups.path | quote }} + cron: {{ .Values.config.backups.cron | quote }} exports: path: {{ .Values.config.exports.path | quote }} database: diff --git a/helm/values.yaml b/helm/values.yaml index b9fd375..91615f2 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -84,6 +84,9 @@ config: token_tries: 10 dashboard: secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + backups: + path: "backups" + cron: "*/30 * * * *" exports: path: "exports" database: diff --git a/src/config.py b/src/config.py index b17df7e..a3968e3 100644 --- a/src/config.py +++ b/src/config.py @@ -39,6 +39,10 @@ class Config: # exporter settings exports_path: str = "exports" + + # backup job settings + backups_path: str = "backups" + backups_cron: str = "*/30 * * * *" # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 @@ -153,6 +157,7 @@ class Config: dashboard = data.get("dashboard", {}) api = data.get("api", {}) exports = data.get("exports", {}) + backups = data.get("backups", {}) database = data.get("database", {}) behavior = data.get("behavior", {}) analyzer = data.get("analyzer") or {} @@ -189,6 +194,8 @@ class Config: dashboard_secret_path=dashboard_path, probability_error_codes=behavior.get("probability_error_codes", 0), exports_path=exports.get("path"), + backups_path=backups.get("path"), + backups_cron=backups.get("cron"), database_path=database.get("path", "data/krawl.db"), database_retention_days=database.get("retention_days", 30), http_risky_methods_threshold=analyzer.get( diff --git a/src/tasks/db_dump.py b/src/tasks/db_dump.py new file mode 100644 index 0000000..14dbe66 --- /dev/null +++ b/src/tasks/db_dump.py @@ -0,0 +1,93 @@ +# tasks/db_dump.py + +from logger import get_app_logger +from database import get_database +from config import get_config +from sqlalchemy import MetaData, inspect +from sqlalchemy.schema import CreateTable +import os + +config = get_config() +app_logger = get_app_logger() + +# ---------------------- +# TASK CONFIG +# ---------------------- +TASK_CONFIG = { + "name": "dump-krawl-data", + "cron": f"{config.backups_cron}", + "enabled": True, + "run_when_loaded": True, +} + +# ---------------------- +# TASK LOGIC +# ---------------------- +def main(): + """ + Dump krawl database to a sql file for backups + """ + task_name = TASK_CONFIG.get("name") + app_logger.info(f"[Background Task] {task_name} starting...") + + try: + db = get_database() + engine = db._engine + + metadata = MetaData() + + # Reflect the database structure + metadata.reflect(bind=engine) + output_file = os.path.join(config.backups_path,"db_dump.sql") + + with open(output_file, 'w') as f: + # Write header + app_logger.info(f"[Background Task] {task_name} started database dump") + + # Get inspector for additional metadata + inspector = inspect(engine) + + # Dump schema (CREATE TABLE statements) + f.write("-- Schema\n") + f.write("-- " + "="*70 + "\n\n") + + for table_name in metadata.tables: + table = metadata.tables[table_name] + app_logger.info(f"[Background Task] {task_name} dumping {table} table schema") + + # Create table statement + create_stmt = str(CreateTable(table).compile(engine)) + f.write(f"{create_stmt};\n\n") + + f.write("\n-- Data\n") + f.write("-- " + "="*70 + "\n\n") + + with engine.connect() as conn: + for table_name in metadata.tables: + table = metadata.tables[table_name] + + f.write(f"-- Table: {table_name}\n") + + # Select all data from table + result = conn.execute(table.select()) + rows = result.fetchall() + + if rows: + app_logger.info(f"[Background Task] {task_name} dumping {table} content") + for row in rows: + # Build INSERT statement + columns = ', '.join([col.name for col in table.columns]) + values = ', '.join([repr(value) for value in row]) + f.write(f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n") + + f.write("\n") + else: + f.write(f"-- No data in {table_name}\n\n") + app_logger.info(f"[Background Task] {task_name} no data in {table}") + + app_logger.info(f"[Background Task] {task_name} Database dump completed: {output_file}") + + except Exception as e: + app_logger.error(f"[Background Task] {task_name} failed: {e}") + finally: + db.close_session() From 9c5bcab200781c32b792a3787b778f050136b8b0 Mon Sep 17 00:00:00 2001 From: carnivuth Date: Thu, 5 Feb 2026 17:57:29 +0100 Subject: [PATCH 2/3] updated cron with configuration variables --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 70b23ce..038d023 100644 --- a/README.md +++ b/README.md @@ -211,6 +211,8 @@ Krawl uses a **configuration hierarchy** in which **environment variables take p | `KRAWL_PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | | `KRAWL_DATABASE_PATH` | Database file location | `data/krawl.db` | | `KRAWL_EXPORTS_PATH` | Path where firewalls rule sets are exported | `exports` | +| `KRAWL_BACKUPS_PATH` | Path where database dump are saved | `backups` | +| `KRAWL_BACKUPS_CRON` | cron expression to control backup job schedule | `*/30 * * * *` | | `KRAWL_DATABASE_RETENTION_DAYS` | Days to retain data in database | `30` | | `KRAWL_HTTP_RISKY_METHODS_THRESHOLD` | Threshold for risky HTTP methods detection | `0.1` | | `KRAWL_VIOLATED_ROBOTS_THRESHOLD` | Threshold for robots.txt violations | `0.1` | From 789d77c7846feb4f96df1108f725254ba5f7f63a Mon Sep 17 00:00:00 2001 From: carnivuth Date: Thu, 5 Feb 2026 17:57:38 +0100 Subject: [PATCH 3/3] linted code --- src/tasks/db_dump.py | 33 +++++++++++++++++++---------- src/templates/dashboard_template.py | 9 ++------ 2 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/tasks/db_dump.py b/src/tasks/db_dump.py index 14dbe66..b1644d6 100644 --- a/src/tasks/db_dump.py +++ b/src/tasks/db_dump.py @@ -20,6 +20,7 @@ TASK_CONFIG = { "run_when_loaded": True, } + # ---------------------- # TASK LOGIC # ---------------------- @@ -38,9 +39,9 @@ def main(): # Reflect the database structure metadata.reflect(bind=engine) - output_file = os.path.join(config.backups_path,"db_dump.sql") + output_file = os.path.join(config.backups_path, "db_dump.sql") - with open(output_file, 'w') as f: + with open(output_file, "w") as f: # Write header app_logger.info(f"[Background Task] {task_name} started database dump") @@ -49,18 +50,20 @@ def main(): # Dump schema (CREATE TABLE statements) f.write("-- Schema\n") - f.write("-- " + "="*70 + "\n\n") + f.write("-- " + "=" * 70 + "\n\n") for table_name in metadata.tables: table = metadata.tables[table_name] - app_logger.info(f"[Background Task] {task_name} dumping {table} table schema") + app_logger.info( + f"[Background Task] {task_name} dumping {table} table schema" + ) # Create table statement create_stmt = str(CreateTable(table).compile(engine)) f.write(f"{create_stmt};\n\n") f.write("\n-- Data\n") - f.write("-- " + "="*70 + "\n\n") + f.write("-- " + "=" * 70 + "\n\n") with engine.connect() as conn: for table_name in metadata.tables: @@ -73,19 +76,27 @@ def main(): rows = result.fetchall() if rows: - app_logger.info(f"[Background Task] {task_name} dumping {table} content") + app_logger.info( + f"[Background Task] {task_name} dumping {table} content" + ) for row in rows: # Build INSERT statement - columns = ', '.join([col.name for col in table.columns]) - values = ', '.join([repr(value) for value in row]) - f.write(f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n") + columns = ", ".join([col.name for col in table.columns]) + values = ", ".join([repr(value) for value in row]) + f.write( + f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n" + ) f.write("\n") else: f.write(f"-- No data in {table_name}\n\n") - app_logger.info(f"[Background Task] {task_name} no data in {table}") + app_logger.info( + f"[Background Task] {task_name} no data in {table}" + ) - app_logger.info(f"[Background Task] {task_name} Database dump completed: {output_file}") + app_logger.info( + f"[Background Task] {task_name} Database dump completed: {output_file}" + ) except Exception as e: app_logger.error(f"[Background Task] {task_name} failed: {e}") diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index a31f929..30628c7 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -50,9 +50,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str: # Generate suspicious accesses rows with clickable IPs suspicious_rows = ( - "\n".join( - [ - f""" + "\n".join([f""" {_escape(log["ip"])} {_escape(log["path"])} {_escape(log["user_agent"][:60])} @@ -64,10 +62,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
Loading stats...
- """ - for log in stats["recent_suspicious"][-10:] - ] - ) + """ for log in stats["recent_suspicious"][-10:]]) or 'No suspicious activity detected' )