From 76219326021f6ab9532c80607f760e49459a6dff Mon Sep 17 00:00:00 2001 From: carnivuth Date: Thu, 5 Feb 2026 17:26:06 +0100 Subject: [PATCH] added task dump krawl data and adjusted configuration files --- .gitignore | 3 +- config.yaml | 4 ++ docker-compose.yaml | 1 + helm/templates/configmap.yaml | 3 ++ helm/values.yaml | 3 ++ src/config.py | 7 +++ src/tasks/db_dump.py | 93 +++++++++++++++++++++++++++++++++++ 7 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 src/tasks/db_dump.py diff --git a/.gitignore b/.gitignore index ed1f3d9..109cf28 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ data/ *.db *.sqlite *.sqlite3 +backups/ # Temporary files *.tmp @@ -83,4 +84,4 @@ personal-values.yaml /src/exports/* # tmux config -.tmux.conf \ No newline at end of file +.tmux.conf diff --git a/config.yaml b/config.yaml index 40246db..08f9fcc 100644 --- a/config.yaml +++ b/config.yaml @@ -25,6 +25,10 @@ dashboard: # secret_path: super-secret-dashboard-path secret_path: test +backups: + path: "backups" + cron: "*/30 * * * *" + exports: path: "exports" diff --git a/docker-compose.yaml b/docker-compose.yaml index 44b534d..17680de 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -18,6 +18,7 @@ services: - ./logs:/app/logs - ./exports:/app/exports - ./data:/app/data + - ./backups:/app/backups restart: unless-stopped develop: watch: diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 4d503ab..176ef21 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -22,6 +22,9 @@ data: token_tries: {{ .Values.config.canary.token_tries }} dashboard: secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + backups: + path: {{ .Values.config.backups.path | quote }} + cron: {{ .Values.config.backups.cron | quote }} exports: path: {{ .Values.config.exports.path | quote }} database: diff --git a/helm/values.yaml b/helm/values.yaml index b9fd375..91615f2 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -84,6 +84,9 @@ config: token_tries: 10 dashboard: secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + backups: + path: "backups" + cron: "*/30 * * * *" exports: path: "exports" database: diff --git a/src/config.py b/src/config.py index b17df7e..a3968e3 100644 --- a/src/config.py +++ b/src/config.py @@ -39,6 +39,10 @@ class Config: # exporter settings exports_path: str = "exports" + + # backup job settings + backups_path: str = "backups" + backups_cron: str = "*/30 * * * *" # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 @@ -153,6 +157,7 @@ class Config: dashboard = data.get("dashboard", {}) api = data.get("api", {}) exports = data.get("exports", {}) + backups = data.get("backups", {}) database = data.get("database", {}) behavior = data.get("behavior", {}) analyzer = data.get("analyzer") or {} @@ -189,6 +194,8 @@ class Config: dashboard_secret_path=dashboard_path, probability_error_codes=behavior.get("probability_error_codes", 0), exports_path=exports.get("path"), + backups_path=backups.get("path"), + backups_cron=backups.get("cron"), database_path=database.get("path", "data/krawl.db"), database_retention_days=database.get("retention_days", 30), http_risky_methods_threshold=analyzer.get( diff --git a/src/tasks/db_dump.py b/src/tasks/db_dump.py new file mode 100644 index 0000000..14dbe66 --- /dev/null +++ b/src/tasks/db_dump.py @@ -0,0 +1,93 @@ +# tasks/db_dump.py + +from logger import get_app_logger +from database import get_database +from config import get_config +from sqlalchemy import MetaData, inspect +from sqlalchemy.schema import CreateTable +import os + +config = get_config() +app_logger = get_app_logger() + +# ---------------------- +# TASK CONFIG +# ---------------------- +TASK_CONFIG = { + "name": "dump-krawl-data", + "cron": f"{config.backups_cron}", + "enabled": True, + "run_when_loaded": True, +} + +# ---------------------- +# TASK LOGIC +# ---------------------- +def main(): + """ + Dump krawl database to a sql file for backups + """ + task_name = TASK_CONFIG.get("name") + app_logger.info(f"[Background Task] {task_name} starting...") + + try: + db = get_database() + engine = db._engine + + metadata = MetaData() + + # Reflect the database structure + metadata.reflect(bind=engine) + output_file = os.path.join(config.backups_path,"db_dump.sql") + + with open(output_file, 'w') as f: + # Write header + app_logger.info(f"[Background Task] {task_name} started database dump") + + # Get inspector for additional metadata + inspector = inspect(engine) + + # Dump schema (CREATE TABLE statements) + f.write("-- Schema\n") + f.write("-- " + "="*70 + "\n\n") + + for table_name in metadata.tables: + table = metadata.tables[table_name] + app_logger.info(f"[Background Task] {task_name} dumping {table} table schema") + + # Create table statement + create_stmt = str(CreateTable(table).compile(engine)) + f.write(f"{create_stmt};\n\n") + + f.write("\n-- Data\n") + f.write("-- " + "="*70 + "\n\n") + + with engine.connect() as conn: + for table_name in metadata.tables: + table = metadata.tables[table_name] + + f.write(f"-- Table: {table_name}\n") + + # Select all data from table + result = conn.execute(table.select()) + rows = result.fetchall() + + if rows: + app_logger.info(f"[Background Task] {task_name} dumping {table} content") + for row in rows: + # Build INSERT statement + columns = ', '.join([col.name for col in table.columns]) + values = ', '.join([repr(value) for value in row]) + f.write(f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n") + + f.write("\n") + else: + f.write(f"-- No data in {table_name}\n\n") + app_logger.info(f"[Background Task] {task_name} no data in {table}") + + app_logger.info(f"[Background Task] {task_name} Database dump completed: {output_file}") + + except Exception as e: + app_logger.error(f"[Background Task] {task_name} failed: {e}") + finally: + db.close_session()