added task dump krawl data and adjusted configuration files

This commit is contained in:
carnivuth
2026-02-05 17:26:06 +01:00
parent 5c9918e29b
commit 7621932602
7 changed files with 113 additions and 1 deletions

3
.gitignore vendored
View File

@@ -68,6 +68,7 @@ data/
*.db
*.sqlite
*.sqlite3
backups/
# Temporary files
*.tmp
@@ -83,4 +84,4 @@ personal-values.yaml
/src/exports/*
# tmux config
.tmux.conf
.tmux.conf

View File

@@ -25,6 +25,10 @@ dashboard:
# secret_path: super-secret-dashboard-path
secret_path: test
backups:
path: "backups"
cron: "*/30 * * * *"
exports:
path: "exports"

View File

@@ -18,6 +18,7 @@ services:
- ./logs:/app/logs
- ./exports:/app/exports
- ./data:/app/data
- ./backups:/app/backups
restart: unless-stopped
develop:
watch:

View File

@@ -22,6 +22,9 @@ data:
token_tries: {{ .Values.config.canary.token_tries }}
dashboard:
secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
backups:
path: {{ .Values.config.backups.path | quote }}
cron: {{ .Values.config.backups.cron | quote }}
exports:
path: {{ .Values.config.exports.path | quote }}
database:

View File

@@ -84,6 +84,9 @@ config:
token_tries: 10
dashboard:
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
backups:
path: "backups"
cron: "*/30 * * * *"
exports:
path: "exports"
database:

View File

@@ -39,6 +39,10 @@ class Config:
# exporter settings
exports_path: str = "exports"
# backup job settings
backups_path: str = "backups"
backups_cron: str = "*/30 * * * *"
# Database settings
database_path: str = "data/krawl.db"
database_retention_days: int = 30
@@ -153,6 +157,7 @@ class Config:
dashboard = data.get("dashboard", {})
api = data.get("api", {})
exports = data.get("exports", {})
backups = data.get("backups", {})
database = data.get("database", {})
behavior = data.get("behavior", {})
analyzer = data.get("analyzer") or {}
@@ -189,6 +194,8 @@ class Config:
dashboard_secret_path=dashboard_path,
probability_error_codes=behavior.get("probability_error_codes", 0),
exports_path=exports.get("path"),
backups_path=backups.get("path"),
backups_cron=backups.get("cron"),
database_path=database.get("path", "data/krawl.db"),
database_retention_days=database.get("retention_days", 30),
http_risky_methods_threshold=analyzer.get(

93
src/tasks/db_dump.py Normal file
View File

@@ -0,0 +1,93 @@
# tasks/db_dump.py
from logger import get_app_logger
from database import get_database
from config import get_config
from sqlalchemy import MetaData, inspect
from sqlalchemy.schema import CreateTable
import os
config = get_config()
app_logger = get_app_logger()
# ----------------------
# TASK CONFIG
# ----------------------
TASK_CONFIG = {
"name": "dump-krawl-data",
"cron": f"{config.backups_cron}",
"enabled": True,
"run_when_loaded": True,
}
# ----------------------
# TASK LOGIC
# ----------------------
def main():
"""
Dump krawl database to a sql file for backups
"""
task_name = TASK_CONFIG.get("name")
app_logger.info(f"[Background Task] {task_name} starting...")
try:
db = get_database()
engine = db._engine
metadata = MetaData()
# Reflect the database structure
metadata.reflect(bind=engine)
output_file = os.path.join(config.backups_path,"db_dump.sql")
with open(output_file, 'w') as f:
# Write header
app_logger.info(f"[Background Task] {task_name} started database dump")
# Get inspector for additional metadata
inspector = inspect(engine)
# Dump schema (CREATE TABLE statements)
f.write("-- Schema\n")
f.write("-- " + "="*70 + "\n\n")
for table_name in metadata.tables:
table = metadata.tables[table_name]
app_logger.info(f"[Background Task] {task_name} dumping {table} table schema")
# Create table statement
create_stmt = str(CreateTable(table).compile(engine))
f.write(f"{create_stmt};\n\n")
f.write("\n-- Data\n")
f.write("-- " + "="*70 + "\n\n")
with engine.connect() as conn:
for table_name in metadata.tables:
table = metadata.tables[table_name]
f.write(f"-- Table: {table_name}\n")
# Select all data from table
result = conn.execute(table.select())
rows = result.fetchall()
if rows:
app_logger.info(f"[Background Task] {task_name} dumping {table} content")
for row in rows:
# Build INSERT statement
columns = ', '.join([col.name for col in table.columns])
values = ', '.join([repr(value) for value in row])
f.write(f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n")
f.write("\n")
else:
f.write(f"-- No data in {table_name}\n\n")
app_logger.info(f"[Background Task] {task_name} no data in {table}")
app_logger.info(f"[Background Task] {task_name} Database dump completed: {output_file}")
except Exception as e:
app_logger.error(f"[Background Task] {task_name} failed: {e}")
finally:
db.close_session()