added task dump krawl data and adjusted configuration files
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -68,6 +68,7 @@ data/
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
backups/
|
||||
|
||||
# Temporary files
|
||||
*.tmp
|
||||
@@ -83,4 +84,4 @@ personal-values.yaml
|
||||
/src/exports/*
|
||||
|
||||
# tmux config
|
||||
.tmux.conf
|
||||
.tmux.conf
|
||||
|
||||
@@ -25,6 +25,10 @@ dashboard:
|
||||
# secret_path: super-secret-dashboard-path
|
||||
secret_path: test
|
||||
|
||||
backups:
|
||||
path: "backups"
|
||||
cron: "*/30 * * * *"
|
||||
|
||||
exports:
|
||||
path: "exports"
|
||||
|
||||
|
||||
@@ -18,6 +18,7 @@ services:
|
||||
- ./logs:/app/logs
|
||||
- ./exports:/app/exports
|
||||
- ./data:/app/data
|
||||
- ./backups:/app/backups
|
||||
restart: unless-stopped
|
||||
develop:
|
||||
watch:
|
||||
|
||||
@@ -22,6 +22,9 @@ data:
|
||||
token_tries: {{ .Values.config.canary.token_tries }}
|
||||
dashboard:
|
||||
secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
|
||||
backups:
|
||||
path: {{ .Values.config.backups.path | quote }}
|
||||
cron: {{ .Values.config.backups.cron | quote }}
|
||||
exports:
|
||||
path: {{ .Values.config.exports.path | quote }}
|
||||
database:
|
||||
|
||||
@@ -84,6 +84,9 @@ config:
|
||||
token_tries: 10
|
||||
dashboard:
|
||||
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
|
||||
backups:
|
||||
path: "backups"
|
||||
cron: "*/30 * * * *"
|
||||
exports:
|
||||
path: "exports"
|
||||
database:
|
||||
|
||||
@@ -39,6 +39,10 @@ class Config:
|
||||
|
||||
# exporter settings
|
||||
exports_path: str = "exports"
|
||||
|
||||
# backup job settings
|
||||
backups_path: str = "backups"
|
||||
backups_cron: str = "*/30 * * * *"
|
||||
# Database settings
|
||||
database_path: str = "data/krawl.db"
|
||||
database_retention_days: int = 30
|
||||
@@ -153,6 +157,7 @@ class Config:
|
||||
dashboard = data.get("dashboard", {})
|
||||
api = data.get("api", {})
|
||||
exports = data.get("exports", {})
|
||||
backups = data.get("backups", {})
|
||||
database = data.get("database", {})
|
||||
behavior = data.get("behavior", {})
|
||||
analyzer = data.get("analyzer") or {}
|
||||
@@ -189,6 +194,8 @@ class Config:
|
||||
dashboard_secret_path=dashboard_path,
|
||||
probability_error_codes=behavior.get("probability_error_codes", 0),
|
||||
exports_path=exports.get("path"),
|
||||
backups_path=backups.get("path"),
|
||||
backups_cron=backups.get("cron"),
|
||||
database_path=database.get("path", "data/krawl.db"),
|
||||
database_retention_days=database.get("retention_days", 30),
|
||||
http_risky_methods_threshold=analyzer.get(
|
||||
|
||||
93
src/tasks/db_dump.py
Normal file
93
src/tasks/db_dump.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# tasks/db_dump.py
|
||||
|
||||
from logger import get_app_logger
|
||||
from database import get_database
|
||||
from config import get_config
|
||||
from sqlalchemy import MetaData, inspect
|
||||
from sqlalchemy.schema import CreateTable
|
||||
import os
|
||||
|
||||
config = get_config()
|
||||
app_logger = get_app_logger()
|
||||
|
||||
# ----------------------
|
||||
# TASK CONFIG
|
||||
# ----------------------
|
||||
TASK_CONFIG = {
|
||||
"name": "dump-krawl-data",
|
||||
"cron": f"{config.backups_cron}",
|
||||
"enabled": True,
|
||||
"run_when_loaded": True,
|
||||
}
|
||||
|
||||
# ----------------------
|
||||
# TASK LOGIC
|
||||
# ----------------------
|
||||
def main():
|
||||
"""
|
||||
Dump krawl database to a sql file for backups
|
||||
"""
|
||||
task_name = TASK_CONFIG.get("name")
|
||||
app_logger.info(f"[Background Task] {task_name} starting...")
|
||||
|
||||
try:
|
||||
db = get_database()
|
||||
engine = db._engine
|
||||
|
||||
metadata = MetaData()
|
||||
|
||||
# Reflect the database structure
|
||||
metadata.reflect(bind=engine)
|
||||
output_file = os.path.join(config.backups_path,"db_dump.sql")
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
# Write header
|
||||
app_logger.info(f"[Background Task] {task_name} started database dump")
|
||||
|
||||
# Get inspector for additional metadata
|
||||
inspector = inspect(engine)
|
||||
|
||||
# Dump schema (CREATE TABLE statements)
|
||||
f.write("-- Schema\n")
|
||||
f.write("-- " + "="*70 + "\n\n")
|
||||
|
||||
for table_name in metadata.tables:
|
||||
table = metadata.tables[table_name]
|
||||
app_logger.info(f"[Background Task] {task_name} dumping {table} table schema")
|
||||
|
||||
# Create table statement
|
||||
create_stmt = str(CreateTable(table).compile(engine))
|
||||
f.write(f"{create_stmt};\n\n")
|
||||
|
||||
f.write("\n-- Data\n")
|
||||
f.write("-- " + "="*70 + "\n\n")
|
||||
|
||||
with engine.connect() as conn:
|
||||
for table_name in metadata.tables:
|
||||
table = metadata.tables[table_name]
|
||||
|
||||
f.write(f"-- Table: {table_name}\n")
|
||||
|
||||
# Select all data from table
|
||||
result = conn.execute(table.select())
|
||||
rows = result.fetchall()
|
||||
|
||||
if rows:
|
||||
app_logger.info(f"[Background Task] {task_name} dumping {table} content")
|
||||
for row in rows:
|
||||
# Build INSERT statement
|
||||
columns = ', '.join([col.name for col in table.columns])
|
||||
values = ', '.join([repr(value) for value in row])
|
||||
f.write(f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n")
|
||||
|
||||
f.write("\n")
|
||||
else:
|
||||
f.write(f"-- No data in {table_name}\n\n")
|
||||
app_logger.info(f"[Background Task] {task_name} no data in {table}")
|
||||
|
||||
app_logger.info(f"[Background Task] {task_name} Database dump completed: {output_file}")
|
||||
|
||||
except Exception as e:
|
||||
app_logger.error(f"[Background Task] {task_name} failed: {e}")
|
||||
finally:
|
||||
db.close_session()
|
||||
Reference in New Issue
Block a user