added task dump krawl data and adjusted configuration files
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -68,6 +68,7 @@ data/
|
|||||||
*.db
|
*.db
|
||||||
*.sqlite
|
*.sqlite
|
||||||
*.sqlite3
|
*.sqlite3
|
||||||
|
backups/
|
||||||
|
|
||||||
# Temporary files
|
# Temporary files
|
||||||
*.tmp
|
*.tmp
|
||||||
|
|||||||
@@ -25,6 +25,10 @@ dashboard:
|
|||||||
# secret_path: super-secret-dashboard-path
|
# secret_path: super-secret-dashboard-path
|
||||||
secret_path: test
|
secret_path: test
|
||||||
|
|
||||||
|
backups:
|
||||||
|
path: "backups"
|
||||||
|
cron: "*/30 * * * *"
|
||||||
|
|
||||||
exports:
|
exports:
|
||||||
path: "exports"
|
path: "exports"
|
||||||
|
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ services:
|
|||||||
- ./logs:/app/logs
|
- ./logs:/app/logs
|
||||||
- ./exports:/app/exports
|
- ./exports:/app/exports
|
||||||
- ./data:/app/data
|
- ./data:/app/data
|
||||||
|
- ./backups:/app/backups
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
develop:
|
develop:
|
||||||
watch:
|
watch:
|
||||||
|
|||||||
@@ -22,6 +22,9 @@ data:
|
|||||||
token_tries: {{ .Values.config.canary.token_tries }}
|
token_tries: {{ .Values.config.canary.token_tries }}
|
||||||
dashboard:
|
dashboard:
|
||||||
secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
|
secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
|
||||||
|
backups:
|
||||||
|
path: {{ .Values.config.backups.path | quote }}
|
||||||
|
cron: {{ .Values.config.backups.cron | quote }}
|
||||||
exports:
|
exports:
|
||||||
path: {{ .Values.config.exports.path | quote }}
|
path: {{ .Values.config.exports.path | quote }}
|
||||||
database:
|
database:
|
||||||
|
|||||||
@@ -84,6 +84,9 @@ config:
|
|||||||
token_tries: 10
|
token_tries: 10
|
||||||
dashboard:
|
dashboard:
|
||||||
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
|
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
|
||||||
|
backups:
|
||||||
|
path: "backups"
|
||||||
|
cron: "*/30 * * * *"
|
||||||
exports:
|
exports:
|
||||||
path: "exports"
|
path: "exports"
|
||||||
database:
|
database:
|
||||||
|
|||||||
@@ -39,6 +39,10 @@ class Config:
|
|||||||
|
|
||||||
# exporter settings
|
# exporter settings
|
||||||
exports_path: str = "exports"
|
exports_path: str = "exports"
|
||||||
|
|
||||||
|
# backup job settings
|
||||||
|
backups_path: str = "backups"
|
||||||
|
backups_cron: str = "*/30 * * * *"
|
||||||
# Database settings
|
# Database settings
|
||||||
database_path: str = "data/krawl.db"
|
database_path: str = "data/krawl.db"
|
||||||
database_retention_days: int = 30
|
database_retention_days: int = 30
|
||||||
@@ -153,6 +157,7 @@ class Config:
|
|||||||
dashboard = data.get("dashboard", {})
|
dashboard = data.get("dashboard", {})
|
||||||
api = data.get("api", {})
|
api = data.get("api", {})
|
||||||
exports = data.get("exports", {})
|
exports = data.get("exports", {})
|
||||||
|
backups = data.get("backups", {})
|
||||||
database = data.get("database", {})
|
database = data.get("database", {})
|
||||||
behavior = data.get("behavior", {})
|
behavior = data.get("behavior", {})
|
||||||
analyzer = data.get("analyzer") or {}
|
analyzer = data.get("analyzer") or {}
|
||||||
@@ -189,6 +194,8 @@ class Config:
|
|||||||
dashboard_secret_path=dashboard_path,
|
dashboard_secret_path=dashboard_path,
|
||||||
probability_error_codes=behavior.get("probability_error_codes", 0),
|
probability_error_codes=behavior.get("probability_error_codes", 0),
|
||||||
exports_path=exports.get("path"),
|
exports_path=exports.get("path"),
|
||||||
|
backups_path=backups.get("path"),
|
||||||
|
backups_cron=backups.get("cron"),
|
||||||
database_path=database.get("path", "data/krawl.db"),
|
database_path=database.get("path", "data/krawl.db"),
|
||||||
database_retention_days=database.get("retention_days", 30),
|
database_retention_days=database.get("retention_days", 30),
|
||||||
http_risky_methods_threshold=analyzer.get(
|
http_risky_methods_threshold=analyzer.get(
|
||||||
|
|||||||
93
src/tasks/db_dump.py
Normal file
93
src/tasks/db_dump.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
# tasks/db_dump.py
|
||||||
|
|
||||||
|
from logger import get_app_logger
|
||||||
|
from database import get_database
|
||||||
|
from config import get_config
|
||||||
|
from sqlalchemy import MetaData, inspect
|
||||||
|
from sqlalchemy.schema import CreateTable
|
||||||
|
import os
|
||||||
|
|
||||||
|
config = get_config()
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
|
# ----------------------
|
||||||
|
# TASK CONFIG
|
||||||
|
# ----------------------
|
||||||
|
TASK_CONFIG = {
|
||||||
|
"name": "dump-krawl-data",
|
||||||
|
"cron": f"{config.backups_cron}",
|
||||||
|
"enabled": True,
|
||||||
|
"run_when_loaded": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ----------------------
|
||||||
|
# TASK LOGIC
|
||||||
|
# ----------------------
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Dump krawl database to a sql file for backups
|
||||||
|
"""
|
||||||
|
task_name = TASK_CONFIG.get("name")
|
||||||
|
app_logger.info(f"[Background Task] {task_name} starting...")
|
||||||
|
|
||||||
|
try:
|
||||||
|
db = get_database()
|
||||||
|
engine = db._engine
|
||||||
|
|
||||||
|
metadata = MetaData()
|
||||||
|
|
||||||
|
# Reflect the database structure
|
||||||
|
metadata.reflect(bind=engine)
|
||||||
|
output_file = os.path.join(config.backups_path,"db_dump.sql")
|
||||||
|
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
# Write header
|
||||||
|
app_logger.info(f"[Background Task] {task_name} started database dump")
|
||||||
|
|
||||||
|
# Get inspector for additional metadata
|
||||||
|
inspector = inspect(engine)
|
||||||
|
|
||||||
|
# Dump schema (CREATE TABLE statements)
|
||||||
|
f.write("-- Schema\n")
|
||||||
|
f.write("-- " + "="*70 + "\n\n")
|
||||||
|
|
||||||
|
for table_name in metadata.tables:
|
||||||
|
table = metadata.tables[table_name]
|
||||||
|
app_logger.info(f"[Background Task] {task_name} dumping {table} table schema")
|
||||||
|
|
||||||
|
# Create table statement
|
||||||
|
create_stmt = str(CreateTable(table).compile(engine))
|
||||||
|
f.write(f"{create_stmt};\n\n")
|
||||||
|
|
||||||
|
f.write("\n-- Data\n")
|
||||||
|
f.write("-- " + "="*70 + "\n\n")
|
||||||
|
|
||||||
|
with engine.connect() as conn:
|
||||||
|
for table_name in metadata.tables:
|
||||||
|
table = metadata.tables[table_name]
|
||||||
|
|
||||||
|
f.write(f"-- Table: {table_name}\n")
|
||||||
|
|
||||||
|
# Select all data from table
|
||||||
|
result = conn.execute(table.select())
|
||||||
|
rows = result.fetchall()
|
||||||
|
|
||||||
|
if rows:
|
||||||
|
app_logger.info(f"[Background Task] {task_name} dumping {table} content")
|
||||||
|
for row in rows:
|
||||||
|
# Build INSERT statement
|
||||||
|
columns = ', '.join([col.name for col in table.columns])
|
||||||
|
values = ', '.join([repr(value) for value in row])
|
||||||
|
f.write(f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n")
|
||||||
|
|
||||||
|
f.write("\n")
|
||||||
|
else:
|
||||||
|
f.write(f"-- No data in {table_name}\n\n")
|
||||||
|
app_logger.info(f"[Background Task] {task_name} no data in {table}")
|
||||||
|
|
||||||
|
app_logger.info(f"[Background Task] {task_name} Database dump completed: {output_file}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
app_logger.error(f"[Background Task] {task_name} failed: {e}")
|
||||||
|
finally:
|
||||||
|
db.close_session()
|
||||||
Reference in New Issue
Block a user