feat: implement database migration runner and initialize migrations on startup

This commit is contained in:
Lorenzo Venerandi
2026-02-22 15:24:26 +01:00
parent df98eca066
commit 2f82d3a3bd
4 changed files with 102 additions and 2 deletions

View File

@@ -29,10 +29,11 @@ async def lifespan(app: FastAPI):
initialize_logging()
app_logger = get_app_logger()
# Initialize database
# Initialize database and run pending migrations before accepting traffic
try:
app_logger.info(f"Initializing database at: {config.database_path}")
initialize_database(config.database_path)
app_logger.info(f"Database initialized at: {config.database_path}")
app_logger.info("Database ready")
except Exception as e:
app_logger.warning(
f"Database initialization failed: {e}. Continuing with in-memory only."

View File

@@ -97,6 +97,11 @@ class DatabaseManager:
# Run automatic migrations for backward compatibility
self._run_migrations(database_path)
# Run schema migrations (columns & indexes on existing tables)
from migrations.runner import run_migrations
run_migrations(database_path)
# Set restrictive file permissions (owner read/write only)
if os.path.exists(database_path):
try:

View File

94
src/migrations/runner.py Normal file
View File

@@ -0,0 +1,94 @@
"""
Migration runner for Krawl.
Checks the database schema and applies any pending migrations at startup.
All checks are idempotent — safe to run on every boot.
Note: table creation (e.g. category_history) is already handled by
Base.metadata.create_all() in DatabaseManager.initialize() and is NOT
duplicated here. This runner only covers ALTER-level changes that
create_all() cannot apply to existing tables (new columns, new indexes).
"""
import sqlite3
import logging
from typing import List
logger = logging.getLogger("krawl")
def _column_exists(cursor, table_name: str, column_name: str) -> bool:
cursor.execute(f"PRAGMA table_info({table_name})")
columns = [row[1] for row in cursor.fetchall()]
return column_name in columns
def _index_exists(cursor, index_name: str) -> bool:
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='index' AND name=?",
(index_name,),
)
return cursor.fetchone() is not None
def _migrate_raw_request_column(cursor) -> bool:
"""Add raw_request column to access_logs if missing."""
if _column_exists(cursor, "access_logs", "raw_request"):
return False
cursor.execute("ALTER TABLE access_logs ADD COLUMN raw_request TEXT")
return True
def _migrate_performance_indexes(cursor) -> List[str]:
"""Add performance indexes to attack_detections if missing."""
added = []
if not _index_exists(cursor, "ix_attack_detections_attack_type"):
cursor.execute(
"CREATE INDEX ix_attack_detections_attack_type "
"ON attack_detections(attack_type)"
)
added.append("ix_attack_detections_attack_type")
if not _index_exists(cursor, "ix_attack_detections_type_log"):
cursor.execute(
"CREATE INDEX ix_attack_detections_type_log "
"ON attack_detections(attack_type, access_log_id)"
)
added.append("ix_attack_detections_type_log")
return added
def run_migrations(database_path: str) -> None:
"""
Check the database schema and apply any pending migrations.
Only handles ALTER-level changes (columns, indexes) that
Base.metadata.create_all() cannot apply to existing tables.
Args:
database_path: Path to the SQLite database file.
"""
applied: List[str] = []
try:
conn = sqlite3.connect(database_path)
cursor = conn.cursor()
if _migrate_raw_request_column(cursor):
applied.append("add raw_request column to access_logs")
idx_added = _migrate_performance_indexes(cursor)
for idx in idx_added:
applied.append(f"add index {idx}")
conn.commit()
conn.close()
except sqlite3.Error as e:
logger.error(f"Migration error: {e}")
if applied:
for m in applied:
logger.info(f"Migration applied: {m}")
logger.info(f"All migrations complete ({len(applied)} applied)")
else:
logger.info("Database schema is up to date — no migrations needed")