From 2f82d3a3bd8efad35d4a09b90d57e66180e7e0ef Mon Sep 17 00:00:00 2001 From: Lorenzo Venerandi Date: Sun, 22 Feb 2026 15:24:26 +0100 Subject: [PATCH] feat: implement database migration runner and initialize migrations on startup --- src/app.py | 5 +- src/database.py | 5 ++ src/migrations/__init__.py | 0 src/migrations/runner.py | 94 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 2 deletions(-) create mode 100644 src/migrations/__init__.py create mode 100644 src/migrations/runner.py diff --git a/src/app.py b/src/app.py index ae4e0dc..788bcf2 100644 --- a/src/app.py +++ b/src/app.py @@ -29,10 +29,11 @@ async def lifespan(app: FastAPI): initialize_logging() app_logger = get_app_logger() - # Initialize database + # Initialize database and run pending migrations before accepting traffic try: + app_logger.info(f"Initializing database at: {config.database_path}") initialize_database(config.database_path) - app_logger.info(f"Database initialized at: {config.database_path}") + app_logger.info("Database ready") except Exception as e: app_logger.warning( f"Database initialization failed: {e}. Continuing with in-memory only." diff --git a/src/database.py b/src/database.py index 3e04de5..b3217c3 100644 --- a/src/database.py +++ b/src/database.py @@ -97,6 +97,11 @@ class DatabaseManager: # Run automatic migrations for backward compatibility self._run_migrations(database_path) + # Run schema migrations (columns & indexes on existing tables) + from migrations.runner import run_migrations + + run_migrations(database_path) + # Set restrictive file permissions (owner read/write only) if os.path.exists(database_path): try: diff --git a/src/migrations/__init__.py b/src/migrations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/migrations/runner.py b/src/migrations/runner.py new file mode 100644 index 0000000..a46a07a --- /dev/null +++ b/src/migrations/runner.py @@ -0,0 +1,94 @@ +""" +Migration runner for Krawl. +Checks the database schema and applies any pending migrations at startup. +All checks are idempotent — safe to run on every boot. + +Note: table creation (e.g. category_history) is already handled by +Base.metadata.create_all() in DatabaseManager.initialize() and is NOT +duplicated here. This runner only covers ALTER-level changes that +create_all() cannot apply to existing tables (new columns, new indexes). +""" + +import sqlite3 +import logging +from typing import List + +logger = logging.getLogger("krawl") + + +def _column_exists(cursor, table_name: str, column_name: str) -> bool: + cursor.execute(f"PRAGMA table_info({table_name})") + columns = [row[1] for row in cursor.fetchall()] + return column_name in columns + + +def _index_exists(cursor, index_name: str) -> bool: + cursor.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name=?", + (index_name,), + ) + return cursor.fetchone() is not None + + +def _migrate_raw_request_column(cursor) -> bool: + """Add raw_request column to access_logs if missing.""" + if _column_exists(cursor, "access_logs", "raw_request"): + return False + cursor.execute("ALTER TABLE access_logs ADD COLUMN raw_request TEXT") + return True + + +def _migrate_performance_indexes(cursor) -> List[str]: + """Add performance indexes to attack_detections if missing.""" + added = [] + if not _index_exists(cursor, "ix_attack_detections_attack_type"): + cursor.execute( + "CREATE INDEX ix_attack_detections_attack_type " + "ON attack_detections(attack_type)" + ) + added.append("ix_attack_detections_attack_type") + + if not _index_exists(cursor, "ix_attack_detections_type_log"): + cursor.execute( + "CREATE INDEX ix_attack_detections_type_log " + "ON attack_detections(attack_type, access_log_id)" + ) + added.append("ix_attack_detections_type_log") + + return added + + +def run_migrations(database_path: str) -> None: + """ + Check the database schema and apply any pending migrations. + + Only handles ALTER-level changes (columns, indexes) that + Base.metadata.create_all() cannot apply to existing tables. + + Args: + database_path: Path to the SQLite database file. + """ + applied: List[str] = [] + + try: + conn = sqlite3.connect(database_path) + cursor = conn.cursor() + + if _migrate_raw_request_column(cursor): + applied.append("add raw_request column to access_logs") + + idx_added = _migrate_performance_indexes(cursor) + for idx in idx_added: + applied.append(f"add index {idx}") + + conn.commit() + conn.close() + except sqlite3.Error as e: + logger.error(f"Migration error: {e}") + + if applied: + for m in applied: + logger.info(f"Migration applied: {m}") + logger.info(f"All migrations complete ({len(applied)} applied)") + else: + logger.info("Database schema is up to date — no migrations needed")