From db95876db2df3f62a03b08652953e656781b80dc Mon Sep 17 00:00:00 2001 From: Malin Date: Tue, 21 Apr 2026 07:10:45 +0200 Subject: [PATCH] fix: SQLite database locked errors + add error status for 4xx/5xx SQLite locking: - Enable WAL journal mode in init_db (readers don't block writers) - Set busy_timeout=30000ms in init_db - Add timeout=30 to every aiosqlite.connect() across db.py, validator.py, enricher.py, main.py so connections wait up to 30s instead of crashing Error status: - 4xx/5xx HTTP responses are now prescreen_status='error' (server alive but broken/blocking) instead of 'live' - Added 'error' counter to validator stats and orange Error stat box in UI - Added ps-error CSS class (orange) and filter option in Browse tab Co-Authored-By: Claude Sonnet 4.6 --- app/db.py | 23 +++++++++++++---------- app/enricher.py | 12 ++++++------ app/main.py | 6 +++--- app/static/index.html | 3 ++- app/validator.py | 12 +++++++++--- 5 files changed, 33 insertions(+), 23 deletions(-) diff --git a/app/db.py b/app/db.py index 44b013a..621acbd 100644 --- a/app/db.py +++ b/app/db.py @@ -101,7 +101,10 @@ _total_cache: int = 0 async def init_db(): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: + # WAL mode: concurrent reads don't block on writes; write lock held briefly + await db.execute("PRAGMA journal_mode=WAL") + await db.execute("PRAGMA busy_timeout=30000") await db.executescript(SCHEMA) # Run migrations (safe to re-run — silently skips existing columns) for sql in _MIGRATIONS: @@ -237,7 +240,7 @@ async def get_domains(tld=None, page=1, limit=100, alpha_only=False, no_sld=Fals return total, [] placeholders = ",".join("?" * len(domain_list)) - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: db.row_factory = aiosqlite.Row async with db.execute( f"SELECT * FROM enriched_domains WHERE domain IN ({placeholders})", @@ -289,7 +292,7 @@ async def get_stats(): loop = asyncio.get_event_loop() _total_cache, _tld_cache = await loop.run_in_executor(None, _tld_stats_sync) - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute("SELECT COUNT(*) FROM enriched_domains") as cur: enriched = (await cur.fetchone())[0] threshold = int(os.getenv("SCORE_THRESHOLD", "60")) @@ -351,7 +354,7 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None, conditions.append("site_type = ?") params.append(site_type) where = "WHERE " + " AND ".join(conditions) - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: db.row_factory = aiosqlite.Row async with db.execute( f"SELECT * FROM enriched_domains {where} ORDER BY score DESC LIMIT ? OFFSET ?", @@ -366,7 +369,7 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None, async def queue_ai(domains: list[str], language: str = "ES"): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: await db.executemany( """INSERT INTO ai_queue (domain, language) VALUES (?, ?) ON CONFLICT(domain) DO UPDATE SET language=excluded.language, status='pending'""", @@ -376,7 +379,7 @@ async def queue_ai(domains: list[str], language: str = "ES"): async def get_ai_queue_status(): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute("SELECT status, COUNT(*) FROM ai_queue GROUP BY status") as cur: rows = {r[0]: r[1] async for r in cur} return { @@ -390,7 +393,7 @@ async def get_ai_queue_status(): async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict = None): import json as _json - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: # Upsert into enriched_domains (domain may not exist yet if assessed before full enrichment) await db.execute( """INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING""", @@ -445,7 +448,7 @@ async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict async def save_prescreen_results(results: list[dict]): """Upsert prescreen HTTP results and/or DeepSeek niche/type classifications.""" - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: for r in results: domain = r.get("domain") if not domain: @@ -477,7 +480,7 @@ async def save_prescreen_results(results: list[dict]): async def queue_domains(domains: list[str]): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: await db.executemany( "INSERT OR IGNORE INTO job_queue (domain) VALUES (?)", [(d,) for d in domains], @@ -486,7 +489,7 @@ async def queue_domains(domains: list[str]): async def get_queue_status(): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur: rows = {r[0]: r[1] async for r in cur} pending = rows.get("pending", 0) diff --git a/app/enricher.py b/app/enricher.py index 0671ca1..7a6bfb9 100644 --- a/app/enricher.py +++ b/app/enricher.py @@ -251,7 +251,7 @@ async def enrich_domain(domain: str) -> dict: async def save_enriched(data: dict): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: await db.execute( """INSERT INTO enriched_domains (domain, is_live, status_code, ssl_valid, ssl_expiry_days, cms, @@ -284,7 +284,7 @@ async def save_enriched(data: dict): async def mark_job(domain: str, status: str, error: str = None): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: if status == "running": await db.execute( "UPDATE job_queue SET status=?, started_at=datetime('now') WHERE domain=?", @@ -315,7 +315,7 @@ async def worker_loop(): if _paused: await asyncio.sleep(1) continue - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute( "SELECT domain FROM job_queue WHERE status='pending' LIMIT 100" ) as cur: @@ -348,7 +348,7 @@ async def _assess_one(domain: str, language: str = "ES") -> None: except Exception as e: logger.error("AI: failed %s — %s", domain, e, exc_info=True) try: - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: await db.execute( "UPDATE ai_queue SET status='failed', completed_at=datetime('now'), error=? WHERE domain=?", (str(e)[:400], domain), @@ -362,7 +362,7 @@ async def ai_worker_loop(): logger.info("AI worker loop starting") # Reset any jobs left in 'running' state from a previous crashed worker try: - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: result = await db.execute( "UPDATE ai_queue SET status='pending' WHERE status='running'" ) @@ -375,7 +375,7 @@ async def ai_worker_loop(): while True: rows = [] try: - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute( "SELECT domain, COALESCE(language,'ES') FROM ai_queue WHERE status='pending' LIMIT 5" ) as cur: diff --git a/app/main.py b/app/main.py index 6f7eeba..b572557 100644 --- a/app/main.py +++ b/app/main.py @@ -132,7 +132,7 @@ async def enrich_status(): @app.post("/api/enrich/retry") async def enrich_retry(): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: await db.execute("UPDATE job_queue SET status='pending', error=NULL WHERE status='failed'") await db.commit() resume_worker() @@ -267,7 +267,7 @@ async def ai_worker_restart(): @app.post("/api/ai/reset") async def ai_reset(): """Reset all 'running' AI queue jobs back to 'pending' (unstick hung jobs).""" - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: r = await db.execute("UPDATE ai_queue SET status='pending' WHERE status='running'") count = r.rowcount await db.commit() @@ -287,7 +287,7 @@ async def ai_debug(): except Exception: pass - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: db.row_factory = aiosqlite.Row async with db.execute( "SELECT domain, status, created_at, completed_at, error FROM ai_queue ORDER BY created_at DESC LIMIT 10" diff --git a/app/static/index.html b/app/static/index.html index 6c6a59a..ba7c72e 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -75,7 +75,7 @@ input[type=range]{accent-color:var(--accent);width:100px;cursor:pointer} .pni{background:#0ea5e918;color:#38bdf8;border:1px solid #0ea5e933} .pty{background:#8b5cf618;color:#a78bfa;border:1px solid #8b5cf633} /* Prescreen status dot */ -.ps-live{color:#34d399} .ps-dead{color:#f87171} .ps-parked{color:#fbbf24} .ps-redirect{color:#94a3b8} +.ps-live{color:#34d399} .ps-dead{color:#f87171} .ps-error{color:#f97316} .ps-parked{color:#fbbf24} .ps-redirect{color:#94a3b8} /* Table */ .tw{overflow-x:auto;border-radius:var(--r);border:1px solid var(--border)} @@ -562,6 +562,7 @@ tr:hover td{background:rgba(255,255,255,.025)}
Checked
Live
Dead
+
Error
Parked
Redirect
dom/sec
diff --git a/app/validator.py b/app/validator.py index 46cecfa..5684de3 100644 --- a/app/validator.py +++ b/app/validator.py @@ -66,6 +66,7 @@ _val_stats: dict = { "processed": 0, "live": 0, "dead": 0, + "error": 0, "parked": 0, "redirect": 0, "skipped": 0, @@ -148,6 +149,11 @@ async def _check_domain(domain: str) -> dict: result["prescreen_status"] = "parked" return result + # 4xx / 5xx = server is alive but site is broken/blocking + if resp.status_code >= 400: + result["prescreen_status"] = "error" + return result + result["prescreen_status"] = "live" return result @@ -201,7 +207,7 @@ async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) -> condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'" else: condition = "prescreen_status IS NOT NULL" - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute( f"SELECT domain FROM enriched_domains " f"WHERE domain IN ({placeholders}) AND {condition}", @@ -212,7 +218,7 @@ async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) -> async def _save_batch(results: list[dict]): - async with aiosqlite.connect(SQLITE_PATH) as db: + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: for r in results: await db.execute( """INSERT INTO enriched_domains @@ -319,7 +325,7 @@ def start_validator(tld_filter: Optional[str] = None, rescan_dead: bool = False) # domains, so restarting from 0 is safe and fast even for the same TLD. _val_stats.update( running=True, - processed=0, live=0, dead=0, parked=0, + processed=0, live=0, dead=0, error=0, parked=0, redirect=0, skipped=0, offset=0, rate=0.0, tld_filter=tld_filter, rescan_dead=rescan_dead,