fix: SQLite database locked errors + add error status for 4xx/5xx

SQLite locking:
- Enable WAL journal mode in init_db (readers don't block writers)
- Set busy_timeout=30000ms in init_db
- Add timeout=30 to every aiosqlite.connect() across db.py, validator.py,
  enricher.py, main.py so connections wait up to 30s instead of crashing

Error status:
- 4xx/5xx HTTP responses are now prescreen_status='error' (server alive
  but broken/blocking) instead of 'live'
- Added 'error' counter to validator stats and orange Error stat box in UI
- Added ps-error CSS class (orange) and filter option in Browse tab

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-21 07:10:45 +02:00
parent 989717e479
commit db95876db2
5 changed files with 33 additions and 23 deletions

View File

@@ -101,7 +101,10 @@ _total_cache: int = 0
async def init_db():
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
# WAL mode: concurrent reads don't block on writes; write lock held briefly
await db.execute("PRAGMA journal_mode=WAL")
await db.execute("PRAGMA busy_timeout=30000")
await db.executescript(SCHEMA)
# Run migrations (safe to re-run — silently skips existing columns)
for sql in _MIGRATIONS:
@@ -237,7 +240,7 @@ async def get_domains(tld=None, page=1, limit=100, alpha_only=False, no_sld=Fals
return total, []
placeholders = ",".join("?" * len(domain_list))
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
f"SELECT * FROM enriched_domains WHERE domain IN ({placeholders})",
@@ -289,7 +292,7 @@ async def get_stats():
loop = asyncio.get_event_loop()
_total_cache, _tld_cache = await loop.run_in_executor(None, _tld_stats_sync)
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute("SELECT COUNT(*) FROM enriched_domains") as cur:
enriched = (await cur.fetchone())[0]
threshold = int(os.getenv("SCORE_THRESHOLD", "60"))
@@ -351,7 +354,7 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None,
conditions.append("site_type = ?")
params.append(site_type)
where = "WHERE " + " AND ".join(conditions)
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
f"SELECT * FROM enriched_domains {where} ORDER BY score DESC LIMIT ? OFFSET ?",
@@ -366,7 +369,7 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None,
async def queue_ai(domains: list[str], language: str = "ES"):
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.executemany(
"""INSERT INTO ai_queue (domain, language) VALUES (?, ?)
ON CONFLICT(domain) DO UPDATE SET language=excluded.language, status='pending'""",
@@ -376,7 +379,7 @@ async def queue_ai(domains: list[str], language: str = "ES"):
async def get_ai_queue_status():
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute("SELECT status, COUNT(*) FROM ai_queue GROUP BY status") as cur:
rows = {r[0]: r[1] async for r in cur}
return {
@@ -390,7 +393,7 @@ async def get_ai_queue_status():
async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict = None):
import json as _json
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
# Upsert into enriched_domains (domain may not exist yet if assessed before full enrichment)
await db.execute(
"""INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING""",
@@ -445,7 +448,7 @@ async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict
async def save_prescreen_results(results: list[dict]):
"""Upsert prescreen HTTP results and/or DeepSeek niche/type classifications."""
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
for r in results:
domain = r.get("domain")
if not domain:
@@ -477,7 +480,7 @@ async def save_prescreen_results(results: list[dict]):
async def queue_domains(domains: list[str]):
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.executemany(
"INSERT OR IGNORE INTO job_queue (domain) VALUES (?)",
[(d,) for d in domains],
@@ -486,7 +489,7 @@ async def queue_domains(domains: list[str]):
async def get_queue_status():
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur:
rows = {r[0]: r[1] async for r in cur}
pending = rows.get("pending", 0)

View File

@@ -251,7 +251,7 @@ async def enrich_domain(domain: str) -> dict:
async def save_enriched(data: dict):
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.execute(
"""INSERT INTO enriched_domains
(domain, is_live, status_code, ssl_valid, ssl_expiry_days, cms,
@@ -284,7 +284,7 @@ async def save_enriched(data: dict):
async def mark_job(domain: str, status: str, error: str = None):
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
if status == "running":
await db.execute(
"UPDATE job_queue SET status=?, started_at=datetime('now') WHERE domain=?",
@@ -315,7 +315,7 @@ async def worker_loop():
if _paused:
await asyncio.sleep(1)
continue
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute(
"SELECT domain FROM job_queue WHERE status='pending' LIMIT 100"
) as cur:
@@ -348,7 +348,7 @@ async def _assess_one(domain: str, language: str = "ES") -> None:
except Exception as e:
logger.error("AI: failed %s%s", domain, e, exc_info=True)
try:
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.execute(
"UPDATE ai_queue SET status='failed', completed_at=datetime('now'), error=? WHERE domain=?",
(str(e)[:400], domain),
@@ -362,7 +362,7 @@ async def ai_worker_loop():
logger.info("AI worker loop starting")
# Reset any jobs left in 'running' state from a previous crashed worker
try:
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
result = await db.execute(
"UPDATE ai_queue SET status='pending' WHERE status='running'"
)
@@ -375,7 +375,7 @@ async def ai_worker_loop():
while True:
rows = []
try:
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute(
"SELECT domain, COALESCE(language,'ES') FROM ai_queue WHERE status='pending' LIMIT 5"
) as cur:

View File

@@ -132,7 +132,7 @@ async def enrich_status():
@app.post("/api/enrich/retry")
async def enrich_retry():
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.execute("UPDATE job_queue SET status='pending', error=NULL WHERE status='failed'")
await db.commit()
resume_worker()
@@ -267,7 +267,7 @@ async def ai_worker_restart():
@app.post("/api/ai/reset")
async def ai_reset():
"""Reset all 'running' AI queue jobs back to 'pending' (unstick hung jobs)."""
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
r = await db.execute("UPDATE ai_queue SET status='pending' WHERE status='running'")
count = r.rowcount
await db.commit()
@@ -287,7 +287,7 @@ async def ai_debug():
except Exception:
pass
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
"SELECT domain, status, created_at, completed_at, error FROM ai_queue ORDER BY created_at DESC LIMIT 10"

View File

@@ -75,7 +75,7 @@ input[type=range]{accent-color:var(--accent);width:100px;cursor:pointer}
.pni{background:#0ea5e918;color:#38bdf8;border:1px solid #0ea5e933}
.pty{background:#8b5cf618;color:#a78bfa;border:1px solid #8b5cf633}
/* Prescreen status dot */
.ps-live{color:#34d399} .ps-dead{color:#f87171} .ps-parked{color:#fbbf24} .ps-redirect{color:#94a3b8}
.ps-live{color:#34d399} .ps-dead{color:#f87171} .ps-error{color:#f97316} .ps-parked{color:#fbbf24} .ps-redirect{color:#94a3b8}
/* Table */
.tw{overflow-x:auto;border-radius:var(--r);border:1px solid var(--border)}
@@ -562,6 +562,7 @@ tr:hover td{background:rgba(255,255,255,.025)}
<div class="esb"><div class="ev c1" x-text="(valSt.processed??0).toLocaleString()"></div><div class="el">Checked</div></div>
<div class="esb"><div class="ev ps-live" x-text="(valSt.live??0).toLocaleString()"></div><div class="el">Live</div></div>
<div class="esb"><div class="ev ps-dead" x-text="(valSt.dead??0).toLocaleString()"></div><div class="el">Dead</div></div>
<div class="esb"><div class="ev ps-error" x-text="(valSt.error??0).toLocaleString()"></div><div class="el">Error</div></div>
<div class="esb"><div class="ev ps-parked" x-text="(valSt.parked??0).toLocaleString()"></div><div class="el">Parked</div></div>
<div class="esb"><div class="ev ps-redirect" x-text="(valSt.redirect??0).toLocaleString()"></div><div class="el">Redirect</div></div>
<div class="esb"><div class="ev c3" x-text="(valSt.rate??0).toFixed(1)"></div><div class="el">dom/sec</div></div>

View File

@@ -66,6 +66,7 @@ _val_stats: dict = {
"processed": 0,
"live": 0,
"dead": 0,
"error": 0,
"parked": 0,
"redirect": 0,
"skipped": 0,
@@ -148,6 +149,11 @@ async def _check_domain(domain: str) -> dict:
result["prescreen_status"] = "parked"
return result
# 4xx / 5xx = server is alive but site is broken/blocking
if resp.status_code >= 400:
result["prescreen_status"] = "error"
return result
result["prescreen_status"] = "live"
return result
@@ -201,7 +207,7 @@ async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) ->
condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'"
else:
condition = "prescreen_status IS NOT NULL"
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute(
f"SELECT domain FROM enriched_domains "
f"WHERE domain IN ({placeholders}) AND {condition}",
@@ -212,7 +218,7 @@ async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) ->
async def _save_batch(results: list[dict]):
async with aiosqlite.connect(SQLITE_PATH) as db:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
for r in results:
await db.execute(
"""INSERT INTO enriched_domains
@@ -319,7 +325,7 @@ def start_validator(tld_filter: Optional[str] = None, rescan_dead: bool = False)
# domains, so restarting from 0 is safe and fast even for the same TLD.
_val_stats.update(
running=True,
processed=0, live=0, dead=0, parked=0,
processed=0, live=0, dead=0, error=0, parked=0,
redirect=0, skipped=0, offset=0, rate=0.0,
tld_filter=tld_filter,
rescan_dead=rescan_dead,