fix: SQLite database locked errors + add error status for 4xx/5xx
SQLite locking: - Enable WAL journal mode in init_db (readers don't block writers) - Set busy_timeout=30000ms in init_db - Add timeout=30 to every aiosqlite.connect() across db.py, validator.py, enricher.py, main.py so connections wait up to 30s instead of crashing Error status: - 4xx/5xx HTTP responses are now prescreen_status='error' (server alive but broken/blocking) instead of 'live' - Added 'error' counter to validator stats and orange Error stat box in UI - Added ps-error CSS class (orange) and filter option in Browse tab Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
23
app/db.py
23
app/db.py
@@ -101,7 +101,10 @@ _total_cache: int = 0
|
|||||||
|
|
||||||
|
|
||||||
async def init_db():
|
async def init_db():
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
|
# WAL mode: concurrent reads don't block on writes; write lock held briefly
|
||||||
|
await db.execute("PRAGMA journal_mode=WAL")
|
||||||
|
await db.execute("PRAGMA busy_timeout=30000")
|
||||||
await db.executescript(SCHEMA)
|
await db.executescript(SCHEMA)
|
||||||
# Run migrations (safe to re-run — silently skips existing columns)
|
# Run migrations (safe to re-run — silently skips existing columns)
|
||||||
for sql in _MIGRATIONS:
|
for sql in _MIGRATIONS:
|
||||||
@@ -237,7 +240,7 @@ async def get_domains(tld=None, page=1, limit=100, alpha_only=False, no_sld=Fals
|
|||||||
return total, []
|
return total, []
|
||||||
|
|
||||||
placeholders = ",".join("?" * len(domain_list))
|
placeholders = ",".join("?" * len(domain_list))
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
db.row_factory = aiosqlite.Row
|
db.row_factory = aiosqlite.Row
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
f"SELECT * FROM enriched_domains WHERE domain IN ({placeholders})",
|
f"SELECT * FROM enriched_domains WHERE domain IN ({placeholders})",
|
||||||
@@ -289,7 +292,7 @@ async def get_stats():
|
|||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
_total_cache, _tld_cache = await loop.run_in_executor(None, _tld_stats_sync)
|
_total_cache, _tld_cache = await loop.run_in_executor(None, _tld_stats_sync)
|
||||||
|
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute("SELECT COUNT(*) FROM enriched_domains") as cur:
|
async with db.execute("SELECT COUNT(*) FROM enriched_domains") as cur:
|
||||||
enriched = (await cur.fetchone())[0]
|
enriched = (await cur.fetchone())[0]
|
||||||
threshold = int(os.getenv("SCORE_THRESHOLD", "60"))
|
threshold = int(os.getenv("SCORE_THRESHOLD", "60"))
|
||||||
@@ -351,7 +354,7 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None,
|
|||||||
conditions.append("site_type = ?")
|
conditions.append("site_type = ?")
|
||||||
params.append(site_type)
|
params.append(site_type)
|
||||||
where = "WHERE " + " AND ".join(conditions)
|
where = "WHERE " + " AND ".join(conditions)
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
db.row_factory = aiosqlite.Row
|
db.row_factory = aiosqlite.Row
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
f"SELECT * FROM enriched_domains {where} ORDER BY score DESC LIMIT ? OFFSET ?",
|
f"SELECT * FROM enriched_domains {where} ORDER BY score DESC LIMIT ? OFFSET ?",
|
||||||
@@ -366,7 +369,7 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None,
|
|||||||
|
|
||||||
|
|
||||||
async def queue_ai(domains: list[str], language: str = "ES"):
|
async def queue_ai(domains: list[str], language: str = "ES"):
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
await db.executemany(
|
await db.executemany(
|
||||||
"""INSERT INTO ai_queue (domain, language) VALUES (?, ?)
|
"""INSERT INTO ai_queue (domain, language) VALUES (?, ?)
|
||||||
ON CONFLICT(domain) DO UPDATE SET language=excluded.language, status='pending'""",
|
ON CONFLICT(domain) DO UPDATE SET language=excluded.language, status='pending'""",
|
||||||
@@ -376,7 +379,7 @@ async def queue_ai(domains: list[str], language: str = "ES"):
|
|||||||
|
|
||||||
|
|
||||||
async def get_ai_queue_status():
|
async def get_ai_queue_status():
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute("SELECT status, COUNT(*) FROM ai_queue GROUP BY status") as cur:
|
async with db.execute("SELECT status, COUNT(*) FROM ai_queue GROUP BY status") as cur:
|
||||||
rows = {r[0]: r[1] async for r in cur}
|
rows = {r[0]: r[1] async for r in cur}
|
||||||
return {
|
return {
|
||||||
@@ -390,7 +393,7 @@ async def get_ai_queue_status():
|
|||||||
|
|
||||||
async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict = None):
|
async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict = None):
|
||||||
import json as _json
|
import json as _json
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
# Upsert into enriched_domains (domain may not exist yet if assessed before full enrichment)
|
# Upsert into enriched_domains (domain may not exist yet if assessed before full enrichment)
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING""",
|
"""INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING""",
|
||||||
@@ -445,7 +448,7 @@ async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict
|
|||||||
|
|
||||||
async def save_prescreen_results(results: list[dict]):
|
async def save_prescreen_results(results: list[dict]):
|
||||||
"""Upsert prescreen HTTP results and/or DeepSeek niche/type classifications."""
|
"""Upsert prescreen HTTP results and/or DeepSeek niche/type classifications."""
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
for r in results:
|
for r in results:
|
||||||
domain = r.get("domain")
|
domain = r.get("domain")
|
||||||
if not domain:
|
if not domain:
|
||||||
@@ -477,7 +480,7 @@ async def save_prescreen_results(results: list[dict]):
|
|||||||
|
|
||||||
|
|
||||||
async def queue_domains(domains: list[str]):
|
async def queue_domains(domains: list[str]):
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
await db.executemany(
|
await db.executemany(
|
||||||
"INSERT OR IGNORE INTO job_queue (domain) VALUES (?)",
|
"INSERT OR IGNORE INTO job_queue (domain) VALUES (?)",
|
||||||
[(d,) for d in domains],
|
[(d,) for d in domains],
|
||||||
@@ -486,7 +489,7 @@ async def queue_domains(domains: list[str]):
|
|||||||
|
|
||||||
|
|
||||||
async def get_queue_status():
|
async def get_queue_status():
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur:
|
async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur:
|
||||||
rows = {r[0]: r[1] async for r in cur}
|
rows = {r[0]: r[1] async for r in cur}
|
||||||
pending = rows.get("pending", 0)
|
pending = rows.get("pending", 0)
|
||||||
|
|||||||
@@ -251,7 +251,7 @@ async def enrich_domain(domain: str) -> dict:
|
|||||||
|
|
||||||
|
|
||||||
async def save_enriched(data: dict):
|
async def save_enriched(data: dict):
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""INSERT INTO enriched_domains
|
"""INSERT INTO enriched_domains
|
||||||
(domain, is_live, status_code, ssl_valid, ssl_expiry_days, cms,
|
(domain, is_live, status_code, ssl_valid, ssl_expiry_days, cms,
|
||||||
@@ -284,7 +284,7 @@ async def save_enriched(data: dict):
|
|||||||
|
|
||||||
|
|
||||||
async def mark_job(domain: str, status: str, error: str = None):
|
async def mark_job(domain: str, status: str, error: str = None):
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
if status == "running":
|
if status == "running":
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"UPDATE job_queue SET status=?, started_at=datetime('now') WHERE domain=?",
|
"UPDATE job_queue SET status=?, started_at=datetime('now') WHERE domain=?",
|
||||||
@@ -315,7 +315,7 @@ async def worker_loop():
|
|||||||
if _paused:
|
if _paused:
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
continue
|
continue
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT domain FROM job_queue WHERE status='pending' LIMIT 100"
|
"SELECT domain FROM job_queue WHERE status='pending' LIMIT 100"
|
||||||
) as cur:
|
) as cur:
|
||||||
@@ -348,7 +348,7 @@ async def _assess_one(domain: str, language: str = "ES") -> None:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("AI: failed %s — %s", domain, e, exc_info=True)
|
logger.error("AI: failed %s — %s", domain, e, exc_info=True)
|
||||||
try:
|
try:
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"UPDATE ai_queue SET status='failed', completed_at=datetime('now'), error=? WHERE domain=?",
|
"UPDATE ai_queue SET status='failed', completed_at=datetime('now'), error=? WHERE domain=?",
|
||||||
(str(e)[:400], domain),
|
(str(e)[:400], domain),
|
||||||
@@ -362,7 +362,7 @@ async def ai_worker_loop():
|
|||||||
logger.info("AI worker loop starting")
|
logger.info("AI worker loop starting")
|
||||||
# Reset any jobs left in 'running' state from a previous crashed worker
|
# Reset any jobs left in 'running' state from a previous crashed worker
|
||||||
try:
|
try:
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
result = await db.execute(
|
result = await db.execute(
|
||||||
"UPDATE ai_queue SET status='pending' WHERE status='running'"
|
"UPDATE ai_queue SET status='pending' WHERE status='running'"
|
||||||
)
|
)
|
||||||
@@ -375,7 +375,7 @@ async def ai_worker_loop():
|
|||||||
while True:
|
while True:
|
||||||
rows = []
|
rows = []
|
||||||
try:
|
try:
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT domain, COALESCE(language,'ES') FROM ai_queue WHERE status='pending' LIMIT 5"
|
"SELECT domain, COALESCE(language,'ES') FROM ai_queue WHERE status='pending' LIMIT 5"
|
||||||
) as cur:
|
) as cur:
|
||||||
|
|||||||
@@ -132,7 +132,7 @@ async def enrich_status():
|
|||||||
|
|
||||||
@app.post("/api/enrich/retry")
|
@app.post("/api/enrich/retry")
|
||||||
async def enrich_retry():
|
async def enrich_retry():
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
await db.execute("UPDATE job_queue SET status='pending', error=NULL WHERE status='failed'")
|
await db.execute("UPDATE job_queue SET status='pending', error=NULL WHERE status='failed'")
|
||||||
await db.commit()
|
await db.commit()
|
||||||
resume_worker()
|
resume_worker()
|
||||||
@@ -267,7 +267,7 @@ async def ai_worker_restart():
|
|||||||
@app.post("/api/ai/reset")
|
@app.post("/api/ai/reset")
|
||||||
async def ai_reset():
|
async def ai_reset():
|
||||||
"""Reset all 'running' AI queue jobs back to 'pending' (unstick hung jobs)."""
|
"""Reset all 'running' AI queue jobs back to 'pending' (unstick hung jobs)."""
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
r = await db.execute("UPDATE ai_queue SET status='pending' WHERE status='running'")
|
r = await db.execute("UPDATE ai_queue SET status='pending' WHERE status='running'")
|
||||||
count = r.rowcount
|
count = r.rowcount
|
||||||
await db.commit()
|
await db.commit()
|
||||||
@@ -287,7 +287,7 @@ async def ai_debug():
|
|||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
db.row_factory = aiosqlite.Row
|
db.row_factory = aiosqlite.Row
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT domain, status, created_at, completed_at, error FROM ai_queue ORDER BY created_at DESC LIMIT 10"
|
"SELECT domain, status, created_at, completed_at, error FROM ai_queue ORDER BY created_at DESC LIMIT 10"
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ input[type=range]{accent-color:var(--accent);width:100px;cursor:pointer}
|
|||||||
.pni{background:#0ea5e918;color:#38bdf8;border:1px solid #0ea5e933}
|
.pni{background:#0ea5e918;color:#38bdf8;border:1px solid #0ea5e933}
|
||||||
.pty{background:#8b5cf618;color:#a78bfa;border:1px solid #8b5cf633}
|
.pty{background:#8b5cf618;color:#a78bfa;border:1px solid #8b5cf633}
|
||||||
/* Prescreen status dot */
|
/* Prescreen status dot */
|
||||||
.ps-live{color:#34d399} .ps-dead{color:#f87171} .ps-parked{color:#fbbf24} .ps-redirect{color:#94a3b8}
|
.ps-live{color:#34d399} .ps-dead{color:#f87171} .ps-error{color:#f97316} .ps-parked{color:#fbbf24} .ps-redirect{color:#94a3b8}
|
||||||
|
|
||||||
/* Table */
|
/* Table */
|
||||||
.tw{overflow-x:auto;border-radius:var(--r);border:1px solid var(--border)}
|
.tw{overflow-x:auto;border-radius:var(--r);border:1px solid var(--border)}
|
||||||
@@ -562,6 +562,7 @@ tr:hover td{background:rgba(255,255,255,.025)}
|
|||||||
<div class="esb"><div class="ev c1" x-text="(valSt.processed??0).toLocaleString()"></div><div class="el">Checked</div></div>
|
<div class="esb"><div class="ev c1" x-text="(valSt.processed??0).toLocaleString()"></div><div class="el">Checked</div></div>
|
||||||
<div class="esb"><div class="ev ps-live" x-text="(valSt.live??0).toLocaleString()"></div><div class="el">Live</div></div>
|
<div class="esb"><div class="ev ps-live" x-text="(valSt.live??0).toLocaleString()"></div><div class="el">Live</div></div>
|
||||||
<div class="esb"><div class="ev ps-dead" x-text="(valSt.dead??0).toLocaleString()"></div><div class="el">Dead</div></div>
|
<div class="esb"><div class="ev ps-dead" x-text="(valSt.dead??0).toLocaleString()"></div><div class="el">Dead</div></div>
|
||||||
|
<div class="esb"><div class="ev ps-error" x-text="(valSt.error??0).toLocaleString()"></div><div class="el">Error</div></div>
|
||||||
<div class="esb"><div class="ev ps-parked" x-text="(valSt.parked??0).toLocaleString()"></div><div class="el">Parked</div></div>
|
<div class="esb"><div class="ev ps-parked" x-text="(valSt.parked??0).toLocaleString()"></div><div class="el">Parked</div></div>
|
||||||
<div class="esb"><div class="ev ps-redirect" x-text="(valSt.redirect??0).toLocaleString()"></div><div class="el">Redirect</div></div>
|
<div class="esb"><div class="ev ps-redirect" x-text="(valSt.redirect??0).toLocaleString()"></div><div class="el">Redirect</div></div>
|
||||||
<div class="esb"><div class="ev c3" x-text="(valSt.rate??0).toFixed(1)"></div><div class="el">dom/sec</div></div>
|
<div class="esb"><div class="ev c3" x-text="(valSt.rate??0).toFixed(1)"></div><div class="el">dom/sec</div></div>
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ _val_stats: dict = {
|
|||||||
"processed": 0,
|
"processed": 0,
|
||||||
"live": 0,
|
"live": 0,
|
||||||
"dead": 0,
|
"dead": 0,
|
||||||
|
"error": 0,
|
||||||
"parked": 0,
|
"parked": 0,
|
||||||
"redirect": 0,
|
"redirect": 0,
|
||||||
"skipped": 0,
|
"skipped": 0,
|
||||||
@@ -148,6 +149,11 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
result["prescreen_status"] = "parked"
|
result["prescreen_status"] = "parked"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
# 4xx / 5xx = server is alive but site is broken/blocking
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
result["prescreen_status"] = "error"
|
||||||
|
return result
|
||||||
|
|
||||||
result["prescreen_status"] = "live"
|
result["prescreen_status"] = "live"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
@@ -201,7 +207,7 @@ async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) ->
|
|||||||
condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'"
|
condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'"
|
||||||
else:
|
else:
|
||||||
condition = "prescreen_status IS NOT NULL"
|
condition = "prescreen_status IS NOT NULL"
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
f"SELECT domain FROM enriched_domains "
|
f"SELECT domain FROM enriched_domains "
|
||||||
f"WHERE domain IN ({placeholders}) AND {condition}",
|
f"WHERE domain IN ({placeholders}) AND {condition}",
|
||||||
@@ -212,7 +218,7 @@ async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) ->
|
|||||||
|
|
||||||
|
|
||||||
async def _save_batch(results: list[dict]):
|
async def _save_batch(results: list[dict]):
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
for r in results:
|
for r in results:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""INSERT INTO enriched_domains
|
"""INSERT INTO enriched_domains
|
||||||
@@ -319,7 +325,7 @@ def start_validator(tld_filter: Optional[str] = None, rescan_dead: bool = False)
|
|||||||
# domains, so restarting from 0 is safe and fast even for the same TLD.
|
# domains, so restarting from 0 is safe and fast even for the same TLD.
|
||||||
_val_stats.update(
|
_val_stats.update(
|
||||||
running=True,
|
running=True,
|
||||||
processed=0, live=0, dead=0, parked=0,
|
processed=0, live=0, dead=0, error=0, parked=0,
|
||||||
redirect=0, skipped=0, offset=0, rate=0.0,
|
redirect=0, skipped=0, offset=0, rate=0.0,
|
||||||
tld_filter=tld_filter,
|
tld_filter=tld_filter,
|
||||||
rescan_dead=rescan_dead,
|
rescan_dead=rescan_dead,
|
||||||
|
|||||||
Reference in New Issue
Block a user