fix: AI worker crash-proof + GDPR/hosting/accessibility analysis
AI worker fixes (root cause of "nothing reaches Replicate"): - Worker task died silently — no exception handler around while loop - Added try/except around entire loop body with exc_info logging - Added watchdog task that restarts dead workers every 10 seconds - ensure_workers_alive() called on every /api/ai/assess/batch POST - _assess_one() is now a top-level function (not closure) — avoids subtle scoping bugs with async inner functions in while loops - /api/ai/debug endpoint: shows worker alive status, task exception, last 10 queue entries — browse to /api/ai/debug to diagnose - /api/ai/worker/restart endpoint + UI button - "Restart AI worker" button + "Debug AI queue" link in enrichment tab site_analyzer.py — new signals: - IP resolution + ip-api.com for ASN, org, ISP, host country - EU hosting detection (27 EU + EEA + adequacy countries) - GDPR: detects Cookiebot, OneTrust, CookiePro, Osano, Iubenda, Borlabs, CookieYes, Complianz, Usercentrics + text signals - Privacy policy and GDPR text presence - Accessibility: html lang missing, images without alt count, skip nav link, empty links, inputs without labels Gemini prompt additions: - Hosting section: IP, ASN, org/ISP, EU vs non-EU flag - GDPR section: cookie tool, notice, privacy policy - Accessibility section: all quick-scan results - New output fields: hosting_notes, gdpr_compliance, accessibility_issues[] Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -338,53 +338,88 @@ async def worker_loop():
|
||||
|
||||
# ── AI assessment worker ──────────────────────────────────────────────────────
|
||||
|
||||
async def ai_worker_loop():
|
||||
async def _assess_one(domain: str) -> None:
|
||||
"""Process a single AI assessment — safe to call concurrently."""
|
||||
from app.replicate_ai import assess_domain as gemini_assess
|
||||
from app.site_analyzer import analyze_site
|
||||
|
||||
while True:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
async with db.execute(
|
||||
"SELECT domain FROM ai_queue WHERE status='pending' LIMIT 10"
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
if rows:
|
||||
await db.executemany(
|
||||
"UPDATE ai_queue SET status='running' WHERE domain=?",
|
||||
[(r[0],) for r in rows],
|
||||
logger.info("AI: starting analysis for %s", domain)
|
||||
try:
|
||||
analysis = await analyze_site(domain)
|
||||
logger.info("AI: site analyzed %s (reachable=%s, words=%s)",
|
||||
domain, analysis.get("reachable"), analysis.get("word_count"))
|
||||
assessment = await gemini_assess(analysis)
|
||||
logger.info("AI: Gemini done %s → quality=%s",
|
||||
domain, assessment.get("lead_quality"))
|
||||
await save_ai_assessment(domain, assessment, site_analysis=analysis)
|
||||
logger.info("AI: saved %s", domain)
|
||||
except Exception as e:
|
||||
logger.error("AI: failed %s — %s", domain, e, exc_info=True)
|
||||
try:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.execute(
|
||||
"UPDATE ai_queue SET status='failed', completed_at=datetime('now'), error=? WHERE domain=?",
|
||||
(str(e)[:400], domain),
|
||||
)
|
||||
await db.commit()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
async def ai_worker_loop():
|
||||
logger.info("AI worker loop starting")
|
||||
while True:
|
||||
rows = []
|
||||
try:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
async with db.execute(
|
||||
"SELECT domain FROM ai_queue WHERE status='pending' LIMIT 5"
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
if rows:
|
||||
await db.executemany(
|
||||
"UPDATE ai_queue SET status='running' WHERE domain=?",
|
||||
[(r[0],) for r in rows],
|
||||
)
|
||||
await db.commit()
|
||||
logger.info("AI worker: picked up %d jobs: %s",
|
||||
len(rows), [r[0] for r in rows])
|
||||
except Exception as e:
|
||||
logger.error("AI worker DB error: %s", e, exc_info=True)
|
||||
await asyncio.sleep(5)
|
||||
continue
|
||||
|
||||
if not rows:
|
||||
await asyncio.sleep(3)
|
||||
continue
|
||||
|
||||
async def assess_one(domain: str):
|
||||
try:
|
||||
# Always do a fresh deep scrape — no pre-enrichment required
|
||||
analysis = await analyze_site(domain)
|
||||
assessment = await gemini_assess(analysis)
|
||||
await save_ai_assessment(domain, assessment, site_analysis=analysis)
|
||||
logger.info("AI done: %s → %s", domain, assessment.get("lead_quality"))
|
||||
except Exception as e:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.execute(
|
||||
"UPDATE ai_queue SET status='failed', completed_at=datetime('now') WHERE domain=?",
|
||||
(domain,),
|
||||
)
|
||||
await db.commit()
|
||||
logger.error("AI worker error %s: %s", domain, e)
|
||||
|
||||
# AI_CONCURRENCY concurrent assessments (already enforced by replicate_ai semaphore)
|
||||
await asyncio.gather(*[asyncio.create_task(assess_one(r[0])) for r in rows], return_exceptions=True)
|
||||
# Run assessments concurrently (semaphore in replicate_ai enforces AI_CONCURRENCY)
|
||||
results = await asyncio.gather(
|
||||
*[_assess_one(r[0]) for r in rows],
|
||||
return_exceptions=True,
|
||||
)
|
||||
for r, exc in zip(rows, results):
|
||||
if isinstance(exc, Exception):
|
||||
logger.error("AI task exception for %s: %s", r[0], exc, exc_info=exc)
|
||||
|
||||
|
||||
def start_worker():
|
||||
global _worker_task, _ai_worker_task
|
||||
if _worker_task is None or _worker_task.done():
|
||||
_worker_task = asyncio.create_task(worker_loop())
|
||||
logger.info("Enrichment worker started")
|
||||
if _ai_worker_task is None or _ai_worker_task.done():
|
||||
if _ai_worker_task is not None and _ai_worker_task.done():
|
||||
exc = _ai_worker_task.exception() if not _ai_worker_task.cancelled() else None
|
||||
if exc:
|
||||
logger.error("AI worker died with: %s", exc, exc_info=exc)
|
||||
_ai_worker_task = asyncio.create_task(ai_worker_loop())
|
||||
logger.info("AI worker started/restarted")
|
||||
|
||||
|
||||
def ensure_workers_alive():
|
||||
"""Restart workers if they've died — call periodically."""
|
||||
start_worker()
|
||||
|
||||
|
||||
def pause_worker():
|
||||
|
||||
Reference in New Issue
Block a user