fix: add timeouts to SSL/DNS blocking calls, reset stuck AI jobs on startup

- SSL handshake: set socket timeout before wrap_socket (prevents indefinite hang)
- SSL executor: asyncio.wait_for(..., timeout=12)
- DNS gethostbyname: asyncio.wait_for(..., timeout=6)
- analyze_site: hard 90s timeout wrapper
- _assess_one: hard 180s ceiling via asyncio.timeout()
- ai_worker_loop: reset 'running' → 'pending' on startup (clears crashed-session jobs)
- Add POST /api/ai/reset endpoint + UI button to unstick jobs without restart

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 18:11:27 +02:00
parent 684fbd75b8
commit 5bef587ca0
4 changed files with 55 additions and 12 deletions

View File

@@ -345,14 +345,16 @@ async def _assess_one(domain: str) -> None:
logger.info("AI: starting analysis for %s", domain)
try:
analysis = await analyze_site(domain)
logger.info("AI: site analyzed %s (reachable=%s, words=%s)",
domain, analysis.get("reachable"), analysis.get("word_count"))
assessment = await gemini_assess(analysis)
logger.info("AI: Gemini done %s → quality=%s",
domain, assessment.get("lead_quality"))
await save_ai_assessment(domain, assessment, site_analysis=analysis)
logger.info("AI: saved %s", domain)
# Hard 3-minute ceiling so stuck jobs never block the worker forever
async with asyncio.timeout(180):
analysis = await analyze_site(domain)
logger.info("AI: site analyzed %s (reachable=%s, words=%s)",
domain, analysis.get("reachable"), analysis.get("word_count"))
assessment = await gemini_assess(analysis)
logger.info("AI: Gemini done %s → quality=%s",
domain, assessment.get("lead_quality"))
await save_ai_assessment(domain, assessment, site_analysis=analysis)
logger.info("AI: saved %s", domain)
except Exception as e:
logger.error("AI: failed %s%s", domain, e, exc_info=True)
try:
@@ -368,6 +370,18 @@ async def _assess_one(domain: str) -> None:
async def ai_worker_loop():
logger.info("AI worker loop starting")
# Reset any jobs left in 'running' state from a previous crashed worker
try:
async with aiosqlite.connect(SQLITE_PATH) as db:
result = await db.execute(
"UPDATE ai_queue SET status='pending' WHERE status='running'"
)
count = result.rowcount
await db.commit()
if count:
logger.info("AI worker: reset %d stale 'running' jobs to 'pending'", count)
except Exception as e:
logger.error("AI worker: failed to reset stale jobs: %s", e)
while True:
rows = []
try: