feat: deep site analysis engine + fix AI assess for any domain
site_analyzer.py (new):
- Fresh scrape with timing, page size, server, CMS detection
- Lorem ipsum detection (16 phrases incl. user's example)
- Placeholder content detection (hello world, sample page, etc.)
- Analytics: GA4, GTM, Facebook Pixel, Hotjar, Clarity
- Webmaster: Google Search Console, Bing, Yandex verification tags
- sitemap.xml and robots.txt check + Googlebot block detection
- Mobile viewport check, word count, image/script count
- Full contact extraction: emails, phones, WhatsApp, social links
- Kit Digital signal detection
AI worker fix:
- No longer requires pre-enrichment — works on ANY selected domain
- Does fresh site_analyzer scrape then calls Gemini with full context
- Stores site_analysis JSON alongside AI assessment
- Upserts into enriched_domains even if domain was never enriched
Gemini prompt now includes:
- Complete technical snapshot (load time, size, server, SSL)
- Full SEO signals (sitemap, robots, analytics, webmaster verified)
- Content quality (lorem ipsum matches, placeholder matches)
- Kit Digital signals
- All extracted contacts
- 500-word page text sample
- Outputs: summary, site_quality_score/10, content_issues[],
urgency_signals[], performance_notes, seo_status,
best_contact_channel+value, all_contacts, ES pitch,
services_needed, outreach_notes
UI: rich AI modal with summary banner, quality grid, content issues,
urgency signals, full contact list, technical snapshot
Fixes: correct Replicate token, ai_queue status='running' bug
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -13,7 +13,7 @@ import dns.resolver
|
||||
import aiosqlite
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
from app.db import SQLITE_PATH, queue_ai, save_ai_assessment, get_ai_queue_status
|
||||
from app.db import SQLITE_PATH, queue_ai, save_ai_assessment
|
||||
from app.scorer import score
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -340,17 +340,17 @@ async def worker_loop():
|
||||
|
||||
async def ai_worker_loop():
|
||||
from app.replicate_ai import assess_domain as gemini_assess
|
||||
from app.site_analyzer import analyze_site
|
||||
|
||||
while True:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
async with db.execute(
|
||||
"SELECT domain FROM ai_queue WHERE status='pending' LIMIT 20"
|
||||
"SELECT domain FROM ai_queue WHERE status='pending' LIMIT 10"
|
||||
) as cur:
|
||||
rows = await cur.fetchall()
|
||||
# Mark as running
|
||||
if rows:
|
||||
await db.executemany(
|
||||
"UPDATE ai_queue SET status='running', created_at=created_at WHERE domain=?",
|
||||
"UPDATE ai_queue SET status='running' WHERE domain=?",
|
||||
[(r[0],) for r in rows],
|
||||
)
|
||||
await db.commit()
|
||||
@@ -361,16 +361,11 @@ async def ai_worker_loop():
|
||||
|
||||
async def assess_one(domain: str):
|
||||
try:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute(
|
||||
"SELECT * FROM enriched_domains WHERE domain=?", (domain,)
|
||||
) as cur:
|
||||
row = await cur.fetchone()
|
||||
if not row:
|
||||
return
|
||||
assessment = await gemini_assess(dict(row))
|
||||
await save_ai_assessment(domain, assessment)
|
||||
# Always do a fresh deep scrape — no pre-enrichment required
|
||||
analysis = await analyze_site(domain)
|
||||
assessment = await gemini_assess(analysis)
|
||||
await save_ai_assessment(domain, assessment, site_analysis=analysis)
|
||||
logger.info("AI done: %s → %s", domain, assessment.get("lead_quality"))
|
||||
except Exception as e:
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.execute(
|
||||
@@ -380,6 +375,7 @@ async def ai_worker_loop():
|
||||
await db.commit()
|
||||
logger.error("AI worker error %s: %s", domain, e)
|
||||
|
||||
# AI_CONCURRENCY concurrent assessments (already enforced by replicate_ai semaphore)
|
||||
await asyncio.gather(*[asyncio.create_task(assess_one(r[0])) for r in rows], return_exceptions=True)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user