feat: deep site analysis engine + fix AI assess for any domain
site_analyzer.py (new):
- Fresh scrape with timing, page size, server, CMS detection
- Lorem ipsum detection (16 phrases incl. user's example)
- Placeholder content detection (hello world, sample page, etc.)
- Analytics: GA4, GTM, Facebook Pixel, Hotjar, Clarity
- Webmaster: Google Search Console, Bing, Yandex verification tags
- sitemap.xml and robots.txt check + Googlebot block detection
- Mobile viewport check, word count, image/script count
- Full contact extraction: emails, phones, WhatsApp, social links
- Kit Digital signal detection
AI worker fix:
- No longer requires pre-enrichment — works on ANY selected domain
- Does fresh site_analyzer scrape then calls Gemini with full context
- Stores site_analysis JSON alongside AI assessment
- Upserts into enriched_domains even if domain was never enriched
Gemini prompt now includes:
- Complete technical snapshot (load time, size, server, SSL)
- Full SEO signals (sitemap, robots, analytics, webmaster verified)
- Content quality (lorem ipsum matches, placeholder matches)
- Kit Digital signals
- All extracted contacts
- 500-word page text sample
- Outputs: summary, site_quality_score/10, content_issues[],
urgency_signals[], performance_notes, seo_status,
best_contact_channel+value, all_contacts, ES pitch,
services_needed, outreach_notes
UI: rich AI modal with summary banner, quality grid, content issues,
urgency signals, full contact list, technical snapshot
Fixes: correct Replicate token, ai_queue status='running' bug
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
34
app/db.py
34
app/db.py
@@ -35,7 +35,8 @@ CREATE TABLE IF NOT EXISTS enriched_domains (
|
||||
ai_pitch TEXT,
|
||||
ai_contact_channel TEXT,
|
||||
ai_contact_value TEXT,
|
||||
ai_assessed_at TEXT
|
||||
ai_assessed_at TEXT,
|
||||
site_analysis TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS job_queue (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -71,6 +72,7 @@ _MIGRATIONS = [
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_channel TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_value TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_assessed_at TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN site_analysis TEXT",
|
||||
"CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)",
|
||||
]
|
||||
|
||||
@@ -352,13 +354,19 @@ async def get_ai_queue_status():
|
||||
}
|
||||
|
||||
|
||||
async def save_ai_assessment(domain: str, assessment: dict):
|
||||
async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict = None):
|
||||
import json as _json
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
# Upsert into enriched_domains (domain may not exist yet if assessed before full enrichment)
|
||||
await db.execute(
|
||||
"""INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING""",
|
||||
(domain,),
|
||||
)
|
||||
await db.execute(
|
||||
"""UPDATE enriched_domains SET
|
||||
ai_assessment=?, ai_lead_quality=?, ai_pitch=?,
|
||||
ai_contact_channel=?, ai_contact_value=?, ai_assessed_at=datetime('now')
|
||||
ai_contact_channel=?, ai_contact_value=?, ai_assessed_at=datetime('now'),
|
||||
site_analysis=?
|
||||
WHERE domain=?""",
|
||||
(
|
||||
_json.dumps(assessment),
|
||||
@@ -366,9 +374,29 @@ async def save_ai_assessment(domain: str, assessment: dict):
|
||||
assessment.get("pitch_angle"),
|
||||
assessment.get("best_contact_channel"),
|
||||
assessment.get("best_contact_value"),
|
||||
_json.dumps(site_analysis) if site_analysis else None,
|
||||
domain,
|
||||
),
|
||||
)
|
||||
# Also update contact_info + kit_digital from site_analysis if available
|
||||
if site_analysis:
|
||||
contacts = {
|
||||
"emails": site_analysis.get("emails", []),
|
||||
"phones": site_analysis.get("phones", []),
|
||||
"whatsapp": site_analysis.get("whatsapp", []),
|
||||
"social": site_analysis.get("social_links", []),
|
||||
}
|
||||
await db.execute(
|
||||
"""UPDATE enriched_domains SET
|
||||
kit_digital=?, kit_digital_signals=?, contact_info=?
|
||||
WHERE domain=?""",
|
||||
(
|
||||
int(site_analysis.get("kit_digital", False)),
|
||||
_json.dumps(site_analysis.get("kit_digital_signals", [])),
|
||||
_json.dumps(contacts),
|
||||
domain,
|
||||
),
|
||||
)
|
||||
await db.execute(
|
||||
"UPDATE ai_queue SET status='done', completed_at=datetime('now') WHERE domain=?",
|
||||
(domain,),
|
||||
|
||||
Reference in New Issue
Block a user