feat: Gemini AI assessment, Kit Digital detection, contact extraction

Kit Digital detection (enricher.py):
- Scans img src/alt/srcset for digitalizadores, kit-digital, fondos-europeos etc
- Scans page text for Kit Digital, Agente Digitalizador, Next Generation EU, PRTR
- Scans links for acelerapyme.es, red.es, kit-digital refs
- +20 score bonus for Kit Digital confirmed sites (proven IT buyers)

Contact extraction (enricher.py):
- Pulls mailto/tel/wa.me links from HTML
- Extracts email addresses via regex, phone numbers (ES format)
- Detects social media links (FB, IG, LinkedIn, Twitter, TikTok)
- Stored as JSON in contact_info column

Gemini via Replicate (replicate_ai.py):
- Assesses lead quality (HOT/WARM/COLD), Kit Digital confirmation
- Identifies best contact channel + actual value (email/phone/WA)
- Writes Spanish cold-call/email pitch angle
- Lists services likely needed + outreach notes
- 3 concurrent requests, 90s timeout, JSON output parsing

DB: migration adds kit_digital, kit_digital_signals, contact_info,
    ai_assessment, ai_lead_quality, ai_pitch, ai_contact_channel/value,
    ai_queue table

UI: Kit Digital 🏅 badge, AI quality pill (clickable modal with full
    assessment), contact chips (email/phone/WA/social), AI Assess button,
    Kit Digital only filter, AI queue status in enrichment tab

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-13 17:25:06 +02:00
parent 7acff12242
commit faca4b6e1a
7 changed files with 875 additions and 382 deletions

View File

@@ -26,7 +26,16 @@ CREATE TABLE IF NOT EXISTS enriched_domains (
server TEXT,
enriched_at TEXT,
error TEXT,
score INTEGER DEFAULT 0
score INTEGER DEFAULT 0,
kit_digital INTEGER DEFAULT 0,
kit_digital_signals TEXT,
contact_info TEXT,
ai_assessment TEXT,
ai_lead_quality TEXT,
ai_pitch TEXT,
ai_contact_channel TEXT,
ai_contact_value TEXT,
ai_assessed_at TEXT
);
CREATE TABLE IF NOT EXISTS job_queue (
id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -37,6 +46,13 @@ CREATE TABLE IF NOT EXISTS job_queue (
completed_at TEXT,
error TEXT
);
CREATE TABLE IF NOT EXISTS ai_queue (
domain TEXT PRIMARY KEY,
status TEXT DEFAULT 'pending',
created_at TEXT DEFAULT (datetime('now')),
completed_at TEXT,
error TEXT
);
CREATE TABLE IF NOT EXISTS scores (
domain TEXT PRIMARY KEY,
score INTEGER NOT NULL,
@@ -44,6 +60,20 @@ CREATE TABLE IF NOT EXISTS scores (
);
"""
# Columns added after initial release — applied as migrations on existing DBs
_MIGRATIONS = [
"ALTER TABLE enriched_domains ADD COLUMN kit_digital INTEGER DEFAULT 0",
"ALTER TABLE enriched_domains ADD COLUMN kit_digital_signals TEXT",
"ALTER TABLE enriched_domains ADD COLUMN contact_info TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ai_assessment TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ai_lead_quality TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ai_pitch TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_channel TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_value TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ai_assessed_at TEXT",
"CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)",
]
# Index build state
_index_ready = False
_index_building = False
@@ -57,6 +87,12 @@ _total_cache: int = 0
async def init_db():
async with aiosqlite.connect(SQLITE_PATH) as db:
await db.executescript(SCHEMA)
# Run migrations (safe to re-run — silently skips existing columns)
for sql in _MIGRATIONS:
try:
await db.execute(sql)
except Exception:
pass
await db.commit()
@@ -243,6 +279,8 @@ async def get_stats():
threshold = int(os.getenv("SCORE_THRESHOLD", "60"))
async with db.execute("SELECT COUNT(*) FROM enriched_domains WHERE score >= ?", (threshold,)) as cur:
hot_leads = (await cur.fetchone())[0]
async with db.execute("SELECT COUNT(*) FROM enriched_domains WHERE kit_digital=1") as cur:
kit_digital_count = (await cur.fetchone())[0]
async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur:
q = {r[0]: r[1] async for r in cur}
@@ -250,6 +288,7 @@ async def get_stats():
"total_domains": _total_cache,
"enriched": enriched,
"hot_leads": hot_leads,
"kit_digital_count": kit_digital_count,
"tld_breakdown": _tld_cache,
"index_status": index_status(),
"queue": {
@@ -263,7 +302,7 @@ async def get_stats():
# ── Enrichment helpers ───────────────────────────────────────────────────────
async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None, page=1, limit=100):
offset = (page - 1) * limit
conditions = ["score >= ?"]
params: list = [min_score]
@@ -273,6 +312,9 @@ async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
if country:
conditions.append("ip_country = ?")
params.append(country)
if kit_digital is not None:
conditions.append("kit_digital = ?")
params.append(1 if kit_digital else 0)
where = "WHERE " + " AND ".join(conditions)
async with aiosqlite.connect(SQLITE_PATH) as db:
db.row_factory = aiosqlite.Row
@@ -288,6 +330,52 @@ async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
return total, rows
async def queue_ai(domains: list[str]):
async with aiosqlite.connect(SQLITE_PATH) as db:
await db.executemany(
"INSERT OR IGNORE INTO ai_queue (domain) VALUES (?)",
[(d,) for d in domains],
)
await db.commit()
async def get_ai_queue_status():
async with aiosqlite.connect(SQLITE_PATH) as db:
async with db.execute("SELECT status, COUNT(*) FROM ai_queue GROUP BY status") as cur:
rows = {r[0]: r[1] async for r in cur}
return {
"pending": rows.get("pending", 0),
"running": rows.get("running", 0),
"done": rows.get("done", 0),
"failed": rows.get("failed", 0),
"total": sum(rows.values()),
}
async def save_ai_assessment(domain: str, assessment: dict):
import json as _json
async with aiosqlite.connect(SQLITE_PATH) as db:
await db.execute(
"""UPDATE enriched_domains SET
ai_assessment=?, ai_lead_quality=?, ai_pitch=?,
ai_contact_channel=?, ai_contact_value=?, ai_assessed_at=datetime('now')
WHERE domain=?""",
(
_json.dumps(assessment),
assessment.get("lead_quality"),
assessment.get("pitch_angle"),
assessment.get("best_contact_channel"),
assessment.get("best_contact_value"),
domain,
),
)
await db.execute(
"UPDATE ai_queue SET status='done', completed_at=datetime('now') WHERE domain=?",
(domain,),
)
await db.commit()
async def queue_domains(domains: list[str]):
async with aiosqlite.connect(SQLITE_PATH) as db:
await db.executemany(