feat: Gemini AI assessment, Kit Digital detection, contact extraction
Kit Digital detection (enricher.py):
- Scans img src/alt/srcset for digitalizadores, kit-digital, fondos-europeos etc
- Scans page text for Kit Digital, Agente Digitalizador, Next Generation EU, PRTR
- Scans links for acelerapyme.es, red.es, kit-digital refs
- +20 score bonus for Kit Digital confirmed sites (proven IT buyers)
Contact extraction (enricher.py):
- Pulls mailto/tel/wa.me links from HTML
- Extracts email addresses via regex, phone numbers (ES format)
- Detects social media links (FB, IG, LinkedIn, Twitter, TikTok)
- Stored as JSON in contact_info column
Gemini via Replicate (replicate_ai.py):
- Assesses lead quality (HOT/WARM/COLD), Kit Digital confirmation
- Identifies best contact channel + actual value (email/phone/WA)
- Writes Spanish cold-call/email pitch angle
- Lists services likely needed + outreach notes
- 3 concurrent requests, 90s timeout, JSON output parsing
DB: migration adds kit_digital, kit_digital_signals, contact_info,
ai_assessment, ai_lead_quality, ai_pitch, ai_contact_channel/value,
ai_queue table
UI: Kit Digital 🏅 badge, AI quality pill (clickable modal with full
assessment), contact chips (email/phone/WA/social), AI Assess button,
Kit Digital only filter, AI queue status in enrichment tab
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
92
app/db.py
92
app/db.py
@@ -26,7 +26,16 @@ CREATE TABLE IF NOT EXISTS enriched_domains (
|
||||
server TEXT,
|
||||
enriched_at TEXT,
|
||||
error TEXT,
|
||||
score INTEGER DEFAULT 0
|
||||
score INTEGER DEFAULT 0,
|
||||
kit_digital INTEGER DEFAULT 0,
|
||||
kit_digital_signals TEXT,
|
||||
contact_info TEXT,
|
||||
ai_assessment TEXT,
|
||||
ai_lead_quality TEXT,
|
||||
ai_pitch TEXT,
|
||||
ai_contact_channel TEXT,
|
||||
ai_contact_value TEXT,
|
||||
ai_assessed_at TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS job_queue (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -37,6 +46,13 @@ CREATE TABLE IF NOT EXISTS job_queue (
|
||||
completed_at TEXT,
|
||||
error TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS ai_queue (
|
||||
domain TEXT PRIMARY KEY,
|
||||
status TEXT DEFAULT 'pending',
|
||||
created_at TEXT DEFAULT (datetime('now')),
|
||||
completed_at TEXT,
|
||||
error TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS scores (
|
||||
domain TEXT PRIMARY KEY,
|
||||
score INTEGER NOT NULL,
|
||||
@@ -44,6 +60,20 @@ CREATE TABLE IF NOT EXISTS scores (
|
||||
);
|
||||
"""
|
||||
|
||||
# Columns added after initial release — applied as migrations on existing DBs
|
||||
_MIGRATIONS = [
|
||||
"ALTER TABLE enriched_domains ADD COLUMN kit_digital INTEGER DEFAULT 0",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN kit_digital_signals TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN contact_info TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_assessment TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_lead_quality TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_pitch TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_channel TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_value TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN ai_assessed_at TEXT",
|
||||
"CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)",
|
||||
]
|
||||
|
||||
# Index build state
|
||||
_index_ready = False
|
||||
_index_building = False
|
||||
@@ -57,6 +87,12 @@ _total_cache: int = 0
|
||||
async def init_db():
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.executescript(SCHEMA)
|
||||
# Run migrations (safe to re-run — silently skips existing columns)
|
||||
for sql in _MIGRATIONS:
|
||||
try:
|
||||
await db.execute(sql)
|
||||
except Exception:
|
||||
pass
|
||||
await db.commit()
|
||||
|
||||
|
||||
@@ -243,6 +279,8 @@ async def get_stats():
|
||||
threshold = int(os.getenv("SCORE_THRESHOLD", "60"))
|
||||
async with db.execute("SELECT COUNT(*) FROM enriched_domains WHERE score >= ?", (threshold,)) as cur:
|
||||
hot_leads = (await cur.fetchone())[0]
|
||||
async with db.execute("SELECT COUNT(*) FROM enriched_domains WHERE kit_digital=1") as cur:
|
||||
kit_digital_count = (await cur.fetchone())[0]
|
||||
async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur:
|
||||
q = {r[0]: r[1] async for r in cur}
|
||||
|
||||
@@ -250,6 +288,7 @@ async def get_stats():
|
||||
"total_domains": _total_cache,
|
||||
"enriched": enriched,
|
||||
"hot_leads": hot_leads,
|
||||
"kit_digital_count": kit_digital_count,
|
||||
"tld_breakdown": _tld_cache,
|
||||
"index_status": index_status(),
|
||||
"queue": {
|
||||
@@ -263,7 +302,7 @@ async def get_stats():
|
||||
|
||||
# ── Enrichment helpers ───────────────────────────────────────────────────────
|
||||
|
||||
async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
|
||||
async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None, page=1, limit=100):
|
||||
offset = (page - 1) * limit
|
||||
conditions = ["score >= ?"]
|
||||
params: list = [min_score]
|
||||
@@ -273,6 +312,9 @@ async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
|
||||
if country:
|
||||
conditions.append("ip_country = ?")
|
||||
params.append(country)
|
||||
if kit_digital is not None:
|
||||
conditions.append("kit_digital = ?")
|
||||
params.append(1 if kit_digital else 0)
|
||||
where = "WHERE " + " AND ".join(conditions)
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
@@ -288,6 +330,52 @@ async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
|
||||
return total, rows
|
||||
|
||||
|
||||
async def queue_ai(domains: list[str]):
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.executemany(
|
||||
"INSERT OR IGNORE INTO ai_queue (domain) VALUES (?)",
|
||||
[(d,) for d in domains],
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def get_ai_queue_status():
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
async with db.execute("SELECT status, COUNT(*) FROM ai_queue GROUP BY status") as cur:
|
||||
rows = {r[0]: r[1] async for r in cur}
|
||||
return {
|
||||
"pending": rows.get("pending", 0),
|
||||
"running": rows.get("running", 0),
|
||||
"done": rows.get("done", 0),
|
||||
"failed": rows.get("failed", 0),
|
||||
"total": sum(rows.values()),
|
||||
}
|
||||
|
||||
|
||||
async def save_ai_assessment(domain: str, assessment: dict):
|
||||
import json as _json
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.execute(
|
||||
"""UPDATE enriched_domains SET
|
||||
ai_assessment=?, ai_lead_quality=?, ai_pitch=?,
|
||||
ai_contact_channel=?, ai_contact_value=?, ai_assessed_at=datetime('now')
|
||||
WHERE domain=?""",
|
||||
(
|
||||
_json.dumps(assessment),
|
||||
assessment.get("lead_quality"),
|
||||
assessment.get("pitch_angle"),
|
||||
assessment.get("best_contact_channel"),
|
||||
assessment.get("best_contact_value"),
|
||||
domain,
|
||||
),
|
||||
)
|
||||
await db.execute(
|
||||
"UPDATE ai_queue SET status='done', completed_at=datetime('now') WHERE domain=?",
|
||||
(domain,),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def queue_domains(domains: list[str]):
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.executemany(
|
||||
|
||||
Reference in New Issue
Block a user