Kit Digital detection (enricher.py):
- Scans img src/alt/srcset for digitalizadores, kit-digital, fondos-europeos etc
- Scans page text for Kit Digital, Agente Digitalizador, Next Generation EU, PRTR
- Scans links for acelerapyme.es, red.es, kit-digital refs
- +20 score bonus for Kit Digital confirmed sites (proven IT buyers)
Contact extraction (enricher.py):
- Pulls mailto/tel/wa.me links from HTML
- Extracts email addresses via regex, phone numbers (ES format)
- Detects social media links (FB, IG, LinkedIn, Twitter, TikTok)
- Stored as JSON in contact_info column
Gemini via Replicate (replicate_ai.py):
- Assesses lead quality (HOT/WARM/COLD), Kit Digital confirmation
- Identifies best contact channel + actual value (email/phone/WA)
- Writes Spanish cold-call/email pitch angle
- Lists services likely needed + outreach notes
- 3 concurrent requests, 90s timeout, JSON output parsing
DB: migration adds kit_digital, kit_digital_signals, contact_info,
ai_assessment, ai_lead_quality, ai_pitch, ai_contact_channel/value,
ai_queue table
UI: Kit Digital 🏅 badge, AI quality pill (clickable modal with full
assessment), contact chips (email/phone/WA/social), AI Assess button,
Kit Digital only filter, AI queue status in enrichment tab
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
68 lines
2.2 KiB
Python
68 lines
2.2 KiB
Python
import os
|
|
import aiosqlite
|
|
from app.db import SQLITE_PATH
|
|
|
|
KNOWN_CMS = {"wordpress", "joomla", "drupal", "wix", "squarespace", "shopify", "prestashop", "magento", "typo3", "opencart"}
|
|
TARGET_COUNTRIES = set(os.getenv("TARGET_COUNTRIES", "ES,GB,DE,FR").split(","))
|
|
|
|
LOCAL_BIZ_KEYWORDS = {
|
|
"restaurant", "cafe", "shop", "store", "salon", "plumber", "electrician",
|
|
"dentist", "clinic", "garage", "hotel", "bakery", "bar", "gym", "spa",
|
|
"fontanero", "electricista", "dentista", "clínica", "taller", "hotel",
|
|
"panadería", "peluquería", "tienda",
|
|
}
|
|
|
|
|
|
def local_biz_keywords(title: str | None) -> bool:
|
|
if not title:
|
|
return False
|
|
title_lower = title.lower()
|
|
return any(kw in title_lower for kw in LOCAL_BIZ_KEYWORDS)
|
|
|
|
|
|
def score(domain_row: dict) -> int:
|
|
s = 0
|
|
if domain_row.get("is_live"):
|
|
s += 20
|
|
ssl_days = domain_row.get("ssl_expiry_days")
|
|
if ssl_days is not None and ssl_days < 30:
|
|
s += 15
|
|
if not domain_row.get("ssl_valid"):
|
|
s += 15
|
|
cms = (domain_row.get("cms") or "").lower()
|
|
if cms in KNOWN_CMS:
|
|
s += 15
|
|
if not domain_row.get("has_mx"):
|
|
s += 10
|
|
if domain_row.get("ip_country") in TARGET_COUNTRIES:
|
|
s += 10
|
|
server = (domain_row.get("server") or "").lower()
|
|
if "shared" in server:
|
|
s += 10
|
|
if local_biz_keywords(domain_row.get("page_title")):
|
|
s += 5
|
|
# Kit Digital: proven buyer of IT services
|
|
if domain_row.get("kit_digital"):
|
|
s += 20
|
|
return min(s, 100)
|
|
|
|
|
|
async def run_scoring():
|
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
|
db.row_factory = aiosqlite.Row
|
|
async with db.execute("SELECT * FROM enriched_domains") as cur:
|
|
rows = [dict(r) async for r in cur]
|
|
|
|
updates = [(score(r), r["domain"]) for r in rows]
|
|
await db.executemany(
|
|
"UPDATE enriched_domains SET score = ? WHERE domain = ?", updates
|
|
)
|
|
await db.executemany(
|
|
"""INSERT INTO scores (domain, score) VALUES (?, ?)
|
|
ON CONFLICT(domain) DO UPDATE SET score=excluded.score, scored_at=datetime('now')""",
|
|
updates,
|
|
)
|
|
await db.commit()
|
|
|
|
return {"scored": len(updates)}
|