feat: Gemini AI assessment, Kit Digital detection, contact extraction
Kit Digital detection (enricher.py):
- Scans img src/alt/srcset for digitalizadores, kit-digital, fondos-europeos etc
- Scans page text for Kit Digital, Agente Digitalizador, Next Generation EU, PRTR
- Scans links for acelerapyme.es, red.es, kit-digital refs
- +20 score bonus for Kit Digital confirmed sites (proven IT buyers)
Contact extraction (enricher.py):
- Pulls mailto/tel/wa.me links from HTML
- Extracts email addresses via regex, phone numbers (ES format)
- Detects social media links (FB, IG, LinkedIn, Twitter, TikTok)
- Stored as JSON in contact_info column
Gemini via Replicate (replicate_ai.py):
- Assesses lead quality (HOT/WARM/COLD), Kit Digital confirmation
- Identifies best contact channel + actual value (email/phone/WA)
- Writes Spanish cold-call/email pitch angle
- Lists services likely needed + outreach notes
- 3 concurrent requests, 90s timeout, JSON output parsing
DB: migration adds kit_digital, kit_digital_signals, contact_info,
ai_assessment, ai_lead_quality, ai_pitch, ai_contact_channel/value,
ai_queue table
UI: Kit Digital 🏅 badge, AI quality pill (clickable modal with full
assessment), contact chips (email/phone/WA/social), AI Assess button,
Kit Digital only filter, AI queue status in enrichment tab
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
44
app/main.py
44
app/main.py
@@ -6,6 +6,7 @@ from contextlib import asynccontextmanager
|
||||
|
||||
import httpx
|
||||
import aiosqlite
|
||||
from typing import Optional
|
||||
from fastapi import FastAPI, Query
|
||||
from fastapi.responses import StreamingResponse, JSONResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
@@ -17,6 +18,7 @@ from app.db import (
|
||||
DATA_DIR, PARQUET_PATH, SQLITE_PATH,
|
||||
init_db, get_stats, get_domains, get_enriched,
|
||||
queue_domains, get_queue_status, build_duckdb_index, index_status,
|
||||
queue_ai, get_ai_queue_status, save_ai_assessment,
|
||||
)
|
||||
from app.enricher import start_worker, pause_worker, resume_worker, is_running
|
||||
from app.scorer import run_scoring
|
||||
@@ -146,13 +148,53 @@ async def enriched(
|
||||
min_score: int = Query(0, ge=0, le=100),
|
||||
cms: str = Query(None),
|
||||
country: str = Query(None),
|
||||
kit_digital: Optional[bool] = Query(None),
|
||||
page: int = Query(1, ge=1),
|
||||
limit: int = Query(100, ge=1, le=1000),
|
||||
):
|
||||
total, rows = await get_enriched(min_score=min_score, cms=cms, country=country, page=page, limit=limit)
|
||||
total, rows = await get_enriched(
|
||||
min_score=min_score, cms=cms, country=country,
|
||||
kit_digital=kit_digital, page=page, limit=limit,
|
||||
)
|
||||
return {"page": page, "limit": limit, "total": total, "results": rows}
|
||||
|
||||
|
||||
# ── AI assessment endpoints ───────────────────────────────────────────────────
|
||||
|
||||
@app.post("/api/ai/assess/batch")
|
||||
async def ai_assess_batch(body: dict):
|
||||
domains_list = body.get("domains", [])
|
||||
if not domains_list:
|
||||
return JSONResponse({"error": "no domains provided"}, status_code=400)
|
||||
await queue_ai(domains_list)
|
||||
return {"queued": len(domains_list)}
|
||||
|
||||
|
||||
@app.get("/api/ai/status")
|
||||
async def ai_status():
|
||||
return await get_ai_queue_status()
|
||||
|
||||
|
||||
@app.post("/api/ai/assess/single")
|
||||
async def ai_assess_single(body: dict):
|
||||
"""Immediate (blocking) AI assessment of a single domain."""
|
||||
domain = body.get("domain")
|
||||
if not domain:
|
||||
return JSONResponse({"error": "no domain"}, status_code=400)
|
||||
from app.replicate_ai import assess_domain as gemini_assess
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute(
|
||||
"SELECT * FROM enriched_domains WHERE domain=?", (domain,)
|
||||
) as cur:
|
||||
row = await cur.fetchone()
|
||||
if not row:
|
||||
return JSONResponse({"error": "domain not yet enriched"}, status_code=404)
|
||||
assessment = await gemini_assess(dict(row))
|
||||
await save_ai_assessment(domain, assessment)
|
||||
return assessment
|
||||
|
||||
|
||||
@app.get("/api/export")
|
||||
async def export_csv(
|
||||
min_score: int = Query(0),
|
||||
|
||||
Reference in New Issue
Block a user