feat: initial Dockerized domain intelligence dashboard
- FastAPI backend with DuckDB pushdown queries on 72M parquet - Async enrichment worker: HTTP, SSL, DNS MX, CMS fingerprint, ip-api.com - Resumable parquet download with HTTP Range support - Lead scoring engine (max 100 pts, target countries ES,GB,DE,FR,RO,PT,AD,IT) - Single-file Alpine.js + Chart.js dashboard on port 6677 - SQLite enrichment DB with job queue and scores tables - Dockerized with persistent /data volume Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
64
app/scorer.py
Normal file
64
app/scorer.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import os
|
||||
import aiosqlite
|
||||
from app.db import SQLITE_PATH
|
||||
|
||||
KNOWN_CMS = {"wordpress", "joomla", "drupal", "wix", "squarespace", "shopify", "prestashop", "magento", "typo3", "opencart"}
|
||||
TARGET_COUNTRIES = set(os.getenv("TARGET_COUNTRIES", "ES,GB,DE,FR").split(","))
|
||||
|
||||
LOCAL_BIZ_KEYWORDS = {
|
||||
"restaurant", "cafe", "shop", "store", "salon", "plumber", "electrician",
|
||||
"dentist", "clinic", "garage", "hotel", "bakery", "bar", "gym", "spa",
|
||||
"fontanero", "electricista", "dentista", "clínica", "taller", "hotel",
|
||||
"panadería", "peluquería", "tienda",
|
||||
}
|
||||
|
||||
|
||||
def local_biz_keywords(title: str | None) -> bool:
|
||||
if not title:
|
||||
return False
|
||||
title_lower = title.lower()
|
||||
return any(kw in title_lower for kw in LOCAL_BIZ_KEYWORDS)
|
||||
|
||||
|
||||
def score(domain_row: dict) -> int:
|
||||
s = 0
|
||||
if domain_row.get("is_live"):
|
||||
s += 20
|
||||
ssl_days = domain_row.get("ssl_expiry_days")
|
||||
if ssl_days is not None and ssl_days < 30:
|
||||
s += 15
|
||||
if not domain_row.get("ssl_valid"):
|
||||
s += 15
|
||||
cms = (domain_row.get("cms") or "").lower()
|
||||
if cms in KNOWN_CMS:
|
||||
s += 15
|
||||
if not domain_row.get("has_mx"):
|
||||
s += 10
|
||||
if domain_row.get("ip_country") in TARGET_COUNTRIES:
|
||||
s += 10
|
||||
server = (domain_row.get("server") or "").lower()
|
||||
if "shared" in server:
|
||||
s += 10
|
||||
if local_biz_keywords(domain_row.get("page_title")):
|
||||
s += 5
|
||||
return min(s, 100)
|
||||
|
||||
|
||||
async def run_scoring():
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
db.row_factory = aiosqlite.Row
|
||||
async with db.execute("SELECT * FROM enriched_domains") as cur:
|
||||
rows = [dict(r) async for r in cur]
|
||||
|
||||
updates = [(score(r), r["domain"]) for r in rows]
|
||||
await db.executemany(
|
||||
"UPDATE enriched_domains SET score = ? WHERE domain = ?", updates
|
||||
)
|
||||
await db.executemany(
|
||||
"""INSERT INTO scores (domain, score) VALUES (?, ?)
|
||||
ON CONFLICT(domain) DO UPDATE SET score=excluded.score, scored_at=datetime('now')""",
|
||||
updates,
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
return {"scored": len(updates)}
|
||||
Reference in New Issue
Block a user