feat: initial Dockerized domain intelligence dashboard
- FastAPI backend with DuckDB pushdown queries on 72M parquet - Async enrichment worker: HTTP, SSL, DNS MX, CMS fingerprint, ip-api.com - Resumable parquet download with HTTP Range support - Lead scoring engine (max 100 pts, target countries ES,GB,DE,FR,RO,PT,AD,IT) - Single-file Alpine.js + Chart.js dashboard on port 6677 - SQLite enrichment DB with job queue and scores tables - Dockerized with persistent /data volume Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
6
.env
Normal file
6
.env
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
DATA_DIR=/data
|
||||||
|
PARQUET_URL=https://github.com/digitalcortex/72m-domains-dataset/raw/refs/heads/master/domains.parquet
|
||||||
|
CONCURRENCY_LIMIT=50
|
||||||
|
SCORE_THRESHOLD=60
|
||||||
|
TARGET_TLDS=es,com,net
|
||||||
|
TARGET_COUNTRIES=ES,GB,DE,FR,RO,PT,AD,IT
|
||||||
16
Dockerfile
Normal file
16
Dockerfile
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y \
|
||||||
|
curl gcc libssl-dev && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY app/ ./app/
|
||||||
|
|
||||||
|
EXPOSE 6677
|
||||||
|
|
||||||
|
CMD ["python", "app/main.py"]
|
||||||
54
README.md
Normal file
54
README.md
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
# DomGod — Domain Intelligence Dashboard
|
||||||
|
|
||||||
|
Dockerized dashboard for filtering, enriching, scoring, and exporting leads from a 72M-domain dataset.
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up --build
|
||||||
|
```
|
||||||
|
|
||||||
|
Open **http://localhost:6677**
|
||||||
|
|
||||||
|
On first boot, the container downloads `domains.parquet` (~GB) and caches it in `./data/`. Subsequent restarts skip the download.
|
||||||
|
|
||||||
|
## Environment variables (docker-compose.yml)
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|---|---|---|
|
||||||
|
| `DATA_DIR` | `/data` | Where parquet + sqlite live |
|
||||||
|
| `PARQUET_URL` | GitHub raw URL | Source parquet |
|
||||||
|
| `CONCURRENCY_LIMIT` | `50` | Parallel enrichment workers |
|
||||||
|
| `SCORE_THRESHOLD` | `60` | "Hot lead" threshold |
|
||||||
|
| `TARGET_TLDS` | `es,com,net` | TLDs to prioritise |
|
||||||
|
| `TARGET_COUNTRIES` | `ES,GB,DE,FR,RO,PT,AD,IT` | Countries for scoring bonus |
|
||||||
|
|
||||||
|
## Scoring
|
||||||
|
|
||||||
|
| Signal | Points |
|
||||||
|
|---|---|
|
||||||
|
| Domain is live | +20 |
|
||||||
|
| SSL expiry < 30 days | +15 |
|
||||||
|
| No valid SSL | +15 |
|
||||||
|
| Known CMS detected | +15 |
|
||||||
|
| No MX record | +10 |
|
||||||
|
| IP in target country | +10 |
|
||||||
|
| Shared hosting server | +10 |
|
||||||
|
| Local business keywords in title | +5 |
|
||||||
|
|
||||||
|
Max score: 100. Hot ≥ 80, Warm 50–79, Cold < 50.
|
||||||
|
|
||||||
|
## API
|
||||||
|
|
||||||
|
```
|
||||||
|
GET /api/stats
|
||||||
|
GET /api/domains?tld=es&page=1&limit=100&live_only=false
|
||||||
|
POST /api/enrich/batch { "domains": ["example.com"] }
|
||||||
|
GET /api/enrich/status
|
||||||
|
POST /api/enrich/pause
|
||||||
|
POST /api/enrich/resume
|
||||||
|
POST /api/enrich/retry
|
||||||
|
GET /api/enriched?min_score=60&cms=wordpress&country=ES
|
||||||
|
GET /api/export?tier=hot (streams CSV)
|
||||||
|
POST /api/score/run
|
||||||
|
```
|
||||||
0
app/__init__.py
Normal file
0
app/__init__.py
Normal file
231
app/db.py
Normal file
231
app/db.py
Normal file
@@ -0,0 +1,231 @@
|
|||||||
|
import os
|
||||||
|
import aiosqlite
|
||||||
|
import duckdb
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
DATA_DIR = Path(os.getenv("DATA_DIR", "/data"))
|
||||||
|
PARQUET_PATH = DATA_DIR / "domains.parquet"
|
||||||
|
SQLITE_PATH = DATA_DIR / "enrichment.db"
|
||||||
|
|
||||||
|
SCHEMA = """
|
||||||
|
CREATE TABLE IF NOT EXISTS enriched_domains (
|
||||||
|
domain TEXT PRIMARY KEY,
|
||||||
|
is_live INTEGER DEFAULT 0,
|
||||||
|
status_code INTEGER,
|
||||||
|
ssl_valid INTEGER DEFAULT 0,
|
||||||
|
ssl_expiry_days INTEGER,
|
||||||
|
cms TEXT,
|
||||||
|
has_mx INTEGER DEFAULT 0,
|
||||||
|
ip_country TEXT,
|
||||||
|
page_title TEXT,
|
||||||
|
server TEXT,
|
||||||
|
enriched_at TEXT,
|
||||||
|
error TEXT,
|
||||||
|
score INTEGER DEFAULT 0
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS job_queue (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
domain TEXT UNIQUE NOT NULL,
|
||||||
|
status TEXT DEFAULT 'pending',
|
||||||
|
created_at TEXT DEFAULT (datetime('now')),
|
||||||
|
started_at TEXT,
|
||||||
|
completed_at TEXT,
|
||||||
|
error TEXT
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS scores (
|
||||||
|
domain TEXT PRIMARY KEY,
|
||||||
|
score INTEGER NOT NULL,
|
||||||
|
scored_at TEXT DEFAULT (datetime('now'))
|
||||||
|
);
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def init_db():
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
await db.executescript(SCHEMA)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def get_db():
|
||||||
|
return await aiosqlite.connect(SQLITE_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
def duckdb_query(sql: str, params=None):
|
||||||
|
conn = duckdb.connect(database=":memory:", read_only=False)
|
||||||
|
conn.execute(f"SET threads=4")
|
||||||
|
if params:
|
||||||
|
result = conn.execute(sql, params).fetchall()
|
||||||
|
else:
|
||||||
|
result = conn.execute(sql).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def duckdb_query_df(sql: str, params=None):
|
||||||
|
conn = duckdb.connect(database=":memory:", read_only=False)
|
||||||
|
conn.execute("SET threads=4")
|
||||||
|
if params:
|
||||||
|
result = conn.execute(sql, params).df()
|
||||||
|
else:
|
||||||
|
result = conn.execute(sql).df()
|
||||||
|
conn.close()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def get_stats():
|
||||||
|
parquet = str(PARQUET_PATH)
|
||||||
|
|
||||||
|
# Total count + TLD breakdown via DuckDB pushdown
|
||||||
|
total = duckdb_query(f"SELECT COUNT(*) FROM read_parquet('{parquet}')")[0][0]
|
||||||
|
|
||||||
|
tld_rows = duckdb_query(f"""
|
||||||
|
SELECT
|
||||||
|
regexp_extract(domain, '\\.([a-zA-Z0-9]+)$', 1) AS tld,
|
||||||
|
COUNT(*) AS cnt
|
||||||
|
FROM read_parquet('{parquet}')
|
||||||
|
GROUP BY tld
|
||||||
|
ORDER BY cnt DESC
|
||||||
|
LIMIT 20
|
||||||
|
""")
|
||||||
|
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
async with db.execute("SELECT COUNT(*) FROM enriched_domains") as cur:
|
||||||
|
enriched = (await cur.fetchone())[0]
|
||||||
|
threshold = int(os.getenv("SCORE_THRESHOLD", "60"))
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT COUNT(*) FROM enriched_domains WHERE score >= ?", (threshold,)
|
||||||
|
) as cur:
|
||||||
|
hot_leads = (await cur.fetchone())[0]
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT COUNT(*) FROM job_queue WHERE status='pending'"
|
||||||
|
) as cur:
|
||||||
|
queue_pending = (await cur.fetchone())[0]
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT COUNT(*) FROM job_queue WHERE status='running'"
|
||||||
|
) as cur:
|
||||||
|
queue_running = (await cur.fetchone())[0]
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT COUNT(*) FROM job_queue WHERE status='done'"
|
||||||
|
) as cur:
|
||||||
|
queue_done = (await cur.fetchone())[0]
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT COUNT(*) FROM job_queue WHERE status='failed'"
|
||||||
|
) as cur:
|
||||||
|
queue_failed = (await cur.fetchone())[0]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_domains": total,
|
||||||
|
"enriched": enriched,
|
||||||
|
"hot_leads": hot_leads,
|
||||||
|
"tld_breakdown": [{"tld": r[0], "count": r[1]} for r in tld_rows],
|
||||||
|
"queue": {
|
||||||
|
"pending": queue_pending,
|
||||||
|
"running": queue_running,
|
||||||
|
"done": queue_done,
|
||||||
|
"failed": queue_failed,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def get_domains(tld=None, page=1, limit=100, live_only=False):
|
||||||
|
parquet = str(PARQUET_PATH)
|
||||||
|
conditions = []
|
||||||
|
params = []
|
||||||
|
|
||||||
|
if tld:
|
||||||
|
conditions.append(f"regexp_extract(domain, '\\.([a-zA-Z0-9]+)$', 1) = '{tld}'")
|
||||||
|
if live_only:
|
||||||
|
# Join with enriched_domains to check is_live
|
||||||
|
pass
|
||||||
|
|
||||||
|
where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
|
||||||
|
offset = (page - 1) * limit
|
||||||
|
|
||||||
|
sql = f"""
|
||||||
|
SELECT domain
|
||||||
|
FROM read_parquet('{parquet}')
|
||||||
|
{where}
|
||||||
|
LIMIT {limit} OFFSET {offset}
|
||||||
|
"""
|
||||||
|
rows = duckdb_query(sql)
|
||||||
|
domains = [r[0] for r in rows]
|
||||||
|
|
||||||
|
# Merge enrichment data from SQLite
|
||||||
|
if domains:
|
||||||
|
placeholders = ",".join("?" * len(domains))
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
db.row_factory = aiosqlite.Row
|
||||||
|
async with db.execute(
|
||||||
|
f"SELECT * FROM enriched_domains WHERE domain IN ({placeholders})",
|
||||||
|
domains,
|
||||||
|
) as cur:
|
||||||
|
enriched = {r["domain"]: dict(r) async for r in cur}
|
||||||
|
|
||||||
|
result = []
|
||||||
|
for d in domains:
|
||||||
|
if d in enriched:
|
||||||
|
result.append(enriched[d])
|
||||||
|
else:
|
||||||
|
result.append({"domain": d})
|
||||||
|
return result
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
|
async def get_enriched(min_score=0, cms=None, country=None, page=1, limit=100):
|
||||||
|
offset = (page - 1) * limit
|
||||||
|
conditions = ["score >= ?"]
|
||||||
|
params = [min_score]
|
||||||
|
if cms:
|
||||||
|
conditions.append("cms = ?")
|
||||||
|
params.append(cms)
|
||||||
|
if country:
|
||||||
|
conditions.append("ip_country = ?")
|
||||||
|
params.append(country)
|
||||||
|
|
||||||
|
where = "WHERE " + " AND ".join(conditions)
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
db.row_factory = aiosqlite.Row
|
||||||
|
async with db.execute(
|
||||||
|
f"SELECT * FROM enriched_domains {where} ORDER BY score DESC LIMIT ? OFFSET ?",
|
||||||
|
params + [limit, offset],
|
||||||
|
) as cur:
|
||||||
|
rows = [dict(r) async for r in cur]
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
async def queue_domains(domains: list[str]):
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
await db.executemany(
|
||||||
|
"INSERT OR IGNORE INTO job_queue (domain) VALUES (?)",
|
||||||
|
[(d,) for d in domains],
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def get_queue_status():
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT status, COUNT(*) FROM job_queue GROUP BY status"
|
||||||
|
) as cur:
|
||||||
|
rows = {r[0]: r[1] async for r in cur}
|
||||||
|
total = sum(rows.values())
|
||||||
|
done = rows.get("done", 0)
|
||||||
|
pending = rows.get("pending", 0)
|
||||||
|
running = rows.get("running", 0)
|
||||||
|
failed = rows.get("failed", 0)
|
||||||
|
|
||||||
|
eta_seconds = None
|
||||||
|
if running > 0 or pending > 0:
|
||||||
|
rate = int(os.getenv("CONCURRENCY_LIMIT", "50"))
|
||||||
|
eta_seconds = (pending + running) / max(rate / 10, 1)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": total,
|
||||||
|
"pending": pending,
|
||||||
|
"running": running,
|
||||||
|
"done": done,
|
||||||
|
"failed": failed,
|
||||||
|
"eta_seconds": eta_seconds,
|
||||||
|
}
|
||||||
270
app/enricher.py
Normal file
270
app/enricher.py
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import ssl
|
||||||
|
import socket
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import dns.resolver
|
||||||
|
import aiosqlite
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from app.db import SQLITE_PATH
|
||||||
|
from app.scorer import score
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
CONCURRENCY_LIMIT = int(os.getenv("CONCURRENCY_LIMIT", "50"))
|
||||||
|
# ip-api.com free tier: 45 req/min → ~1.33/s. We use a separate slower semaphore.
|
||||||
|
IP_API_SEMAPHORE: Optional[asyncio.Semaphore] = None
|
||||||
|
IP_API_RATE = 45 # per minute
|
||||||
|
|
||||||
|
_worker_task: Optional[asyncio.Task] = None
|
||||||
|
_paused = False
|
||||||
|
|
||||||
|
|
||||||
|
def get_ip_semaphore():
|
||||||
|
global IP_API_SEMAPHORE
|
||||||
|
if IP_API_SEMAPHORE is None:
|
||||||
|
IP_API_SEMAPHORE = asyncio.Semaphore(1)
|
||||||
|
return IP_API_SEMAPHORE
|
||||||
|
|
||||||
|
|
||||||
|
CMS_SIGNATURES = {
|
||||||
|
"wordpress": ["/wp-content/", "/wp-includes/", 'name="generator" content="WordPress'],
|
||||||
|
"joomla": ["/components/com_", "Joomla!", 'name="generator" content="Joomla'],
|
||||||
|
"drupal": ["/sites/default/files/", "Drupal.settings", 'name="generator" content="Drupal'],
|
||||||
|
"wix": ["wix.com", "X-Wix-"],
|
||||||
|
"squarespace": ["squarespace.com", "X-Squarespace-"],
|
||||||
|
"shopify": ["cdn.shopify.com", "Shopify.theme"],
|
||||||
|
"prestashop": ["PrestaShop", "/modules/"],
|
||||||
|
"magento": ["Mage.Cookies", "X-Magento-"],
|
||||||
|
"typo3": ["typo3", "TYPO3 CMS"],
|
||||||
|
"opencart": ["route=common/home", "OpenCart"],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def detect_cms(html: str, headers: dict) -> Optional[str]:
|
||||||
|
combined = html[:50000] + " ".join(f"{k}:{v}" for k, v in headers.items())
|
||||||
|
for cms, sigs in CMS_SIGNATURES.items():
|
||||||
|
if any(sig.lower() in combined.lower() for sig in sigs):
|
||||||
|
return cms
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def check_ssl(domain: str) -> tuple[bool, Optional[int]]:
|
||||||
|
try:
|
||||||
|
ctx = ssl.create_default_context()
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
def _check():
|
||||||
|
with socket.create_connection((domain, 443), timeout=5) as sock:
|
||||||
|
with ctx.wrap_socket(sock, server_hostname=domain) as ssock:
|
||||||
|
cert = ssock.getpeercert()
|
||||||
|
expiry_str = cert.get("notAfter", "")
|
||||||
|
expiry = datetime.datetime.strptime(expiry_str, "%b %d %H:%M:%S %Y %Z")
|
||||||
|
days = (expiry - datetime.datetime.utcnow()).days
|
||||||
|
return True, days
|
||||||
|
|
||||||
|
return await loop.run_in_executor(None, _check)
|
||||||
|
except Exception:
|
||||||
|
return False, None
|
||||||
|
|
||||||
|
|
||||||
|
async def check_mx(domain: str) -> bool:
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
def _check():
|
||||||
|
try:
|
||||||
|
answers = dns.resolver.resolve(domain, "MX", lifetime=5)
|
||||||
|
return len(answers) > 0
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
return await loop.run_in_executor(None, _check)
|
||||||
|
except Exception:
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
_ip_last_call = 0.0
|
||||||
|
_ip_lock = asyncio.Lock() if False else None # initialized lazily
|
||||||
|
|
||||||
|
|
||||||
|
async def get_ip_country(ip: str) -> Optional[str]:
|
||||||
|
global _ip_last_call
|
||||||
|
# Enforce 45 req/min = 1 req per 1.33s
|
||||||
|
async with get_ip_semaphore():
|
||||||
|
now = asyncio.get_event_loop().time()
|
||||||
|
wait = (1 / (IP_API_RATE / 60)) - (now - _ip_last_call)
|
||||||
|
if wait > 0:
|
||||||
|
await asyncio.sleep(wait)
|
||||||
|
_ip_last_call = asyncio.get_event_loop().time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=5) as client:
|
||||||
|
resp = await client.get(f"http://ip-api.com/json/{ip}?fields=countryCode")
|
||||||
|
if resp.status_code == 200:
|
||||||
|
return resp.json().get("countryCode")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
async def enrich_domain(domain: str) -> dict:
|
||||||
|
result = {
|
||||||
|
"domain": domain,
|
||||||
|
"is_live": False,
|
||||||
|
"status_code": None,
|
||||||
|
"ssl_valid": False,
|
||||||
|
"ssl_expiry_days": None,
|
||||||
|
"cms": None,
|
||||||
|
"has_mx": False,
|
||||||
|
"ip_country": None,
|
||||||
|
"page_title": None,
|
||||||
|
"server": None,
|
||||||
|
"enriched_at": datetime.datetime.utcnow().isoformat(),
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=10,
|
||||||
|
follow_redirects=True,
|
||||||
|
verify=False,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0 (compatible; DomGod/1.0)"},
|
||||||
|
) as client:
|
||||||
|
resp = await client.get(f"http://{domain}")
|
||||||
|
result["is_live"] = resp.status_code in (200, 301, 302, 303, 307, 308)
|
||||||
|
result["status_code"] = resp.status_code
|
||||||
|
result["server"] = resp.headers.get("server")
|
||||||
|
|
||||||
|
html = resp.text
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
title_tag = soup.find("title")
|
||||||
|
result["page_title"] = title_tag.get_text(strip=True)[:500] if title_tag else None
|
||||||
|
result["cms"] = detect_cms(html, dict(resp.headers))
|
||||||
|
|
||||||
|
# Resolve IP for country lookup
|
||||||
|
try:
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
ip = await loop.run_in_executor(None, socket.gethostbyname, domain)
|
||||||
|
result["ip_country"] = await get_ip_country(ip)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
result["error"] = str(e)[:500]
|
||||||
|
|
||||||
|
# SSL check (independent of HTTP)
|
||||||
|
ssl_valid, ssl_days = await check_ssl(domain)
|
||||||
|
result["ssl_valid"] = ssl_valid
|
||||||
|
result["ssl_expiry_days"] = ssl_days
|
||||||
|
|
||||||
|
# MX check
|
||||||
|
result["has_mx"] = await check_mx(domain)
|
||||||
|
|
||||||
|
# Score
|
||||||
|
result["score"] = score(result)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def save_enriched(data: dict):
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO enriched_domains
|
||||||
|
(domain, is_live, status_code, ssl_valid, ssl_expiry_days, cms,
|
||||||
|
has_mx, ip_country, page_title, server, enriched_at, error, score)
|
||||||
|
VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?)
|
||||||
|
ON CONFLICT(domain) DO UPDATE SET
|
||||||
|
is_live=excluded.is_live, status_code=excluded.status_code,
|
||||||
|
ssl_valid=excluded.ssl_valid, ssl_expiry_days=excluded.ssl_expiry_days,
|
||||||
|
cms=excluded.cms, has_mx=excluded.has_mx, ip_country=excluded.ip_country,
|
||||||
|
page_title=excluded.page_title, server=excluded.server,
|
||||||
|
enriched_at=excluded.enriched_at, error=excluded.error, score=excluded.score""",
|
||||||
|
(
|
||||||
|
data["domain"], data["is_live"], data["status_code"],
|
||||||
|
data["ssl_valid"], data["ssl_expiry_days"], data["cms"],
|
||||||
|
data["has_mx"], data["ip_country"], data["page_title"],
|
||||||
|
data["server"], data["enriched_at"], data["error"], data["score"],
|
||||||
|
),
|
||||||
|
)
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO scores (domain, score) VALUES (?,?)
|
||||||
|
ON CONFLICT(domain) DO UPDATE SET score=excluded.score, scored_at=datetime('now')""",
|
||||||
|
(data["domain"], data["score"]),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def mark_job(domain: str, status: str, error: str = None):
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
if status == "running":
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE job_queue SET status=?, started_at=datetime('now') WHERE domain=?",
|
||||||
|
(status, domain),
|
||||||
|
)
|
||||||
|
elif status in ("done", "failed"):
|
||||||
|
await db.execute(
|
||||||
|
"UPDATE job_queue SET status=?, completed_at=datetime('now'), error=? WHERE domain=?",
|
||||||
|
(status, error, domain),
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def worker_loop():
|
||||||
|
global _paused
|
||||||
|
sem = asyncio.Semaphore(CONCURRENCY_LIMIT)
|
||||||
|
|
||||||
|
async def process(domain: str):
|
||||||
|
async with sem:
|
||||||
|
await mark_job(domain, "running")
|
||||||
|
try:
|
||||||
|
data = await enrich_domain(domain)
|
||||||
|
await save_enriched(data)
|
||||||
|
await mark_job(domain, "done")
|
||||||
|
except Exception as e:
|
||||||
|
await mark_job(domain, "failed", str(e)[:500])
|
||||||
|
|
||||||
|
while True:
|
||||||
|
if _paused:
|
||||||
|
await asyncio.sleep(1)
|
||||||
|
continue
|
||||||
|
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
async with db.execute(
|
||||||
|
"SELECT domain FROM job_queue WHERE status='pending' LIMIT 100"
|
||||||
|
) as cur:
|
||||||
|
rows = await cur.fetchall()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
continue
|
||||||
|
|
||||||
|
tasks = [asyncio.create_task(process(r[0])) for r in rows]
|
||||||
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
|
||||||
|
def start_worker():
|
||||||
|
global _worker_task
|
||||||
|
if _worker_task is None or _worker_task.done():
|
||||||
|
_worker_task = asyncio.create_task(worker_loop())
|
||||||
|
_paused = False
|
||||||
|
|
||||||
|
|
||||||
|
def pause_worker():
|
||||||
|
global _paused
|
||||||
|
_paused = True
|
||||||
|
|
||||||
|
|
||||||
|
def resume_worker():
|
||||||
|
global _paused
|
||||||
|
_paused = False
|
||||||
|
start_worker()
|
||||||
|
|
||||||
|
|
||||||
|
def is_running() -> bool:
|
||||||
|
return _worker_task is not None and not _worker_task.done() and not _paused
|
||||||
202
app/main.py
Normal file
202
app/main.py
Normal file
@@ -0,0 +1,202 @@
|
|||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from contextlib import asynccontextmanager
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
import duckdb
|
||||||
|
import aiosqlite
|
||||||
|
from fastapi import FastAPI, Query
|
||||||
|
from fastapi.responses import StreamingResponse, JSONResponse
|
||||||
|
from fastapi.staticfiles import StaticFiles
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
from app.db import (
|
||||||
|
DATA_DIR, PARQUET_PATH, SQLITE_PATH,
|
||||||
|
init_db, get_stats, get_domains, get_enriched,
|
||||||
|
queue_domains, get_queue_status,
|
||||||
|
)
|
||||||
|
from app.enricher import start_worker, pause_worker, resume_worker, is_running
|
||||||
|
from app.scorer import run_scoring
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
PARQUET_URL = os.getenv("PARQUET_URL", "")
|
||||||
|
|
||||||
|
|
||||||
|
async def download_parquet():
|
||||||
|
if PARQUET_PATH.exists():
|
||||||
|
logger.info("Using cached parquet at %s", PARQUET_PATH)
|
||||||
|
return
|
||||||
|
|
||||||
|
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
tmp_path = PARQUET_PATH.with_suffix(".tmp")
|
||||||
|
|
||||||
|
# Resumable download via Range header
|
||||||
|
downloaded = tmp_path.stat().st_size if tmp_path.exists() else 0
|
||||||
|
headers = {"Range": f"bytes={downloaded}-"} if downloaded > 0 else {}
|
||||||
|
|
||||||
|
logger.info("Downloading parquet from %s (offset=%d)...", PARQUET_URL, downloaded)
|
||||||
|
|
||||||
|
async with httpx.AsyncClient(follow_redirects=True, timeout=None) as client:
|
||||||
|
async with client.stream("GET", PARQUET_URL, headers=headers) as resp:
|
||||||
|
if resp.status_code == 416:
|
||||||
|
# Already fully downloaded
|
||||||
|
tmp_path.rename(PARQUET_PATH)
|
||||||
|
return
|
||||||
|
resp.raise_for_status()
|
||||||
|
total = int(resp.headers.get("content-length", 0)) + downloaded
|
||||||
|
mode = "ab" if downloaded > 0 else "wb"
|
||||||
|
with open(tmp_path, mode) as f:
|
||||||
|
received = downloaded
|
||||||
|
async for chunk in resp.aiter_bytes(chunk_size=1024 * 1024):
|
||||||
|
f.write(chunk)
|
||||||
|
received += len(chunk)
|
||||||
|
if total:
|
||||||
|
pct = received / total * 100
|
||||||
|
logger.info("Download progress: %.1f%% (%d/%d bytes)", pct, received, total)
|
||||||
|
|
||||||
|
tmp_path.rename(PARQUET_PATH)
|
||||||
|
logger.info("Parquet download complete: %s", PARQUET_PATH)
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
await download_parquet()
|
||||||
|
await init_db()
|
||||||
|
start_worker()
|
||||||
|
logger.info("DomGod dashboard ready on port 6677")
|
||||||
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
app = FastAPI(title="DomGod", lifespan=lifespan)
|
||||||
|
|
||||||
|
|
||||||
|
# ── API routes ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
@app.get("/api/stats")
|
||||||
|
async def stats():
|
||||||
|
return await get_stats()
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/domains")
|
||||||
|
async def domains(
|
||||||
|
tld: str = Query(None),
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
limit: int = Query(100, ge=1, le=1000),
|
||||||
|
live_only: bool = Query(False),
|
||||||
|
):
|
||||||
|
rows = await get_domains(tld=tld, page=page, limit=limit, live_only=live_only)
|
||||||
|
return {"page": page, "limit": limit, "results": rows}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/enrich/batch")
|
||||||
|
async def enrich_batch(body: dict):
|
||||||
|
domains_list = body.get("domains", [])
|
||||||
|
if not domains_list:
|
||||||
|
return JSONResponse({"error": "no domains provided"}, status_code=400)
|
||||||
|
await queue_domains(domains_list)
|
||||||
|
resume_worker()
|
||||||
|
return {"queued": len(domains_list)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/enrich/status")
|
||||||
|
async def enrich_status():
|
||||||
|
status = await get_queue_status()
|
||||||
|
status["worker_running"] = is_running()
|
||||||
|
return status
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/enrich/retry")
|
||||||
|
async def enrich_retry():
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
await db.execute("UPDATE job_queue SET status='pending', error=NULL WHERE status='failed'")
|
||||||
|
await db.commit()
|
||||||
|
resume_worker()
|
||||||
|
return {"status": "retrying failed jobs"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/enrich/pause")
|
||||||
|
async def enrich_pause():
|
||||||
|
pause_worker()
|
||||||
|
return {"status": "paused"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/enrich/resume")
|
||||||
|
async def enrich_resume():
|
||||||
|
resume_worker()
|
||||||
|
return {"status": "resumed"}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/enriched")
|
||||||
|
async def enriched(
|
||||||
|
min_score: int = Query(0, ge=0, le=100),
|
||||||
|
cms: str = Query(None),
|
||||||
|
country: str = Query(None),
|
||||||
|
page: int = Query(1, ge=1),
|
||||||
|
limit: int = Query(100, ge=1, le=1000),
|
||||||
|
):
|
||||||
|
rows = await get_enriched(min_score=min_score, cms=cms, country=country, page=page, limit=limit)
|
||||||
|
return {"page": page, "limit": limit, "results": rows}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/export")
|
||||||
|
async def export_csv(
|
||||||
|
min_score: int = Query(0),
|
||||||
|
cms: str = Query(None),
|
||||||
|
country: str = Query(None),
|
||||||
|
tier: str = Query(None),
|
||||||
|
):
|
||||||
|
if tier == "hot":
|
||||||
|
min_score = 80
|
||||||
|
elif tier == "warm":
|
||||||
|
min_score = 50
|
||||||
|
|
||||||
|
async def generate():
|
||||||
|
yield "domain,score,cms,ssl_expiry_days,ip_country,is_live,status_code,has_mx,server,page_title,enriched_at\n"
|
||||||
|
page = 1
|
||||||
|
while True:
|
||||||
|
rows = await get_enriched(min_score=min_score, cms=cms, country=country, page=page, limit=500)
|
||||||
|
if not rows:
|
||||||
|
break
|
||||||
|
for r in rows:
|
||||||
|
# Apply warm tier upper bound
|
||||||
|
if tier == "warm" and r.get("score", 0) >= 80:
|
||||||
|
continue
|
||||||
|
line = ",".join(
|
||||||
|
f'"{str(r.get(col) or "").replace(chr(34), chr(39))}"'
|
||||||
|
for col in [
|
||||||
|
"domain", "score", "cms", "ssl_expiry_days", "ip_country",
|
||||||
|
"is_live", "status_code", "has_mx", "server", "page_title", "enriched_at"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
yield line + "\n"
|
||||||
|
page += 1
|
||||||
|
|
||||||
|
filename = f"domgod_leads_score{min_score}{'_' + tier if tier else ''}.csv"
|
||||||
|
return StreamingResponse(
|
||||||
|
generate(),
|
||||||
|
media_type="text/csv",
|
||||||
|
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/score/run")
|
||||||
|
async def score_run():
|
||||||
|
result = await run_scoring()
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ── Static UI ───────────────────────────────────────────────────────────────
|
||||||
|
static_dir = Path(__file__).parent / "static"
|
||||||
|
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=6677, log_level="info")
|
||||||
64
app/scorer.py
Normal file
64
app/scorer.py
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
import os
|
||||||
|
import aiosqlite
|
||||||
|
from app.db import SQLITE_PATH
|
||||||
|
|
||||||
|
KNOWN_CMS = {"wordpress", "joomla", "drupal", "wix", "squarespace", "shopify", "prestashop", "magento", "typo3", "opencart"}
|
||||||
|
TARGET_COUNTRIES = set(os.getenv("TARGET_COUNTRIES", "ES,GB,DE,FR").split(","))
|
||||||
|
|
||||||
|
LOCAL_BIZ_KEYWORDS = {
|
||||||
|
"restaurant", "cafe", "shop", "store", "salon", "plumber", "electrician",
|
||||||
|
"dentist", "clinic", "garage", "hotel", "bakery", "bar", "gym", "spa",
|
||||||
|
"fontanero", "electricista", "dentista", "clínica", "taller", "hotel",
|
||||||
|
"panadería", "peluquería", "tienda",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def local_biz_keywords(title: str | None) -> bool:
|
||||||
|
if not title:
|
||||||
|
return False
|
||||||
|
title_lower = title.lower()
|
||||||
|
return any(kw in title_lower for kw in LOCAL_BIZ_KEYWORDS)
|
||||||
|
|
||||||
|
|
||||||
|
def score(domain_row: dict) -> int:
|
||||||
|
s = 0
|
||||||
|
if domain_row.get("is_live"):
|
||||||
|
s += 20
|
||||||
|
ssl_days = domain_row.get("ssl_expiry_days")
|
||||||
|
if ssl_days is not None and ssl_days < 30:
|
||||||
|
s += 15
|
||||||
|
if not domain_row.get("ssl_valid"):
|
||||||
|
s += 15
|
||||||
|
cms = (domain_row.get("cms") or "").lower()
|
||||||
|
if cms in KNOWN_CMS:
|
||||||
|
s += 15
|
||||||
|
if not domain_row.get("has_mx"):
|
||||||
|
s += 10
|
||||||
|
if domain_row.get("ip_country") in TARGET_COUNTRIES:
|
||||||
|
s += 10
|
||||||
|
server = (domain_row.get("server") or "").lower()
|
||||||
|
if "shared" in server:
|
||||||
|
s += 10
|
||||||
|
if local_biz_keywords(domain_row.get("page_title")):
|
||||||
|
s += 5
|
||||||
|
return min(s, 100)
|
||||||
|
|
||||||
|
|
||||||
|
async def run_scoring():
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
db.row_factory = aiosqlite.Row
|
||||||
|
async with db.execute("SELECT * FROM enriched_domains") as cur:
|
||||||
|
rows = [dict(r) async for r in cur]
|
||||||
|
|
||||||
|
updates = [(score(r), r["domain"]) for r in rows]
|
||||||
|
await db.executemany(
|
||||||
|
"UPDATE enriched_domains SET score = ? WHERE domain = ?", updates
|
||||||
|
)
|
||||||
|
await db.executemany(
|
||||||
|
"""INSERT INTO scores (domain, score) VALUES (?, ?)
|
||||||
|
ON CONFLICT(domain) DO UPDATE SET score=excluded.score, scored_at=datetime('now')""",
|
||||||
|
updates,
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
return {"scored": len(updates)}
|
||||||
600
app/static/index.html
Normal file
600
app/static/index.html
Normal file
@@ -0,0 +1,600 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>DomGod — Domain Intelligence Dashboard</title>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
|
||||||
|
<style>
|
||||||
|
:root {
|
||||||
|
--bg: #0f1117;
|
||||||
|
--surface: #1a1d27;
|
||||||
|
--surface2: #222638;
|
||||||
|
--border: #2e3250;
|
||||||
|
--accent: #6c63ff;
|
||||||
|
--accent2: #00d4aa;
|
||||||
|
--danger: #ff4f6d;
|
||||||
|
--warn: #ffb347;
|
||||||
|
--text: #e8eaf0;
|
||||||
|
--muted: #8891b0;
|
||||||
|
--hot: #ff4f6d;
|
||||||
|
--warm: #ffb347;
|
||||||
|
--cold: #6c7aff;
|
||||||
|
--radius: 10px;
|
||||||
|
}
|
||||||
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
||||||
|
body { background: var(--bg); color: var(--text); font-family: 'Segoe UI', system-ui, sans-serif; font-size: 14px; }
|
||||||
|
a { color: var(--accent2); text-decoration: none; }
|
||||||
|
|
||||||
|
/* Layout */
|
||||||
|
.shell { display: flex; flex-direction: column; min-height: 100vh; }
|
||||||
|
header { background: var(--surface); border-bottom: 1px solid var(--border); padding: 14px 24px; display: flex; align-items: center; gap: 12px; position: sticky; top: 0; z-index: 100; }
|
||||||
|
header h1 { font-size: 20px; font-weight: 700; letter-spacing: -0.5px; }
|
||||||
|
header h1 span { color: var(--accent); }
|
||||||
|
.badge { background: var(--accent); color: #fff; font-size: 11px; padding: 2px 8px; border-radius: 99px; }
|
||||||
|
main { padding: 20px 24px; display: flex; flex-direction: column; gap: 20px; max-width: 1400px; margin: 0 auto; width: 100%; }
|
||||||
|
|
||||||
|
/* Cards */
|
||||||
|
.card { background: var(--surface); border: 1px solid var(--border); border-radius: var(--radius); padding: 18px; }
|
||||||
|
.card-title { font-size: 13px; font-weight: 600; color: var(--muted); text-transform: uppercase; letter-spacing: .5px; margin-bottom: 14px; }
|
||||||
|
|
||||||
|
/* Stats bar */
|
||||||
|
.stats-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 14px; }
|
||||||
|
.stat-box { background: var(--surface2); border-radius: var(--radius); padding: 14px 16px; border: 1px solid var(--border); }
|
||||||
|
.stat-box .label { font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: .5px; }
|
||||||
|
.stat-box .value { font-size: 26px; font-weight: 700; margin-top: 4px; }
|
||||||
|
.stat-box .sub { font-size: 11px; color: var(--muted); margin-top: 2px; }
|
||||||
|
.v-accent { color: var(--accent2); }
|
||||||
|
.v-hot { color: var(--hot); }
|
||||||
|
.v-warn { color: var(--warn); }
|
||||||
|
.v-muted { color: var(--muted); }
|
||||||
|
|
||||||
|
/* Tabs */
|
||||||
|
.tabs { display: flex; gap: 4px; border-bottom: 1px solid var(--border); margin-bottom: 16px; }
|
||||||
|
.tab { padding: 8px 16px; border-radius: 6px 6px 0 0; cursor: pointer; font-size: 13px; font-weight: 500; color: var(--muted); border: 1px solid transparent; border-bottom: none; }
|
||||||
|
.tab.active { background: var(--surface2); color: var(--text); border-color: var(--border); }
|
||||||
|
.tab:hover:not(.active) { color: var(--text); }
|
||||||
|
|
||||||
|
/* Filters */
|
||||||
|
.filter-row { display: flex; flex-wrap: wrap; gap: 10px; align-items: flex-end; margin-bottom: 14px; }
|
||||||
|
.field { display: flex; flex-direction: column; gap: 4px; }
|
||||||
|
.field label { font-size: 11px; color: var(--muted); text-transform: uppercase; }
|
||||||
|
input[type=text], input[type=number], select {
|
||||||
|
background: var(--surface2); border: 1px solid var(--border); color: var(--text);
|
||||||
|
padding: 7px 10px; border-radius: 6px; font-size: 13px; outline: none; min-width: 100px;
|
||||||
|
}
|
||||||
|
input[type=text]:focus, select:focus { border-color: var(--accent); }
|
||||||
|
input[type=range] { accent-color: var(--accent); width: 120px; }
|
||||||
|
.toggle { display: flex; align-items: center; gap: 6px; cursor: pointer; }
|
||||||
|
.toggle input { accent-color: var(--accent); width: 16px; height: 16px; cursor: pointer; }
|
||||||
|
|
||||||
|
/* Buttons */
|
||||||
|
.btn { padding: 7px 14px; border-radius: 6px; font-size: 13px; font-weight: 600; cursor: pointer; border: none; transition: opacity .15s; }
|
||||||
|
.btn:hover { opacity: .85; }
|
||||||
|
.btn-primary { background: var(--accent); color: #fff; }
|
||||||
|
.btn-success { background: var(--accent2); color: #111; }
|
||||||
|
.btn-danger { background: var(--danger); color: #fff; }
|
||||||
|
.btn-warn { background: var(--warn); color: #111; }
|
||||||
|
.btn-ghost { background: var(--surface2); color: var(--text); border: 1px solid var(--border); }
|
||||||
|
.btn:disabled { opacity: .4; cursor: not-allowed; }
|
||||||
|
|
||||||
|
/* Table */
|
||||||
|
.table-wrap { overflow-x: auto; }
|
||||||
|
table { width: 100%; border-collapse: collapse; font-size: 13px; }
|
||||||
|
th { text-align: left; padding: 8px 10px; font-size: 11px; color: var(--muted); text-transform: uppercase; letter-spacing: .4px; border-bottom: 1px solid var(--border); background: var(--surface2); position: sticky; top: 0; }
|
||||||
|
td { padding: 8px 10px; border-bottom: 1px solid var(--border); }
|
||||||
|
tr:hover td { background: var(--surface2); }
|
||||||
|
.pill { display: inline-block; padding: 2px 8px; border-radius: 99px; font-size: 11px; font-weight: 600; }
|
||||||
|
.pill-green { background: #00d4aa22; color: var(--accent2); }
|
||||||
|
.pill-red { background: #ff4f6d22; color: var(--danger); }
|
||||||
|
.pill-grey { background: #ffffff11; color: var(--muted); }
|
||||||
|
.pill-cms { background: #6c63ff22; color: var(--accent); }
|
||||||
|
|
||||||
|
/* Score badge */
|
||||||
|
.score-badge { display: inline-block; padding: 2px 7px; border-radius: 6px; font-weight: 700; font-size: 12px; }
|
||||||
|
|
||||||
|
/* Progress bar */
|
||||||
|
.progress-wrap { background: var(--surface2); border-radius: 99px; height: 10px; overflow: hidden; margin: 8px 0; }
|
||||||
|
.progress-bar { height: 100%; background: linear-gradient(90deg, var(--accent), var(--accent2)); border-radius: 99px; transition: width .4s; }
|
||||||
|
|
||||||
|
/* Pipeline columns */
|
||||||
|
.pipeline { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 14px; }
|
||||||
|
.pipe-col { background: var(--surface2); border-radius: var(--radius); border: 1px solid var(--border); padding: 14px; }
|
||||||
|
.pipe-col h3 { font-size: 15px; font-weight: 700; margin-bottom: 4px; }
|
||||||
|
.pipe-col .count { font-size: 28px; font-weight: 800; }
|
||||||
|
.pipe-col .samples { margin-top: 10px; display: flex; flex-direction: column; gap: 4px; }
|
||||||
|
.pipe-col .sample { font-size: 12px; color: var(--muted); padding: 4px 8px; background: var(--surface); border-radius: 6px; display: flex; justify-content: space-between; align-items: center; }
|
||||||
|
|
||||||
|
/* Chart */
|
||||||
|
.chart-wrap { max-width: 100%; height: 280px; }
|
||||||
|
|
||||||
|
/* Pagination */
|
||||||
|
.pagination { display: flex; align-items: center; gap: 8px; margin-top: 12px; }
|
||||||
|
|
||||||
|
/* Enrichment status */
|
||||||
|
.enrich-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(120px, 1fr)); gap: 10px; margin-bottom: 14px; }
|
||||||
|
.enrich-stat { background: var(--surface2); border-radius: 8px; padding: 10px 14px; text-align: center; }
|
||||||
|
.enrich-stat .val { font-size: 22px; font-weight: 700; }
|
||||||
|
.enrich-stat .lbl { font-size: 11px; color: var(--muted); margin-top: 2px; }
|
||||||
|
|
||||||
|
@media (max-width: 700px) {
|
||||||
|
.pipeline { grid-template-columns: 1fr; }
|
||||||
|
.stats-grid { grid-template-columns: repeat(2, 1fr); }
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<div class="shell" x-data="dashboard()" x-init="init()">
|
||||||
|
|
||||||
|
<header>
|
||||||
|
<h1>Dom<span>God</span></h1>
|
||||||
|
<span class="badge" x-text="'v1.0'"></span>
|
||||||
|
<span style="flex:1"></span>
|
||||||
|
<span style="font-size:12px; color:var(--muted)" x-text="stats.total_domains ? stats.total_domains.toLocaleString() + ' domains' : 'Loading...'"></span>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
|
||||||
|
<!-- ① Stats Bar -->
|
||||||
|
<div class="card">
|
||||||
|
<div class="card-title">Overview</div>
|
||||||
|
<div class="stats-grid">
|
||||||
|
<div class="stat-box">
|
||||||
|
<div class="label">Total Domains</div>
|
||||||
|
<div class="value v-accent" x-text="stats.total_domains ? stats.total_domains.toLocaleString() : '—'"></div>
|
||||||
|
<div class="sub">in parquet</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-box">
|
||||||
|
<div class="label">Enriched</div>
|
||||||
|
<div class="value v-accent" x-text="stats.enriched ? stats.enriched.toLocaleString() : '0'"></div>
|
||||||
|
<div class="sub" x-text="stats.total_domains ? ((stats.enriched/stats.total_domains*100).toFixed(3) + '%') : ''"></div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-box">
|
||||||
|
<div class="label">Hot Leads</div>
|
||||||
|
<div class="value v-hot" x-text="stats.hot_leads ? stats.hot_leads.toLocaleString() : '0'"></div>
|
||||||
|
<div class="sub">score ≥ 60</div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-box">
|
||||||
|
<div class="label">Queue Pending</div>
|
||||||
|
<div class="value v-warn" x-text="stats.queue ? stats.queue.pending.toLocaleString() : '0'"></div>
|
||||||
|
<div class="sub" x-text="stats.queue ? stats.queue.running + ' running' : ''"></div>
|
||||||
|
</div>
|
||||||
|
<div class="stat-box">
|
||||||
|
<div class="label">Done / Failed</div>
|
||||||
|
<div class="value v-muted" x-text="stats.queue ? stats.queue.done.toLocaleString() : '0'"></div>
|
||||||
|
<div class="sub" x-text="stats.queue ? stats.queue.failed + ' failed' : ''"></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Tabs -->
|
||||||
|
<div class="tabs">
|
||||||
|
<div class="tab" :class="{active: tab==='browse'}" @click="tab='browse'">Browse & Filter</div>
|
||||||
|
<div class="tab" :class="{active: tab==='enrichment'}" @click="tab='enrichment'">Enrichment Queue</div>
|
||||||
|
<div class="tab" :class="{active: tab==='pipeline'}" @click="tab='pipeline'">Lead Pipeline</div>
|
||||||
|
<div class="tab" :class="{active: tab==='chart'}" @click="tab='chart'; renderChart()">TLD Chart</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- ② Browse & Filter -->
|
||||||
|
<div class="card" x-show="tab==='browse'">
|
||||||
|
<div class="filter-row">
|
||||||
|
<div class="field">
|
||||||
|
<label>TLD</label>
|
||||||
|
<input type="text" x-model="filter.tld" placeholder="es, com…" @keydown.enter="loadDomains()">
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label>Country</label>
|
||||||
|
<input type="text" x-model="filter.country" placeholder="ES, GB…">
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label>Min Score: <strong x-text="filter.min_score"></strong></label>
|
||||||
|
<input type="range" x-model="filter.min_score" min="0" max="100" step="5">
|
||||||
|
</div>
|
||||||
|
<div class="field">
|
||||||
|
<label>CMS</label>
|
||||||
|
<select x-model="filter.cms">
|
||||||
|
<option value="">Any</option>
|
||||||
|
<option>wordpress</option><option>joomla</option><option>drupal</option>
|
||||||
|
<option>wix</option><option>squarespace</option><option>shopify</option>
|
||||||
|
<option>prestashop</option><option>magento</option><option>typo3</option><option>opencart</option>
|
||||||
|
</select>
|
||||||
|
</div>
|
||||||
|
<label class="toggle field">
|
||||||
|
<label>Live only</label>
|
||||||
|
<input type="checkbox" x-model="filter.live_only">
|
||||||
|
</label>
|
||||||
|
<button class="btn btn-primary" @click="loadDomains(1)">Search</button>
|
||||||
|
<button class="btn btn-success" @click="enqueueSelected()" :disabled="selected.length===0">
|
||||||
|
Enrich selected (<span x-text="selected.length"></span>)
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="table-wrap">
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th><input type="checkbox" @change="toggleAll($event)"></th>
|
||||||
|
<th>Domain</th>
|
||||||
|
<th>Score</th>
|
||||||
|
<th>CMS</th>
|
||||||
|
<th>SSL days</th>
|
||||||
|
<th>Country</th>
|
||||||
|
<th>Live</th>
|
||||||
|
<th>Status</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<template x-for="row in domains" :key="row.domain">
|
||||||
|
<tr>
|
||||||
|
<td><input type="checkbox" :value="row.domain" x-model="selected"></td>
|
||||||
|
<td>
|
||||||
|
<a :href="'http://'+row.domain" target="_blank" x-text="row.domain"></a>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span class="score-badge"
|
||||||
|
:style="scoreBg(row.score)"
|
||||||
|
x-text="row.score ?? '—'"></span>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<span x-show="row.cms" class="pill pill-cms" x-text="row.cms"></span>
|
||||||
|
<span x-show="!row.cms" class="pill pill-grey">—</span>
|
||||||
|
</td>
|
||||||
|
<td x-text="row.ssl_expiry_days ?? '—'"></td>
|
||||||
|
<td x-text="row.ip_country ?? '—'"></td>
|
||||||
|
<td>
|
||||||
|
<span class="pill" :class="row.is_live ? 'pill-green' : 'pill-grey'" x-text="row.is_live ? 'Yes' : '—'"></span>
|
||||||
|
</td>
|
||||||
|
<td x-text="row.status_code ?? '—'"></td>
|
||||||
|
</tr>
|
||||||
|
</template>
|
||||||
|
<tr x-show="domains.length===0 && !loading">
|
||||||
|
<td colspan="8" style="text-align:center;color:var(--muted);padding:24px">No results — run a search above</td>
|
||||||
|
</tr>
|
||||||
|
<tr x-show="loading">
|
||||||
|
<td colspan="8" style="text-align:center;color:var(--muted);padding:24px">Loading…</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="pagination">
|
||||||
|
<button class="btn btn-ghost" @click="loadDomains(page-1)" :disabled="page<=1">← Prev</button>
|
||||||
|
<span style="color:var(--muted)">Page <strong x-text="page"></strong></span>
|
||||||
|
<button class="btn btn-ghost" @click="loadDomains(page+1)" :disabled="domains.length < filter.limit">Next →</button>
|
||||||
|
<span style="color:var(--muted);margin-left:8px">Limit:
|
||||||
|
<select x-model="filter.limit" @change="loadDomains(1)" style="width:80px">
|
||||||
|
<option value="50">50</option>
|
||||||
|
<option value="100">100</option>
|
||||||
|
<option value="250">250</option>
|
||||||
|
</select>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- ③ Enrichment Queue -->
|
||||||
|
<div class="card" x-show="tab==='enrichment'">
|
||||||
|
<div class="enrich-grid">
|
||||||
|
<div class="enrich-stat">
|
||||||
|
<div class="val v-warn" x-text="queueStatus.pending ?? '—'"></div>
|
||||||
|
<div class="lbl">Pending</div>
|
||||||
|
</div>
|
||||||
|
<div class="enrich-stat">
|
||||||
|
<div class="val v-accent" x-text="queueStatus.running ?? '—'"></div>
|
||||||
|
<div class="lbl">Running</div>
|
||||||
|
</div>
|
||||||
|
<div class="enrich-stat">
|
||||||
|
<div class="val v-accent" x-text="queueStatus.done ?? '—'"></div>
|
||||||
|
<div class="lbl">Done</div>
|
||||||
|
</div>
|
||||||
|
<div class="enrich-stat">
|
||||||
|
<div class="val v-hot" x-text="queueStatus.failed ?? '—'"></div>
|
||||||
|
<div class="lbl">Failed</div>
|
||||||
|
</div>
|
||||||
|
<div class="enrich-stat">
|
||||||
|
<div class="val v-muted" x-text="queueStatus.eta_seconds ? Math.ceil(queueStatus.eta_seconds/60) + 'm' : '—'"></div>
|
||||||
|
<div class="lbl">ETA</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:6px">
|
||||||
|
<span style="font-size:12px;color:var(--muted)" x-text="progressLabel()"></span>
|
||||||
|
<div style="display:flex;gap:8px">
|
||||||
|
<button class="btn btn-success" @click="startEnrich()" x-show="!enrichRunning">▶ Start</button>
|
||||||
|
<button class="btn btn-warn" @click="pauseEnrich()" x-show="enrichRunning">⏸ Pause</button>
|
||||||
|
<button class="btn btn-ghost" @click="retryFailed()">↺ Retry Failed</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="progress-wrap">
|
||||||
|
<div class="progress-bar" :style="'width:' + progressPct() + '%'"></div>
|
||||||
|
</div>
|
||||||
|
<div style="font-size:11px;color:var(--muted);margin-top:4px" x-text="progressPct().toFixed(1) + '% complete'"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div style="margin-top:20px">
|
||||||
|
<div class="card-title">Enrich custom domains</div>
|
||||||
|
<div style="display:flex;gap:8px;align-items:flex-start">
|
||||||
|
<textarea
|
||||||
|
x-model="customDomains"
|
||||||
|
placeholder="example.com another.es third.net"
|
||||||
|
style="flex:1;background:var(--surface2);border:1px solid var(--border);color:var(--text);border-radius:6px;padding:8px;min-height:80px;font-size:12px;resize:vertical"
|
||||||
|
></textarea>
|
||||||
|
<button class="btn btn-primary" @click="enqueueCustom()" style="align-self:flex-end">Queue</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- ④ Lead Pipeline -->
|
||||||
|
<div class="card" x-show="tab==='pipeline'">
|
||||||
|
<div style="display:flex;justify-content:flex-end;gap:8px;margin-bottom:14px">
|
||||||
|
<button class="btn btn-ghost" @click="loadPipeline()">↻ Refresh</button>
|
||||||
|
</div>
|
||||||
|
<div class="pipeline">
|
||||||
|
<!-- Hot -->
|
||||||
|
<div class="pipe-col" style="border-top:3px solid var(--hot)">
|
||||||
|
<h3>🔥 Hot</h3>
|
||||||
|
<div style="color:var(--muted);font-size:12px">Score 80–100</div>
|
||||||
|
<div class="count" style="color:var(--hot)" x-text="pipeline.hot.count.toLocaleString()"></div>
|
||||||
|
<div class="samples">
|
||||||
|
<template x-for="d in pipeline.hot.samples" :key="d.domain">
|
||||||
|
<div class="sample">
|
||||||
|
<a :href="'http://'+d.domain" target="_blank" x-text="d.domain"></a>
|
||||||
|
<span class="score-badge" :style="scoreBg(d.score)" x-text="d.score"></span>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-danger" style="margin-top:12px;width:100%" @click="exportTier('hot')">⬇ Export Hot CSV</button>
|
||||||
|
</div>
|
||||||
|
<!-- Warm -->
|
||||||
|
<div class="pipe-col" style="border-top:3px solid var(--warm)">
|
||||||
|
<h3>♨️ Warm</h3>
|
||||||
|
<div style="color:var(--muted);font-size:12px">Score 50–79</div>
|
||||||
|
<div class="count" style="color:var(--warm)" x-text="pipeline.warm.count.toLocaleString()"></div>
|
||||||
|
<div class="samples">
|
||||||
|
<template x-for="d in pipeline.warm.samples" :key="d.domain">
|
||||||
|
<div class="sample">
|
||||||
|
<a :href="'http://'+d.domain" target="_blank" x-text="d.domain"></a>
|
||||||
|
<span class="score-badge" :style="scoreBg(d.score)" x-text="d.score"></span>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-warn" style="margin-top:12px;width:100%" @click="exportTier('warm')">⬇ Export Warm CSV</button>
|
||||||
|
</div>
|
||||||
|
<!-- Cold -->
|
||||||
|
<div class="pipe-col" style="border-top:3px solid var(--cold)">
|
||||||
|
<h3>🧊 Cold</h3>
|
||||||
|
<div style="color:var(--muted);font-size:12px">Score < 50</div>
|
||||||
|
<div class="count" style="color:var(--cold)" x-text="pipeline.cold.count.toLocaleString()"></div>
|
||||||
|
<div class="samples">
|
||||||
|
<template x-for="d in pipeline.cold.samples" :key="d.domain">
|
||||||
|
<div class="sample">
|
||||||
|
<a :href="'http://'+d.domain" target="_blank" x-text="d.domain"></a>
|
||||||
|
<span class="score-badge" :style="scoreBg(d.score)" x-text="d.score"></span>
|
||||||
|
</div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
<button class="btn btn-ghost" style="margin-top:12px;width:100%" @click="exportTier('cold')">⬇ Export Cold CSV</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- ⑤ TLD Chart -->
|
||||||
|
<div class="card" x-show="tab==='chart'">
|
||||||
|
<div class="card-title">Top 20 TLDs</div>
|
||||||
|
<div class="chart-wrap">
|
||||||
|
<canvas id="tldChart"></canvas>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
function dashboard() {
|
||||||
|
return {
|
||||||
|
tab: 'browse',
|
||||||
|
stats: {},
|
||||||
|
domains: [],
|
||||||
|
selected: [],
|
||||||
|
loading: false,
|
||||||
|
page: 1,
|
||||||
|
filter: { tld: '', country: '', min_score: 0, cms: '', live_only: false, limit: '100' },
|
||||||
|
queueStatus: {},
|
||||||
|
enrichRunning: false,
|
||||||
|
customDomains: '',
|
||||||
|
pipeline: {
|
||||||
|
hot: { count: 0, samples: [] },
|
||||||
|
warm: { count: 0, samples: [] },
|
||||||
|
cold: { count: 0, samples: [] },
|
||||||
|
},
|
||||||
|
_chart: null,
|
||||||
|
_pollInterval: null,
|
||||||
|
|
||||||
|
async init() {
|
||||||
|
await this.loadStats();
|
||||||
|
this.startPolling();
|
||||||
|
},
|
||||||
|
|
||||||
|
startPolling() {
|
||||||
|
this._pollInterval = setInterval(async () => {
|
||||||
|
await this.loadStats();
|
||||||
|
if (this.tab === 'enrichment') await this.loadQueueStatus();
|
||||||
|
if (this.tab === 'pipeline') await this.loadPipeline();
|
||||||
|
}, 3000);
|
||||||
|
},
|
||||||
|
|
||||||
|
async loadStats() {
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/stats');
|
||||||
|
this.stats = await r.json();
|
||||||
|
} catch(e) {}
|
||||||
|
},
|
||||||
|
|
||||||
|
async loadQueueStatus() {
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/enrich/status');
|
||||||
|
this.queueStatus = await r.json();
|
||||||
|
this.enrichRunning = this.queueStatus.worker_running;
|
||||||
|
} catch(e) {}
|
||||||
|
},
|
||||||
|
|
||||||
|
async loadDomains(p) {
|
||||||
|
if (p !== undefined) this.page = p;
|
||||||
|
this.loading = true;
|
||||||
|
const params = new URLSearchParams({
|
||||||
|
page: this.page,
|
||||||
|
limit: this.filter.limit,
|
||||||
|
...(this.filter.tld && { tld: this.filter.tld }),
|
||||||
|
...(this.filter.live_only && { live_only: 'true' }),
|
||||||
|
});
|
||||||
|
try {
|
||||||
|
const r = await fetch('/api/domains?' + params);
|
||||||
|
const data = await r.json();
|
||||||
|
// Filter by country/min_score client-side for enriched rows
|
||||||
|
this.domains = data.results.filter(row => {
|
||||||
|
if (this.filter.min_score > 0 && (row.score ?? 0) < this.filter.min_score) return false;
|
||||||
|
if (this.filter.country && row.ip_country !== this.filter.country.toUpperCase()) return false;
|
||||||
|
if (this.filter.cms && row.cms !== this.filter.cms) return false;
|
||||||
|
return true;
|
||||||
|
});
|
||||||
|
} catch(e) { this.domains = []; }
|
||||||
|
this.loading = false;
|
||||||
|
},
|
||||||
|
|
||||||
|
toggleAll(e) {
|
||||||
|
if (e.target.checked) {
|
||||||
|
this.selected = this.domains.map(d => d.domain);
|
||||||
|
} else {
|
||||||
|
this.selected = [];
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
|
async enqueueSelected() {
|
||||||
|
if (!this.selected.length) return;
|
||||||
|
await fetch('/api/enrich/batch', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({ domains: this.selected }),
|
||||||
|
});
|
||||||
|
this.selected = [];
|
||||||
|
alert('Queued for enrichment!');
|
||||||
|
},
|
||||||
|
|
||||||
|
async enqueueCustom() {
|
||||||
|
const domains = this.customDomains.split('\n').map(d => d.trim()).filter(Boolean);
|
||||||
|
if (!domains.length) return;
|
||||||
|
await fetch('/api/enrich/batch', {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {'Content-Type': 'application/json'},
|
||||||
|
body: JSON.stringify({ domains }),
|
||||||
|
});
|
||||||
|
this.customDomains = '';
|
||||||
|
await this.loadQueueStatus();
|
||||||
|
},
|
||||||
|
|
||||||
|
async startEnrich() {
|
||||||
|
await fetch('/api/enrich/resume', { method: 'POST' });
|
||||||
|
this.enrichRunning = true;
|
||||||
|
await this.loadQueueStatus();
|
||||||
|
},
|
||||||
|
|
||||||
|
async pauseEnrich() {
|
||||||
|
await fetch('/api/enrich/pause', { method: 'POST' });
|
||||||
|
this.enrichRunning = false;
|
||||||
|
},
|
||||||
|
|
||||||
|
async retryFailed() {
|
||||||
|
// Mark failed jobs as pending
|
||||||
|
await fetch('/api/enrich/retry', { method: 'POST' });
|
||||||
|
await this.loadQueueStatus();
|
||||||
|
},
|
||||||
|
|
||||||
|
progressPct() {
|
||||||
|
const q = this.queueStatus;
|
||||||
|
if (!q || !q.total) return 0;
|
||||||
|
return (q.done / q.total) * 100;
|
||||||
|
},
|
||||||
|
|
||||||
|
progressLabel() {
|
||||||
|
const q = this.queueStatus;
|
||||||
|
if (!q) return '';
|
||||||
|
return `${q.done ?? 0} done · ${q.pending ?? 0} pending · ${q.running ?? 0} running · ${q.failed ?? 0} failed`;
|
||||||
|
},
|
||||||
|
|
||||||
|
async loadPipeline() {
|
||||||
|
try {
|
||||||
|
const [hot, warm, cold] = await Promise.all([
|
||||||
|
fetch('/api/enriched?min_score=80&limit=5').then(r => r.json()),
|
||||||
|
fetch('/api/enriched?min_score=50&limit=5').then(r => r.json()),
|
||||||
|
fetch('/api/enriched?min_score=0&limit=5').then(r => r.json()),
|
||||||
|
]);
|
||||||
|
// Fetch counts separately
|
||||||
|
const [hc, wc, cc] = await Promise.all([
|
||||||
|
fetch('/api/enriched?min_score=80&limit=1').then(r => r.json()),
|
||||||
|
fetch('/api/enriched?min_score=50&limit=1').then(r => r.json()),
|
||||||
|
fetch('/api/enriched?min_score=0&limit=1').then(r => r.json()),
|
||||||
|
]);
|
||||||
|
|
||||||
|
const warmFiltered = warm.results.filter(d => d.score < 80);
|
||||||
|
const coldFiltered = cold.results.filter(d => d.score < 50);
|
||||||
|
|
||||||
|
this.pipeline.hot = { count: hot.results.length, samples: hot.results.slice(0,5) };
|
||||||
|
this.pipeline.warm = { count: warmFiltered.length, samples: warmFiltered.slice(0,5) };
|
||||||
|
this.pipeline.cold = { count: coldFiltered.length, samples: coldFiltered.slice(0,5) };
|
||||||
|
} catch(e) {}
|
||||||
|
},
|
||||||
|
|
||||||
|
exportTier(tier) {
|
||||||
|
window.location = `/api/export?tier=${tier}`;
|
||||||
|
},
|
||||||
|
|
||||||
|
scoreBg(score) {
|
||||||
|
if (score == null) return 'background:#333;color:#aaa';
|
||||||
|
if (score >= 80) return 'background:#ff4f6d33;color:#ff4f6d';
|
||||||
|
if (score >= 50) return 'background:#ffb34733;color:#ffb347';
|
||||||
|
return 'background:#6c7aff33;color:#6c7aff';
|
||||||
|
},
|
||||||
|
|
||||||
|
async renderChart() {
|
||||||
|
await this.$nextTick();
|
||||||
|
const canvas = document.getElementById('tldChart');
|
||||||
|
if (!canvas) return;
|
||||||
|
if (this._chart) { this._chart.destroy(); this._chart = null; }
|
||||||
|
|
||||||
|
const tlds = this.stats.tld_breakdown || [];
|
||||||
|
if (!tlds.length) {
|
||||||
|
await this.loadStats();
|
||||||
|
}
|
||||||
|
const labels = (this.stats.tld_breakdown || []).map(t => '.' + (t.tld || '?'));
|
||||||
|
const data = (this.stats.tld_breakdown || []).map(t => t.count);
|
||||||
|
|
||||||
|
this._chart = new Chart(canvas, {
|
||||||
|
type: 'bar',
|
||||||
|
data: {
|
||||||
|
labels,
|
||||||
|
datasets: [{
|
||||||
|
label: 'Domains',
|
||||||
|
data,
|
||||||
|
backgroundColor: 'rgba(108, 99, 255, 0.7)',
|
||||||
|
borderColor: 'rgba(108, 99, 255, 1)',
|
||||||
|
borderWidth: 1,
|
||||||
|
borderRadius: 4,
|
||||||
|
}]
|
||||||
|
},
|
||||||
|
options: {
|
||||||
|
responsive: true,
|
||||||
|
maintainAspectRatio: false,
|
||||||
|
plugins: { legend: { display: false } },
|
||||||
|
scales: {
|
||||||
|
x: { ticks: { color: '#8891b0' }, grid: { color: '#2e3250' } },
|
||||||
|
y: { ticks: { color: '#8891b0' }, grid: { color: '#2e3250' } },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
16
docker-compose.yml
Normal file
16
docker-compose.yml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
version: "3.9"
|
||||||
|
services:
|
||||||
|
dashboard:
|
||||||
|
build: .
|
||||||
|
ports:
|
||||||
|
- "6677:6677"
|
||||||
|
volumes:
|
||||||
|
- ./data:/data
|
||||||
|
environment:
|
||||||
|
- DATA_DIR=/data
|
||||||
|
- PARQUET_URL=https://github.com/digitalcortex/72m-domains-dataset/raw/refs/heads/master/domains.parquet
|
||||||
|
- CONCURRENCY_LIMIT=50
|
||||||
|
- SCORE_THRESHOLD=60
|
||||||
|
- TARGET_TLDS=es,com,net
|
||||||
|
- TARGET_COUNTRIES=ES,GB,DE,FR,RO,PT,AD,IT
|
||||||
|
restart: unless-stopped
|
||||||
8
requirements.txt
Normal file
8
requirements.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
fastapi
|
||||||
|
uvicorn[standard]
|
||||||
|
duckdb
|
||||||
|
httpx
|
||||||
|
dnspython
|
||||||
|
beautifulsoup4
|
||||||
|
aiosqlite
|
||||||
|
python-dotenv
|
||||||
Reference in New Issue
Block a user