feat: two-phase pre-screening with HTTP check + DeepSeek batch classification
Phase 1 (no AI credits): httpx checks every selected domain concurrently (30 parallel) with real browser UA — detects live/dead/parked/redirect. Parked: keyword scan in body/title + known parking host redirect check. Results saved to DB immediately; dead/parked never reach DeepSeek. Phase 2 (single DeepSeek call): all live-site titles + snippets bundled into ONE Replicate/DeepSeek-R1 request → returns niche + type for every domain in batch (up to 80 per call, parallelised if more). - app/prescreener.py (new): _check_one(), prescreen_domains(), classify_with_deepseek(), parking signal lists, same-domain redirect logic - app/db.py: prescreen_status/niche/site_type/prescreen_at columns + migrations; save_prescreen_results() upsert helper - app/main.py: POST /api/prescreen/batch endpoint - app/static/index.html: - 🔍 Pre-screen button (disabled while running, shows spinner) - Niche + Type columns in Browse and Leads tables (.pni/.pty pills) - Prescreen status colour dot (●) when niche not yet set - prescreening state flag; result toast shows per-status counts Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
39
app/db.py
39
app/db.py
@@ -36,7 +36,11 @@ CREATE TABLE IF NOT EXISTS enriched_domains (
|
||||
ai_contact_channel TEXT,
|
||||
ai_contact_value TEXT,
|
||||
ai_assessed_at TEXT,
|
||||
site_analysis TEXT
|
||||
site_analysis TEXT,
|
||||
prescreen_status TEXT,
|
||||
niche TEXT,
|
||||
site_type TEXT,
|
||||
prescreen_at TEXT
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS job_queue (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
@@ -76,6 +80,10 @@ _MIGRATIONS = [
|
||||
"ALTER TABLE enriched_domains ADD COLUMN site_analysis TEXT",
|
||||
"CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)",
|
||||
"ALTER TABLE ai_queue ADD COLUMN language TEXT DEFAULT 'ES'",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN prescreen_status TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN niche TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN site_type TEXT",
|
||||
"ALTER TABLE enriched_domains ADD COLUMN prescreen_at TEXT",
|
||||
]
|
||||
|
||||
# Index build state
|
||||
@@ -418,6 +426,35 @@ async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def save_prescreen_results(results: list[dict]):
|
||||
"""Upsert prescreen HTTP results and/or DeepSeek niche/type classifications."""
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
for r in results:
|
||||
domain = r.get("domain")
|
||||
if not domain:
|
||||
continue
|
||||
niche = r.get("niche")
|
||||
site_type = r.get("type") # DeepSeek returns "type" key
|
||||
if niche or site_type:
|
||||
# Classification-only update (domain row must already exist)
|
||||
await db.execute(
|
||||
"UPDATE enriched_domains SET niche=?, site_type=? WHERE domain=?",
|
||||
(niche, site_type, domain),
|
||||
)
|
||||
else:
|
||||
# Prescreen status upsert — create row if it doesn't exist yet
|
||||
await db.execute(
|
||||
"""INSERT INTO enriched_domains (domain, prescreen_status, prescreen_at, page_title)
|
||||
VALUES (?, ?, datetime('now'), ?)
|
||||
ON CONFLICT(domain) DO UPDATE SET
|
||||
prescreen_status = excluded.prescreen_status,
|
||||
prescreen_at = excluded.prescreen_at,
|
||||
page_title = COALESCE(page_title, excluded.page_title)""",
|
||||
(domain, r.get("prescreen_status"), r.get("title")),
|
||||
)
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def queue_domains(domains: list[str]):
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
await db.executemany(
|
||||
|
||||
Reference in New Issue
Block a user