fix: prescreen returns immediately after HTTP check, DeepSeek runs in background

Previously /api/prescreen/batch blocked for 4-10 minutes waiting for Replicate/
DeepSeek, causing browser connection timeout and zero results saved.

- Phase 1 (HTTP check) runs synchronously and saves results immediately
- Phase 2 (DeepSeek classify) fires as asyncio.create_task and runs in background
- Response is returned to client as soon as phase 1 completes (~30-90s)
- Frontend toast shows "classifying N in background" so user knows niche/type
  will appear shortly without waiting
- Each DeepSeek sub-batch saves independently so partial results are preserved

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 08:28:26 +02:00
parent 7ec0304dea
commit daccb99a0c
2 changed files with 26 additions and 15 deletions

View File

@@ -218,6 +218,7 @@ async def prescreen_batch(body: dict):
from app.prescreener import prescreen_domains, classify_with_deepseek, DEEPSEEK_BATCH_SIZE from app.prescreener import prescreen_domains, classify_with_deepseek, DEEPSEEK_BATCH_SIZE
# Phase 1: HTTP check — runs synchronously, finishes in ~30-90s, saves immediately.
results = await prescreen_domains(domains_list) results = await prescreen_domains(domains_list)
await save_prescreen_results(results) await save_prescreen_results(results)
@@ -227,18 +228,26 @@ async def prescreen_batch(body: dict):
counts[s] = counts.get(s, 0) + 1 counts[s] = counts.get(s, 0) + 1
live = [r for r in results if r.get("prescreen_status") == "live"] live = [r for r in results if r.get("prescreen_status") == "live"]
classified = 0
# Phase 2: DeepSeek classification — fires in the background so the HTTP
# response is returned immediately. Results are saved async; the Browse
# table will show niche/type once the background task completes.
if live: if live:
batches = [live[i:i + DEEPSEEK_BATCH_SIZE] for i in range(0, len(live), DEEPSEEK_BATCH_SIZE)] async def _classify_bg(items: list) -> None:
all_cls: list = [] try:
for i, batch in enumerate(batches): batches = [items[i:i + DEEPSEEK_BATCH_SIZE]
if i > 0: for i in range(0, len(items), DEEPSEEK_BATCH_SIZE)]
await asyncio.sleep(3) for i, batch in enumerate(batches):
cls = await classify_with_deepseek(batch) if i > 0:
all_cls.extend(cls) await asyncio.sleep(3)
if all_cls: cls = await classify_with_deepseek(batch)
await save_prescreen_results(all_cls) if cls:
classified = len(all_cls) await save_prescreen_results(cls)
logger.info("Prescreen BG: classified %d domains", len(cls))
except Exception as e:
logger.error("Prescreen BG classification failed: %s", e)
asyncio.create_task(_classify_bg(live))
return { return {
"total": len(domains_list), "total": len(domains_list),
@@ -246,7 +255,8 @@ async def prescreen_batch(body: dict):
"parked": counts.get("parked", 0), "parked": counts.get("parked", 0),
"redirect": counts.get("redirect", 0), "redirect": counts.get("redirect", 0),
"dead": counts.get("dead", 0), "dead": counts.get("dead", 0),
"classified": classified, "error": counts.get("error", 0),
"classifying": len(live), # niche/type arrives shortly via background task
} }

View File

@@ -559,7 +559,7 @@ function app() {
try { try {
const chunks = []; const chunks = [];
for (let i=0; i<this.selected.length; i+=200) chunks.push(this.selected.slice(i,i+200)); for (let i=0; i<this.selected.length; i+=200) chunks.push(this.selected.slice(i,i+200));
let totals = {live:0,dead:0,parked:0,redirect:0,classified:0}; let totals = {live:0,dead:0,parked:0,redirect:0,error:0,classifying:0};
for (const chunk of chunks) { for (const chunk of chunks) {
const d = await fetch('/api/prescreen/batch', { const d = await fetch('/api/prescreen/batch', {
method:'POST', headers:{'Content-Type':'application/json'}, method:'POST', headers:{'Content-Type':'application/json'},
@@ -567,9 +567,10 @@ function app() {
}).then(r=>r.json()); }).then(r=>r.json());
totals.live += d.live||0; totals.dead += d.dead||0; totals.live += d.live||0; totals.dead += d.dead||0;
totals.parked += d.parked||0; totals.redirect += d.redirect||0; totals.parked += d.parked||0; totals.redirect += d.redirect||0;
totals.classified += d.classified||0; totals.error += d.error||0; totals.classifying += d.classifying||0;
} }
this.notify(`${totals.live} live · ☠ ${totals.dead} dead · 🅿 ${totals.parked} parked · 🏷 ${totals.classified} classified`, 'success'); const cls = totals.classifying > 0 ? ` · 🏷 classifying ${totals.classifying} in background` : '';
this.notify(`${totals.live} live · ☠ ${totals.dead} dead · 🅿 ${totals.parked} parked${cls}`, 'success');
this.selected = []; this.selected = [];
await this.loadDomains(); await this.loadDomains();
} catch(e) { this.notify('Pre-screen failed: '+e.message, 'error'); } } catch(e) { this.notify('Pre-screen failed: '+e.message, 'error'); }