fix: 429 retry, sequential batching, force UI refresh after prescreen

1. prescreener.py: classify_with_deepseek now retries on 429 with exponential back-off (5s → 10s → 20s → 40s, up to 4 attempts); same back-off also covers other transient errors. 2. main.py: prescreen batches run sequentially with a 3s gap instead of asyncio.gather (parallel). Parallel batches caused the second batch to always hit the 429 rate limit, leaving most domains unclassified (only the smaller last batch succeeded). 3. index.html: prescreenSelected() now clears this.domains before calling _fetch() so Alpine re-renders the full table with the updated niche/type values; also updates the notify hint to mention the expected 1-2 min wait. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 21:52:39 +02:00
parent a30085975e
commit 468d76387d
3 changed files with 76 additions and 57 deletions
--- a/app/prescreener.py
+++ b/app/prescreener.py
@@ -223,8 +223,9 @@ def _parse_classify_output(raw: str) -> list[dict]:
 async def classify_with_deepseek(live_items: list[dict]) -> list[dict]:
    """Single DeepSeek call → list of {domain, niche, type}.

-    Replicate may return 202 (async) for slow models like DeepSeek-R1.
-    We poll the prediction URL until it succeeds or times out.
+    Handles:
+    - 429 Too Many Requests: exponential back-off, up to 4 retries
+    - 202 Accepted (async prediction): polls urls.get until succeeded
    """
    if not live_items:
        return []
@@ -235,55 +236,69 @@ async def classify_with_deepseek(live_items: list[dict]) -> list[dict]:
            "temperature": 0.1,
        }
    }
-    auth_headers = {
-        "Authorization": f"Bearer {REPLICATE_TOKEN}",
-        "Content-Type": "application/json",
-    }
-    try:
-        async with httpx.AsyncClient(timeout=300) as client:
-            resp = await client.post(
-                DEEPSEEK_MODEL,
-                headers={**auth_headers, "Prefer": "wait=60"},
-                json=payload,
-            )
-            resp.raise_for_status()
-            data = resp.json()
+    auth_header = {"Authorization": f"Bearer {REPLICATE_TOKEN}"}

-            # ── Poll if Replicate accepted async (202 or status starting/processing) ──
-            if resp.status_code == 202 or data.get("status") in ("starting", "processing"):
-                poll_url = (data.get("urls") or {}).get("get")
-                if not poll_url:
-                    logger.error("DeepSeek: 202 but no poll URL in response")
-                    return []
-                logger.info("DeepSeek: async prediction, polling %s", poll_url)
-                for attempt in range(90):          # up to ~3 minutes
-                    await asyncio.sleep(2)
-                    pr = await client.get(
-                        poll_url,
-                        headers={"Authorization": f"Bearer {REPLICATE_TOKEN}"},
-                    )
-                    pdata = pr.json()
-                    status = pdata.get("status")
-                    logger.debug("DeepSeek poll #%d status=%s", attempt + 1, status)
-                    if status == "succeeded":
-                        data = pdata
-                        break
-                    if status in ("failed", "canceled"):
-                        logger.error("DeepSeek prediction %s: %s", status, pdata.get("error"))
+    MAX_RETRIES = 4
+    for attempt in range(MAX_RETRIES):
+        try:
+            async with httpx.AsyncClient(timeout=300) as client:
+                resp = await client.post(
+                    DEEPSEEK_MODEL,
+                    headers={**auth_header, "Content-Type": "application/json", "Prefer": "wait=60"},
+                    json=payload,
+                )
+
+                # ── Retry on rate-limit ──────────────────────────────────────
+                if resp.status_code == 429:
+                    wait = min(60, 5 * (2 ** attempt))   # 5 → 10 → 20 → 40s
+                    logger.warning("DeepSeek 429 (attempt %d/%d), retrying in %ds",
+                                   attempt + 1, MAX_RETRIES, wait)
+                    await asyncio.sleep(wait)
+                    continue
+
+                resp.raise_for_status()
+                data = resp.json()
+
+                # ── Poll if Replicate queued async (202) ─────────────────────
+                if resp.status_code == 202 or data.get("status") in ("starting", "processing"):
+                    poll_url = (data.get("urls") or {}).get("get")
+                    if not poll_url:
+                        logger.error("DeepSeek: 202 but no poll URL in response")
+                        return []
+                    logger.info("DeepSeek: async prediction, polling %s", poll_url)
+                    for tick in range(90):          # up to ~3 minutes
+                        await asyncio.sleep(2)
+                        pr = await client.get(poll_url, headers=auth_header)
+                        pdata = pr.json()
+                        status = pdata.get("status")
+                        logger.debug("DeepSeek poll #%d status=%s", tick + 1, status)
+                        if status == "succeeded":
+                            data = pdata
+                            break
+                        if status in ("failed", "canceled"):
+                            logger.error("DeepSeek prediction %s: %s", status, pdata.get("error"))
+                            return []
+                    else:
+                        logger.error("DeepSeek: prediction timed out after 90 polls")
                        return []
-                else:
-                    logger.error("DeepSeek: prediction timed out after polling 90×2s")
-                    return []

-        output = data.get("output") or ""
-        if isinstance(output, list):
-            output = "".join(str(t) for t in output if t is not None)
+            # ── Parse output ─────────────────────────────────────────────────
+            output = data.get("output") or ""
+            if isinstance(output, list):
+                output = "".join(str(t) for t in output if t is not None)

-        logger.info("DeepSeek raw output (first 500 chars): %.500s", output)
-        result = _parse_classify_output(output)
-        logger.info("DeepSeek classified %d / %d domains", len(result), len(live_items))
-        return result
+            logger.info("DeepSeek raw output (first 500 chars): %.500s", output)
+            result = _parse_classify_output(output)
+            logger.info("DeepSeek classified %d / %d domains", len(result), len(live_items))
+            return result

-    except Exception as e:
-        logger.error("DeepSeek classification error: %s", e)
-        return []
+        except Exception as e:
+            if attempt < MAX_RETRIES - 1:
+                wait = 5 * (2 ** attempt)
+                logger.warning("DeepSeek error (attempt %d/%d), retry in %ds: %s",
+                               attempt + 1, MAX_RETRIES, wait, e)
+                await asyncio.sleep(wait)
+            else:
+                logger.error("DeepSeek classification failed after %d attempts: %s",
+                             MAX_RETRIES, e)
+    return []