From d9ece58e1270bf56e7fbb32e86068fc1106eb761 Mon Sep 17 00:00:00 2001 From: Malin Date: Thu, 7 May 2026 11:06:58 +0200 Subject: [PATCH] fix: search race condition + brand detection + contacts + reassess MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - loadDomains(): add generation counter so stale auto-advance fetches cannot overwrite a newer user-triggered search result; snapshot filter state before the first await so URL reflects what was requested; add HTTP status check so backend errors surface as toasts rather than silent empty results; auto-advance now calls loadDomains() without await so the counter increments correctly per page advance - beauty_ai: word-boundary regex for short brands (≤5 chars) to stop 'ref' matching 'reference'/'refresh'/'prefer' etc.; merge phones, whatsapp and social_links from site_analyzer directly into result (more reliable than AI extraction); add contact_whatsapp and contact_social fields to AI JSON schema - db: add requeue_beauty() for re-assessing already-assessed domains - beauty_main: /api/beauty/reassess/batch endpoint using requeue_beauty - index.html: Re-assess Selected bulk button, per-row ↺ button in Browse and Pipeline, WhatsApp + social links in Pipeline contact panel Co-Authored-By: Claude Sonnet 4.6 --- app/beauty_ai.py | 37 +++++++++++- app/beauty_main.py | 13 ++++- app/db.py | 13 +++++ app/static/beauty/index.html | 105 +++++++++++++++++++++++++++-------- 4 files changed, 141 insertions(+), 27 deletions(-) diff --git a/app/beauty_ai.py b/app/beauty_ai.py index 2daec93..01b2eb0 100644 --- a/app/beauty_ai.py +++ b/app/beauty_ai.py @@ -128,9 +128,22 @@ BEAUTY_CATEGORIES = [ # ── Brand detection (fast pre-scan, no AI) ───────────────────────────────────── def detect_brands_in_text(text: str) -> list[str]: - """Find which brands from the universe appear in the scraped page text.""" + """Find which brands from the universe appear in the scraped page text. + + Short brands (≤5 chars) use word-boundary matching to avoid false positives + like 'ref' matching 'reference', 'prefer', 'refresh', etc. + """ tl = text.lower() - return [b for b in BEAUTY_BRANDS if b.lower() in tl][:60] + result = [] + for b in BEAUTY_BRANDS: + bl = b.lower() + if len(bl) <= 5: + if re.search(r'(? list[str]: @@ -247,6 +260,8 @@ Respond ONLY with valid JSON, no markdown, no text outside JSON: "dist_matches": ["OurBrand1","OurBrand2"], "contact_email": "email or empty string", "contact_phone": "phone or empty string", + "contact_whatsapp": "whatsapp link or empty string", + "contact_social": "primary social profile URL or empty string", "b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands", "outreach_subject": "short Spanish subject line referencing their business name", "outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).", @@ -336,12 +351,27 @@ async def assess_beauty_domain(analysis: dict) -> dict: if not result.get("detected_brands") and detected: result["detected_brands"] = detected + # Always merge contact data directly from site_analyzer — more reliable + # than AI extraction since it uses regex against raw HTML + phones = analysis.get("phones", []) + whatsapp = analysis.get("whatsapp", []) + social_links = analysis.get("social_links", []) + if phones and not result.get("contact_phone"): + result["contact_phone"] = phones[0] + if whatsapp: + result["contact_whatsapp"] = "; ".join(whatsapp[:2]) + if social_links: + result["contact_social"] = "; ".join(social_links[:3]) + logger.info("Beauty AI %s → quality=%s, dist_matches=%s", domain, result.get("lead_quality"), result.get("dist_matches")) return result except Exception as e: logger.error("Beauty AI error %s: %s", domain, e) + phones = analysis.get("phones", []) + whatsapp = analysis.get("whatsapp", []) + social = analysis.get("social_links", []) return { "error": str(e)[:300], "is_relevant": False, @@ -349,4 +379,7 @@ async def assess_beauty_domain(analysis: dict) -> dict: "dist_matches": dist_match, "detected_brands": detected, "contact_email": "", + "contact_phone": phones[0] if phones else "", + "contact_whatsapp": "; ".join(whatsapp[:2]) if whatsapp else "", + "contact_social": "; ".join(social[:3]) if social else "", } diff --git a/app/beauty_main.py b/app/beauty_main.py index 31e12d8..7f57474 100644 --- a/app/beauty_main.py +++ b/app/beauty_main.py @@ -22,7 +22,7 @@ load_dotenv() from app.db import ( SQLITE_PATH, init_db, get_stats, get_domains, get_enriched, build_duckdb_index, index_status, - queue_beauty, get_beauty_queue_status, save_beauty_assessment, get_beauty_leads, + queue_beauty, requeue_beauty, get_beauty_queue_status, save_beauty_assessment, get_beauty_leads, save_prescreen_results, ) from app.validator import start_validator, stop_validator, get_validator_status @@ -277,6 +277,17 @@ async def beauty_assess_batch(body: dict): return {"queued": len(domains_list)} +@app.post("/api/beauty/reassess/batch") +async def beauty_reassess_batch(body: dict): + """Re-queue domains for fresh assessment, resetting any existing result.""" + domains_list = body.get("domains", []) + if not domains_list: + return JSONResponse({"error": "no domains provided"}, status_code=400) + await requeue_beauty(domains_list) + _start_beauty_worker() + return {"requeued": len(domains_list)} + + @app.post("/api/beauty/worker/restart") async def beauty_worker_restart(): _start_beauty_worker() diff --git a/app/db.py b/app/db.py index 21c63ea..7854ef3 100644 --- a/app/db.py +++ b/app/db.py @@ -540,6 +540,19 @@ async def queue_beauty(domains: list[str]): await db.commit() +async def requeue_beauty(domains: list[str]): + """Re-queue domains for fresh assessment even if already assessed.""" + async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: + await db.executemany( + """INSERT INTO beauty_queue (domain, status) + VALUES (?, 'pending') + ON CONFLICT(domain) DO UPDATE SET + status='pending', completed_at=NULL, error=NULL""", + [(d,) for d in domains], + ) + await db.commit() + + async def get_beauty_queue_status(): async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db: async with db.execute("SELECT status, COUNT(*) FROM beauty_queue GROUP BY status") as cur: diff --git a/app/static/beauty/index.html b/app/static/beauty/index.html index 1cf42e5..57fb0d5 100644 --- a/app/static/beauty/index.html +++ b/app/static/beauty/index.html @@ -180,6 +180,10 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px} Screening… + @@ -227,6 +231,7 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px} + @@ -338,6 +343,7 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px} x-text="(row._beauty||{}).contact_email||row.emails||'—'"> + @@ -371,14 +377,20 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}

Contact Details

-

+

-