fix: search race condition + brand detection + contacts + reassess
- loadDomains(): add generation counter so stale auto-advance fetches cannot overwrite a newer user-triggered search result; snapshot filter state before the first await so URL reflects what was requested; add HTTP status check so backend errors surface as toasts rather than silent empty results; auto-advance now calls loadDomains() without await so the counter increments correctly per page advance - beauty_ai: word-boundary regex for short brands (≤5 chars) to stop 'ref' matching 'reference'/'refresh'/'prefer' etc.; merge phones, whatsapp and social_links from site_analyzer directly into result (more reliable than AI extraction); add contact_whatsapp and contact_social fields to AI JSON schema - db: add requeue_beauty() for re-assessing already-assessed domains - beauty_main: /api/beauty/reassess/batch endpoint using requeue_beauty - index.html: Re-assess Selected bulk button, per-row ↺ button in Browse and Pipeline, WhatsApp + social links in Pipeline contact panel Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -128,9 +128,22 @@ BEAUTY_CATEGORIES = [
|
||||
# ── Brand detection (fast pre-scan, no AI) ─────────────────────────────────────
|
||||
|
||||
def detect_brands_in_text(text: str) -> list[str]:
|
||||
"""Find which brands from the universe appear in the scraped page text."""
|
||||
"""Find which brands from the universe appear in the scraped page text.
|
||||
|
||||
Short brands (≤5 chars) use word-boundary matching to avoid false positives
|
||||
like 'ref' matching 'reference', 'prefer', 'refresh', etc.
|
||||
"""
|
||||
tl = text.lower()
|
||||
return [b for b in BEAUTY_BRANDS if b.lower() in tl][:60]
|
||||
result = []
|
||||
for b in BEAUTY_BRANDS:
|
||||
bl = b.lower()
|
||||
if len(bl) <= 5:
|
||||
if re.search(r'(?<![a-zA-Z0-9])' + re.escape(bl) + r'(?![a-zA-Z0-9])', tl):
|
||||
result.append(b)
|
||||
else:
|
||||
if bl in tl:
|
||||
result.append(b)
|
||||
return result[:60]
|
||||
|
||||
|
||||
def get_dist_matches(detected: list[str]) -> list[str]:
|
||||
@@ -247,6 +260,8 @@ Respond ONLY with valid JSON, no markdown, no text outside JSON:
|
||||
"dist_matches": ["OurBrand1","OurBrand2"],
|
||||
"contact_email": "email or empty string",
|
||||
"contact_phone": "phone or empty string",
|
||||
"contact_whatsapp": "whatsapp link or empty string",
|
||||
"contact_social": "primary social profile URL or empty string",
|
||||
"b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands",
|
||||
"outreach_subject": "short Spanish subject line referencing their business name",
|
||||
"outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).",
|
||||
@@ -336,12 +351,27 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
||||
if not result.get("detected_brands") and detected:
|
||||
result["detected_brands"] = detected
|
||||
|
||||
# Always merge contact data directly from site_analyzer — more reliable
|
||||
# than AI extraction since it uses regex against raw HTML
|
||||
phones = analysis.get("phones", [])
|
||||
whatsapp = analysis.get("whatsapp", [])
|
||||
social_links = analysis.get("social_links", [])
|
||||
if phones and not result.get("contact_phone"):
|
||||
result["contact_phone"] = phones[0]
|
||||
if whatsapp:
|
||||
result["contact_whatsapp"] = "; ".join(whatsapp[:2])
|
||||
if social_links:
|
||||
result["contact_social"] = "; ".join(social_links[:3])
|
||||
|
||||
logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
|
||||
domain, result.get("lead_quality"), result.get("dist_matches"))
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Beauty AI error %s: %s", domain, e)
|
||||
phones = analysis.get("phones", [])
|
||||
whatsapp = analysis.get("whatsapp", [])
|
||||
social = analysis.get("social_links", [])
|
||||
return {
|
||||
"error": str(e)[:300],
|
||||
"is_relevant": False,
|
||||
@@ -349,4 +379,7 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
||||
"dist_matches": dist_match,
|
||||
"detected_brands": detected,
|
||||
"contact_email": "",
|
||||
"contact_phone": phones[0] if phones else "",
|
||||
"contact_whatsapp": "; ".join(whatsapp[:2]) if whatsapp else "",
|
||||
"contact_social": "; ".join(social[:3]) if social else "",
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user