feat: richer B2B assessment — legal page scraping, full contacts, summary
beauty_ai.py: - Add _scrape_legal_pages(): fetches /aviso-legal, /politica-de-privacidad, /privacidad, /quienes-somos, /legal in parallel — Spanish aviso legal pages legally contain razón social, CIF/NIF, address and a contact email; legal snippet passed to AI so it can identify the registered company name - Rewrite _build_beauty_prompt(): full technical profile (SSL, analytics, CMS, load time, word count, GDPR, mobile), all contact channels merged from both site_analyzer and legal pages, updated assessment rules with clearer HOT/WARM criteria, 700-char search results, richer portfolio portfolio context - New JSON schema fields: summary (executive description), pitch_angle (one Spanish hook sentence), all_contacts dict (emails/phones/whatsapp/social full lists), best_contact_channel, best_contact_value, partnership_signals, revenue_estimate; outreach_email is now a complete ready-to-send email - max_output_tokens raised from 2000 → 4000 - Contact merge: all_contacts populated from both site_analyzer and legal pages; top-level contact_* fields filled from merged data as fallback - Run DDG search and legal page scraping in parallel (no extra wall-clock cost) index.html (Pipeline): - Business Summary panel with pitch_angle as accent subtitle - Full all_contacts display: all emails (mailto links), all phones, all WhatsApp (green links), all social profiles (shortened display) - partnership_signals chips alongside brand detection - outreach_notes shown in amber at bottom of contact panel - best_contact_channel chip in contact header - Table contact column now shows best_contact_value if available Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
346
app/beauty_ai.py
346
app/beauty_ai.py
@@ -19,6 +19,18 @@ REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN", "r8_7I7Feai78f9PzMOs20y5GVFKi
|
||||
REPLICATE_MODEL = "https://api.replicate.com/v1/models/google/gemini-3-pro/predictions"
|
||||
AI_CONCURRENCY = int(os.getenv("AI_CONCURRENCY", "3"))
|
||||
|
||||
# Contact extraction regexes (same patterns as site_analyzer)
|
||||
_EMAIL_RE = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}")
|
||||
_PHONE_RE = re.compile(r"(?:\+\d{1,3}[\s\-]?)?(?:6|7|8|9)\d{2}[\s\-]?\d{3}[\s\-]?\d{3}")
|
||||
|
||||
# Pages that often contain company registration info (CIF/NIF, registered address,
|
||||
# legal email) — not fetched by site_analyzer, but rich sources for B2B contact data
|
||||
_LEGAL_PATHS = [
|
||||
"/aviso-legal", "/aviso_legal", "/legal",
|
||||
"/politica-de-privacidad", "/politica_privacidad", "/privacidad",
|
||||
"/quienes-somos", "/quienes_somos", "/nosotros",
|
||||
]
|
||||
|
||||
_ai_sem: Optional[asyncio.Semaphore] = None
|
||||
|
||||
def _sem() -> asyncio.Semaphore:
|
||||
@@ -182,91 +194,214 @@ async def _ddg_search(query: str) -> str:
|
||||
return ""
|
||||
|
||||
|
||||
# ── Legal / about page scraper ────────────────────────────────────────────────
|
||||
|
||||
async def _scrape_legal_pages(domain: str) -> dict:
|
||||
"""Fetch legal and about pages not covered by site_analyzer.
|
||||
|
||||
Spanish Aviso Legal pages legally must contain: company name (razón social),
|
||||
CIF/NIF, registered address, and a contact email — making them the richest
|
||||
source of verified B2B contact data.
|
||||
|
||||
Returns:
|
||||
emails: all unique emails found across all pages
|
||||
phones: all unique phones found across all pages
|
||||
legal_snippet: first 800 chars of the aviso legal page (company registration
|
||||
info: razón social, CIF, domicilio, etc.)
|
||||
"""
|
||||
result: dict = {"emails": [], "phones": [], "legal_snippet": ""}
|
||||
|
||||
async def _fetch(path: str) -> tuple[str, str | None]:
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=8, follow_redirects=True, verify=False,
|
||||
headers={"User-Agent": "Mozilla/5.0"},
|
||||
) as c:
|
||||
r = await c.get(f"https://{domain}{path}")
|
||||
if r.status_code == 200:
|
||||
return path, r.text
|
||||
except Exception:
|
||||
pass
|
||||
return path, None
|
||||
|
||||
pages = await asyncio.gather(*[_fetch(p) for p in _LEGAL_PATHS])
|
||||
|
||||
for path, html in pages:
|
||||
if not html:
|
||||
continue
|
||||
try:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
# Extract from anchor tags
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"]
|
||||
if href.startswith("mailto:"):
|
||||
em = href[7:].split("?")[0].strip().lower()
|
||||
if em and em not in result["emails"]:
|
||||
result["emails"].append(em)
|
||||
elif href.startswith("tel:"):
|
||||
ph = re.sub(r"[^\d+]", "", href[4:])
|
||||
if ph and ph not in result["phones"]:
|
||||
result["phones"].append(ph)
|
||||
# Regex scan full HTML for emails
|
||||
for em in _EMAIL_RE.findall(html[:60000]):
|
||||
em = em.lower()
|
||||
if em not in result["emails"] and not any(
|
||||
em.endswith(x) for x in (".png", ".jpg", ".css", ".js", ".svg")
|
||||
):
|
||||
result["emails"].append(em)
|
||||
# Regex scan visible text for phones
|
||||
visible = soup.get_text(separator=" ", strip=True)
|
||||
for ph in _PHONE_RE.findall(visible):
|
||||
ph_c = re.sub(r"[\s\-]", "", ph)
|
||||
if ph_c and ph_c not in result["phones"]:
|
||||
result["phones"].append(ph_c)
|
||||
# Capture legal snippet from the first legal page that resolves
|
||||
if not result["legal_snippet"] and any(
|
||||
k in path for k in ("aviso", "legal", "privacidad")
|
||||
):
|
||||
result["legal_snippet"] = " ".join(visible.split()[:150])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
result["emails"] = list(dict.fromkeys(result["emails"]))[:8]
|
||||
result["phones"] = list(dict.fromkeys(result["phones"]))[:6]
|
||||
return result
|
||||
|
||||
|
||||
# ── Prompt builder ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _build_beauty_prompt(a: dict, detected_brands: list, dist_matches: list,
|
||||
search_results: str = "") -> str:
|
||||
contacts_block = []
|
||||
if a.get("emails"): contacts_block.append(f" Emails: {', '.join(a['emails'][:3])}")
|
||||
if a.get("phones"): contacts_block.append(f" Phones: {', '.join(a['phones'][:3])}")
|
||||
if a.get("social_links"): contacts_block.append(f" Social: {', '.join(a['social_links'][:4])}")
|
||||
contacts_str = "\n".join(contacts_block) or " Not found"
|
||||
search_results: str = "",
|
||||
extra_contacts: dict | None = None) -> str:
|
||||
"""Build the Gemini assessment prompt.
|
||||
|
||||
snippet = (a.get("visible_text_snippet") or "")[:1200]
|
||||
title = a.get("page_title") or ""
|
||||
meta = a.get("meta_description") or ""
|
||||
country = a.get("ip_country") or "unknown"
|
||||
cms = a.get("cms") or "unknown"
|
||||
detected_str = ", ".join(detected_brands) if detected_brands else "none detected"
|
||||
dist_str = ", ".join(dist_matches) if dist_matches else "none"
|
||||
extra_contacts comes from _scrape_legal_pages() and adds emails/phones/company
|
||||
info found in the aviso legal, privacy policy, and about pages.
|
||||
"""
|
||||
ec = extra_contacts or {}
|
||||
|
||||
return f"""You are a senior B2B sales analyst for a cosmetics distribution company operating in Europe.
|
||||
Your task: evaluate whether this website is a viable B2B customer (retailer, multi-brand store,
|
||||
e-commerce, distributor or chain that buys beauty products wholesale) and generate an outreach plan.
|
||||
# Merge contact sources: site_analyzer (main page + contact pages) + legal pages
|
||||
all_emails = list(dict.fromkeys((a.get("emails") or []) + (ec.get("emails") or [])))[:8]
|
||||
all_phones = list(dict.fromkeys((a.get("phones") or []) + (ec.get("phones") or [])))[:6]
|
||||
all_whatsapp = list(dict.fromkeys(a.get("whatsapp") or []))[:4]
|
||||
all_social = list(dict.fromkeys(a.get("social_links") or []))[:6]
|
||||
|
||||
=== SITE DATA ===
|
||||
Domain: {a.get("domain")}
|
||||
Country (IP): {country}
|
||||
Title: {title}
|
||||
Meta desc: {meta}
|
||||
CMS: {cms}
|
||||
Contact info:
|
||||
{contacts_str}
|
||||
def _fmt(lst: list) -> str:
|
||||
return ", ".join(lst) if lst else "—"
|
||||
|
||||
# Site technical signals
|
||||
ssl_info = ("✓ valid" if a.get("ssl_valid") else "✗ invalid/missing")
|
||||
analytics = ", ".join(a.get("analytics_present") or []) or "none detected"
|
||||
word_count = a.get("word_count", 0)
|
||||
load_ms = a.get("load_time_ms", 0)
|
||||
copyright = a.get("copyright_year") or a.get("last_modified") or "unknown"
|
||||
|
||||
snippet = (a.get("visible_text_snippet") or "")[:1600]
|
||||
legal_snippet = (ec.get("legal_snippet") or "")[:800]
|
||||
detected_str = ", ".join(detected_brands) if detected_brands else "none detected"
|
||||
dist_str = ", ".join(dist_matches) if dist_matches else "none"
|
||||
|
||||
return f"""You are a senior B2B sales analyst for a cosmetics distribution company
|
||||
operating across Europe. Your task: thoroughly evaluate this website as a potential
|
||||
wholesale B2B customer and produce a complete outreach dossier.
|
||||
|
||||
=== BUSINESS PROFILE ===
|
||||
Domain: {a.get("domain")}
|
||||
Country (IP): {a.get("ip_country") or "unknown"}
|
||||
Region: {a.get("ip_region") or "unknown"}
|
||||
Hosting (EU?): {a.get("eu_hosted")} | ISP/Org: {a.get("org") or a.get("isp") or "unknown"}
|
||||
Page title: {a.get("page_title") or "—"}
|
||||
H1: {a.get("h1_text") or "—"}
|
||||
Meta desc: {(a.get("meta_description") or "—")[:200]}
|
||||
CMS: {a.get("cms") or "unknown"}
|
||||
Last updated: {copyright}
|
||||
|
||||
=== TECHNICAL SIGNALS ===
|
||||
SSL: {ssl_info}
|
||||
Load time: {load_ms}ms
|
||||
Word count: {word_count}
|
||||
Analytics: {analytics}
|
||||
Mobile: {"yes" if a.get("has_mobile_viewport") else "no"}
|
||||
Sitemap/Robots: sitemap={"yes" if a.get("has_sitemap") else "no"}, robots={"yes" if a.get("has_robots") else "no"}
|
||||
GDPR/Privacy: cookie_tool={a.get("cookie_tool") or "none"}, privacy_policy={"yes" if a.get("has_privacy_policy") else "no"}
|
||||
|
||||
=== ALL CONTACT CHANNELS ===
|
||||
Emails: {_fmt(all_emails)}
|
||||
Phones: {_fmt(all_phones)}
|
||||
WhatsApp: {_fmt(all_whatsapp)}
|
||||
Social media: {_fmt(all_social)}
|
||||
|
||||
=== LEGAL / COMPANY REGISTRATION INFO ===
|
||||
(extracted from aviso legal / política de privacidad — may contain razón social, CIF, address)
|
||||
{legal_snippet or "Not found or page not accessible"}
|
||||
|
||||
=== PAGE CONTENT SAMPLE ===
|
||||
{snippet}
|
||||
|
||||
=== BRANDS ALREADY DETECTED ON SITE ===
|
||||
=== BRANDS DETECTED ON SITE ===
|
||||
{detected_str}
|
||||
|
||||
=== OUR PORTFOLIO BRANDS FOUND ON THEIR SITE ===
|
||||
(These brands we distribute — finding them means we're already in their market)
|
||||
(brands we distribute that appear on their site — confirms shared market)
|
||||
{dist_str}
|
||||
|
||||
=== WEB SEARCH RESULTS ===
|
||||
{(search_results or "No results.")[:500]}
|
||||
{(search_results or "No results available.")[:700]}
|
||||
|
||||
=== OUR DISTRIBUTION PORTFOLIO ===
|
||||
=== OUR FULL DISTRIBUTION PORTFOLIO ===
|
||||
{', '.join(OUR_BRANDS)}
|
||||
|
||||
=== BEAUTY CATEGORIES WE COVER ===
|
||||
{', '.join(BEAUTY_CATEGORIES)}
|
||||
|
||||
=== ASSESSMENT RULES ===
|
||||
1. Determine if this is a B2B prospect: retailer, pharmacy, parafarmacia,
|
||||
perfumería, multi-brand beauty ecommerce, salon chain, supermarket beauty section,
|
||||
or beauty products distributor based in Europe.
|
||||
2. Identify which categories from our list they cover.
|
||||
3. From the page content (even if brands list is empty), identify any beauty brands mentioned.
|
||||
4. Match detected brands against our portfolio — this drives lead quality:
|
||||
- HOT: 3+ of our portfolio brands detected, OR a large EU retailer clearly in our niche
|
||||
- WARM: 1-2 portfolio brand matches, OR clear beauty retailer with good potential
|
||||
- COLD: beauty-adjacent but weak match, OR can't confirm they buy wholesale
|
||||
- NOT_RELEVANT: not a beauty business or not in Europe
|
||||
5. Write all human text (proposal, email) in Spanish.
|
||||
6. Keep JSON values concise (≤ 25 words each).
|
||||
1. TARGET PROFILE: retailer, pharmacy, parafarmacia, perfumería, multi-brand beauty
|
||||
ecommerce, salon chain, beauty distributor, or supermarket beauty section in Europe.
|
||||
2. Identify ALL beauty brands mentioned anywhere on the page — go beyond the pre-detected
|
||||
list above. Use product names, brand references in body text, alt text, etc.
|
||||
3. Match brands against our portfolio. Lead quality is driven by portfolio overlap:
|
||||
- HOT: 3+ portfolio brands detected, OR major EU beauty retailer clearly in our niche
|
||||
- WARM: 1-2 portfolio brand matches, OR clear beauty multi-brand retailer with good reach
|
||||
- COLD: beauty-adjacent but weak portfolio overlap, OR single-brand, OR unclear wholesale
|
||||
- NOT_RELEVANT: not a beauty business, not in Europe, or clearly a consumer-only brand
|
||||
4. Extract the BEST contact for outreach:
|
||||
- Prefer business/commercial emails (info@, ventas@, compras@, admin@) over personal
|
||||
- If WhatsApp exists, flag it — it's often the fastest channel in Spain/LatAm
|
||||
- Check social media for direct messaging channels
|
||||
5. Use the legal/company info to identify the official business name (razón social),
|
||||
and if a CIF/NIF is visible, mention it in outreach_notes as it confirms legitimacy.
|
||||
6. Write summary, pitch_angle, b2b_proposal, outreach_subject, and outreach_email in Spanish.
|
||||
7. The outreach_email must be a complete ready-to-send email: greeting, 2-3 body sentences
|
||||
(reference their specific range, 1-2 matching portfolio brands, add value), clear CTA.
|
||||
|
||||
Respond ONLY with valid JSON, no markdown, no text outside JSON:
|
||||
Respond ONLY with valid JSON, no markdown fences, no text outside the JSON object:
|
||||
{{
|
||||
"is_relevant": true/false,
|
||||
"is_relevant": true,
|
||||
"lead_quality": "HOT|WARM|COLD|NOT_RELEVANT",
|
||||
"lead_reasoning": "1-2 sentences why",
|
||||
"business_type": "retailer|ecommerce|distributor|pharmacy|salon_chain|other",
|
||||
"business_name": "name from title or domain",
|
||||
"country_fiscal": "2-letter ISO or full name",
|
||||
"countries_active": ["ES","FR"],
|
||||
"summary": "2-3 sentence executive summary: what this business does, their product range, who their customers are, and their apparent scale",
|
||||
"lead_reasoning": "2-3 sentences explaining the lead quality rating — reference specific brands found, categories covered, and portfolio overlap",
|
||||
"business_type": "retailer|ecommerce|distributor|pharmacy|parafarmacia|salon_chain|perfumeria|other",
|
||||
"business_name": "official business name from title, H1, or aviso legal",
|
||||
"country_fiscal": "2-letter ISO",
|
||||
"countries_active": ["ES"],
|
||||
"categories": ["Hair Care","Makeup"],
|
||||
"detected_brands": ["brand1","brand2"],
|
||||
"dist_matches": ["OurBrand1","OurBrand2"],
|
||||
"contact_email": "email or empty string",
|
||||
"contact_phone": "phone or empty string",
|
||||
"contact_whatsapp": "whatsapp link or empty string",
|
||||
"contact_social": "primary social profile URL or empty string",
|
||||
"b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands",
|
||||
"outreach_subject": "short Spanish subject line referencing their business name",
|
||||
"outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).",
|
||||
"revenue_estimate": "unknown",
|
||||
"outreach_notes": "brief context for sales rep"
|
||||
"detected_brands": ["all beauty brands found on site — be thorough"],
|
||||
"dist_matches": ["our portfolio brands found on their site"],
|
||||
"partnership_signals": ["carries multi-brand","has wholesale section","stockist page","B2B portal"],
|
||||
"pitch_angle": "1 punchy sentence in Spanish: the specific angle for this business (reference their range, a gap you fill, or the portfolio brands that match)",
|
||||
"b2b_proposal": "2-3 sentence value proposition in Spanish: what we offer, why it fits their range, what differentiates our brands",
|
||||
"outreach_subject": "specific Spanish subject line mentioning their business name and 1 relevant brand",
|
||||
"outreach_email": "complete ready-to-send Spanish email: greeting + 3-4 body sentences referencing their specific product range and 1-2 portfolio brands that match + clear CTA (catálogo, muestra, llamada, pedido mínimo) + valediction. Do not use placeholders.",
|
||||
"best_contact_channel": "email|phone|whatsapp|social|web_form|unknown",
|
||||
"best_contact_value": "the actual email/phone/URL to use — prefer commercial emails, then phone, then social",
|
||||
"all_contacts": {{
|
||||
"emails": {json.dumps(all_emails)},
|
||||
"phones": {json.dumps(all_phones)},
|
||||
"whatsapp": {json.dumps(all_whatsapp)},
|
||||
"social": {json.dumps(all_social)}
|
||||
}},
|
||||
"revenue_estimate": "unknown|<100k€|100k-500k€|500k-2M€|>2M€",
|
||||
"outreach_notes": "2-3 sentences for the sales rep: timing, approach, red flags, CIF if found, any urgency signals"
|
||||
}}"""
|
||||
|
||||
|
||||
@@ -309,21 +444,31 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
||||
detected = detect_brands_in_text(text)
|
||||
dist_match = get_dist_matches(detected)
|
||||
|
||||
# Also search for company context
|
||||
# Run DDG search and legal page scraping in parallel
|
||||
title = analysis.get("page_title") or ""
|
||||
biz_name = title.split("|")[0].split("-")[0].strip() or domain
|
||||
search_results = await _ddg_search(f'"{biz_name}" {domain} beauty cosmetics wholesale contact')
|
||||
logger.info("Beauty assess %s: %d brands detected, %d portfolio matches",
|
||||
domain, len(detected), len(dist_match))
|
||||
search_results, extra_contacts = await asyncio.gather(
|
||||
_ddg_search(f'"{biz_name}" {domain} cosmetics beauty wholesale B2B contacto'),
|
||||
_scrape_legal_pages(domain),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"Beauty assess %s: %d brands, %d portfolio matches, "
|
||||
"%d extra emails from legal pages",
|
||||
domain, len(detected), len(dist_match),
|
||||
len(extra_contacts.get("emails", [])),
|
||||
)
|
||||
|
||||
payload = {
|
||||
"input": {
|
||||
"prompt": _build_beauty_prompt(analysis, detected, dist_match, search_results),
|
||||
"prompt": _build_beauty_prompt(
|
||||
analysis, detected, dist_match, search_results, extra_contacts
|
||||
),
|
||||
"images": [], "videos": [],
|
||||
"top_p": 0.9,
|
||||
"temperature": 0.15,
|
||||
"temperature": 0.2,
|
||||
"thinking_level": "low",
|
||||
"max_output_tokens": 2000,
|
||||
"max_output_tokens": 4000,
|
||||
}
|
||||
}
|
||||
try:
|
||||
@@ -351,17 +496,45 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
||||
if not result.get("detected_brands") and detected:
|
||||
result["detected_brands"] = detected
|
||||
|
||||
# Always merge contact data directly from site_analyzer — more reliable
|
||||
# than AI extraction since it uses regex against raw HTML
|
||||
phones = analysis.get("phones", [])
|
||||
whatsapp = analysis.get("whatsapp", [])
|
||||
social_links = analysis.get("social_links", [])
|
||||
if phones and not result.get("contact_phone"):
|
||||
result["contact_phone"] = phones[0]
|
||||
if whatsapp:
|
||||
result["contact_whatsapp"] = "; ".join(whatsapp[:2])
|
||||
if social_links:
|
||||
result["contact_social"] = "; ".join(social_links[:3])
|
||||
# Merge contact data directly from site_analyzer + legal pages —
|
||||
# more reliable than AI extraction since it's regex against raw HTML.
|
||||
# The AI's all_contacts field may already have the right data if it
|
||||
# followed the schema; fill gaps from our own extraction.
|
||||
all_emails = list(dict.fromkeys(
|
||||
(analysis.get("emails") or []) + (extra_contacts.get("emails") or [])
|
||||
))[:8]
|
||||
all_phones = list(dict.fromkeys(
|
||||
(analysis.get("phones") or []) + (extra_contacts.get("phones") or [])
|
||||
))[:6]
|
||||
all_whatsapp = list(dict.fromkeys(analysis.get("whatsapp") or []))[:4]
|
||||
all_social = list(dict.fromkeys(analysis.get("social_links") or []))[:6]
|
||||
|
||||
# Ensure all_contacts in result is always populated from our own data
|
||||
if not result.get("all_contacts") or not isinstance(result.get("all_contacts"), dict):
|
||||
result["all_contacts"] = {}
|
||||
result["all_contacts"].setdefault("emails", [])
|
||||
result["all_contacts"].setdefault("phones", [])
|
||||
result["all_contacts"].setdefault("whatsapp", [])
|
||||
result["all_contacts"].setdefault("social", [])
|
||||
# Merge our extracted data into the AI's all_contacts
|
||||
result["all_contacts"]["emails"] = list(dict.fromkeys(
|
||||
result["all_contacts"]["emails"] + all_emails))[:8]
|
||||
result["all_contacts"]["phones"] = list(dict.fromkeys(
|
||||
result["all_contacts"]["phones"] + all_phones))[:6]
|
||||
result["all_contacts"]["whatsapp"] = list(dict.fromkeys(
|
||||
result["all_contacts"]["whatsapp"] + all_whatsapp))[:4]
|
||||
result["all_contacts"]["social"] = list(dict.fromkeys(
|
||||
result["all_contacts"]["social"] + all_social))[:6]
|
||||
|
||||
# Fill top-level contact fields from merged data if AI left them blank
|
||||
if not result.get("contact_email") and all_emails:
|
||||
result["contact_email"] = all_emails[0]
|
||||
if not result.get("contact_phone") and all_phones:
|
||||
result["contact_phone"] = all_phones[0]
|
||||
if not result.get("contact_whatsapp") and all_whatsapp:
|
||||
result["contact_whatsapp"] = all_whatsapp[0]
|
||||
if not result.get("contact_social") and all_social:
|
||||
result["contact_social"] = all_social[0]
|
||||
|
||||
logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
|
||||
domain, result.get("lead_quality"), result.get("dist_matches"))
|
||||
@@ -369,17 +542,24 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Beauty AI error %s: %s", domain, e)
|
||||
phones = analysis.get("phones", [])
|
||||
whatsapp = analysis.get("whatsapp", [])
|
||||
social = analysis.get("social_links", [])
|
||||
all_emails = list(dict.fromkeys(
|
||||
(analysis.get("emails") or []) + (extra_contacts.get("emails") or [])))[:8]
|
||||
all_phones = list(dict.fromkeys(
|
||||
(analysis.get("phones") or []) + (extra_contacts.get("phones") or [])))[:6]
|
||||
all_whatsapp = list(dict.fromkeys(analysis.get("whatsapp") or []))[:4]
|
||||
all_social = list(dict.fromkeys(analysis.get("social_links") or []))[:6]
|
||||
return {
|
||||
"error": str(e)[:300],
|
||||
"is_relevant": False,
|
||||
"lead_quality": "COLD",
|
||||
"dist_matches": dist_match,
|
||||
"detected_brands": detected,
|
||||
"contact_email": "",
|
||||
"contact_phone": phones[0] if phones else "",
|
||||
"contact_whatsapp": "; ".join(whatsapp[:2]) if whatsapp else "",
|
||||
"contact_social": "; ".join(social[:3]) if social else "",
|
||||
"contact_email": all_emails[0] if all_emails else "",
|
||||
"contact_phone": all_phones[0] if all_phones else "",
|
||||
"contact_whatsapp": all_whatsapp[0] if all_whatsapp else "",
|
||||
"contact_social": all_social[0] if all_social else "",
|
||||
"all_contacts": {
|
||||
"emails": all_emails, "phones": all_phones,
|
||||
"whatsapp": all_whatsapp, "social": all_social,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -340,7 +340,7 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
||||
<span x-show="!((row._beauty||{}).dist_matches||[]).length" style="color:var(--muted)">—</span>
|
||||
</td>
|
||||
<td style="font-size:11px;max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap"
|
||||
x-text="(row._beauty||{}).contact_email||row.emails||'—'"></td>
|
||||
x-text="(row._beauty||{}).best_contact_value||(row._beauty||{}).contact_email||row.emails||'—'"></td>
|
||||
<td @click.stop style="white-space:nowrap;display:flex;gap:4px">
|
||||
<button class="btn-secondary btn-sm" @click="copyOutreach(row)">Copy email</button>
|
||||
<button class="btn-secondary btn-sm" @click="reassessOne(row.domain)" title="Re-run B2B assessment">↺</button>
|
||||
@@ -350,14 +350,18 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
||||
<tr class="detail-row" x-show="expandedLead===row.domain" @click="expandedLead=null" style="cursor:pointer">
|
||||
<td colspan="8">
|
||||
<div class="detail-grid" @click.stop>
|
||||
<div class="detail-box">
|
||||
<h4>B2B Proposal</h4>
|
||||
<p x-text="(row._beauty||{}).b2b_proposal||'—'"></p>
|
||||
</div>
|
||||
<div class="detail-box">
|
||||
<h4>Lead Reasoning</h4>
|
||||
<p x-text="(row._beauty||{}).lead_reasoning||'—'"></p>
|
||||
|
||||
<!-- Summary + pitch -->
|
||||
<div class="detail-box" style="grid-column:1/-1;background:rgba(232,121,160,.06);border-color:rgba(232,121,160,.2)">
|
||||
<h4 style="display:flex;align-items:center;gap:8px">
|
||||
Business Summary
|
||||
<span x-show="(row._beauty||{}).pitch_angle" style="color:var(--accent);font-size:11px;font-weight:400;font-style:italic" x-text="'→ '+((row._beauty||{}).pitch_angle||'')"></span>
|
||||
</h4>
|
||||
<p x-text="(row._beauty||{}).summary||(row._beauty||{}).b2b_proposal||'—'"></p>
|
||||
<p x-show="(row._beauty||{}).lead_reasoning" style="margin-top:6px;font-size:11px;color:var(--muted)" x-text="(row._beauty||{}).lead_reasoning"></p>
|
||||
</div>
|
||||
|
||||
<!-- Outreach email -->
|
||||
<div class="detail-box" style="grid-column:1/-1">
|
||||
<h4 style="display:flex;align-items:center;gap:8px">
|
||||
Outreach Email
|
||||
@@ -366,35 +370,81 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
||||
</h4>
|
||||
<p style="white-space:pre-wrap;font-size:11px;color:var(--text);margin-top:6px;line-height:1.6" x-text="(row._beauty||{}).outreach_email||'—'"></p>
|
||||
</div>
|
||||
|
||||
<!-- Brands detected -->
|
||||
<div class="detail-box">
|
||||
<h4>Brands Detected on Site</h4>
|
||||
<p style="font-size:11px">
|
||||
<template x-for="b in ((row._beauty||{}).detected_brands||[]).slice(0,30)" :key="b">
|
||||
<template x-for="b in ((row._beauty||{}).detected_brands||[]).slice(0,40)" :key="b">
|
||||
<span class="chip" x-text="b"></span>
|
||||
</template>
|
||||
<span x-show="!((row._beauty||{}).detected_brands||[]).length" style="color:var(--muted)">None detected in scraped text</span>
|
||||
<span x-show="!((row._beauty||{}).detected_brands||[]).length" style="color:var(--muted)">None detected</span>
|
||||
</p>
|
||||
<template x-if="((row._beauty||{}).partnership_signals||[]).length>0">
|
||||
<p style="margin-top:8px;font-size:11px">
|
||||
<span style="color:var(--muted)">Signals: </span>
|
||||
<template x-for="s in ((row._beauty||{}).partnership_signals||[])" :key="s">
|
||||
<span class="chip chip-match" x-text="s"></span>
|
||||
</template>
|
||||
</p>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
<!-- Full contact details -->
|
||||
<div class="detail-box">
|
||||
<h4>Contact Details</h4>
|
||||
<p style="font-size:12px;line-height:1.8">
|
||||
<template x-if="(row._beauty||{}).contact_email">
|
||||
<span>Email: <a :href="'mailto:'+(row._beauty||{}).contact_email" x-text="(row._beauty||{}).contact_email"></a><br></span>
|
||||
<h4 style="display:flex;align-items:center;gap:8px">
|
||||
Contact Details
|
||||
<template x-if="(row._beauty||{}).best_contact_channel">
|
||||
<span class="chip chip-match" x-text="'best: '+((row._beauty||{}).best_contact_channel||'')"></span>
|
||||
</template>
|
||||
<template x-if="(row._beauty||{}).contact_phone">
|
||||
<span>Phone: <span x-text="(row._beauty||{}).contact_phone"></span><br></span>
|
||||
</h4>
|
||||
<div style="font-size:12px;line-height:1.9">
|
||||
<!-- All emails -->
|
||||
<template x-if="((row._beauty||{}).all_contacts||{}).emails?.length">
|
||||
<div>
|
||||
<span style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.04em">Emails</span><br>
|
||||
<template x-for="em in ((row._beauty||{}).all_contacts||{}).emails||[]" :key="em">
|
||||
<span><a :href="'mailto:'+em" x-text="em" style="display:inline-block;margin-right:8px"></a></span>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="(row._beauty||{}).contact_whatsapp">
|
||||
<span>WhatsApp: <a :href="(row._beauty||{}).contact_whatsapp" target="_blank" x-text="(row._beauty||{}).contact_whatsapp"></a><br></span>
|
||||
<!-- All phones -->
|
||||
<template x-if="((row._beauty||{}).all_contacts||{}).phones?.length">
|
||||
<div style="margin-top:4px">
|
||||
<span style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.04em">Phones</span><br>
|
||||
<template x-for="ph in ((row._beauty||{}).all_contacts||{}).phones||[]" :key="ph">
|
||||
<span x-text="ph" style="display:inline-block;margin-right:8px"></span>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="(row._beauty||{}).contact_social">
|
||||
<span style="color:var(--muted)">Social: <span x-text="(row._beauty||{}).contact_social"></span><br></span>
|
||||
<!-- WhatsApp -->
|
||||
<template x-if="((row._beauty||{}).all_contacts||{}).whatsapp?.length">
|
||||
<div style="margin-top:4px">
|
||||
<span style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.04em">WhatsApp</span><br>
|
||||
<template x-for="wa in ((row._beauty||{}).all_contacts||{}).whatsapp||[]" :key="wa">
|
||||
<a :href="wa" target="_blank" x-text="wa" style="display:inline-block;margin-right:8px;color:var(--success)"></a>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
<template x-if="row.emails && !(row._beauty||{}).contact_email">
|
||||
<span style="color:var(--muted);font-size:11px">On-site: <span x-text="row.emails"></span></span>
|
||||
<!-- Social -->
|
||||
<template x-if="((row._beauty||{}).all_contacts||{}).social?.length">
|
||||
<div style="margin-top:4px">
|
||||
<span style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.04em">Social</span><br>
|
||||
<template x-for="soc in ((row._beauty||{}).all_contacts||{}).social||[]" :key="soc">
|
||||
<a :href="soc" target="_blank" x-text="soc.replace('https://','').replace('www.','').split('/').slice(0,2).join('/')" style="display:inline-block;margin-right:8px;color:var(--info)"></a>
|
||||
</template>
|
||||
</div>
|
||||
</template>
|
||||
</p>
|
||||
<!-- Fallback if all_contacts not set (older assessments) -->
|
||||
<template x-if="!(row._beauty||{}).all_contacts && (row._beauty||{}).contact_email">
|
||||
<div>Email: <a :href="'mailto:'+(row._beauty||{}).contact_email" x-text="(row._beauty||{}).contact_email"></a></div>
|
||||
</template>
|
||||
</div>
|
||||
<template x-if="(row._beauty||{}).outreach_notes">
|
||||
<p style="margin-top:8px;font-size:11px;color:var(--warn);border-top:1px solid var(--border);padding-top:6px" x-text="(row._beauty||{}).outreach_notes"></p>
|
||||
</template>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
Reference in New Issue
Block a user