fix: broader WhatsApp/social detection, generous assessment rules, overlay popup

- site_analyzer: scan onclick/data-href/data-url/data-link/data-action attrs
  on ALL tags for WhatsApp (wa.me, api.whatsapp, web.whatsapp, wa.link),
  tel: links, and social media URLs; raise dedup cap 5→8
- beauty_ai: rewrite lead quality rules — WARM for any genuine multi-brand
  retailer even with zero portfolio matches; portfolio absence NEVER justifies
  COLD alone; added country_fiscal fallback to ip_country
- index.html: assessPopup overlay modal on quality badge click in Browse tab;
  showAssessPopup() parses beauty_assessment JSON with all_contacts fallback;
  [x-cloak] CSS to prevent flash

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-13 10:37:36 +02:00
parent e426922544
commit dfd47743e3
3 changed files with 227 additions and 25 deletions

View File

@@ -316,8 +316,22 @@ async def _analyze_site_inner(domain: str) -> dict:
result["has_gmb"] = any(sig.lower() in hl for sig in GMB_SCHEMA_SIGNALS)
# ── Contacts ──────────────────────────────────────────────────────────
for a in soup.find_all("a", href=True):
href = a["href"]
# Pattern for WhatsApp links that appear inside onclick/data-* attrs
_WA_ATTR_RE = re.compile(
r'(https?://(?:wa\.me|api\.whatsapp\.com/send|web\.whatsapp\.com/send'
r'|wa\.link)[^\s\'"\\>]{0,80})',
re.I,
)
def _add_whatsapp(raw: str):
m = _WA_ATTR_RE.search(raw)
url = m.group(1) if m else raw[:80]
url = url.rstrip("'\"\\)")
if url and url not in result["whatsapp"]:
result["whatsapp"].append(url)
for tag in soup.find_all("a", href=True):
href = tag["href"]
if href.startswith("mailto:"):
em = href[7:].split("?")[0].strip().lower()
if em and em not in result["emails"]:
@@ -326,9 +340,8 @@ async def _analyze_site_inner(domain: str) -> dict:
ph = re.sub(r"[^\d+]", "", href[4:])
if ph and ph not in result["phones"]:
result["phones"].append(ph)
elif "wa.me" in href or "api.whatsapp.com" in href:
if href not in result["whatsapp"]:
result["whatsapp"].append(href[:80])
elif any(x in href for x in ("wa.me", "api.whatsapp", "wa.link", "web.whatsapp")):
_add_whatsapp(href)
else:
for sd in SOCIAL_DOM:
if sd in href.lower():
@@ -336,6 +349,32 @@ async def _analyze_site_inner(domain: str) -> dict:
if clean not in result["social_links"]:
result["social_links"].append(clean)
break
# Broader scan: WhatsApp / tel links hidden in onclick, data-href, data-url, etc.
for tag in soup.find_all(True):
for attr in ("onclick", "data-href", "data-url", "data-link", "data-action"):
val = tag.get(attr) or ""
if not val:
continue
# WhatsApp in attribute value
if any(x in val for x in ("wa.me", "api.whatsapp", "wa.link", "web.whatsapp")):
_add_whatsapp(val)
# tel: in attribute value
m_tel = re.search(r"tel:([\d\s\+\-\(\)]{6,20})", val)
if m_tel:
ph = re.sub(r"[^\d+]", "", m_tel.group(1))
if ph and ph not in result["phones"]:
result["phones"].append(ph)
# Social media links in attribute value
for sd in SOCIAL_DOM:
if sd in val.lower():
url_m = re.search(r"https?://[^\s'\"\\)]{10,120}", val)
if url_m:
clean = url_m.group(0).split("?")[0].rstrip("/")
if clean not in result["social_links"]:
result["social_links"].append(clean)
break
for em in EMAIL_RE.findall(html[:80000]):
em = em.lower()
if em not in result["emails"] and not any(em.endswith(x) for x in [".png",".jpg",".css",".js",".svg"]):
@@ -345,7 +384,7 @@ async def _analyze_site_inner(domain: str) -> dict:
if ph_c not in result["phones"]:
result["phones"].append(ph_c)
for k in ["emails", "phones", "whatsapp", "social_links"]:
result[k] = list(dict.fromkeys(result[k]))[:5]
result[k] = list(dict.fromkeys(result[k]))[:8]
# ── CMS ───────────────────────────────────────────────────────────────
CMS_SIGS = {