fix: broader WhatsApp/social detection, generous assessment rules, overlay popup
- site_analyzer: scan onclick/data-href/data-url/data-link/data-action attrs on ALL tags for WhatsApp (wa.me, api.whatsapp, web.whatsapp, wa.link), tel: links, and social media URLs; raise dedup cap 5→8 - beauty_ai: rewrite lead quality rules — WARM for any genuine multi-brand retailer even with zero portfolio matches; portfolio absence NEVER justifies COLD alone; added country_fiscal fallback to ip_country - index.html: assessPopup overlay modal on quality badge click in Browse tab; showAssessPopup() parses beauty_assessment JSON with all_contacts fallback; [x-cloak] CSS to prevent flash Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -316,8 +316,22 @@ async def _analyze_site_inner(domain: str) -> dict:
|
||||
result["has_gmb"] = any(sig.lower() in hl for sig in GMB_SCHEMA_SIGNALS)
|
||||
|
||||
# ── Contacts ──────────────────────────────────────────────────────────
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"]
|
||||
# Pattern for WhatsApp links that appear inside onclick/data-* attrs
|
||||
_WA_ATTR_RE = re.compile(
|
||||
r'(https?://(?:wa\.me|api\.whatsapp\.com/send|web\.whatsapp\.com/send'
|
||||
r'|wa\.link)[^\s\'"\\>]{0,80})',
|
||||
re.I,
|
||||
)
|
||||
|
||||
def _add_whatsapp(raw: str):
|
||||
m = _WA_ATTR_RE.search(raw)
|
||||
url = m.group(1) if m else raw[:80]
|
||||
url = url.rstrip("'\"\\)")
|
||||
if url and url not in result["whatsapp"]:
|
||||
result["whatsapp"].append(url)
|
||||
|
||||
for tag in soup.find_all("a", href=True):
|
||||
href = tag["href"]
|
||||
if href.startswith("mailto:"):
|
||||
em = href[7:].split("?")[0].strip().lower()
|
||||
if em and em not in result["emails"]:
|
||||
@@ -326,9 +340,8 @@ async def _analyze_site_inner(domain: str) -> dict:
|
||||
ph = re.sub(r"[^\d+]", "", href[4:])
|
||||
if ph and ph not in result["phones"]:
|
||||
result["phones"].append(ph)
|
||||
elif "wa.me" in href or "api.whatsapp.com" in href:
|
||||
if href not in result["whatsapp"]:
|
||||
result["whatsapp"].append(href[:80])
|
||||
elif any(x in href for x in ("wa.me", "api.whatsapp", "wa.link", "web.whatsapp")):
|
||||
_add_whatsapp(href)
|
||||
else:
|
||||
for sd in SOCIAL_DOM:
|
||||
if sd in href.lower():
|
||||
@@ -336,6 +349,32 @@ async def _analyze_site_inner(domain: str) -> dict:
|
||||
if clean not in result["social_links"]:
|
||||
result["social_links"].append(clean)
|
||||
break
|
||||
|
||||
# Broader scan: WhatsApp / tel links hidden in onclick, data-href, data-url, etc.
|
||||
for tag in soup.find_all(True):
|
||||
for attr in ("onclick", "data-href", "data-url", "data-link", "data-action"):
|
||||
val = tag.get(attr) or ""
|
||||
if not val:
|
||||
continue
|
||||
# WhatsApp in attribute value
|
||||
if any(x in val for x in ("wa.me", "api.whatsapp", "wa.link", "web.whatsapp")):
|
||||
_add_whatsapp(val)
|
||||
# tel: in attribute value
|
||||
m_tel = re.search(r"tel:([\d\s\+\-\(\)]{6,20})", val)
|
||||
if m_tel:
|
||||
ph = re.sub(r"[^\d+]", "", m_tel.group(1))
|
||||
if ph and ph not in result["phones"]:
|
||||
result["phones"].append(ph)
|
||||
# Social media links in attribute value
|
||||
for sd in SOCIAL_DOM:
|
||||
if sd in val.lower():
|
||||
url_m = re.search(r"https?://[^\s'\"\\)]{10,120}", val)
|
||||
if url_m:
|
||||
clean = url_m.group(0).split("?")[0].rstrip("/")
|
||||
if clean not in result["social_links"]:
|
||||
result["social_links"].append(clean)
|
||||
break
|
||||
|
||||
for em in EMAIL_RE.findall(html[:80000]):
|
||||
em = em.lower()
|
||||
if em not in result["emails"] and not any(em.endswith(x) for x in [".png",".jpg",".css",".js",".svg"]):
|
||||
@@ -345,7 +384,7 @@ async def _analyze_site_inner(domain: str) -> dict:
|
||||
if ph_c not in result["phones"]:
|
||||
result["phones"].append(ph_c)
|
||||
for k in ["emails", "phones", "whatsapp", "social_links"]:
|
||||
result[k] = list(dict.fromkeys(result[k]))[:5]
|
||||
result[k] = list(dict.fromkeys(result[k]))[:8]
|
||||
|
||||
# ── CMS ───────────────────────────────────────────────────────────────
|
||||
CMS_SIGS = {
|
||||
|
||||
Reference in New Issue
Block a user