From dfd47743e35d357dcfe51683b774b06da914d810 Mon Sep 17 00:00:00 2001
From: Malin <malin@cloudhost.es>
Date: Wed, 13 May 2026 10:37:36 +0200
Subject: [PATCH] fix: broader WhatsApp/social detection, generous assessment
 rules, overlay popup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- site_analyzer: scan onclick/data-href/data-url/data-link/data-action attrs
  on ALL tags for WhatsApp (wa.me, api.whatsapp, web.whatsapp, wa.link),
  tel: links, and social media URLs; raise dedup cap 5→8
- beauty_ai: rewrite lead quality rules — WARM for any genuine multi-brand
  retailer even with zero portfolio matches; portfolio absence NEVER justifies
  COLD alone; added country_fiscal fallback to ip_country
- index.html: assessPopup overlay modal on quality badge click in Browse tab;
  showAssessPopup() parses beauty_assessment JSON with all_contacts fallback;
  [x-cloak] CSS to prevent flash

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 app/beauty_ai.py             |  61 ++++++++++-----
 app/site_analyzer.py         |  51 +++++++++++--
 app/static/beauty/index.html | 140 ++++++++++++++++++++++++++++++++++-
 3 files changed, 227 insertions(+), 25 deletions(-)

diff --git a/app/beauty_ai.py b/app/beauty_ai.py
index 9c1839e..74f0f96 100644
--- a/app/beauty_ai.py
+++ b/app/beauty_ai.py
@@ -355,24 +355,44 @@ Social media:    {_fmt(all_social)}
 {', '.join(BEAUTY_CATEGORIES)}
 
 === ASSESSMENT RULES ===
-1. TARGET PROFILE: retailer, pharmacy, parafarmacia, perfumería, multi-brand beauty
-   ecommerce, salon chain, beauty distributor, or supermarket beauty section in Europe.
-2. Identify ALL beauty brands mentioned anywhere on the page — go beyond the pre-detected
-   list above. Use product names, brand references in body text, alt text, etc.
-3. Match brands against our portfolio. Lead quality is driven by portfolio overlap:
-   - HOT:  3+ portfolio brands detected, OR major EU beauty retailer clearly in our niche
-   - WARM: 1-2 portfolio brand matches, OR clear beauty multi-brand retailer with good reach
-   - COLD: beauty-adjacent but weak portfolio overlap, OR single-brand, OR unclear wholesale
-   - NOT_RELEVANT: not a beauty business, not in Europe, or clearly a consumer-only brand
-4. Extract the BEST contact for outreach:
-   - Prefer business/commercial emails (info@, ventas@, compras@, admin@) over personal
-   - If WhatsApp exists, flag it — it's often the fastest channel in Spain/LatAm
-   - Check social media for direct messaging channels
-5. Use the legal/company info to identify the official business name (razón social),
-   and if a CIF/NIF is visible, mention it in outreach_notes as it confirms legitimacy.
-6. Write summary, pitch_angle, b2b_proposal, outreach_subject, and outreach_email in Spanish.
-7. The outreach_email must be a complete ready-to-send email: greeting, 2-3 body sentences
-   (reference their specific range, 1-2 matching portfolio brands, add value), clear CTA.
+1. TARGET PROFILE: We are looking for businesses that BUY BEAUTY PRODUCTS WHOLESALE to
+   resell: retailers, pharmacies, parafarmacias, perfumerías, multi-brand beauty ecommerce,
+   salon chains, supermarkets with beauty sections, beauty distributors — anywhere in Europe.
+
+2. Identify ALL beauty brands anywhere on the page (body text, alt text, category names,
+   product listings, brand pages). Go beyond the pre-detected list already provided above.
+
+3. LEAD QUALITY — rate on BUSINESS TYPE first, portfolio overlap second:
+   - HOT:  Business type is clearly a multi-brand beauty reseller with professional/wholesale
+           activity AND at least one of: ≥2 portfolio brands detected, evident professional
+           lines, large catalogue (pharmacies, parafarmacia chains, pro salon distributors).
+           Also HOT: any large-scale EU beauty retailer even without portfolio brand matches.
+   - WARM: ANY genuine multi-brand beauty retailer or ecommerce that could buy wholesale —
+           even if ZERO portfolio brands are currently detected. They are our TARGET MARKET:
+           we want to introduce our brands to them. Pharmacies, perfumerías, beauty shops,
+           multi-brand online stores → default WARM unless there is a clear disqualifier.
+           When uncertain between WARM and COLD: choose WARM.
+   - COLD: ONLY if clearly disqualified: single-brand D2C (sells only their own brand),
+           beauty salon that doesn't sell products to end-consumers, personal influencer /
+           blog, OR no evidence this is a purchasing business at all.
+   - NOT_RELEVANT: No beauty/cosmetics connection, or clearly non-European.
+
+   ⚠ CRITICAL: Portfolio brand absence NEVER alone justifies COLD. Our job is to introduce
+   our brands to retailers who don't carry them yet. Rate on whether they COULD buy wholesale.
+
+4. country_fiscal: use aviso legal if found; otherwise use the IP country shown above.
+   NEVER leave country_fiscal empty — always provide a 2-letter ISO code.
+
+5. Extract the BEST contact for outreach — check all data above:
+   - Prefer commercial emails (info@, ventas@, compras@, pedidos@) over generic/personal
+   - WhatsApp is often the fastest channel in Spain; flag it if present
+   - Set best_contact_channel and best_contact_value explicitly
+
+6. Write summary, pitch_angle, b2b_proposal, outreach_subject, and outreach_email in SPANISH.
+
+7. outreach_email must be a complete ready-to-send Spanish email: greeting + 3-4 sentences
+   referencing their specific range + 1-2 of our portfolio brands that match + clear CTA
+   (catálogo, muestra gratuita, llamada, primer pedido mínimo). No placeholders.
 
 Respond ONLY with valid JSON, no markdown fences, no text outside the JSON object:
 {{
@@ -536,6 +556,11 @@ async def assess_beauty_domain(analysis: dict) -> dict:
             if not result.get("contact_social") and all_social:
                 result["contact_social"] = all_social[0]
 
+            # country_fiscal fallback — always provide a value
+            fc = (result.get("country_fiscal") or "").strip()
+            if not fc or fc.lower() in ("unknown", "n/a", "-"):
+                result["country_fiscal"] = analysis.get("ip_country") or ""
+
             logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
                         domain, result.get("lead_quality"), result.get("dist_matches"))
             return result
diff --git a/app/site_analyzer.py b/app/site_analyzer.py
index 74f9c52..60e53d2 100644
--- a/app/site_analyzer.py
+++ b/app/site_analyzer.py
@@ -316,8 +316,22 @@ async def _analyze_site_inner(domain: str) -> dict:
             result["has_gmb"] = any(sig.lower() in hl for sig in GMB_SCHEMA_SIGNALS)
 
         # ── Contacts ──────────────────────────────────────────────────────────
-        for a in soup.find_all("a", href=True):
-            href = a["href"]
+        # Pattern for WhatsApp links that appear inside onclick/data-* attrs
+        _WA_ATTR_RE = re.compile(
+            r'(https?://(?:wa\.me|api\.whatsapp\.com/send|web\.whatsapp\.com/send'
+            r'|wa\.link)[^\s\'"\\>]{0,80})',
+            re.I,
+        )
+
+        def _add_whatsapp(raw: str):
+            m = _WA_ATTR_RE.search(raw)
+            url = m.group(1) if m else raw[:80]
+            url = url.rstrip("'\"\\)")
+            if url and url not in result["whatsapp"]:
+                result["whatsapp"].append(url)
+
+        for tag in soup.find_all("a", href=True):
+            href = tag["href"]
             if href.startswith("mailto:"):
                 em = href[7:].split("?")[0].strip().lower()
                 if em and em not in result["emails"]:
@@ -326,9 +340,8 @@ async def _analyze_site_inner(domain: str) -> dict:
                 ph = re.sub(r"[^\d+]", "", href[4:])
                 if ph and ph not in result["phones"]:
                     result["phones"].append(ph)
-            elif "wa.me" in href or "api.whatsapp.com" in href:
-                if href not in result["whatsapp"]:
-                    result["whatsapp"].append(href[:80])
+            elif any(x in href for x in ("wa.me", "api.whatsapp", "wa.link", "web.whatsapp")):
+                _add_whatsapp(href)
             else:
                 for sd in SOCIAL_DOM:
                     if sd in href.lower():
@@ -336,6 +349,32 @@ async def _analyze_site_inner(domain: str) -> dict:
                         if clean not in result["social_links"]:
                             result["social_links"].append(clean)
                         break
+
+        # Broader scan: WhatsApp / tel links hidden in onclick, data-href, data-url, etc.
+        for tag in soup.find_all(True):
+            for attr in ("onclick", "data-href", "data-url", "data-link", "data-action"):
+                val = tag.get(attr) or ""
+                if not val:
+                    continue
+                # WhatsApp in attribute value
+                if any(x in val for x in ("wa.me", "api.whatsapp", "wa.link", "web.whatsapp")):
+                    _add_whatsapp(val)
+                # tel: in attribute value
+                m_tel = re.search(r"tel:([\d\s\+\-\(\)]{6,20})", val)
+                if m_tel:
+                    ph = re.sub(r"[^\d+]", "", m_tel.group(1))
+                    if ph and ph not in result["phones"]:
+                        result["phones"].append(ph)
+                # Social media links in attribute value
+                for sd in SOCIAL_DOM:
+                    if sd in val.lower():
+                        url_m = re.search(r"https?://[^\s'\"\\)]{10,120}", val)
+                        if url_m:
+                            clean = url_m.group(0).split("?")[0].rstrip("/")
+                            if clean not in result["social_links"]:
+                                result["social_links"].append(clean)
+                        break
+
         for em in EMAIL_RE.findall(html[:80000]):
             em = em.lower()
             if em not in result["emails"] and not any(em.endswith(x) for x in [".png",".jpg",".css",".js",".svg"]):
@@ -345,7 +384,7 @@ async def _analyze_site_inner(domain: str) -> dict:
             if ph_c not in result["phones"]:
                 result["phones"].append(ph_c)
         for k in ["emails", "phones", "whatsapp", "social_links"]:
-            result[k] = list(dict.fromkeys(result[k]))[:5]
+            result[k] = list(dict.fromkeys(result[k]))[:8]
 
         # ── CMS ───────────────────────────────────────────────────────────────
         CMS_SIGS = {
diff --git a/app/static/beauty/index.html b/app/static/beauty/index.html
index ba0c541..a29935b 100644
--- a/app/static/beauty/index.html
+++ b/app/static/beauty/index.html
@@ -88,6 +88,7 @@ tr:hover td{background:rgba(232,121,160,.04)}
 input[type=checkbox]{width:14px;height:14px;accent-color:var(--accent);cursor:pointer}
 textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
 .section-pad{padding:0 24px}
+[x-cloak]{display:none!important}
 </style>
 </head>
 <body x-data="app()" x-init="init()">
@@ -225,7 +226,10 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
             <td x-text="(row.niche||'—').replace('_',' ')"></td>
             <td x-text="(row.site_type||'—').replace('_',' ')"></td>
             <td>
-              <span x-show="row.beauty_lead_quality" class="badge" :class="qualityBadge(row.beauty_lead_quality)" x-text="row.beauty_lead_quality"></span>
+              <span x-show="row.beauty_lead_quality" class="badge" :class="qualityBadge(row.beauty_lead_quality)"
+                    x-text="row.beauty_lead_quality" style="cursor:pointer"
+                    title="Click to view assessment"
+                    @click.stop="showAssessPopup(row)"></span>
               <span x-show="!row.beauty_lead_quality" style="color:var(--muted)">—</span>
             </td>
             <td style="white-space:nowrap;display:flex;gap:4px">
@@ -489,6 +493,120 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
   </div>
 </div>
 
+<!-- Assessment popup overlay (Browse tab quality badge click) -->
+<div x-show="assessPopup" x-cloak @click.self="assessPopup=null"
+     style="position:fixed;inset:0;background:rgba(0,0,0,.55);z-index:800;display:flex;align-items:center;justify-content:center">
+  <div @click.stop style="background:var(--card);border:1px solid var(--border);border-radius:12px;
+       padding:20px 24px;max-width:580px;width:94%;max-height:85vh;overflow-y:auto;position:relative">
+    <button @click="assessPopup=null"
+            style="position:absolute;top:12px;right:14px;background:none;border:none;color:var(--muted);font-size:18px;cursor:pointer">✕</button>
+
+    <template x-if="assessPopup">
+      <div>
+        <!-- Header -->
+        <div style="display:flex;align-items:center;gap:10px;margin-bottom:14px">
+          <span class="badge" :class="qualityBadge(assessPopup.lead_quality)" x-text="assessPopup.lead_quality||'—'" style="font-size:12px;padding:4px 12px"></span>
+          <span style="font-weight:700;font-size:15px" x-text="assessPopup.business_name||assessPopup._domain||'—'"></span>
+          <span x-show="assessPopup.country_fiscal" class="chip" x-text="assessPopup.country_fiscal"></span>
+          <span x-show="assessPopup.business_type" class="chip" x-text="assessPopup.business_type"></span>
+        </div>
+
+        <!-- Pitch angle -->
+        <template x-if="assessPopup.pitch_angle">
+          <p style="color:var(--accent);font-size:13px;font-style:italic;margin-bottom:12px;padding:8px 12px;background:rgba(232,121,160,.08);border-radius:6px" x-text="assessPopup.pitch_angle"></p>
+        </template>
+
+        <!-- Summary -->
+        <template x-if="assessPopup.summary">
+          <div style="margin-bottom:12px">
+            <div style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.05em;margin-bottom:4px">Summary</div>
+            <p style="font-size:12px;line-height:1.6" x-text="assessPopup.summary"></p>
+          </div>
+        </template>
+
+        <!-- Lead reasoning -->
+        <template x-if="assessPopup.lead_reasoning">
+          <div style="margin-bottom:12px">
+            <div style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.05em;margin-bottom:4px">Lead Reasoning</div>
+            <p style="font-size:12px;line-height:1.6" x-text="assessPopup.lead_reasoning"></p>
+          </div>
+        </template>
+
+        <!-- Categories + portfolio matches -->
+        <div style="display:flex;gap:16px;margin-bottom:12px;flex-wrap:wrap">
+          <template x-if="(assessPopup.categories||[]).length">
+            <div style="flex:1;min-width:140px">
+              <div style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.05em;margin-bottom:4px">Categories</div>
+              <template x-for="c in (assessPopup.categories||[])" :key="c">
+                <span class="chip" x-text="c"></span>
+              </template>
+            </div>
+          </template>
+          <template x-if="(assessPopup.dist_matches||[]).length">
+            <div style="flex:1;min-width:140px">
+              <div style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.05em;margin-bottom:4px">Portfolio Match</div>
+              <template x-for="b in (assessPopup.dist_matches||[])" :key="b">
+                <span class="chip chip-match" x-text="b"></span>
+              </template>
+            </div>
+          </template>
+        </div>
+
+        <!-- Contacts -->
+        <div style="margin-bottom:12px">
+          <div style="color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px">
+            Best Contact
+            <span x-show="assessPopup.best_contact_channel" class="chip chip-match" style="margin-left:4px" x-text="assessPopup.best_contact_channel"></span>
+          </div>
+          <div style="font-size:12px;line-height:1.9">
+            <template x-if="(assessPopup.all_contacts||{}).emails?.length">
+              <div>
+                <template x-for="em in (assessPopup.all_contacts.emails||[])" :key="em">
+                  <a :href="'mailto:'+em" x-text="em" style="display:inline-block;margin-right:10px;color:var(--accent)"></a>
+                </template>
+              </div>
+            </template>
+            <template x-if="(assessPopup.all_contacts||{}).phones?.length">
+              <div style="color:var(--text)">
+                <template x-for="ph in (assessPopup.all_contacts.phones||[])" :key="ph">
+                  <span x-text="ph" style="display:inline-block;margin-right:10px"></span>
+                </template>
+              </div>
+            </template>
+            <template x-if="(assessPopup.all_contacts||{}).whatsapp?.length">
+              <div>
+                <template x-for="wa in (assessPopup.all_contacts.whatsapp||[])" :key="wa">
+                  <a :href="wa" target="_blank" style="color:var(--success);display:inline-block;margin-right:10px">WhatsApp ↗</a>
+                </template>
+              </div>
+            </template>
+            <template x-if="(assessPopup.all_contacts||{}).social?.length">
+              <div>
+                <template x-for="s in (assessPopup.all_contacts.social||[])" :key="s">
+                  <a :href="s" target="_blank" style="color:var(--info);display:inline-block;margin-right:10px" x-text="s.replace('https://','').replace('www.','').split('/').slice(0,2).join('/')"></a>
+                </template>
+              </div>
+            </template>
+            <template x-if="assessPopup.contact_email && !(assessPopup.all_contacts||{}).emails?.length">
+              <div><a :href="'mailto:'+assessPopup.contact_email" x-text="assessPopup.contact_email" style="color:var(--accent)"></a></div>
+            </template>
+          </div>
+        </div>
+
+        <!-- Sales notes -->
+        <template x-if="assessPopup.outreach_notes">
+          <p style="font-size:11px;color:var(--warn);border-top:1px solid var(--border);padding-top:8px;line-height:1.5" x-text="assessPopup.outreach_notes"></p>
+        </template>
+
+        <div style="display:flex;gap:8px;margin-top:14px">
+          <button class="btn-primary btn-sm" @click="copyText(assessPopup.outreach_email||'');assessPopup=null">Copy outreach email</button>
+          <button class="btn-secondary btn-sm" @click="assessPopup=null">Close</button>
+        </div>
+      </div>
+    </template>
+  </div>
+</div>
+
 <!-- Toasts -->
 <div class="toast-wrap">
   <template x-for="t in toasts" :key="t.id">
@@ -510,6 +628,7 @@ function app() {
     toasts: [],
     prescreening: false, validating: false, reassessing: false,
     _loadGen: 0,  // incremented on every loadDomains() call; stale responses are discarded
+    assessPopup: null,  // parsed _beauty object shown in overlay; null = hidden
     exportQuality: '', exportCountry: '',
     f: {keyword:'', tld:'', prescreen_status:'live', niche:'beauty_cosmetics',
         site_type:'ecommerce', country:'', assessed:'', alpha_only:false, no_sld:false, limit:'100', page:1},
@@ -793,6 +912,25 @@ function app() {
       window.open('/api/beauty/export?' + p, '_blank');
     },
 
+    showAssessPopup(row) {
+      try {
+        const b = row.beauty_assessment ? JSON.parse(row.beauty_assessment) : {};
+        b._domain = row.domain;
+        // Ensure all_contacts is always an object so x-if on .length works
+        if (!b.all_contacts || typeof b.all_contacts !== 'object') {
+          b.all_contacts = {
+            emails:   row.emails   ? [row.emails]   : [],
+            phones:   row.phones   ? [row.phones]   : [],
+            whatsapp: [],
+            social:   [],
+          };
+        }
+        this.assessPopup = b;
+      } catch(e) {
+        this.notify('No assessment data yet', 'info');
+      }
+    },
+
     qualityBadge(q) {
       return {HOT:'badge-hot', WARM:'badge-warm', COLD:'badge-cold', NOT_RELEVANT:'badge-nr'}[q]||'badge-nr';
     },