feat: 5 fixes — dead site scoring, Kit Digital precision, social icons, GMB detection, social/GMB weighting

1. scorer: dead sites capped at 5 (was scoring HOT from SSL/CMS signals)
2. Kit Digital: require explicit kit-digital/agente-digitalizador signals;
   generic EU logo patterns (fondos-europeos, logo-ue, cofinanciado) removed.
   Gemini kit_digital_confirmed now overwrites heuristic in DB.
3. Browse table: social links replaced with compact coloured icon badges
   (fb/ig/in/x/tt/yt) linked to the profile URLs
4. site_analyzer: added has_gmb / gmb_url detection (Maps embed, Place links,
   LocalBusiness schema); fed to Gemini prompt
5. scorer: +5 no-social, +3 reachable contact; Gemini prompt includes GMB and
   social media management as sellable services; modal shows GMB/social status

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-14 07:21:02 +02:00
parent 793aea8a5f
commit dad910b6b0
6 changed files with 159 additions and 63 deletions

View File

@@ -378,7 +378,9 @@ async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict
domain,
),
)
# Also update contact_info + kit_digital from site_analysis if available
# Update contact_info + kit_digital from site_analysis if available.
# Gemini's kit_digital_confirmed is the authoritative verdict — it can
# override a false-positive from the heuristic scanner.
if site_analysis:
contacts = {
"emails": site_analysis.get("emails", []),
@@ -386,12 +388,15 @@ async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict
"whatsapp": site_analysis.get("whatsapp", []),
"social": site_analysis.get("social_links", []),
}
# Prefer Gemini's explicit verdict; fall back to heuristic if null
ai_kit = assessment.get("kit_digital_confirmed")
kit_val = int(ai_kit) if ai_kit is not None else int(site_analysis.get("kit_digital", False))
await db.execute(
"""UPDATE enriched_domains SET
kit_digital=?, kit_digital_signals=?, contact_info=?
WHERE domain=?""",
(
int(site_analysis.get("kit_digital", False)),
kit_val,
_json.dumps(site_analysis.get("kit_digital_signals", [])),
_json.dumps(contacts),
domain,

View File

@@ -62,44 +62,34 @@ def detect_cms(html: str, headers: dict) -> Optional[str]:
# ── Kit Digital detection ────────────────────────────────────────────────────
KIT_IMG_PATS = [
"digitalizadores", "kit-digital", "kitdigital", "kit_digital",
"fondos-europeos", "fondos_europeos", "nextgeneration", "next-generation",
"prtr", "plan-recuperacion", "planderecuperacion",
"acelerapyme", "logo-ue", "recovery-eu", "cofinanciado",
]
KIT_TEXT_PATS = [
"kit digital", "agente digitalizador", "agentes digitalizadores",
"fondos europeos", "next generation eu", "nextgenerationeu",
"plan de recuperación", "plan de recuperacion",
"plan de digitalización", "digitalización pymes",
"prtr", "financiado por la unión europea",
"red.es/kit-digital", "acelerapyme.es",
]
KIT_LINK_PATS = ["acelerapyme", "red.es", "kit-digital", "kitdigital"]
KIT_STRONG_IMG = ["kit-digital", "kitdigital", "kit_digital", "agente-digitalizador", "agente_digitalizador"]
KIT_STRONG_TEXT = ["kit digital", "agente digitalizador", "agentes digitalizadores"]
KIT_STRONG_LINK = ["acelerapyme.es", "red.es/kit-digital", "kit-digital.red.es"]
def detect_kit_digital(soup, html: str) -> tuple[bool, list]:
signals = []
hl = html.lower()
vl = soup.get_text().lower()
for img in soup.find_all("img"):
combined = ((img.get("src") or "") + (img.get("alt") or "") + (img.get("srcset") or "")).lower()
for p in KIT_IMG_PATS:
for p in KIT_STRONG_IMG:
if p in combined:
signals.append(f"img:{p}")
break
for p in KIT_TEXT_PATS:
if p in hl:
for p in KIT_STRONG_TEXT:
if p in vl:
signals.append(f"text:{p}")
for a in soup.find_all("a", href=True):
href = a["href"].lower()
if any(p in href for p in KIT_LINK_PATS):
for p in KIT_STRONG_LINK:
if p in href:
signals.append(f"link:{href[:60]}")
break
signals = list(dict.fromkeys(signals))[:15]
signals = list(dict.fromkeys(signals))[:10]
return len(signals) > 0, signals

View File

@@ -38,6 +38,8 @@ def _build_prompt(a: dict) -> str:
lorem_str = ", ".join(a.get("lorem_matches") or []) or "none"
ph_str = ", ".join(a.get("placeholder_matches") or []) or "none"
snippet = (a.get("visible_text_snippet") or "")[:2000]
social_str = ", ".join(a.get("social_links") or []) or "none detected"
gmb_str = f"✅ Found — {a.get('gmb_url','')}" if a.get("has_gmb") else "❌ Not detected"
eu_hosted = a.get("eu_hosted")
hosting_flag = "✅ EU" if eu_hosted else ("❌ Non-EU" if eu_hosted is False else "unknown")
@@ -86,10 +88,16 @@ Inputs without labels: {a.get("inputs_without_labels")}
Lorem ipsum: {a.get("has_lorem_ipsum")}{lorem_str}
Placeholder: {a.get("has_placeholder")}{ph_str}
=== KIT DIGITAL (Spanish gov €12k SME grants — sites must show EU logos) ===
Detected: {a.get("kit_digital")}
=== KIT DIGITAL (Spanish gov €12k SME grants — ONLY confirm if you see explicit Kit Digital / agente digitalizador branding) ===
Heuristic detected: {a.get("kit_digital")}
{kd_str}
=== GOOGLE MY BUSINESS ===
GMB/Business Profile: {gmb_str}
=== SOCIAL MEDIA ===
Profiles found on site: {social_str}
=== CONTACT CHANNELS ===
{contacts_str}
@@ -99,7 +107,8 @@ Detected: {a.get("kit_digital")}
=== INSTRUCTIONS ===
The client sells: web redesign, SEO, hosting migration, SSL renewal,
security audits, GDPR compliance, accessibility fixes, Google Ads,
maintenance contracts, AI tools for SMEs.
maintenance contracts, AI tools for SMEs, Google My Business setup/optimisation,
social media management (Instagram, Facebook, LinkedIn, TikTok).
Respond ONLY with valid JSON, no markdown fences, no text outside the JSON:
{{
@@ -112,6 +121,8 @@ Respond ONLY with valid JSON, no markdown fences, no text outside the JSON:
"gdpr_compliance": "cookie banner status, privacy policy, GDPR gaps",
"accessibility_issues": ["specific a11y problems found"],
"kit_digital_confirmed": true/false,
"has_gmb": true/false,
"has_social_media": true/false,
"kit_digital_reasoning": "1 sentence",
"is_local_sme": true/false,
"lead_quality": "HOT|WARM|COLD",

View File

@@ -1,15 +1,16 @@
import os
import json
import aiosqlite
from app.db import SQLITE_PATH
KNOWN_CMS = {"wordpress", "joomla", "drupal", "wix", "squarespace", "shopify", "prestashop", "magento", "typo3", "opencart"}
TARGET_COUNTRIES = set(os.getenv("TARGET_COUNTRIES", "ES,GB,DE,FR").split(","))
TARGET_COUNTRIES = set(os.getenv("TARGET_COUNTRIES", "ES,GB,DE,FR,RO,PT,AD,IT").split(","))
LOCAL_BIZ_KEYWORDS = {
"restaurant", "cafe", "shop", "store", "salon", "plumber", "electrician",
"dentist", "clinic", "garage", "hotel", "bakery", "bar", "gym", "spa",
"fontanero", "electricista", "dentista", "clínica", "taller", "hotel",
"panadería", "peluquería", "tienda",
"fontanero", "electricista", "dentista", "clínica", "taller",
"panadería", "peluquería", "tienda", "abogado", "gestor", "inmobili",
}
@@ -21,29 +22,53 @@ def local_biz_keywords(title: str | None) -> bool:
def score(domain_row: dict) -> int:
s = 0
if domain_row.get("is_live"):
s += 20
# Dead sites are unreachable — cap at 5 regardless of other signals
if not domain_row.get("is_live") and not domain_row.get("reachable"):
return 5
s = 20 # Live site base
ssl_days = domain_row.get("ssl_expiry_days")
if ssl_days is not None and ssl_days < 30:
s += 15
s += 15 # SSL expiring / expired — urgent upsell
if not domain_row.get("ssl_valid"):
s += 15
s += 15 # No valid SSL
cms = (domain_row.get("cms") or "").lower()
if cms in KNOWN_CMS:
s += 15
s += 15 # Known CMS — maintenance / migration opportunity
if not domain_row.get("has_mx"):
s += 10
s += 10 # No email = needs professional email setup
if domain_row.get("ip_country") in TARGET_COUNTRIES:
s += 10
server = (domain_row.get("server") or "").lower()
if "shared" in server:
s += 10
s += 5
if local_biz_keywords(domain_row.get("page_title")):
s += 5
# Kit Digital: proven buyer of IT services
# Kit Digital: proven buyer of IT services (Gemini-confirmed takes precedence)
if domain_row.get("kit_digital"):
s += 20
# Social media / GMB presence signals
try:
ci = json.loads(domain_row.get("contact_info") or "{}")
has_social = bool(ci.get("social"))
has_contact = bool(ci.get("emails") or ci.get("phones") or ci.get("whatsapp"))
except Exception:
has_social = False
has_contact = False
if not has_social:
s += 5 # No social = opportunity to build presence
if has_contact:
s += 3 # Reachable lead — we can actually pitch them
return min(s, 100)

View File

@@ -76,16 +76,26 @@ WEBMASTER = {
"yandex": ["yandex-verification"],
}
# ── Kit Digital ───────────────────────────────────────────────────────────────
KIT_IMG_PATS = [
"digitalizadores", "kit-digital", "kitdigital", "kit_digital",
"fondos-europeos", "fondos_europeos", "nextgeneration", "next-generation",
"prtr", "plan-recuperacion", "acelerapyme", "cofinanciado",
# ── Kit Digital — require SPECIFIC signals, not generic EU logos ───────────────
# These patterns are unambiguously Kit Digital programme markers
KIT_STRONG_IMG = ["kit-digital", "kitdigital", "kit_digital", "agente-digitalizador", "agente_digitalizador"]
KIT_STRONG_TEXT = ["kit digital", "agente digitalizador", "agentes digitalizadores"]
KIT_STRONG_LINK = ["acelerapyme.es", "red.es/kit-digital", "kit-digital.red.es"]
# ── Google My Business / Business Profile ────────────────────────────────────
GMB_URL_SIGNALS = [
"maps.googleapis.com/maps/api", # embedded Google Map widget
"google.com/maps/place", # link to GMB Place page
"maps.google.com",
"g.page/",
"maps.app.goo.gl",
"goo.gl/maps",
"business.google.com",
]
KIT_TEXT_PATS = [
"kit digital", "agente digitalizador", "fondos europeos",
"next generation eu", "nextgenerationeu", "plan de recuperación",
"prtr", "financiado por la unión europea", "red.es/kit-digital", "acelerapyme",
GMB_SCHEMA_SIGNALS = [
'"@type":"LocalBusiness"',
'"@type": "LocalBusiness"',
"schema.org/LocalBusiness",
]
EMAIL_RE = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}")
@@ -155,6 +165,8 @@ async def _analyze_site_inner(domain: str) -> dict:
"inputs_without_labels": 0,
# Kit Digital
"kit_digital": False, "kit_digital_signals": [],
# Google My Business
"has_gmb": False, "gmb_url": None,
# Contacts
"emails": [], "phones": [], "whatsapp": [], "social_links": [],
"error": None,
@@ -267,25 +279,40 @@ async def _analyze_site_inner(domain: str) -> dict:
if inp.get("id") not in labeled_ids and not inp.get("aria-label") and not inp.get("aria-labelledby")
)
# ── Kit Digital ───────────────────────────────────────────────────────
# ── Kit Digital (specific signals only — generic EU logos excluded) ──────
kd_signals = []
for img in soup.find_all("img"):
comb = ((img.get("src") or "") + (img.get("alt") or "") + (img.get("srcset") or "")).lower()
for p in KIT_IMG_PATS:
for p in KIT_STRONG_IMG:
if p in comb:
kd_signals.append(f"img:{p}")
break
for p in KIT_TEXT_PATS:
if p in hl:
for p in KIT_STRONG_TEXT:
if p in vl:
kd_signals.append(f"text:{p}")
for a in soup.find_all("a", href=True):
href = a["href"].lower()
if "acelerapyme" in href or "red.es" in href or "kit-digital" in href:
kd_signals.append(f"link:{href[:50]}")
for p in KIT_STRONG_LINK:
if p in href:
kd_signals.append(f"link:{href[:60]}")
break
kd_signals = list(dict.fromkeys(kd_signals))[:10]
result["kit_digital"] = len(kd_signals) > 0
result["kit_digital_signals"] = kd_signals
# ── Google My Business ────────────────────────────────────────────────
for a in soup.find_all("a", href=True):
href_g = a["href"]
for sig in GMB_URL_SIGNALS:
if sig in href_g:
result["has_gmb"] = True
result["gmb_url"] = href_g[:120]
break
if result["has_gmb"]:
break
if not result["has_gmb"]:
result["has_gmb"] = any(sig.lower() in hl for sig in GMB_SCHEMA_SIGNALS)
# ── Contacts ──────────────────────────────────────────────────────────
for a in soup.find_all("a", href=True):
href = a["href"]

View File

@@ -91,12 +91,21 @@ tr:hover td{background:rgba(255,255,255,.025)}
.score{display:inline-block;padding:1px 6px;border-radius:5px;font-weight:800;font-size:11px;min-width:28px;text-align:center}
/* Contact chips */
.contact-chips{display:flex;flex-wrap:wrap;gap:3px}
.contact-chips{display:flex;flex-wrap:wrap;gap:3px;align-items:center}
.chip{display:inline-flex;align-items:center;gap:3px;padding:1px 6px;border-radius:4px;font-size:10px;background:var(--surface2);border:1px solid var(--border);color:var(--muted);max-width:160px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
.chip.email{border-color:#00d4aa33;color:var(--accent2)}
.chip.phone{border-color:#6c63ff33;color:var(--accent)}
.chip.wa{border-color:#22c55e33;color:#4ade80}
.chip.social{border-color:#f59e0b33;color:var(--kd)}
/* Social platform icon badges */
.sicon{display:inline-flex;align-items:center;justify-content:center;width:18px;height:18px;border-radius:4px;font-size:9px;font-weight:900;text-decoration:none;flex-shrink:0;line-height:1}
.sicon.fb{background:#1877f2;color:#fff}
.sicon.ig{background:linear-gradient(135deg,#f09433 0%,#e6683c 25%,#dc2743 50%,#cc2366 75%,#bc1888 100%);color:#fff}
.sicon.li{background:#0a66c2;color:#fff}
.sicon.tw{background:#000;color:#fff}
.sicon.tt{background:#010101;color:#fff}
.sicon.yt{background:#ff0000;color:#fff}
.sicon.gmb{background:#4285f4;color:#fff}
.sicon.other{background:var(--surface2);border:1px solid var(--border);color:var(--muted)}
/* Tooltip */
[title]{cursor:help}
@@ -182,6 +191,8 @@ tr:hover td{background:rgba(255,255,255,.025)}
<div class="mrow"><span class="mlabel">SEO</span><span x-text="modal.ai.seo_status||'—'"></span></div>
<div class="mrow"><span class="mlabel">Hosting</span><span x-text="(modal.sa?.org||'?') + ' / ' + (modal.sa?.ip_country||'?') + (modal.sa?.eu_hosted===false?' ❌ Non-EU':modal.sa?.eu_hosted?' ✅ EU':'')"></span></div>
<div class="mrow"><span class="mlabel">GDPR</span><span :style="(!modal.sa?.has_cookie_notice)?'color:var(--danger)':''" x-text="modal.ai.gdpr_compliance||'—'"></span></div>
<div class="mrow"><span class="mlabel">GMB</span><span :style="!modal.ai.has_gmb?'color:var(--warn)':'color:var(--accent2)'" x-text="modal.ai.has_gmb ? '✅ Found' : '❌ Not detected — opportunity'"></span></div>
<div class="mrow"><span class="mlabel">Social</span><span :style="!modal.ai.has_social_media?'color:var(--warn)':''" x-text="modal.ai.has_social_media ? '✅ Present' : '❌ No social media found — opportunity'"></span></div>
<!-- Content issues -->
<div x-show="(modal.ai.content_issues||[]).length>0" style="margin:8px 0">
@@ -215,8 +226,11 @@ tr:hover td{background:rgba(255,255,255,.025)}
<template x-for="wa in (modal.sa?.whatsapp||[])">
<a :href="wa" target="_blank" class="chip wa">💬 WhatsApp</a>
</template>
<template x-for="s in (modal.sa?.social_links||[]).slice(0,3)">
<a :href="s" target="_blank" class="chip social" x-text="s.replace('https://','').split('/')[0]"></a>
<template x-for="s in (modal.sa?.social_links||[])">
<a :href="s" target="_blank" :class="'sicon '+socialIconClass(s)" :title="s" x-text="socialIconLabel(s)"></a>
</template>
<template x-if="modal.ai?.has_gmb">
<a :href="modal.sa?.gmb_url||'#'" target="_blank" class="sicon gmb" title="Google My Business">G</a>
</template>
</div>
</div>
@@ -371,16 +385,16 @@ tr:hover td{background:rgba(255,255,255,.025)}
<td>
<div class="contact-chips" x-data="{c: parseContacts(row.contact_info)}">
<template x-for="em in (c.emails||[]).slice(0,1)" :key="em">
<span class="chip email" :title="em"><span x-text="em"></span></span>
<a :href="'mailto:'+em" class="chip email" :title="em"><span x-text="em"></span></a>
</template>
<template x-for="ph in (c.phones||[]).slice(0,1)" :key="ph">
<span class="chip phone" :title="ph">📞 <span x-text="ph"></span></span>
<a :href="'tel:'+ph" class="chip phone" :title="ph">📞 <span x-text="ph"></span></a>
</template>
<template x-for="wa in (c.whatsapp||[]).slice(0,1)" :key="wa">
<span class="chip wa" title="WhatsApp">💬 WA</span>
<a :href="wa" target="_blank" class="chip wa" title="WhatsApp">💬</a>
</template>
<template x-if="(c.social||[]).length>0">
<span class="chip social" :title="(c.social||[]).join(', ')">📲 <span x-text="(c.social||[]).length"></span></span>
<template x-for="url in (c.social||[]).slice(0,4)" :key="url">
<a :href="url" target="_blank" :class="'sicon '+socialIconClass(url)" :title="url" x-text="socialIconLabel(url)"></a>
</template>
</div>
</td>
@@ -698,6 +712,30 @@ function app() {
try { return JSON.parse(raw); } catch(e) { return {}; }
},
socialIconClass(url) {
if(!url) return 'other';
const u = url.toLowerCase();
if(u.includes('facebook.com') || u.includes('fb.com')) return 'fb';
if(u.includes('instagram.com')) return 'ig';
if(u.includes('linkedin.com')) return 'li';
if(u.includes('twitter.com') || u.includes('x.com')) return 'tw';
if(u.includes('tiktok.com')) return 'tt';
if(u.includes('youtube.com') || u.includes('youtu.be')) return 'yt';
return 'other';
},
socialIconLabel(url) {
if(!url) return '?';
const u = url.toLowerCase();
if(u.includes('facebook.com') || u.includes('fb.com')) return 'f';
if(u.includes('instagram.com')) return 'ig';
if(u.includes('linkedin.com')) return 'in';
if(u.includes('twitter.com') || u.includes('x.com')) return '𝕏';
if(u.includes('tiktok.com')) return 'tt';
if(u.includes('youtube.com') || u.includes('youtu.be')) return '▶';
return '↗';
},
parseSignals(raw) {
if(!raw) return 'No signals';
try { return JSON.parse(raw).join('\n'); } catch(e) { return raw; }