feat: 5 fixes — dead site scoring, Kit Digital precision, social icons, GMB detection, social/GMB weighting
1. scorer: dead sites capped at 5 (was scoring HOT from SSL/CMS signals) 2. Kit Digital: require explicit kit-digital/agente-digitalizador signals; generic EU logo patterns (fondos-europeos, logo-ue, cofinanciado) removed. Gemini kit_digital_confirmed now overwrites heuristic in DB. 3. Browse table: social links replaced with compact coloured icon badges (fb/ig/in/x/tt/yt) linked to the profile URLs 4. site_analyzer: added has_gmb / gmb_url detection (Maps embed, Place links, LocalBusiness schema); fed to Gemini prompt 5. scorer: +5 no-social, +3 reachable contact; Gemini prompt includes GMB and social media management as sellable services; modal shows GMB/social status Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -62,44 +62,34 @@ def detect_cms(html: str, headers: dict) -> Optional[str]:
|
||||
|
||||
# ── Kit Digital detection ────────────────────────────────────────────────────
|
||||
|
||||
KIT_IMG_PATS = [
|
||||
"digitalizadores", "kit-digital", "kitdigital", "kit_digital",
|
||||
"fondos-europeos", "fondos_europeos", "nextgeneration", "next-generation",
|
||||
"prtr", "plan-recuperacion", "planderecuperacion",
|
||||
"acelerapyme", "logo-ue", "recovery-eu", "cofinanciado",
|
||||
]
|
||||
KIT_TEXT_PATS = [
|
||||
"kit digital", "agente digitalizador", "agentes digitalizadores",
|
||||
"fondos europeos", "next generation eu", "nextgenerationeu",
|
||||
"plan de recuperación", "plan de recuperacion",
|
||||
"plan de digitalización", "digitalización pymes",
|
||||
"prtr", "financiado por la unión europea",
|
||||
"red.es/kit-digital", "acelerapyme.es",
|
||||
]
|
||||
KIT_LINK_PATS = ["acelerapyme", "red.es", "kit-digital", "kitdigital"]
|
||||
KIT_STRONG_IMG = ["kit-digital", "kitdigital", "kit_digital", "agente-digitalizador", "agente_digitalizador"]
|
||||
KIT_STRONG_TEXT = ["kit digital", "agente digitalizador", "agentes digitalizadores"]
|
||||
KIT_STRONG_LINK = ["acelerapyme.es", "red.es/kit-digital", "kit-digital.red.es"]
|
||||
|
||||
|
||||
def detect_kit_digital(soup, html: str) -> tuple[bool, list]:
|
||||
signals = []
|
||||
hl = html.lower()
|
||||
vl = soup.get_text().lower()
|
||||
|
||||
for img in soup.find_all("img"):
|
||||
combined = ((img.get("src") or "") + (img.get("alt") or "") + (img.get("srcset") or "")).lower()
|
||||
for p in KIT_IMG_PATS:
|
||||
for p in KIT_STRONG_IMG:
|
||||
if p in combined:
|
||||
signals.append(f"img:{p}")
|
||||
break
|
||||
|
||||
for p in KIT_TEXT_PATS:
|
||||
if p in hl:
|
||||
for p in KIT_STRONG_TEXT:
|
||||
if p in vl:
|
||||
signals.append(f"text:{p}")
|
||||
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"].lower()
|
||||
if any(p in href for p in KIT_LINK_PATS):
|
||||
signals.append(f"link:{href[:60]}")
|
||||
for p in KIT_STRONG_LINK:
|
||||
if p in href:
|
||||
signals.append(f"link:{href[:60]}")
|
||||
break
|
||||
|
||||
signals = list(dict.fromkeys(signals))[:15]
|
||||
signals = list(dict.fromkeys(signals))[:10]
|
||||
return len(signals) > 0, signals
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user