feat: 5 fixes — dead site scoring, Kit Digital precision, social icons, GMB detection, social/GMB weighting
1. scorer: dead sites capped at 5 (was scoring HOT from SSL/CMS signals) 2. Kit Digital: require explicit kit-digital/agente-digitalizador signals; generic EU logo patterns (fondos-europeos, logo-ue, cofinanciado) removed. Gemini kit_digital_confirmed now overwrites heuristic in DB. 3. Browse table: social links replaced with compact coloured icon badges (fb/ig/in/x/tt/yt) linked to the profile URLs 4. site_analyzer: added has_gmb / gmb_url detection (Maps embed, Place links, LocalBusiness schema); fed to Gemini prompt 5. scorer: +5 no-social, +3 reachable contact; Gemini prompt includes GMB and social media management as sellable services; modal shows GMB/social status Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -76,16 +76,26 @@ WEBMASTER = {
|
||||
"yandex": ["yandex-verification"],
|
||||
}
|
||||
|
||||
# ── Kit Digital ───────────────────────────────────────────────────────────────
|
||||
KIT_IMG_PATS = [
|
||||
"digitalizadores", "kit-digital", "kitdigital", "kit_digital",
|
||||
"fondos-europeos", "fondos_europeos", "nextgeneration", "next-generation",
|
||||
"prtr", "plan-recuperacion", "acelerapyme", "cofinanciado",
|
||||
# ── Kit Digital — require SPECIFIC signals, not generic EU logos ───────────────
|
||||
# These patterns are unambiguously Kit Digital programme markers
|
||||
KIT_STRONG_IMG = ["kit-digital", "kitdigital", "kit_digital", "agente-digitalizador", "agente_digitalizador"]
|
||||
KIT_STRONG_TEXT = ["kit digital", "agente digitalizador", "agentes digitalizadores"]
|
||||
KIT_STRONG_LINK = ["acelerapyme.es", "red.es/kit-digital", "kit-digital.red.es"]
|
||||
|
||||
# ── Google My Business / Business Profile ────────────────────────────────────
|
||||
GMB_URL_SIGNALS = [
|
||||
"maps.googleapis.com/maps/api", # embedded Google Map widget
|
||||
"google.com/maps/place", # link to GMB Place page
|
||||
"maps.google.com",
|
||||
"g.page/",
|
||||
"maps.app.goo.gl",
|
||||
"goo.gl/maps",
|
||||
"business.google.com",
|
||||
]
|
||||
KIT_TEXT_PATS = [
|
||||
"kit digital", "agente digitalizador", "fondos europeos",
|
||||
"next generation eu", "nextgenerationeu", "plan de recuperación",
|
||||
"prtr", "financiado por la unión europea", "red.es/kit-digital", "acelerapyme",
|
||||
GMB_SCHEMA_SIGNALS = [
|
||||
'"@type":"LocalBusiness"',
|
||||
'"@type": "LocalBusiness"',
|
||||
"schema.org/LocalBusiness",
|
||||
]
|
||||
|
||||
EMAIL_RE = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}")
|
||||
@@ -155,6 +165,8 @@ async def _analyze_site_inner(domain: str) -> dict:
|
||||
"inputs_without_labels": 0,
|
||||
# Kit Digital
|
||||
"kit_digital": False, "kit_digital_signals": [],
|
||||
# Google My Business
|
||||
"has_gmb": False, "gmb_url": None,
|
||||
# Contacts
|
||||
"emails": [], "phones": [], "whatsapp": [], "social_links": [],
|
||||
"error": None,
|
||||
@@ -267,25 +279,40 @@ async def _analyze_site_inner(domain: str) -> dict:
|
||||
if inp.get("id") not in labeled_ids and not inp.get("aria-label") and not inp.get("aria-labelledby")
|
||||
)
|
||||
|
||||
# ── Kit Digital ───────────────────────────────────────────────────────
|
||||
# ── Kit Digital (specific signals only — generic EU logos excluded) ──────
|
||||
kd_signals = []
|
||||
for img in soup.find_all("img"):
|
||||
comb = ((img.get("src") or "") + (img.get("alt") or "") + (img.get("srcset") or "")).lower()
|
||||
for p in KIT_IMG_PATS:
|
||||
for p in KIT_STRONG_IMG:
|
||||
if p in comb:
|
||||
kd_signals.append(f"img:{p}")
|
||||
break
|
||||
for p in KIT_TEXT_PATS:
|
||||
if p in hl:
|
||||
for p in KIT_STRONG_TEXT:
|
||||
if p in vl:
|
||||
kd_signals.append(f"text:{p}")
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"].lower()
|
||||
if "acelerapyme" in href or "red.es" in href or "kit-digital" in href:
|
||||
kd_signals.append(f"link:{href[:50]}")
|
||||
for p in KIT_STRONG_LINK:
|
||||
if p in href:
|
||||
kd_signals.append(f"link:{href[:60]}")
|
||||
break
|
||||
kd_signals = list(dict.fromkeys(kd_signals))[:10]
|
||||
result["kit_digital"] = len(kd_signals) > 0
|
||||
result["kit_digital_signals"] = kd_signals
|
||||
|
||||
# ── Google My Business ────────────────────────────────────────────────
|
||||
for a in soup.find_all("a", href=True):
|
||||
href_g = a["href"]
|
||||
for sig in GMB_URL_SIGNALS:
|
||||
if sig in href_g:
|
||||
result["has_gmb"] = True
|
||||
result["gmb_url"] = href_g[:120]
|
||||
break
|
||||
if result["has_gmb"]:
|
||||
break
|
||||
if not result["has_gmb"]:
|
||||
result["has_gmb"] = any(sig.lower() in hl for sig in GMB_SCHEMA_SIGNALS)
|
||||
|
||||
# ── Contacts ──────────────────────────────────────────────────────────
|
||||
for a in soup.find_all("a", href=True):
|
||||
href = a["href"]
|
||||
|
||||
Reference in New Issue
Block a user