feat: deep site analysis engine + fix AI assess for any domain
site_analyzer.py (new):
- Fresh scrape with timing, page size, server, CMS detection
- Lorem ipsum detection (16 phrases incl. user's example)
- Placeholder content detection (hello world, sample page, etc.)
- Analytics: GA4, GTM, Facebook Pixel, Hotjar, Clarity
- Webmaster: Google Search Console, Bing, Yandex verification tags
- sitemap.xml and robots.txt check + Googlebot block detection
- Mobile viewport check, word count, image/script count
- Full contact extraction: emails, phones, WhatsApp, social links
- Kit Digital signal detection
AI worker fix:
- No longer requires pre-enrichment — works on ANY selected domain
- Does fresh site_analyzer scrape then calls Gemini with full context
- Stores site_analysis JSON alongside AI assessment
- Upserts into enriched_domains even if domain was never enriched
Gemini prompt now includes:
- Complete technical snapshot (load time, size, server, SSL)
- Full SEO signals (sitemap, robots, analytics, webmaster verified)
- Content quality (lorem ipsum matches, placeholder matches)
- Kit Digital signals
- All extracted contacts
- 500-word page text sample
- Outputs: summary, site_quality_score/10, content_issues[],
urgency_signals[], performance_notes, seo_status,
best_contact_channel+value, all_contacts, ES pitch,
services_needed, outreach_notes
UI: rich AI modal with summary banner, quality grid, content issues,
urgency signals, full contact list, technical snapshot
Fixes: correct Replicate token, ai_queue status='running' bug
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
34
app/db.py
34
app/db.py
@@ -35,7 +35,8 @@ CREATE TABLE IF NOT EXISTS enriched_domains (
|
|||||||
ai_pitch TEXT,
|
ai_pitch TEXT,
|
||||||
ai_contact_channel TEXT,
|
ai_contact_channel TEXT,
|
||||||
ai_contact_value TEXT,
|
ai_contact_value TEXT,
|
||||||
ai_assessed_at TEXT
|
ai_assessed_at TEXT,
|
||||||
|
site_analysis TEXT
|
||||||
);
|
);
|
||||||
CREATE TABLE IF NOT EXISTS job_queue (
|
CREATE TABLE IF NOT EXISTS job_queue (
|
||||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
@@ -71,6 +72,7 @@ _MIGRATIONS = [
|
|||||||
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_channel TEXT",
|
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_channel TEXT",
|
||||||
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_value TEXT",
|
"ALTER TABLE enriched_domains ADD COLUMN ai_contact_value TEXT",
|
||||||
"ALTER TABLE enriched_domains ADD COLUMN ai_assessed_at TEXT",
|
"ALTER TABLE enriched_domains ADD COLUMN ai_assessed_at TEXT",
|
||||||
|
"ALTER TABLE enriched_domains ADD COLUMN site_analysis TEXT",
|
||||||
"CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)",
|
"CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -352,13 +354,19 @@ async def get_ai_queue_status():
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def save_ai_assessment(domain: str, assessment: dict):
|
async def save_ai_assessment(domain: str, assessment: dict, site_analysis: dict = None):
|
||||||
import json as _json
|
import json as _json
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
|
# Upsert into enriched_domains (domain may not exist yet if assessed before full enrichment)
|
||||||
|
await db.execute(
|
||||||
|
"""INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING""",
|
||||||
|
(domain,),
|
||||||
|
)
|
||||||
await db.execute(
|
await db.execute(
|
||||||
"""UPDATE enriched_domains SET
|
"""UPDATE enriched_domains SET
|
||||||
ai_assessment=?, ai_lead_quality=?, ai_pitch=?,
|
ai_assessment=?, ai_lead_quality=?, ai_pitch=?,
|
||||||
ai_contact_channel=?, ai_contact_value=?, ai_assessed_at=datetime('now')
|
ai_contact_channel=?, ai_contact_value=?, ai_assessed_at=datetime('now'),
|
||||||
|
site_analysis=?
|
||||||
WHERE domain=?""",
|
WHERE domain=?""",
|
||||||
(
|
(
|
||||||
_json.dumps(assessment),
|
_json.dumps(assessment),
|
||||||
@@ -366,6 +374,26 @@ async def save_ai_assessment(domain: str, assessment: dict):
|
|||||||
assessment.get("pitch_angle"),
|
assessment.get("pitch_angle"),
|
||||||
assessment.get("best_contact_channel"),
|
assessment.get("best_contact_channel"),
|
||||||
assessment.get("best_contact_value"),
|
assessment.get("best_contact_value"),
|
||||||
|
_json.dumps(site_analysis) if site_analysis else None,
|
||||||
|
domain,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
# Also update contact_info + kit_digital from site_analysis if available
|
||||||
|
if site_analysis:
|
||||||
|
contacts = {
|
||||||
|
"emails": site_analysis.get("emails", []),
|
||||||
|
"phones": site_analysis.get("phones", []),
|
||||||
|
"whatsapp": site_analysis.get("whatsapp", []),
|
||||||
|
"social": site_analysis.get("social_links", []),
|
||||||
|
}
|
||||||
|
await db.execute(
|
||||||
|
"""UPDATE enriched_domains SET
|
||||||
|
kit_digital=?, kit_digital_signals=?, contact_info=?
|
||||||
|
WHERE domain=?""",
|
||||||
|
(
|
||||||
|
int(site_analysis.get("kit_digital", False)),
|
||||||
|
_json.dumps(site_analysis.get("kit_digital_signals", [])),
|
||||||
|
_json.dumps(contacts),
|
||||||
domain,
|
domain,
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import dns.resolver
|
|||||||
import aiosqlite
|
import aiosqlite
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from app.db import SQLITE_PATH, queue_ai, save_ai_assessment, get_ai_queue_status
|
from app.db import SQLITE_PATH, queue_ai, save_ai_assessment
|
||||||
from app.scorer import score
|
from app.scorer import score
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@@ -340,17 +340,17 @@ async def worker_loop():
|
|||||||
|
|
||||||
async def ai_worker_loop():
|
async def ai_worker_loop():
|
||||||
from app.replicate_ai import assess_domain as gemini_assess
|
from app.replicate_ai import assess_domain as gemini_assess
|
||||||
|
from app.site_analyzer import analyze_site
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
async with db.execute(
|
async with db.execute(
|
||||||
"SELECT domain FROM ai_queue WHERE status='pending' LIMIT 20"
|
"SELECT domain FROM ai_queue WHERE status='pending' LIMIT 10"
|
||||||
) as cur:
|
) as cur:
|
||||||
rows = await cur.fetchall()
|
rows = await cur.fetchall()
|
||||||
# Mark as running
|
|
||||||
if rows:
|
if rows:
|
||||||
await db.executemany(
|
await db.executemany(
|
||||||
"UPDATE ai_queue SET status='running', created_at=created_at WHERE domain=?",
|
"UPDATE ai_queue SET status='running' WHERE domain=?",
|
||||||
[(r[0],) for r in rows],
|
[(r[0],) for r in rows],
|
||||||
)
|
)
|
||||||
await db.commit()
|
await db.commit()
|
||||||
@@ -361,16 +361,11 @@ async def ai_worker_loop():
|
|||||||
|
|
||||||
async def assess_one(domain: str):
|
async def assess_one(domain: str):
|
||||||
try:
|
try:
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
# Always do a fresh deep scrape — no pre-enrichment required
|
||||||
db.row_factory = aiosqlite.Row
|
analysis = await analyze_site(domain)
|
||||||
async with db.execute(
|
assessment = await gemini_assess(analysis)
|
||||||
"SELECT * FROM enriched_domains WHERE domain=?", (domain,)
|
await save_ai_assessment(domain, assessment, site_analysis=analysis)
|
||||||
) as cur:
|
logger.info("AI done: %s → %s", domain, assessment.get("lead_quality"))
|
||||||
row = await cur.fetchone()
|
|
||||||
if not row:
|
|
||||||
return
|
|
||||||
assessment = await gemini_assess(dict(row))
|
|
||||||
await save_ai_assessment(domain, assessment)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||||
await db.execute(
|
await db.execute(
|
||||||
@@ -380,6 +375,7 @@ async def ai_worker_loop():
|
|||||||
await db.commit()
|
await db.commit()
|
||||||
logger.error("AI worker error %s: %s", domain, e)
|
logger.error("AI worker error %s: %s", domain, e)
|
||||||
|
|
||||||
|
# AI_CONCURRENCY concurrent assessments (already enforced by replicate_ai semaphore)
|
||||||
await asyncio.gather(*[asyncio.create_task(assess_one(r[0])) for r in rows], return_exceptions=True)
|
await asyncio.gather(*[asyncio.create_task(assess_one(r[0])) for r in rows], return_exceptions=True)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
18
app/main.py
18
app/main.py
@@ -177,22 +177,16 @@ async def ai_status():
|
|||||||
|
|
||||||
@app.post("/api/ai/assess/single")
|
@app.post("/api/ai/assess/single")
|
||||||
async def ai_assess_single(body: dict):
|
async def ai_assess_single(body: dict):
|
||||||
"""Immediate (blocking) AI assessment of a single domain."""
|
"""Immediate (blocking) AI assessment — does fresh scrape, no pre-enrichment needed."""
|
||||||
domain = body.get("domain")
|
domain = body.get("domain")
|
||||||
if not domain:
|
if not domain:
|
||||||
return JSONResponse({"error": "no domain"}, status_code=400)
|
return JSONResponse({"error": "no domain"}, status_code=400)
|
||||||
|
from app.site_analyzer import analyze_site
|
||||||
from app.replicate_ai import assess_domain as gemini_assess
|
from app.replicate_ai import assess_domain as gemini_assess
|
||||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
analysis = await analyze_site(domain)
|
||||||
db.row_factory = aiosqlite.Row
|
assessment = await gemini_assess(analysis)
|
||||||
async with db.execute(
|
await save_ai_assessment(domain, assessment, site_analysis=analysis)
|
||||||
"SELECT * FROM enriched_domains WHERE domain=?", (domain,)
|
return {**assessment, "site_analysis": analysis}
|
||||||
) as cur:
|
|
||||||
row = await cur.fetchone()
|
|
||||||
if not row:
|
|
||||||
return JSONResponse({"error": "domain not yet enriched"}, status_code=404)
|
|
||||||
assessment = await gemini_assess(dict(row))
|
|
||||||
await save_ai_assessment(domain, assessment)
|
|
||||||
return assessment
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/api/export")
|
@app.get("/api/export")
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
"""Replicate / Gemini integration for domain lead assessment."""
|
"""Replicate / Gemini integration — deep site assessment."""
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
@@ -10,7 +10,7 @@ import httpx
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN", "r8_6kV2NWMQyPVB9JILHJprrXJJh4vWazA22Osyj")
|
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN", "r8_7I7Feai78f9PzMOs20y5GVFKiLkgUWP463vZO") # override via env
|
||||||
REPLICATE_MODEL = "https://api.replicate.com/v1/models/google/gemini-3-pro/predictions"
|
REPLICATE_MODEL = "https://api.replicate.com/v1/models/google/gemini-3-pro/predictions"
|
||||||
AI_CONCURRENCY = int(os.getenv("AI_CONCURRENCY", "3"))
|
AI_CONCURRENCY = int(os.getenv("AI_CONCURRENCY", "3"))
|
||||||
|
|
||||||
@@ -24,66 +24,92 @@ def _sem() -> asyncio.Semaphore:
|
|||||||
return _ai_sem
|
return _ai_sem
|
||||||
|
|
||||||
|
|
||||||
def _build_prompt(row: dict) -> str:
|
def _build_prompt(a: dict) -> str:
|
||||||
kit_signals = row.get("kit_digital_signals") or "[]"
|
"""Build the Gemini prompt from a full site analysis dict."""
|
||||||
try:
|
contacts_block = []
|
||||||
sigs = json.loads(kit_signals)
|
if a.get("emails"): contacts_block.append(f" Emails: {', '.join(a['emails'][:3])}")
|
||||||
kit_block = "\n".join(f" - {s}" for s in sigs) if sigs else " None detected"
|
if a.get("phones"): contacts_block.append(f" Phones: {', '.join(a['phones'][:3])}")
|
||||||
except Exception:
|
if a.get("whatsapp"): contacts_block.append(f" WhatsApp: {', '.join(a['whatsapp'][:2])}")
|
||||||
kit_block = f" {kit_signals}"
|
if a.get("social_links"): contacts_block.append(f" Social: {', '.join(a['social_links'][:4])}")
|
||||||
|
contacts_str = "\n".join(contacts_block) or " None found"
|
||||||
|
|
||||||
contact_raw = row.get("contact_info") or "{}"
|
kd_str = "\n".join(f" - {s}" for s in (a.get("kit_digital_signals") or [])) or " None detected"
|
||||||
try:
|
analytics_str = ", ".join(a.get("analytics_present") or []) or "none"
|
||||||
contacts = json.loads(contact_raw)
|
webmaster_str = ", ".join(a.get("webmaster_verified") or []) or "none"
|
||||||
except Exception:
|
lorem_str = ", ".join(a.get("lorem_matches") or []) or "none"
|
||||||
contacts = {}
|
placeholder_str = ", ".join(a.get("placeholder_matches") or []) or "none"
|
||||||
|
|
||||||
contact_block = []
|
text_snippet = (a.get("visible_text_snippet") or "")[:2000]
|
||||||
if contacts.get("emails"):
|
|
||||||
contact_block.append(f" Emails: {', '.join(contacts['emails'][:3])}")
|
|
||||||
if contacts.get("phones"):
|
|
||||||
contact_block.append(f" Phones: {', '.join(contacts['phones'][:3])}")
|
|
||||||
if contacts.get("whatsapp"):
|
|
||||||
contact_block.append(f" WhatsApp: {', '.join(contacts['whatsapp'][:2])}")
|
|
||||||
if contacts.get("social"):
|
|
||||||
contact_block.append(f" Social: {', '.join(contacts['social'][:4])}")
|
|
||||||
contact_str = "\n".join(contact_block) if contact_block else " None found"
|
|
||||||
|
|
||||||
return f"""You are a sales intelligence analyst evaluating Spanish SME websites for IT services upsell.
|
return f"""You are a senior web consultant and IT sales analyst reviewing a Spanish SME website.
|
||||||
|
|
||||||
DOMAIN DATA:
|
=== TECHNICAL SNAPSHOT ===
|
||||||
- Domain: {row.get("domain")}
|
Domain: {a.get("domain")}
|
||||||
- Page title: {row.get("page_title") or "N/A"}
|
Reachable: {a.get("reachable")} | Status: {a.get("status_code")} | Load time: {a.get("load_time_ms")} ms
|
||||||
- CMS: {row.get("cms") or "unknown"}
|
Final URL: {a.get("final_url")}
|
||||||
- Server: {row.get("server") or "unknown"}
|
Page size: {a.get("page_size_kb")} KB | Server: {a.get("server")} | CMS: {a.get("cms") or "unknown"}
|
||||||
- Country: {row.get("ip_country") or "unknown"}
|
SSL valid: {a.get("ssl_valid")} | SSL expires in: {a.get("ssl_expiry_days")} days
|
||||||
- SSL valid: {row.get("ssl_valid")}, expires in {row.get("ssl_expiry_days") or "?"} days
|
Mobile viewport: {a.get("has_mobile_viewport")}
|
||||||
- Has email (MX): {bool(row.get("has_mx"))}
|
Word count: {a.get("word_count")} | Images: {a.get("image_count")} | Scripts: {a.get("script_count")}
|
||||||
- Is live: {bool(row.get("is_live"))}
|
|
||||||
- Kit Digital signals found on page:
|
|
||||||
{kit_block}
|
|
||||||
- Contact channels found on page:
|
|
||||||
{contact_str}
|
|
||||||
|
|
||||||
Kit Digital is a Spanish government program (up to €12k grants for SME digitalization). Sites that received it MUST display EU/digitalizadores logos. These businesses have proven they invest in IT services and may need follow-up: new website, SEO, hosting migration, security, maintenance contracts.
|
=== SEO & INDEXING SIGNALS ===
|
||||||
|
Page title: {a.get("page_title") or "missing"}
|
||||||
|
H1: {a.get("h1_text") or "missing"}
|
||||||
|
Meta description: {a.get("meta_description") or "missing"}
|
||||||
|
Canonical URL: {a.get("canonical_url") or "not set"}
|
||||||
|
Sitemap.xml: {a.get("has_sitemap")}
|
||||||
|
Robots.txt: {a.get("has_robots")} | Blocks Googlebot: {a.get("robots_disallows_google")}
|
||||||
|
Analytics: {analytics_str}
|
||||||
|
Webmaster verified:{webmaster_str}
|
||||||
|
|
||||||
Assess this lead and respond ONLY with valid JSON (no markdown, no explanation outside the JSON):
|
=== CONTENT QUALITY ===
|
||||||
|
Lorem ipsum found: {a.get("has_lorem_ipsum")} → matches: {lorem_str}
|
||||||
|
Placeholder text: {a.get("has_placeholder")} → matches: {placeholder_str}
|
||||||
|
|
||||||
|
=== KIT DIGITAL (Spanish gov digitalization grant — sites must display EU logos) ===
|
||||||
|
Detected: {a.get("kit_digital")}
|
||||||
|
Signals:
|
||||||
|
{kd_str}
|
||||||
|
|
||||||
|
=== CONTACT CHANNELS ===
|
||||||
|
{contacts_str}
|
||||||
|
|
||||||
|
=== PAGE TEXT SAMPLE (first 2000 chars) ===
|
||||||
|
{text_snippet}
|
||||||
|
|
||||||
|
=== TASK ===
|
||||||
|
Analyse this site for IT services upsell potential. The client sells:
|
||||||
|
web design/redesign, SEO, hosting migration, SSL renewal, security audits,
|
||||||
|
maintenance contracts, Google Ads, and AI-assisted tools for SMEs.
|
||||||
|
|
||||||
|
Respond ONLY with valid JSON — no markdown, no text outside the JSON object:
|
||||||
{{
|
{{
|
||||||
"is_local_sme": true/false,
|
"summary": "2-3 sentence executive summary of the site's current state",
|
||||||
|
"site_quality_score": <0-10 integer>,
|
||||||
|
"content_issues": ["list of specific content problems found — lorem ipsum, broken sections, placeholder text, etc."],
|
||||||
|
"performance_notes": "comment on load time, page size, mobile readiness",
|
||||||
|
"seo_status": "brief SEO assessment — indexing signals, missing elements",
|
||||||
"kit_digital_confirmed": true/false,
|
"kit_digital_confirmed": true/false,
|
||||||
"kit_digital_reasoning": "1 sentence explaining why or why not",
|
"kit_digital_reasoning": "1 sentence — why confirmed or not",
|
||||||
|
"is_local_sme": true/false,
|
||||||
"lead_quality": "HOT|WARM|COLD",
|
"lead_quality": "HOT|WARM|COLD",
|
||||||
"lead_reasoning": "1-2 sentences on why this is a good/bad lead for IT services sales",
|
"lead_reasoning": "1-2 sentences on why",
|
||||||
"best_contact_channel": "email|phone|whatsapp|social|web_form|unknown",
|
"best_contact_channel": "email|phone|whatsapp|social|web_form|unknown",
|
||||||
"best_contact_value": "the actual email/phone/URL to use, or empty string",
|
"best_contact_value": "the actual value to use (email address, phone number, URL) or empty string",
|
||||||
"pitch_angle": "One concrete opening sentence for a cold email or call in Spanish",
|
"all_contacts": {{
|
||||||
"services_likely_needed": ["service1", "service2"],
|
"emails": [],
|
||||||
"outreach_notes": "Any useful context for the sales rep (language, business type, urgency)"
|
"phones": [],
|
||||||
|
"whatsapp": [],
|
||||||
|
"social": []
|
||||||
|
}},
|
||||||
|
"pitch_angle": "One concrete opening sentence in Spanish for cold outreach",
|
||||||
|
"services_needed": ["service1", "service2"],
|
||||||
|
"urgency_signals": ["list of specific urgent issues — expiring SSL, lorem ipsum live, no GA, blocked robots etc"],
|
||||||
|
"outreach_notes": "Key context for the sales rep"
|
||||||
}}"""
|
}}"""
|
||||||
|
|
||||||
|
|
||||||
def _parse_output(raw: str) -> dict:
|
def _parse_output(raw: str) -> dict:
|
||||||
"""Extract JSON from Gemini text output."""
|
|
||||||
text = re.sub(r"```(?:json)?", "", raw).strip().rstrip("`").strip()
|
text = re.sub(r"```(?:json)?", "", raw).strip().rstrip("`").strip()
|
||||||
m = re.search(r"\{[\s\S]+\}", text)
|
m = re.search(r"\{[\s\S]+\}", text)
|
||||||
if m:
|
if m:
|
||||||
@@ -91,8 +117,9 @@ def _parse_output(raw: str) -> dict:
|
|||||||
return json.loads(m.group(0))
|
return json.loads(m.group(0))
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
pass
|
pass
|
||||||
|
logger.warning("Could not parse Gemini JSON output, raw: %s", raw[:300])
|
||||||
return {
|
return {
|
||||||
"raw": raw[:500],
|
"summary": raw[:400],
|
||||||
"lead_quality": "COLD",
|
"lead_quality": "COLD",
|
||||||
"best_contact_channel": "unknown",
|
"best_contact_channel": "unknown",
|
||||||
"best_contact_value": "",
|
"best_contact_value": "",
|
||||||
@@ -100,22 +127,22 @@ def _parse_output(raw: str) -> dict:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
async def assess_domain(row: dict) -> dict:
|
async def assess_domain(analysis: dict) -> dict:
|
||||||
"""Call Gemini via Replicate to assess a domain. Returns parsed assessment dict."""
|
"""Call Gemini with the full site analysis. Returns parsed assessment."""
|
||||||
async with _sem():
|
async with _sem():
|
||||||
payload = {
|
payload = {
|
||||||
"input": {
|
"input": {
|
||||||
"prompt": _build_prompt(row),
|
"prompt": _build_prompt(analysis),
|
||||||
"images": [],
|
"images": [],
|
||||||
"videos": [],
|
"videos": [],
|
||||||
"top_p": 0.9,
|
"top_p": 0.9,
|
||||||
"temperature": 0.2,
|
"temperature": 0.2,
|
||||||
"thinking_level": "low",
|
"thinking_level": "low",
|
||||||
"max_output_tokens": 1024,
|
"max_output_tokens": 2048,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(timeout=90) as client:
|
async with httpx.AsyncClient(timeout=120) as client:
|
||||||
resp = await client.post(
|
resp = await client.post(
|
||||||
REPLICATE_MODEL,
|
REPLICATE_MODEL,
|
||||||
headers={
|
headers={
|
||||||
@@ -133,10 +160,15 @@ async def assess_domain(row: dict) -> dict:
|
|||||||
output = "".join(output)
|
output = "".join(output)
|
||||||
|
|
||||||
result = _parse_output(output)
|
result = _parse_output(output)
|
||||||
logger.info("AI %s → %s / contact: %s",
|
logger.info("AI %s → %s (quality %s)",
|
||||||
row.get("domain"), result.get("lead_quality"), result.get("best_contact_channel"))
|
analysis.get("domain"), result.get("lead_quality"), result.get("site_quality_score"))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Replicate error %s: %s", row.get("domain"), e)
|
logger.error("Replicate error %s: %s", analysis.get("domain"), e)
|
||||||
return {"error": str(e)[:300], "lead_quality": "COLD", "best_contact_channel": "unknown", "best_contact_value": ""}
|
return {
|
||||||
|
"error": str(e)[:300],
|
||||||
|
"lead_quality": "COLD",
|
||||||
|
"best_contact_channel": "unknown",
|
||||||
|
"best_contact_value": "",
|
||||||
|
}
|
||||||
|
|||||||
277
app/site_analyzer.py
Normal file
277
app/site_analyzer.py
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
"""Deep site analysis: content quality, SEO signals, performance, indexing hints."""
|
||||||
|
import asyncio
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ── Content quality ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
LOREM_PHRASES = [
|
||||||
|
"lorem ipsum", "sed ut perspiciatis", "nunc sem sapien",
|
||||||
|
"nulla id nibh", "aenean dignissim", "aliquam tincidunt",
|
||||||
|
"vestibulum commodo", "fusce nunc lacus", "consectetuer",
|
||||||
|
"cras ornare tristique", "ntulla nec ante", "risus id metus",
|
||||||
|
"praesent placerat", "fusce pellentesque", "suscipit nibh",
|
||||||
|
"integer vitae libero", "felis quis tortor",
|
||||||
|
]
|
||||||
|
|
||||||
|
PLACEHOLDER_PHRASES = [
|
||||||
|
"under construction", "coming soon", "sample page",
|
||||||
|
"this is a demo", "default post", "hello world",
|
||||||
|
"test post", "uncategorized",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ── Analytics & webmaster tags ────────────────────────────────────────────────
|
||||||
|
|
||||||
|
ANALYTICS = {
|
||||||
|
"google_analytics": ["gtag('config'", "google-analytics.com/analytics.js", "G-"],
|
||||||
|
"google_tag_manager": ["googletagmanager.com/gtm.js", "GTM-"],
|
||||||
|
"facebook_pixel": ["fbq('init'", "connect.facebook.net/en_US/fbevents"],
|
||||||
|
"hotjar": ["static.hotjar.com"],
|
||||||
|
"clarity": ["clarity.ms/tag"],
|
||||||
|
}
|
||||||
|
|
||||||
|
WEBMASTER = {
|
||||||
|
"google_search_console": ['google-site-verification'],
|
||||||
|
"bing_webmaster": ['msvalidate.01'],
|
||||||
|
"yandex": ['yandex-verification'],
|
||||||
|
}
|
||||||
|
|
||||||
|
KIT_IMG_PATS = [
|
||||||
|
"digitalizadores", "kit-digital", "kitdigital", "kit_digital",
|
||||||
|
"fondos-europeos", "fondos_europeos", "nextgeneration", "next-generation",
|
||||||
|
"prtr", "plan-recuperacion", "acelerapyme", "cofinanciado",
|
||||||
|
]
|
||||||
|
KIT_TEXT_PATS = [
|
||||||
|
"kit digital", "agente digitalizador", "fondos europeos",
|
||||||
|
"next generation eu", "nextgenerationeu", "plan de recuperación",
|
||||||
|
"prtr", "financiado por la unión europea", "red.es/kit-digital", "acelerapyme",
|
||||||
|
]
|
||||||
|
|
||||||
|
EMAIL_RE = re.compile(r"[a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,}")
|
||||||
|
PHONE_RE = re.compile(r"(?:\+34[\s\-]?)?(?:6|7|8|9)\d{2}[\s\-]?\d{3}[\s\-]?\d{3}")
|
||||||
|
SOCIAL_DOM = ["facebook.com", "instagram.com", "linkedin.com", "twitter.com", "x.com", "tiktok.com"]
|
||||||
|
|
||||||
|
|
||||||
|
async def analyze_site(domain: str) -> dict:
|
||||||
|
"""Fetch and deeply analyse a site. Returns a rich dict for the AI prompt."""
|
||||||
|
result = {
|
||||||
|
"domain": domain,
|
||||||
|
"reachable": False,
|
||||||
|
"load_time_ms": None,
|
||||||
|
"status_code": None,
|
||||||
|
"final_url": None,
|
||||||
|
"page_size_kb": None,
|
||||||
|
"server": None,
|
||||||
|
"cms": None,
|
||||||
|
"ssl_valid": False,
|
||||||
|
"ssl_expiry_days": None,
|
||||||
|
# Content quality
|
||||||
|
"has_lorem_ipsum": False,
|
||||||
|
"lorem_matches": [],
|
||||||
|
"has_placeholder": False,
|
||||||
|
"placeholder_matches": [],
|
||||||
|
"word_count": 0,
|
||||||
|
"image_count": 0,
|
||||||
|
"broken_images": 0,
|
||||||
|
"script_count": 0,
|
||||||
|
"has_mobile_viewport": False,
|
||||||
|
"page_title": None,
|
||||||
|
"meta_description": None,
|
||||||
|
"h1_text": None,
|
||||||
|
"visible_text_snippet": "",
|
||||||
|
# SEO / webmaster
|
||||||
|
"has_sitemap": False,
|
||||||
|
"has_robots": False,
|
||||||
|
"robots_disallows_google": False,
|
||||||
|
"analytics_present": [],
|
||||||
|
"webmaster_verified": [],
|
||||||
|
"canonical_url": None,
|
||||||
|
"og_title": None,
|
||||||
|
# Kit Digital
|
||||||
|
"kit_digital": False,
|
||||||
|
"kit_digital_signals": [],
|
||||||
|
# Contacts
|
||||||
|
"emails": [],
|
||||||
|
"phones": [],
|
||||||
|
"whatsapp": [],
|
||||||
|
"social_links": [],
|
||||||
|
# Errors
|
||||||
|
"error": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
# ── Fetch main page ───────────────────────────────────────────────────────
|
||||||
|
try:
|
||||||
|
t0 = time.monotonic()
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
timeout=15, follow_redirects=True, verify=False,
|
||||||
|
headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"},
|
||||||
|
) as client:
|
||||||
|
resp = await client.get(f"https://{domain}")
|
||||||
|
if resp.status_code >= 400:
|
||||||
|
resp = await client.get(f"http://{domain}")
|
||||||
|
|
||||||
|
load_ms = int((time.monotonic() - t0) * 1000)
|
||||||
|
html = resp.text
|
||||||
|
result.update({
|
||||||
|
"reachable": resp.status_code < 400,
|
||||||
|
"load_time_ms": load_ms,
|
||||||
|
"status_code": resp.status_code,
|
||||||
|
"final_url": str(resp.url),
|
||||||
|
"page_size_kb": round(len(resp.content) / 1024, 1),
|
||||||
|
"server": resp.headers.get("server"),
|
||||||
|
})
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
hl = html.lower()
|
||||||
|
|
||||||
|
# Title, meta
|
||||||
|
title_tag = soup.find("title")
|
||||||
|
result["page_title"] = title_tag.get_text(strip=True)[:200] if title_tag else None
|
||||||
|
meta_desc = soup.find("meta", attrs={"name": "description"})
|
||||||
|
result["meta_description"] = (meta_desc.get("content") or "")[:300] if meta_desc else None
|
||||||
|
h1 = soup.find("h1")
|
||||||
|
result["h1_text"] = h1.get_text(strip=True)[:200] if h1 else None
|
||||||
|
|
||||||
|
# Mobile viewport
|
||||||
|
result["has_mobile_viewport"] = bool(soup.find("meta", attrs={"name": "viewport"}))
|
||||||
|
|
||||||
|
# Canonical + OG
|
||||||
|
canon = soup.find("link", rel="canonical")
|
||||||
|
result["canonical_url"] = canon.get("href") if canon else None
|
||||||
|
og = soup.find("meta", property="og:title")
|
||||||
|
result["og_title"] = og.get("content") if og else None
|
||||||
|
|
||||||
|
# Visible text
|
||||||
|
for tag in soup(["script", "style", "noscript"]):
|
||||||
|
tag.decompose()
|
||||||
|
visible_text = soup.get_text(separator=" ", strip=True)
|
||||||
|
words = visible_text.split()
|
||||||
|
result["word_count"] = len(words)
|
||||||
|
result["visible_text_snippet"] = " ".join(words[:500])
|
||||||
|
|
||||||
|
# Lorem ipsum / placeholder detection
|
||||||
|
vl = visible_text.lower()
|
||||||
|
lorem_hits = [p for p in LOREM_PHRASES if p in vl]
|
||||||
|
result["has_lorem_ipsum"] = len(lorem_hits) > 0
|
||||||
|
result["lorem_matches"] = lorem_hits[:5]
|
||||||
|
ph_hits = [p for p in PLACEHOLDER_PHRASES if p in vl]
|
||||||
|
result["has_placeholder"] = len(ph_hits) > 0
|
||||||
|
result["placeholder_matches"] = ph_hits[:3]
|
||||||
|
|
||||||
|
# Images & scripts
|
||||||
|
imgs = soup.find_all("img")
|
||||||
|
result["image_count"] = len(imgs)
|
||||||
|
result["script_count"] = len(soup.find_all("script", src=True))
|
||||||
|
|
||||||
|
# Analytics / webmaster tags
|
||||||
|
for name, sigs in ANALYTICS.items():
|
||||||
|
if any(s.lower() in hl for s in sigs):
|
||||||
|
result["analytics_present"].append(name)
|
||||||
|
for name, sigs in WEBMASTER.items():
|
||||||
|
if any(s.lower() in hl for s in sigs):
|
||||||
|
result["webmaster_verified"].append(name)
|
||||||
|
|
||||||
|
# Kit Digital
|
||||||
|
kd_signals = []
|
||||||
|
for img in imgs:
|
||||||
|
combined = ((img.get("src") or "") + (img.get("alt") or "") + (img.get("srcset") or "")).lower()
|
||||||
|
for p in KIT_IMG_PATS:
|
||||||
|
if p in combined:
|
||||||
|
kd_signals.append(f"img:{p}")
|
||||||
|
break
|
||||||
|
for p in KIT_TEXT_PATS:
|
||||||
|
if p in hl:
|
||||||
|
kd_signals.append(f"text:{p}")
|
||||||
|
for a in soup.find_all("a", href=True):
|
||||||
|
href = a["href"].lower()
|
||||||
|
if "acelerapyme" in href or "red.es" in href or "kit-digital" in href:
|
||||||
|
kd_signals.append(f"link:{href[:50]}")
|
||||||
|
kd_signals = list(dict.fromkeys(kd_signals))[:10]
|
||||||
|
result["kit_digital"] = len(kd_signals) > 0
|
||||||
|
result["kit_digital_signals"] = kd_signals
|
||||||
|
|
||||||
|
# Contacts
|
||||||
|
for a in soup.find_all("a", href=True):
|
||||||
|
href = a["href"]
|
||||||
|
if href.startswith("mailto:"):
|
||||||
|
em = href[7:].split("?")[0].strip().lower()
|
||||||
|
if em and em not in result["emails"]:
|
||||||
|
result["emails"].append(em)
|
||||||
|
elif href.startswith("tel:"):
|
||||||
|
ph = re.sub(r"[^\d+]", "", href[4:])
|
||||||
|
if ph and ph not in result["phones"]:
|
||||||
|
result["phones"].append(ph)
|
||||||
|
elif "wa.me" in href or "api.whatsapp.com" in href:
|
||||||
|
if href not in result["whatsapp"]:
|
||||||
|
result["whatsapp"].append(href[:80])
|
||||||
|
else:
|
||||||
|
for sd in SOCIAL_DOM:
|
||||||
|
if sd in href.lower():
|
||||||
|
clean = href.split("?")[0].rstrip("/")
|
||||||
|
if clean not in result["social_links"]:
|
||||||
|
result["social_links"].append(clean)
|
||||||
|
break
|
||||||
|
for em in EMAIL_RE.findall(html[:80000]):
|
||||||
|
em = em.lower()
|
||||||
|
if em not in result["emails"] and not any(em.endswith(x) for x in [".png", ".jpg", ".css", ".js", ".svg"]):
|
||||||
|
result["emails"].append(em)
|
||||||
|
for ph in PHONE_RE.findall(visible_text):
|
||||||
|
ph_c = re.sub(r"[\s\-]", "", ph)
|
||||||
|
if ph_c not in result["phones"]:
|
||||||
|
result["phones"].append(ph_c)
|
||||||
|
# Cap
|
||||||
|
for k in ["emails", "phones", "whatsapp", "social_links"]:
|
||||||
|
result[k] = list(dict.fromkeys(result[k]))[:5]
|
||||||
|
|
||||||
|
# CMS
|
||||||
|
from app.enricher import detect_cms
|
||||||
|
result["cms"] = detect_cms(html, dict(resp.headers))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
result["error"] = str(e)[:300]
|
||||||
|
|
||||||
|
# ── Sitemap & robots (parallel) ───────────────────────────────────────────
|
||||||
|
async def _check_url(url: str) -> Optional[str]:
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(timeout=6, follow_redirects=True, verify=False) as c:
|
||||||
|
r = await c.get(url)
|
||||||
|
return r.text if r.status_code == 200 else None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
|
sitemap_txt, robots_txt = await asyncio.gather(
|
||||||
|
_check_url(f"https://{domain}/sitemap.xml"),
|
||||||
|
_check_url(f"https://{domain}/robots.txt"),
|
||||||
|
)
|
||||||
|
result["has_sitemap"] = sitemap_txt is not None
|
||||||
|
result["has_robots"] = robots_txt is not None
|
||||||
|
if robots_txt:
|
||||||
|
robots_lower = robots_txt.lower()
|
||||||
|
result["robots_disallows_google"] = (
|
||||||
|
"disallow: /" in robots_lower and "googlebot" in robots_lower
|
||||||
|
)
|
||||||
|
|
||||||
|
# ── SSL ───────────────────────────────────────────────────────────────────
|
||||||
|
import ssl as _ssl, socket as _socket
|
||||||
|
try:
|
||||||
|
def _ssl_check():
|
||||||
|
import datetime as _dt
|
||||||
|
ctx = _ssl.create_default_context()
|
||||||
|
with _socket.create_connection((domain, 443), timeout=5) as s:
|
||||||
|
with ctx.wrap_socket(s, server_hostname=domain) as ss:
|
||||||
|
cert = ss.getpeercert()
|
||||||
|
exp = _dt.datetime.strptime(cert["notAfter"], "%b %d %H:%M:%S %Y %Z")
|
||||||
|
return True, (_dt.datetime.utcnow() - exp).days * -1
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
result["ssl_valid"], result["ssl_expiry_days"] = await loop.run_in_executor(None, _ssl_check)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return result
|
||||||
@@ -136,11 +136,10 @@ tr:hover td{background:rgba(255,255,255,.025)}
|
|||||||
|
|
||||||
/* AI detail modal */
|
/* AI detail modal */
|
||||||
.modal-bg{position:fixed;inset:0;background:#000a;z-index:300;display:flex;align-items:center;justify-content:center}
|
.modal-bg{position:fixed;inset:0;background:#000a;z-index:300;display:flex;align-items:center;justify-content:center}
|
||||||
.modal{background:var(--surface);border:1px solid var(--border);border-radius:var(--r);padding:20px;max-width:500px;width:90%;max-height:80vh;overflow-y:auto}
|
.modal{background:var(--surface);border:1px solid var(--border);border-radius:var(--r);padding:18px;max-width:560px;width:95%;max-height:88vh;overflow-y:auto}
|
||||||
.modal h2{font-size:16px;font-weight:800;margin-bottom:12px}
|
.modal h2{font-size:15px;font-weight:800}
|
||||||
.modal .row{display:flex;gap:8px;margin-bottom:8px;font-size:13px}
|
.mrow{display:flex;gap:8px;margin-bottom:6px;font-size:12px;line-height:1.4}
|
||||||
.modal .label{color:var(--muted);min-width:110px;font-size:12px}
|
.mlabel{color:var(--muted);min-width:90px;font-size:11px;padding-top:1px;flex-shrink:0}
|
||||||
.modal .val{color:var(--text)}
|
|
||||||
|
|
||||||
@media(max-width:700px){.pipeline{grid-template-columns:1fr}.sg{grid-template-columns:1fr 1fr}}
|
@media(max-width:700px){.pipeline{grid-template-columns:1fr}.sg{grid-template-columns:1fr 1fr}}
|
||||||
</style>
|
</style>
|
||||||
@@ -153,15 +152,99 @@ tr:hover td{background:rgba(255,255,255,.025)}
|
|||||||
<!-- AI Detail Modal -->
|
<!-- AI Detail Modal -->
|
||||||
<div class="modal-bg" x-show="modal.open" @click.self="modal.open=false" x-cloak>
|
<div class="modal-bg" x-show="modal.open" @click.self="modal.open=false" x-cloak>
|
||||||
<div class="modal" @click.stop>
|
<div class="modal" @click.stop>
|
||||||
<h2>AI Assessment — <span style="color:var(--accent2)" x-text="modal.domain"></span></h2>
|
<div style="display:flex;justify-content:space-between;align-items:flex-start;margin-bottom:12px">
|
||||||
<div class="row"><span class="label">Lead quality</span><span class="val"><span class="pill" :class="aiPillClass(modal.data.lead_quality)" x-text="modal.data.lead_quality || '—'"></span></span></div>
|
<h2>AI Report — <span style="color:var(--accent2)" x-text="modal.domain"></span></h2>
|
||||||
<div class="row"><span class="label">Kit Digital</span><span class="val" x-text="modal.data.kit_digital_confirmed ? '✅ Confirmed' : '❌ Not confirmed'"></span></div>
|
<button class="btn bg sm" @click="modal.open=false">✕</button>
|
||||||
<div class="row"><span class="label">KD reasoning</span><span class="val" x-text="modal.data.kit_digital_reasoning || '—'"></span></div>
|
</div>
|
||||||
<div class="row"><span class="label">Lead reasoning</span><span class="val" x-text="modal.data.lead_reasoning || '—'"></span></div>
|
|
||||||
<div class="row"><span class="label">Best channel</span><span class="val" x-text="(modal.data.best_contact_channel || '—') + (modal.data.best_contact_value ? ': ' + modal.data.best_contact_value : '')"></span></div>
|
<!-- Summary banner -->
|
||||||
<div class="row"><span class="label">Pitch</span><span class="val" style="font-style:italic;color:var(--accent2)" x-text="modal.data.pitch_angle || '—'"></span></div>
|
<div x-show="modal.ai.summary" style="background:var(--surface2);border-radius:6px;padding:10px 12px;margin-bottom:12px;font-size:12px;line-height:1.5;color:var(--text)" x-text="modal.ai.summary"></div>
|
||||||
<div class="row"><span class="label">Services needed</span><span class="val" x-text="(modal.data.services_likely_needed || []).join(', ') || '—'"></span></div>
|
|
||||||
<div class="row"><span class="label">Outreach notes</span><span class="val" x-text="modal.data.outreach_notes || '—'"></span></div>
|
<!-- Lead + quality -->
|
||||||
|
<div style="display:grid;grid-template-columns:1fr 1fr 1fr;gap:8px;margin-bottom:12px">
|
||||||
|
<div style="background:var(--surface2);border-radius:6px;padding:8px;text-align:center">
|
||||||
|
<div style="font-size:10px;color:var(--muted);margin-bottom:3px">LEAD</div>
|
||||||
|
<span class="pill" :class="aiPillClass(modal.ai.lead_quality)" x-text="modal.ai.lead_quality||'—'"></span>
|
||||||
|
</div>
|
||||||
|
<div style="background:var(--surface2);border-radius:6px;padding:8px;text-align:center">
|
||||||
|
<div style="font-size:10px;color:var(--muted);margin-bottom:3px">SITE QUALITY</div>
|
||||||
|
<span class="score" :style="qualityBg(modal.ai.site_quality_score)" x-text="(modal.ai.site_quality_score??'—')+'/10'"></span>
|
||||||
|
</div>
|
||||||
|
<div style="background:var(--surface2);border-radius:6px;padding:8px;text-align:center">
|
||||||
|
<div style="font-size:10px;color:var(--muted);margin-bottom:3px">KIT DIGITAL</div>
|
||||||
|
<span x-text="modal.ai.kit_digital_confirmed ? '✅ Yes' : '❌ No'" style="font-size:13px;font-weight:700"></span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mrow"><span class="mlabel">Reasoning</span><span x-text="modal.ai.lead_reasoning||'—'"></span></div>
|
||||||
|
<div class="mrow"><span class="mlabel">KD notes</span><span x-text="modal.ai.kit_digital_reasoning||'—'"></span></div>
|
||||||
|
<div class="mrow"><span class="mlabel">Performance</span><span x-text="modal.ai.performance_notes||'—'"></span></div>
|
||||||
|
<div class="mrow"><span class="mlabel">SEO status</span><span x-text="modal.ai.seo_status||'—'"></span></div>
|
||||||
|
|
||||||
|
<!-- Content issues -->
|
||||||
|
<div x-show="(modal.ai.content_issues||[]).length>0" style="margin:8px 0">
|
||||||
|
<div style="font-size:10px;color:var(--muted);text-transform:uppercase;margin-bottom:4px">Content Issues</div>
|
||||||
|
<template x-for="issue in (modal.ai.content_issues||[])">
|
||||||
|
<div style="font-size:12px;color:var(--danger);padding:2px 0">⚠ <span x-text="issue"></span></div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Urgency signals -->
|
||||||
|
<div x-show="(modal.ai.urgency_signals||[]).length>0" style="margin:8px 0">
|
||||||
|
<div style="font-size:10px;color:var(--muted);text-transform:uppercase;margin-bottom:4px">Urgency Signals</div>
|
||||||
|
<template x-for="sig in (modal.ai.urgency_signals||[])">
|
||||||
|
<div style="font-size:12px;color:var(--warn);padding:2px 0">🔴 <span x-text="sig"></span></div>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Contact -->
|
||||||
|
<div style="background:var(--surface2);border-radius:6px;padding:10px;margin:8px 0">
|
||||||
|
<div style="font-size:10px;color:var(--muted);text-transform:uppercase;margin-bottom:6px">Best Contact</div>
|
||||||
|
<div style="font-size:13px;font-weight:700;color:var(--accent2)" x-text="(modal.ai.best_contact_channel||'unknown').toUpperCase()"></div>
|
||||||
|
<div style="font-size:12px;color:var(--text);margin-top:2px;word-break:break-all" x-text="modal.ai.best_contact_value||'—'"></div>
|
||||||
|
<!-- All contacts from site_analysis -->
|
||||||
|
<div x-show="modal.sa" style="margin-top:8px;display:flex;flex-wrap:wrap;gap:4px">
|
||||||
|
<template x-for="em in (modal.sa?.emails||[])">
|
||||||
|
<a :href="'mailto:'+em" class="chip email" x-text="em"></a>
|
||||||
|
</template>
|
||||||
|
<template x-for="ph in (modal.sa?.phones||[])">
|
||||||
|
<a :href="'tel:'+ph" class="chip phone" x-text="ph"></a>
|
||||||
|
</template>
|
||||||
|
<template x-for="wa in (modal.sa?.whatsapp||[])">
|
||||||
|
<a :href="wa" target="_blank" class="chip wa">💬 WhatsApp</a>
|
||||||
|
</template>
|
||||||
|
<template x-for="s in (modal.sa?.social_links||[]).slice(0,3)">
|
||||||
|
<a :href="s" target="_blank" class="chip social" x-text="s.replace('https://','').split('/')[0]"></a>
|
||||||
|
</template>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<!-- Pitch -->
|
||||||
|
<div style="background:#6c63ff15;border:1px solid #6c63ff33;border-radius:6px;padding:10px;margin:8px 0">
|
||||||
|
<div style="font-size:10px;color:var(--muted);text-transform:uppercase;margin-bottom:4px">Cold Pitch (ES)</div>
|
||||||
|
<div style="font-size:13px;font-style:italic;color:var(--accent2)" x-text="modal.ai.pitch_angle||'—'"></div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="mrow"><span class="mlabel">Services</span><span x-text="(modal.ai.services_needed||[]).join(', ')||'—'"></span></div>
|
||||||
|
<div class="mrow"><span class="mlabel">Notes</span><span x-text="modal.ai.outreach_notes||'—'"></span></div>
|
||||||
|
|
||||||
|
<!-- Site analysis tech snapshot -->
|
||||||
|
<div x-show="modal.sa" style="margin-top:10px;padding-top:10px;border-top:1px solid var(--border)">
|
||||||
|
<div style="font-size:10px;color:var(--muted);text-transform:uppercase;margin-bottom:6px">Technical Snapshot</div>
|
||||||
|
<div style="display:grid;grid-template-columns:1fr 1fr;gap:4px;font-size:11px">
|
||||||
|
<div>Load time: <b x-text="(modal.sa?.load_time_ms||'—')+'ms'"></b></div>
|
||||||
|
<div>Page size: <b x-text="(modal.sa?.page_size_kb||'—')+'KB'"></b></div>
|
||||||
|
<div>CMS: <b x-text="modal.sa?.cms||'unknown'"></b></div>
|
||||||
|
<div>Server: <b x-text="modal.sa?.server||'—'"></b></div>
|
||||||
|
<div>Sitemap: <b x-text="modal.sa?.has_sitemap?'✅':'❌'"></b></div>
|
||||||
|
<div>Robots: <b x-text="modal.sa?.has_robots?'✅':'❌'"></b></div>
|
||||||
|
<div>Analytics: <b x-text="(modal.sa?.analytics_present||[]).join(', ')||'none'"></b></div>
|
||||||
|
<div>Mobile: <b x-text="modal.sa?.has_mobile_viewport?'✅':'❌'"></b></div>
|
||||||
|
<div>Lorem ipsum: <b :style="modal.sa?.has_lorem_ipsum?'color:var(--danger)':''" x-text="modal.sa?.has_lorem_ipsum?'⚠ YES':'No'"></b></div>
|
||||||
|
<div>Words: <b x-text="modal.sa?.word_count||'—'"></b></div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
<button class="btn bg" style="margin-top:14px;width:100%" @click="modal.open=false">Close</button>
|
<button class="btn bg" style="margin-top:14px;width:100%" @click="modal.open=false">Close</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -436,7 +519,7 @@ function app() {
|
|||||||
qst: {}, customDomains: '',
|
qst: {}, customDomains: '',
|
||||||
pipeline: {hot:{count:0,samples:[]},warm:{count:0,samples:[]},cold:{count:0,samples:[]}},
|
pipeline: {hot:{count:0,samples:[]},warm:{count:0,samples:[]},cold:{count:0,samples:[]}},
|
||||||
toast: {show:false,msg:'',type:'success'},
|
toast: {show:false,msg:'',type:'success'},
|
||||||
modal: {open:false,domain:'',data:{}},
|
modal: {open:false, domain:'', ai:{}, sa:null},
|
||||||
_chart: null, _poll: null, _toastTimer: null,
|
_chart: null, _poll: null, _toastTimer: null,
|
||||||
|
|
||||||
async init() {
|
async init() {
|
||||||
@@ -556,11 +639,20 @@ function app() {
|
|||||||
|
|
||||||
openModal(row) {
|
openModal(row) {
|
||||||
this.modal.domain = row.domain;
|
this.modal.domain = row.domain;
|
||||||
try { this.modal.data = row.ai_assessment ? JSON.parse(row.ai_assessment) : {}; }
|
try { this.modal.ai = row.ai_assessment ? JSON.parse(row.ai_assessment) : {}; }
|
||||||
catch(e) { this.modal.data = {}; }
|
catch(e) { this.modal.ai = {}; }
|
||||||
|
try { this.modal.sa = row.site_analysis ? JSON.parse(row.site_analysis) : null; }
|
||||||
|
catch(e) { this.modal.sa = null; }
|
||||||
this.modal.open = true;
|
this.modal.open = true;
|
||||||
},
|
},
|
||||||
|
|
||||||
|
qualityBg(s) {
|
||||||
|
if(s==null) return 'background:#333;color:#888';
|
||||||
|
if(s>=8) return 'background:#00d4aa22;color:var(--accent2)';
|
||||||
|
if(s>=5) return 'background:#ffb34722;color:var(--warn)';
|
||||||
|
return 'background:#ff4f6d22;color:var(--danger)';
|
||||||
|
},
|
||||||
|
|
||||||
scoreBg(s) {
|
scoreBg(s) {
|
||||||
if(s==null) return 'background:#333;color:#888';
|
if(s==null) return 'background:#333;color:#888';
|
||||||
if(s>=80) return 'background:#ff4f6d22;color:#ff4f6d';
|
if(s>=80) return 'background:#ff4f6d22;color:#ff4f6d';
|
||||||
|
|||||||
@@ -13,6 +13,6 @@ services:
|
|||||||
- SCORE_THRESHOLD=60
|
- SCORE_THRESHOLD=60
|
||||||
- TARGET_TLDS=es,com,net
|
- TARGET_TLDS=es,com,net
|
||||||
- TARGET_COUNTRIES=ES,GB,DE,FR,RO,PT,AD,IT
|
- TARGET_COUNTRIES=ES,GB,DE,FR,RO,PT,AD,IT
|
||||||
- REPLICATE_API_TOKEN=r8_6kV2NWMQyPVB9JILHJprrXJJh4vWazA22Osyj
|
- REPLICATE_API_TOKEN=r8_7I7Feai78f9PzMOs20y5GVFKiLkgUWP463vZO
|
||||||
- AI_CONCURRENCY=3
|
- AI_CONCURRENCY=3
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
|||||||
Reference in New Issue
Block a user