fix: extend keyword search to page_snippet and beauty_assessment

- add page_snippet TEXT column migration
- save prescreener body snippet (600 chars) to page_snippet on upsert
- keyword filter now searches: domain, page_title, page_snippet, beauty_assessment JSON
  so "belleza" matches sites whose content/assessment mentions the word even if
  the domain name or title doesn't

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-05 07:29:20 +02:00
parent db93401a81
commit 90f128e04e

View File

@@ -91,6 +91,7 @@ _MIGRATIONS = [
"ALTER TABLE enriched_domains ADD COLUMN beauty_lead_quality TEXT",
"ALTER TABLE enriched_domains ADD COLUMN beauty_assessment TEXT",
"ALTER TABLE enriched_domains ADD COLUMN beauty_assessed_at TEXT",
"ALTER TABLE enriched_domains ADD COLUMN page_snippet TEXT",
"""CREATE TABLE IF NOT EXISTS beauty_queue (
domain TEXT PRIMARY KEY,
status TEXT DEFAULT 'pending',
@@ -366,8 +367,13 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None,
params.append(site_type)
if keyword:
kw = f"%{keyword.lower()}%"
conditions.append("(LOWER(domain) LIKE ? OR LOWER(COALESCE(page_title,'')) LIKE ?)")
params.extend([kw, kw])
conditions.append(
"(LOWER(domain) LIKE ?"
" OR LOWER(COALESCE(page_title,'')) LIKE ?"
" OR LOWER(COALESCE(page_snippet,'')) LIKE ?"
" OR LOWER(COALESCE(beauty_assessment,'')) LIKE ?)"
)
params.extend([kw, kw, kw, kw])
if tld:
tld_clean = tld.lower().lstrip(".")
conditions.append("LOWER(domain) LIKE ?")
@@ -487,13 +493,14 @@ async def save_prescreen_results(results: list[dict]):
else:
# Prescreen status upsert — create row if it doesn't exist yet
await db.execute(
"""INSERT INTO enriched_domains (domain, prescreen_status, prescreen_at, page_title)
VALUES (?, ?, datetime('now'), ?)
"""INSERT INTO enriched_domains (domain, prescreen_status, prescreen_at, page_title, page_snippet)
VALUES (?, ?, datetime('now'), ?, ?)
ON CONFLICT(domain) DO UPDATE SET
prescreen_status = excluded.prescreen_status,
prescreen_at = excluded.prescreen_at,
page_title = COALESCE(page_title, excluded.page_title)""",
(domain, r.get("prescreen_status"), r.get("title")),
page_title = COALESCE(page_title, excluded.page_title),
page_snippet = COALESCE(page_snippet, excluded.page_snippet)""",
(domain, r.get("prescreen_status"), r.get("title"), r.get("snippet")),
)
await db.commit()