From 90f128e04ebb60152b5f1d2e818e9a2485488236 Mon Sep 17 00:00:00 2001 From: Malin Date: Tue, 5 May 2026 07:29:20 +0200 Subject: [PATCH] fix: extend keyword search to page_snippet and beauty_assessment - add page_snippet TEXT column migration - save prescreener body snippet (600 chars) to page_snippet on upsert - keyword filter now searches: domain, page_title, page_snippet, beauty_assessment JSON so "belleza" matches sites whose content/assessment mentions the word even if the domain name or title doesn't Co-Authored-By: Claude Sonnet 4.6 --- app/db.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/app/db.py b/app/db.py index 89cc928..c246750 100644 --- a/app/db.py +++ b/app/db.py @@ -91,6 +91,7 @@ _MIGRATIONS = [ "ALTER TABLE enriched_domains ADD COLUMN beauty_lead_quality TEXT", "ALTER TABLE enriched_domains ADD COLUMN beauty_assessment TEXT", "ALTER TABLE enriched_domains ADD COLUMN beauty_assessed_at TEXT", + "ALTER TABLE enriched_domains ADD COLUMN page_snippet TEXT", """CREATE TABLE IF NOT EXISTS beauty_queue ( domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', @@ -366,8 +367,13 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None, params.append(site_type) if keyword: kw = f"%{keyword.lower()}%" - conditions.append("(LOWER(domain) LIKE ? OR LOWER(COALESCE(page_title,'')) LIKE ?)") - params.extend([kw, kw]) + conditions.append( + "(LOWER(domain) LIKE ?" + " OR LOWER(COALESCE(page_title,'')) LIKE ?" + " OR LOWER(COALESCE(page_snippet,'')) LIKE ?" + " OR LOWER(COALESCE(beauty_assessment,'')) LIKE ?)" + ) + params.extend([kw, kw, kw, kw]) if tld: tld_clean = tld.lower().lstrip(".") conditions.append("LOWER(domain) LIKE ?") @@ -487,13 +493,14 @@ async def save_prescreen_results(results: list[dict]): else: # Prescreen status upsert — create row if it doesn't exist yet await db.execute( - """INSERT INTO enriched_domains (domain, prescreen_status, prescreen_at, page_title) - VALUES (?, ?, datetime('now'), ?) + """INSERT INTO enriched_domains (domain, prescreen_status, prescreen_at, page_title, page_snippet) + VALUES (?, ?, datetime('now'), ?, ?) ON CONFLICT(domain) DO UPDATE SET prescreen_status = excluded.prescreen_status, prescreen_at = excluded.prescreen_at, - page_title = COALESCE(page_title, excluded.page_title)""", - (domain, r.get("prescreen_status"), r.get("title")), + page_title = COALESCE(page_title, excluded.page_title), + page_snippet = COALESCE(page_snippet, excluded.page_snippet)""", + (domain, r.get("prescreen_status"), r.get("title"), r.get("snippet")), ) await db.commit()