From 22eae3f9b729285856b2f2a3ba3f2646c57ff84d Mon Sep 17 00:00:00 2001 From: Malin Date: Tue, 14 Apr 2026 08:39:27 +0200 Subject: [PATCH] feat: add EN/ES/RO language selector for AI pitch generation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - db.py: add `language` column to ai_queue; migration; queue_ai() accepts language param and re-queues with ON CONFLICT UPDATE so changing language works - main.py: batch and single assess endpoints accept `language` from request body - enricher.py: ai_worker_loop reads language column, passes to _assess_one() - replicate_ai.py: assess_domain() and _build_prompt() accept language param; OUTPUT LANGUAGE section injected into prompt so Gemini writes pitch/email in the requested language (EN/ES/RO) - index.html: flag dropdown (🇪🇸/🇬🇧/🇷🇴) next to AI Assess button; aiLang state default ES; language sent in all batch assessment requests Co-Authored-By: Claude Sonnet 4.6 --- app/db.py | 11 +++++++---- app/enricher.py | 10 +++++----- app/main.py | 10 ++++++++-- app/replicate_ai.py | 10 +++++++--- app/static/index.html | 15 ++++++++++----- 5 files changed, 37 insertions(+), 19 deletions(-) diff --git a/app/db.py b/app/db.py index 5d53a9d..9534028 100644 --- a/app/db.py +++ b/app/db.py @@ -52,7 +52,8 @@ CREATE TABLE IF NOT EXISTS ai_queue ( status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, - error TEXT + error TEXT, + language TEXT DEFAULT 'ES' ); CREATE TABLE IF NOT EXISTS scores ( domain TEXT PRIMARY KEY, @@ -74,6 +75,7 @@ _MIGRATIONS = [ "ALTER TABLE enriched_domains ADD COLUMN ai_assessed_at TEXT", "ALTER TABLE enriched_domains ADD COLUMN site_analysis TEXT", "CREATE TABLE IF NOT EXISTS ai_queue (domain TEXT PRIMARY KEY, status TEXT DEFAULT 'pending', created_at TEXT DEFAULT (datetime('now')), completed_at TEXT, error TEXT)", + "ALTER TABLE ai_queue ADD COLUMN language TEXT DEFAULT 'ES'", ] # Index build state @@ -332,11 +334,12 @@ async def get_enriched(min_score=0, cms=None, country=None, kit_digital=None, pa return total, rows -async def queue_ai(domains: list[str]): +async def queue_ai(domains: list[str], language: str = "ES"): async with aiosqlite.connect(SQLITE_PATH) as db: await db.executemany( - "INSERT OR IGNORE INTO ai_queue (domain) VALUES (?)", - [(d,) for d in domains], + """INSERT INTO ai_queue (domain, language) VALUES (?, ?) + ON CONFLICT(domain) DO UPDATE SET language=excluded.language, status='pending'""", + [(d, language) for d in domains], ) await db.commit() diff --git a/app/enricher.py b/app/enricher.py index 4d9c293..0671ca1 100644 --- a/app/enricher.py +++ b/app/enricher.py @@ -328,19 +328,19 @@ async def worker_loop(): # ── AI assessment worker ────────────────────────────────────────────────────── -async def _assess_one(domain: str) -> None: +async def _assess_one(domain: str, language: str = "ES") -> None: """Process a single AI assessment — safe to call concurrently.""" from app.replicate_ai import assess_domain as gemini_assess from app.site_analyzer import analyze_site - logger.info("AI: starting analysis for %s", domain) + logger.info("AI: starting analysis for %s (lang=%s)", domain, language) try: # Hard 3-minute ceiling so stuck jobs never block the worker forever async with asyncio.timeout(180): analysis = await analyze_site(domain) logger.info("AI: site analyzed %s (reachable=%s, words=%s)", domain, analysis.get("reachable"), analysis.get("word_count")) - assessment = await gemini_assess(analysis) + assessment = await gemini_assess(analysis, language=language) logger.info("AI: Gemini done %s → quality=%s", domain, assessment.get("lead_quality")) await save_ai_assessment(domain, assessment, site_analysis=analysis) @@ -377,7 +377,7 @@ async def ai_worker_loop(): try: async with aiosqlite.connect(SQLITE_PATH) as db: async with db.execute( - "SELECT domain FROM ai_queue WHERE status='pending' LIMIT 5" + "SELECT domain, COALESCE(language,'ES') FROM ai_queue WHERE status='pending' LIMIT 5" ) as cur: rows = await cur.fetchall() if rows: @@ -399,7 +399,7 @@ async def ai_worker_loop(): # Run assessments concurrently (semaphore in replicate_ai enforces AI_CONCURRENCY) results = await asyncio.gather( - *[_assess_one(r[0]) for r in rows], + *[_assess_one(r[0], r[1]) for r in rows], return_exceptions=True, ) for r, exc in zip(rows, results): diff --git a/app/main.py b/app/main.py index 07ce4d6..f2dc67f 100644 --- a/app/main.py +++ b/app/main.py @@ -173,7 +173,10 @@ async def ai_assess_batch(body: dict): domains_list = body.get("domains", []) if not domains_list: return JSONResponse({"error": "no domains provided"}, status_code=400) - await queue_ai(domains_list) + language = body.get("language", "ES").upper() + if language not in ("EN", "ES", "RO"): + language = "ES" + await queue_ai(domains_list, language=language) ensure_workers_alive() # ensure AI worker is alive when jobs are queued return {"queued": len(domains_list)} @@ -233,10 +236,13 @@ async def ai_assess_single(body: dict): domain = body.get("domain") if not domain: return JSONResponse({"error": "no domain"}, status_code=400) + language = body.get("language", "ES").upper() + if language not in ("EN", "ES", "RO"): + language = "ES" from app.site_analyzer import analyze_site from app.replicate_ai import assess_domain as gemini_assess analysis = await analyze_site(domain) - assessment = await gemini_assess(analysis) + assessment = await gemini_assess(analysis, language=language) await save_ai_assessment(domain, assessment, site_analysis=analysis) return {**assessment, "site_analysis": analysis} diff --git a/app/replicate_ai.py b/app/replicate_ai.py index e6d3c7d..fb0486f 100644 --- a/app/replicate_ai.py +++ b/app/replicate_ai.py @@ -54,7 +54,7 @@ async def _ddg_search(query: str) -> str: return "" -def _build_prompt(a: dict, search_results: str = "") -> str: +def _build_prompt(a: dict, search_results: str = "", language: str = "ES") -> str: contacts_block = [] if a.get("emails"): contacts_block.append(f" Emails: {', '.join(a['emails'][:3])}") if a.get("phones"): contacts_block.append(f" Phones: {', '.join(a['phones'][:3])}") @@ -141,6 +141,10 @@ Profiles found on site: {social_str} === WEB SEARCH RESULTS (use to find contacts, verify business identity) === {(search_results or "No results.")[:600]} +=== OUTPUT LANGUAGE === +Write pitch_angle, outreach_email, email_subject, and all human-readable text fields in: {language} +(EN = English | ES = Spanish | RO = Romanian) + === WHO WE ARE === We are a full-service digital agency. We handle EVERYTHING web-related for SMEs: new website builds, redesigns, landing pages, e-commerce, CMS migrations, speed @@ -246,7 +250,7 @@ def _parse_output(raw: str) -> dict: } -async def assess_domain(analysis: dict) -> dict: +async def assess_domain(analysis: dict, language: str = "ES") -> dict: """Call Gemini with the full site analysis. Returns parsed assessment.""" async with _sem(): # Build search query from domain / page title for contact lookup @@ -259,7 +263,7 @@ async def assess_domain(analysis: dict) -> dict: payload = { "input": { - "prompt": _build_prompt(analysis, search_results), + "prompt": _build_prompt(analysis, search_results, language), "images": [], "videos": [], "top_p": 0.9, diff --git a/app/static/index.html b/app/static/index.html index cab9a91..8d6ec09 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -343,6 +343,11 @@ tr:hover td{background:rgba(255,255,255,.025)} + @@ -550,7 +555,7 @@ function app() { tab: 'browse', stats: {}, indexSt: {ready:false,building:false,total:0}, aiSt: {pending:0,running:0,done:0,failed:0,total:0}, - domains: [], selected: [], + domains: [], selected: [], aiLang: 'ES', loading: false, page: 1, searchTotal: 0, f: {tld:'',keyword:'',min_score:0,cms:'',live_only:false,alpha_only:false,no_sld:false,kit_digital_only:false,limit:'100'}, qst: {}, customDomains: '', @@ -633,9 +638,9 @@ function app() { async aiAssessSelected() { if(!this.selected.length) return; - const r = await fetch('/api/ai/assess/batch',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({domains:this.selected})}); + const r = await fetch('/api/ai/assess/batch',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({domains:this.selected,language:this.aiLang})}); const d = await r.json(); - r.ok ? this.notify(`Queued ${d.queued} for AI assessment`,'info') : this.notify('Error: '+d.error,'error'); + r.ok ? this.notify(`Queued ${d.queued} for AI assessment [${this.aiLang}]`,'info') : this.notify('Error: '+d.error,'error'); this.selected = []; }, @@ -644,9 +649,9 @@ function app() { const r = await fetch('/api/enriched?kit_digital=true&limit=500').then(r=>r.json()); const domains = r.results.map(d=>d.domain); if(!domains.length) { this.notify('No Kit Digital domains enriched yet','info'); return; } - const r2 = await fetch('/api/ai/assess/batch',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({domains})}); + const r2 = await fetch('/api/ai/assess/batch',{method:'POST',headers:{'Content-Type':'application/json'},body:JSON.stringify({domains,language:this.aiLang})}); const d2 = await r2.json(); - this.notify(`Queued ${d2.queued} Kit Digital domains for AI assessment`,'info'); + this.notify(`Queued ${d2.queued} Kit Digital domains for AI assessment [${this.aiLang}]`,'info'); }, async enqueueCustom() {