fix: search race condition + brand detection + contacts + reassess
- loadDomains(): add generation counter so stale auto-advance fetches cannot overwrite a newer user-triggered search result; snapshot filter state before the first await so URL reflects what was requested; add HTTP status check so backend errors surface as toasts rather than silent empty results; auto-advance now calls loadDomains() without await so the counter increments correctly per page advance - beauty_ai: word-boundary regex for short brands (≤5 chars) to stop 'ref' matching 'reference'/'refresh'/'prefer' etc.; merge phones, whatsapp and social_links from site_analyzer directly into result (more reliable than AI extraction); add contact_whatsapp and contact_social fields to AI JSON schema - db: add requeue_beauty() for re-assessing already-assessed domains - beauty_main: /api/beauty/reassess/batch endpoint using requeue_beauty - index.html: Re-assess Selected bulk button, per-row ↺ button in Browse and Pipeline, WhatsApp + social links in Pipeline contact panel Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -128,9 +128,22 @@ BEAUTY_CATEGORIES = [
|
|||||||
# ── Brand detection (fast pre-scan, no AI) ─────────────────────────────────────
|
# ── Brand detection (fast pre-scan, no AI) ─────────────────────────────────────
|
||||||
|
|
||||||
def detect_brands_in_text(text: str) -> list[str]:
|
def detect_brands_in_text(text: str) -> list[str]:
|
||||||
"""Find which brands from the universe appear in the scraped page text."""
|
"""Find which brands from the universe appear in the scraped page text.
|
||||||
|
|
||||||
|
Short brands (≤5 chars) use word-boundary matching to avoid false positives
|
||||||
|
like 'ref' matching 'reference', 'prefer', 'refresh', etc.
|
||||||
|
"""
|
||||||
tl = text.lower()
|
tl = text.lower()
|
||||||
return [b for b in BEAUTY_BRANDS if b.lower() in tl][:60]
|
result = []
|
||||||
|
for b in BEAUTY_BRANDS:
|
||||||
|
bl = b.lower()
|
||||||
|
if len(bl) <= 5:
|
||||||
|
if re.search(r'(?<![a-zA-Z0-9])' + re.escape(bl) + r'(?![a-zA-Z0-9])', tl):
|
||||||
|
result.append(b)
|
||||||
|
else:
|
||||||
|
if bl in tl:
|
||||||
|
result.append(b)
|
||||||
|
return result[:60]
|
||||||
|
|
||||||
|
|
||||||
def get_dist_matches(detected: list[str]) -> list[str]:
|
def get_dist_matches(detected: list[str]) -> list[str]:
|
||||||
@@ -247,6 +260,8 @@ Respond ONLY with valid JSON, no markdown, no text outside JSON:
|
|||||||
"dist_matches": ["OurBrand1","OurBrand2"],
|
"dist_matches": ["OurBrand1","OurBrand2"],
|
||||||
"contact_email": "email or empty string",
|
"contact_email": "email or empty string",
|
||||||
"contact_phone": "phone or empty string",
|
"contact_phone": "phone or empty string",
|
||||||
|
"contact_whatsapp": "whatsapp link or empty string",
|
||||||
|
"contact_social": "primary social profile URL or empty string",
|
||||||
"b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands",
|
"b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands",
|
||||||
"outreach_subject": "short Spanish subject line referencing their business name",
|
"outreach_subject": "short Spanish subject line referencing their business name",
|
||||||
"outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).",
|
"outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).",
|
||||||
@@ -336,12 +351,27 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
|||||||
if not result.get("detected_brands") and detected:
|
if not result.get("detected_brands") and detected:
|
||||||
result["detected_brands"] = detected
|
result["detected_brands"] = detected
|
||||||
|
|
||||||
|
# Always merge contact data directly from site_analyzer — more reliable
|
||||||
|
# than AI extraction since it uses regex against raw HTML
|
||||||
|
phones = analysis.get("phones", [])
|
||||||
|
whatsapp = analysis.get("whatsapp", [])
|
||||||
|
social_links = analysis.get("social_links", [])
|
||||||
|
if phones and not result.get("contact_phone"):
|
||||||
|
result["contact_phone"] = phones[0]
|
||||||
|
if whatsapp:
|
||||||
|
result["contact_whatsapp"] = "; ".join(whatsapp[:2])
|
||||||
|
if social_links:
|
||||||
|
result["contact_social"] = "; ".join(social_links[:3])
|
||||||
|
|
||||||
logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
|
logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
|
||||||
domain, result.get("lead_quality"), result.get("dist_matches"))
|
domain, result.get("lead_quality"), result.get("dist_matches"))
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error("Beauty AI error %s: %s", domain, e)
|
logger.error("Beauty AI error %s: %s", domain, e)
|
||||||
|
phones = analysis.get("phones", [])
|
||||||
|
whatsapp = analysis.get("whatsapp", [])
|
||||||
|
social = analysis.get("social_links", [])
|
||||||
return {
|
return {
|
||||||
"error": str(e)[:300],
|
"error": str(e)[:300],
|
||||||
"is_relevant": False,
|
"is_relevant": False,
|
||||||
@@ -349,4 +379,7 @@ async def assess_beauty_domain(analysis: dict) -> dict:
|
|||||||
"dist_matches": dist_match,
|
"dist_matches": dist_match,
|
||||||
"detected_brands": detected,
|
"detected_brands": detected,
|
||||||
"contact_email": "",
|
"contact_email": "",
|
||||||
|
"contact_phone": phones[0] if phones else "",
|
||||||
|
"contact_whatsapp": "; ".join(whatsapp[:2]) if whatsapp else "",
|
||||||
|
"contact_social": "; ".join(social[:3]) if social else "",
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ load_dotenv()
|
|||||||
from app.db import (
|
from app.db import (
|
||||||
SQLITE_PATH, init_db, get_stats, get_domains, get_enriched,
|
SQLITE_PATH, init_db, get_stats, get_domains, get_enriched,
|
||||||
build_duckdb_index, index_status,
|
build_duckdb_index, index_status,
|
||||||
queue_beauty, get_beauty_queue_status, save_beauty_assessment, get_beauty_leads,
|
queue_beauty, requeue_beauty, get_beauty_queue_status, save_beauty_assessment, get_beauty_leads,
|
||||||
save_prescreen_results,
|
save_prescreen_results,
|
||||||
)
|
)
|
||||||
from app.validator import start_validator, stop_validator, get_validator_status
|
from app.validator import start_validator, stop_validator, get_validator_status
|
||||||
@@ -277,6 +277,17 @@ async def beauty_assess_batch(body: dict):
|
|||||||
return {"queued": len(domains_list)}
|
return {"queued": len(domains_list)}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/beauty/reassess/batch")
|
||||||
|
async def beauty_reassess_batch(body: dict):
|
||||||
|
"""Re-queue domains for fresh assessment, resetting any existing result."""
|
||||||
|
domains_list = body.get("domains", [])
|
||||||
|
if not domains_list:
|
||||||
|
return JSONResponse({"error": "no domains provided"}, status_code=400)
|
||||||
|
await requeue_beauty(domains_list)
|
||||||
|
_start_beauty_worker()
|
||||||
|
return {"requeued": len(domains_list)}
|
||||||
|
|
||||||
|
|
||||||
@app.post("/api/beauty/worker/restart")
|
@app.post("/api/beauty/worker/restart")
|
||||||
async def beauty_worker_restart():
|
async def beauty_worker_restart():
|
||||||
_start_beauty_worker()
|
_start_beauty_worker()
|
||||||
|
|||||||
13
app/db.py
13
app/db.py
@@ -540,6 +540,19 @@ async def queue_beauty(domains: list[str]):
|
|||||||
await db.commit()
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
|
async def requeue_beauty(domains: list[str]):
|
||||||
|
"""Re-queue domains for fresh assessment even if already assessed."""
|
||||||
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
|
await db.executemany(
|
||||||
|
"""INSERT INTO beauty_queue (domain, status)
|
||||||
|
VALUES (?, 'pending')
|
||||||
|
ON CONFLICT(domain) DO UPDATE SET
|
||||||
|
status='pending', completed_at=NULL, error=NULL""",
|
||||||
|
[(d,) for d in domains],
|
||||||
|
)
|
||||||
|
await db.commit()
|
||||||
|
|
||||||
|
|
||||||
async def get_beauty_queue_status():
|
async def get_beauty_queue_status():
|
||||||
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
|
||||||
async with db.execute("SELECT status, COUNT(*) FROM beauty_queue GROUP BY status") as cur:
|
async with db.execute("SELECT status, COUNT(*) FROM beauty_queue GROUP BY status") as cur:
|
||||||
|
|||||||
@@ -180,6 +180,10 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
|||||||
<span x-show="prescreening">Screening…</span>
|
<span x-show="prescreening">Screening…</span>
|
||||||
</button>
|
</button>
|
||||||
<button class="btn-primary btn-sm" @click="assessSelected()">B2B Assess Selected</button>
|
<button class="btn-primary btn-sm" @click="assessSelected()">B2B Assess Selected</button>
|
||||||
|
<button class="btn-secondary btn-sm" @click="reassessSelected()" :disabled="reassessing">
|
||||||
|
<span x-show="!reassessing">Re-assess Selected</span>
|
||||||
|
<span x-show="reassessing">Re-queuing…</span>
|
||||||
|
</button>
|
||||||
<button class="btn-secondary btn-sm" @click="selected=[]">Clear</button>
|
<button class="btn-secondary btn-sm" @click="selected=[]">Clear</button>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -227,6 +231,7 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
|||||||
<td style="white-space:nowrap;display:flex;gap:4px">
|
<td style="white-space:nowrap;display:flex;gap:4px">
|
||||||
<button class="btn-secondary btn-sm" @click="prescreenOne(row.domain)" title="HTTP check + niche classify">Screen</button>
|
<button class="btn-secondary btn-sm" @click="prescreenOne(row.domain)" title="HTTP check + niche classify">Screen</button>
|
||||||
<button class="btn-primary btn-sm" @click="assessOne(row.domain)" title="Beauty B2B AI assessment">Assess</button>
|
<button class="btn-primary btn-sm" @click="assessOne(row.domain)" title="Beauty B2B AI assessment">Assess</button>
|
||||||
|
<button x-show="row.beauty_lead_quality" class="btn-secondary btn-sm" @click="reassessOne(row.domain)" title="Re-run B2B assessment">↺</button>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
</template>
|
</template>
|
||||||
@@ -338,6 +343,7 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
|||||||
x-text="(row._beauty||{}).contact_email||row.emails||'—'"></td>
|
x-text="(row._beauty||{}).contact_email||row.emails||'—'"></td>
|
||||||
<td @click.stop style="white-space:nowrap;display:flex;gap:4px">
|
<td @click.stop style="white-space:nowrap;display:flex;gap:4px">
|
||||||
<button class="btn-secondary btn-sm" @click="copyOutreach(row)">Copy email</button>
|
<button class="btn-secondary btn-sm" @click="copyOutreach(row)">Copy email</button>
|
||||||
|
<button class="btn-secondary btn-sm" @click="reassessOne(row.domain)" title="Re-run B2B assessment">↺</button>
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
<!-- Expanded detail -->
|
<!-- Expanded detail -->
|
||||||
@@ -371,14 +377,20 @@ textarea{width:100%;resize:vertical;font-family:monospace;font-size:12px}
|
|||||||
</div>
|
</div>
|
||||||
<div class="detail-box">
|
<div class="detail-box">
|
||||||
<h4>Contact Details</h4>
|
<h4>Contact Details</h4>
|
||||||
<p style="font-size:12px;line-height:1.7">
|
<p style="font-size:12px;line-height:1.8">
|
||||||
<template x-if="(row._beauty||{}).contact_email">
|
<template x-if="(row._beauty||{}).contact_email">
|
||||||
<span>Email: <a :href="'mailto:'+(row._beauty||{}).contact_email" x-text="(row._beauty||{}).contact_email"></a><br></span>
|
<span>Email: <a :href="'mailto:'+(row._beauty||{}).contact_email" x-text="(row._beauty||{}).contact_email"></a><br></span>
|
||||||
</template>
|
</template>
|
||||||
<template x-if="(row._beauty||{}).contact_phone">
|
<template x-if="(row._beauty||{}).contact_phone">
|
||||||
<span>Phone: <span x-text="(row._beauty||{}).contact_phone"></span><br></span>
|
<span>Phone: <span x-text="(row._beauty||{}).contact_phone"></span><br></span>
|
||||||
</template>
|
</template>
|
||||||
<template x-if="row.emails">
|
<template x-if="(row._beauty||{}).contact_whatsapp">
|
||||||
|
<span>WhatsApp: <a :href="(row._beauty||{}).contact_whatsapp" target="_blank" x-text="(row._beauty||{}).contact_whatsapp"></a><br></span>
|
||||||
|
</template>
|
||||||
|
<template x-if="(row._beauty||{}).contact_social">
|
||||||
|
<span style="color:var(--muted)">Social: <span x-text="(row._beauty||{}).contact_social"></span><br></span>
|
||||||
|
</template>
|
||||||
|
<template x-if="row.emails && !(row._beauty||{}).contact_email">
|
||||||
<span style="color:var(--muted);font-size:11px">On-site: <span x-text="row.emails"></span></span>
|
<span style="color:var(--muted);font-size:11px">On-site: <span x-text="row.emails"></span></span>
|
||||||
</template>
|
</template>
|
||||||
</p>
|
</p>
|
||||||
@@ -446,7 +458,8 @@ function app() {
|
|||||||
valSt: {running:false,processed:0,live:0,dead:0,error:0,parked:0,redirect:0,skipped:0,offset:0,rate:0},
|
valSt: {running:false,processed:0,live:0,dead:0,error:0,parked:0,redirect:0,skipped:0,offset:0,rate:0},
|
||||||
valTld: '', valRescan: false,
|
valTld: '', valRescan: false,
|
||||||
toasts: [],
|
toasts: [],
|
||||||
prescreening: false, validating: false,
|
prescreening: false, validating: false, reassessing: false,
|
||||||
|
_loadGen: 0, // incremented on every loadDomains() call; stale responses are discarded
|
||||||
exportQuality: '', exportCountry: '',
|
exportQuality: '', exportCountry: '',
|
||||||
f: {keyword:'', tld:'', prescreen_status:'live', niche:'beauty_cosmetics',
|
f: {keyword:'', tld:'', prescreen_status:'live', niche:'beauty_cosmetics',
|
||||||
site_type:'ecommerce', country:'', assessed:'', alpha_only:false, no_sld:false, limit:'100', page:1},
|
site_type:'ecommerce', country:'', assessed:'', alpha_only:false, no_sld:false, limit:'100', page:1},
|
||||||
@@ -491,52 +504,72 @@ function app() {
|
|||||||
goSearch() { this.f.page=1; this.loadDomains(); },
|
goSearch() { this.f.page=1; this.loadDomains(); },
|
||||||
|
|
||||||
async loadDomains() {
|
async loadDomains() {
|
||||||
|
// Generation counter: if a newer call starts while this one is awaiting the
|
||||||
|
// network, this call's result is stale and must be discarded. This prevents
|
||||||
|
// auto-advance's background fetches from overwriting a fresh user-triggered
|
||||||
|
// search that completes after the stale fetch returns.
|
||||||
|
const gen = ++this._loadGen;
|
||||||
this.loading = true;
|
this.loading = true;
|
||||||
try {
|
try {
|
||||||
const p = new URLSearchParams({page: this.f.page, limit: this.f.limit});
|
// Snapshot filter state NOW (before any await), so the URL we build
|
||||||
if (this.f.keyword) p.set('keyword', this.f.keyword.trim());
|
// matches the filters the user actually requested.
|
||||||
if (this.f.tld) p.set('tld', this.f.tld.trim());
|
const snap = {...this.f};
|
||||||
if (this.f.alpha_only) p.set('alpha_only', 'true');
|
const p = new URLSearchParams({page: snap.page, limit: snap.limit});
|
||||||
if (this.f.no_sld) p.set('no_sld', 'true');
|
if (snap.keyword) p.set('keyword', snap.keyword.trim());
|
||||||
|
if (snap.tld) p.set('tld', snap.tld.trim());
|
||||||
|
if (snap.alpha_only) p.set('alpha_only', 'true');
|
||||||
|
if (snap.no_sld) p.set('no_sld', 'true');
|
||||||
|
|
||||||
// 'none' (Not checked) = domains never in the pipeline → DuckDB search.
|
// 'none' (Not checked) = domains never in the pipeline → DuckDB search.
|
||||||
// Any real status (live/dead/…), niche, site_type, country, or assessed
|
// Any real status (live/dead/…), niche, site_type, country, or assessed
|
||||||
// requires the SQLite enriched_domains table (all server-side).
|
// requires the SQLite enriched_domains table (all server-side).
|
||||||
const hasEnrichFilter = (this.f.prescreen_status && this.f.prescreen_status !== 'none')
|
const hasEnrichFilter = (snap.prescreen_status && snap.prescreen_status !== 'none')
|
||||||
|| this.f.niche || this.f.site_type || this.f.country || this.f.assessed;
|
|| snap.niche || snap.site_type || snap.country || snap.assessed;
|
||||||
let endpoint;
|
let endpoint;
|
||||||
if (hasEnrichFilter) {
|
if (hasEnrichFilter) {
|
||||||
if (this.f.prescreen_status) p.set('prescreen_status', this.f.prescreen_status);
|
if (snap.prescreen_status) p.set('prescreen_status', snap.prescreen_status);
|
||||||
if (this.f.niche) p.set('niche', this.f.niche);
|
if (snap.niche) p.set('niche', snap.niche);
|
||||||
if (this.f.site_type) p.set('site_type', this.f.site_type);
|
if (snap.site_type) p.set('site_type', snap.site_type);
|
||||||
if (this.f.country) p.set('country', this.f.country.trim().toUpperCase());
|
if (snap.country) p.set('country', snap.country.trim().toUpperCase());
|
||||||
if (this.f.assessed) p.set('assessed', this.f.assessed);
|
if (snap.assessed) p.set('assessed', snap.assessed);
|
||||||
endpoint = '/api/enriched';
|
endpoint = '/api/enriched';
|
||||||
} else {
|
} else {
|
||||||
endpoint = '/api/domains';
|
endpoint = '/api/domains';
|
||||||
}
|
}
|
||||||
|
|
||||||
const d = await fetch(endpoint + '?' + p).then(r=>r.json());
|
const d = await fetch(endpoint + '?' + p).then(r => {
|
||||||
|
if (!r.ok) throw new Error(`Server error ${r.status}`);
|
||||||
|
return r.json();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Discard if a newer search was started while this one was in-flight
|
||||||
|
if (gen !== this._loadGen) return;
|
||||||
|
|
||||||
this.domainsTotal = d.total || 0;
|
this.domainsTotal = d.total || 0;
|
||||||
let rows = d.results || [];
|
let rows = d.results || [];
|
||||||
|
|
||||||
// 'Not checked': DuckDB returns all domains joined with enriched data;
|
// 'Not checked': DuckDB returns all domains joined with enriched data;
|
||||||
// keep only those with no prescreen_status yet (truly unprocessed).
|
// keep only those with no prescreen_status yet (truly unprocessed).
|
||||||
if (this.f.prescreen_status === 'none') rows = rows.filter(r => !r.prescreen_status);
|
if (snap.prescreen_status === 'none') rows = rows.filter(r => !r.prescreen_status);
|
||||||
|
|
||||||
// Auto-advance: current DuckDB page was fully processed → try next page
|
// Auto-advance: current DuckDB page was fully processed → try next page
|
||||||
// (prevents "0 results" after bulk-validating a page of Not checked domains)
|
// (prevents "0 results" after bulk-validating a page of Not checked domains)
|
||||||
if (rows.length === 0 && this.f.prescreen_status === 'none'
|
if (rows.length === 0 && snap.prescreen_status === 'none'
|
||||||
&& (d.results||[]).length > 0 && this.f.page < 500) {
|
&& (d.results||[]).length > 0 && snap.page < 500) {
|
||||||
this.f.page++;
|
this.f.page = snap.page + 1;
|
||||||
this.loading = false;
|
// Do NOT await — start the next page search as a fresh call so the
|
||||||
await this.loadDomains();
|
// generation counter works correctly; this call ends immediately.
|
||||||
|
this.loadDomains();
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.domains = rows;
|
this.domains = rows;
|
||||||
} catch(e) { this.notify('Failed to load: '+e.message, 'error'); }
|
} catch(e) {
|
||||||
finally { this.loading = false; }
|
if (gen !== this._loadGen) return;
|
||||||
|
this.notify('Failed to load: '+e.message, 'error');
|
||||||
|
} finally {
|
||||||
|
if (gen === this._loadGen) this.loading = false;
|
||||||
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
async loadLeads() {
|
async loadLeads() {
|
||||||
@@ -644,6 +677,30 @@ function app() {
|
|||||||
} catch(e) { this.notify('Failed: '+e.message, 'error'); }
|
} catch(e) { this.notify('Failed: '+e.message, 'error'); }
|
||||||
},
|
},
|
||||||
|
|
||||||
|
async reassessSelected() {
|
||||||
|
if (!this.selected.length || this.reassessing) return;
|
||||||
|
this.reassessing = true;
|
||||||
|
try {
|
||||||
|
const d = await fetch('/api/beauty/reassess/batch', {
|
||||||
|
method:'POST', headers:{'Content-Type':'application/json'},
|
||||||
|
body: JSON.stringify({domains: this.selected}),
|
||||||
|
}).then(r=>r.json());
|
||||||
|
this.notify(`Re-queued ${d.requeued} domains for fresh B2B assessment`, 'success');
|
||||||
|
this.selected = [];
|
||||||
|
} catch(e) { this.notify('Re-assess failed: '+e.message, 'error'); }
|
||||||
|
finally { this.reassessing = false; }
|
||||||
|
},
|
||||||
|
|
||||||
|
async reassessOne(domain) {
|
||||||
|
try {
|
||||||
|
await fetch('/api/beauty/reassess/batch', {
|
||||||
|
method:'POST', headers:{'Content-Type':'application/json'},
|
||||||
|
body: JSON.stringify({domains:[domain]}),
|
||||||
|
});
|
||||||
|
this.notify(`${domain} re-queued for fresh assessment`, 'success');
|
||||||
|
} catch(e) { this.notify('Failed: '+e.message, 'error'); }
|
||||||
|
},
|
||||||
|
|
||||||
toggleLead(domain) {
|
toggleLead(domain) {
|
||||||
this.expandedLead = this.expandedLead===domain ? null : domain;
|
this.expandedLead = this.expandedLead===domain ? null : domain;
|
||||||
},
|
},
|
||||||
|
|||||||
Reference in New Issue
Block a user