feat: BeautyLeads B2B cosmetics frontend on port 7788

New service (app/beauty_main.py) sharing the same /data volume:
- Separate FastAPI app running on port 7788
- beauty_ai.py: brand universe scan (~650 brands), portfolio match
  detection against OUR_BRANDS, Gemini B2B assessment prompt in Spanish
  returning quality/categories/dist_matches/outreach_email
- beauty_queue table + beauty_lead_quality/beauty_assessment columns
  in enriched_domains (with migrations)
- Endpoints: /api/beauty/assess/batch, /api/beauty/leads,
  /api/beauty/status, /api/beauty/export, /api/beauty/reset
- Static frontend: Browse (beauty/ecommerce pre-filtered, no CMS/SSL/KD
  columns), Validator, B2B Pipeline (brand chips, expandable outreach),
  Pre-screen, Export CSV
- docker-compose: second 'beauty' service with shared data volume
- Dockerfile: expose 7788 alongside 6677

Also: add 'error' prescreen_status handling + UI (orange stat box,
filter option) for 4xx/5xx HTTP responses

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-04 19:31:10 +02:00
parent db95876db2
commit a7dd7927b9
6 changed files with 1459 additions and 9 deletions

View File

@@ -11,6 +11,6 @@ RUN pip install --no-cache-dir -r requirements.txt
COPY app/ ./app/
EXPOSE 6677
EXPOSE 6677 7788
CMD ["python", "-m", "app.main"]

352
app/beauty_ai.py Normal file
View File

@@ -0,0 +1,352 @@
"""Beauty B2B AI assessment — cosmetics distribution lead qualification.
Pre-scans scraped text for known brands, then sends a focused prompt to Gemini
to evaluate fit as a B2B customer for a cosmetics distribution business.
"""
import asyncio
import json
import logging
import os
import re
from typing import Optional
import httpx
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN", "r8_7I7Feai78f9PzMOs20y5GVFKiLkgUWP463vZO")
REPLICATE_MODEL = "https://api.replicate.com/v1/models/google/gemini-3-pro/predictions"
AI_CONCURRENCY = int(os.getenv("AI_CONCURRENCY", "3"))
_ai_sem: Optional[asyncio.Semaphore] = None
def _sem() -> asyncio.Semaphore:
global _ai_sem
if _ai_sem is None:
_ai_sem = asyncio.Semaphore(AI_CONCURRENCY)
return _ai_sem
# ── Brand universe (market brands we can detect on client sites) ──────────────
BEAUTY_BRANDS = [
"4711","7days","7th Heaven","A-derma","Abercrombie & Fitch","Abril Et Nature",
"Acqua Di Parma","Actinica","Adidas","Adolfo Dominguez","Aesop","Agatha Ruiz De La Prada",
"Agave","Agua Lavanda","Ahava","Air-wick","Aire Sevilla","Al Haramain","Albal","Alcantara",
"Alejandro Sanz","Alfaparf Milano","Algasiv","Alma Secret","Alpecin","Alqvimia","Alterna",
"Alvarez Gomez","Alyssa Ashley","Ambi Pur","American Crew","Amichi","Ana María Lajusticia",
"Angel Schlesser","Anian","Annayake","Anne Möller","Anso","Antonio Banderas","Apisérum",
"Apivita","Aqc Fragrances","Aquilea","Aramis","Ardell","Arganour","Ariel","Armaf",
"Armand Basi","Artdeco","Artero","As I Am","Aseptine","Atashi","Atrix","Ausonia","Aussie",
"Australian Gold","Autan","Aveda","Avena Kinesia","Avène","Axe","Axovital","Azalea",
"Azzaro","Babaria","Babyliss","Barbie","Bare Minerals","Barulab","Batiste","Beaver",
"Beconfident","Belcils","Bella Aurora","Benefit","Benton","Benzacare","Beter","Biafin",
"Bio Ionic","Bio-oil","Bioderma","Biolage","Biotherm","Biovène","Biretix","Bobbi Brown",
"Bouclème","Bourjois","Bperfect Cosmetics","Britney Spears","Bumble & Bumble","Burberry",
"Bvlgari","Byly","Byphasse","Cacharel","Calvin Klein","Camomila Intea","Cantu","Carefree",
"Carmex","Carolina Herrera","Carrera","Carthusia","Catrice","Caudalie","Cerave","Cerruti",
"Cetaphil","Chanel","Chanson D'Eau","Chloé","Chopard","Christina Aguilera","Christophe Robin",
"Clarins","Clean & Clear","Clinique","Coach","Cocosolis","Colab","Colgate","Collistar",
"Color Wow","Comfort Zone","Comodynes","Compeed","Cosrx","Creed","Creme Of Nature",
"Cristalinas","Crossmen","Crusellas","Cryopharma","Cumlaude Lab","Cutex","Cygnetic",
"Daffoil","Darphin","Davidoff","Declaré","Delfy","Delisea","Denenes","Dentiblanc",
"Dermalogica","Desensin","Dexeryl","Diadermine","Diesel","Diet Esthetic","Dior","Diptyque",
"Dodot","Dolce & Gabbana","Donna Karan","Dove","Dr. Hauschka","Dr.jart+","Dr. Organic",
"Dr. Rimpler","Dr. Tree","Drasanvi","Drunk Elephant","Dsquared2","Ducray","Durex",
"Elancyl","Elegant Touch","Elemis","Elie Saab","Elizabeth Arden","Elizabeth Taylor",
"Emilio Pucci","Endocare","Eric Favre","Escada","Essence","Essie","Estée Lauder",
"Etat Libre D'Orange","Eucerin","Eudermin","Evax","Eve Lom","Eylure","Fa","Fairy","Fanola",
"Farmatint","Farmavita","Farouk","Figuière","Fisiocrem","Flor De Mayo","Fluocaril","Foreo",
"Forté Pharma","Foxy","Francis Kurkdjian","Frederic Malle","Frosch","Garnier","Ghd",
"Gillette","Giorgi Line","Givenchy","Glam Of Sweden","Goldwell","Gosh","Goutal","Gritti",
"Gucci","Guerlain","Guess By Marciano","Gummy","Hair Rituel By Sisley","Hairgum","Halita",
"Halloween","Hansaplast","Hask","Hawaiian Tropic","Head & Shoulders","Heliocare",
"Heno De Pravia","Herbal Essences","Hermès","Hidracel","Hollister","Hugo Boss",
"I.c.o.n.","Ibizaloe","Iceberg","Idc Institute","Iroha","Isabelle Lancray","Isdin",
"Issey Miyake","It Cosmetics","Ivybears","Jacadi","Jean Paul Gaultier","Jil Sander",
"Jimmy Choo","Jo Malone","John Frieda","Johnson's Baby","Joico","Joop","Jordan","Jowaé",
"Juicy Couture","Juliette Has A Gun","Just For Men","Juvena","Kaloo","Karl Lagerfeld",
"Karseell","Katai","Kate Spade","Kativa","Kenzo","Kerasilk","Kerastase","Kevin Murphy",
"Kevyn Aucoin","Kilian","Klorane","L'Anza","L'Occitane","L'Oréal Paris",
"L'Oréal Professionnel","La Cabine","La Mer","La Prairie","La Roche Posay","La Toja",
"Laboratoires Filorga","Lacer","Lacoste","Lactacyd","Lactovit","Lalique","Lancaster",
"Lanvin","Lattafa","Laura Biagiotti","Le Petit Marseillais","Legrain","Lierac","Listerine",
"Living Proof","Loewe","Lola Cosmetics","Lolita Lempicka","Lussoni","Lutsine E45",
"M2 Beauté","Mac","Macadamia","Mad Beauty","Maria Nila","Marlies Möller","Martiderm",
"Martinelia","Marvis","Matrix","Maui","Mavala","Max Factor","Maybelline","Melvita",
"Mermade","Michael Kors","Milk Shake","Mix & Shout","Mixa","Moroccanoil","Moschino",
"Mustela","Nabeel","Nanobrow","Nanoil","Nanolash","Narciso Rodriguez","Nars","Natur Vital",
"Natura Bissé","Natural Honey","Naturalium","Naturtint","Nenuco","Neogen","Neoretin",
"Neostrata","Neutrogena","Nivea","Nûby","Nuggela & Sulé","Nyx Professional Make Up",
"Ogx","Olaplex","Olay","Old Spice","Olivia Garden","Opi","Oral-b","Oraldine","Orofluido",
"Orlane","Oscar De La Renta","Pacha","Paese","Palette","Paloma Picasso","Paltons",
"Pantene","Paranix","Parfums Saphir","Parlux","Payot","Phyto","Picu Baby","Pilexil",
"Piz Buin","Plantur 39","Platanomelón","Polaar","Police","Polident","Ponds","Poseidon",
"Postquam","Proraso","Puig","Purito","Rabanne","Raid","Ralph Lauren","Rated Green",
"Real Techniques","Redenhair","Redist","Redken","Reebok","Ref","Refectocil","Relec",
"Remescar","Rene Furterer","Revlon","Revolution Hair Care","Revolution Make Up",
"Revolution Pro","Rexaline","Rexona","Rilastil","Rimmel London","Roberto Cavalli","Roc",
"Rochas","Roger & Gallet","Roja Parfums","Rosacure","S3","Sabon","Salerm","Sally Hansen",
"Salvatore Ferragamo","Sanex","Sarah Jessica Parker","Saryna Key","Satisfyer","Scalpers",
"Scholl","Schwarzkopf","Scottex","Sebamed","Sebastian Professionals","Seche Vite",
"Sensai","Sensilis","Sensodyne","Serge Lutens","Serumkind","Sesderma","Seven Cosmetics",
"Sexy Hair","Shiseido","Shu Uemura","Sisley","Skeyndor","Skin Generics","Sleek",
"Snp","Soap & Glory","Sol De Janeiro","Solgar","Somatoline Cosmetic","Sophie La Girafe",
"Soria Natural","Steinhart","Stendhal Paris","Sterimar","Strivectin","Suavinex",
"Suavipiel","Svr Laboratoire Dermatologique","Syoss","System Professional","Tabac",
"Taky","Talika","Tampax","Tangle Teezer","Tanit","Teaology","Tena Lady","The Body Shop",
"The Ordinary","The Wet Brush","Thermacare","Tiffany & Co","Tigi","Timotei",
"Tiziana Terenzi","Tod's","Tom Ford","Tommy Hilfiger","Topicrem","Torriden","Tot Herba",
"Tous","Trendy Hair","Tresemme","Trussardi","Tulipán Negro","Urban Decay","Uriage",
"Usu Cosmetics","Vagisil","Valmont","Valquer","Vanderbilt","Vaseline","Veet","Vichy",
"Victor","Victoria's Secret","Victorio & Lucchino","Vital Proteins","Vivra",
"Voltage Cosmetics","Volumax","Waterpik","Waterwipes","Wella","Weleda",
"Williams","Woodwick","Xerjoff","Xls Medical","Yankee Candle","Yari","Yotuel",
"Youth Lab","Zadig & Voltaire","Ziaja",
]
# Our distribution portfolio — the brands we sell to B2B clients
OUR_BRANDS = [
"AIMX","Al Haramain","Apivita","Armaf","Aveda","Bouclème","Clarena",
"Curly Girl Movement","Cutrin","Davines","Dr. Hauschka","FanPalm","Farmavita",
"Flora Curl","GAMMA+","GHD","GOSH","ICON","Image Skincare","Instituto Español",
"Janeke","Kay Pro","Kerasilk","Kyo","Label M","Lierac","Living Proof","Londa",
"M2 Beauté","Malibu C","Maria Nila","Medik8","Misslyn","Mustela","Nesti Dante",
"Nuxe","Obagi","Osmo","Payot","Philip B","Philip Martins","Phyto","Piz Buin",
"Ramon Monegal","Redken","REF","Saryna Key","Sesderma","Skala Brasil","Skin1004",
"Strivectin","Swissdent","Topicrem","Uriage","Vita Liberata","Waterclouds",
"Wella","Youngblood Cosmetics",
]
BEAUTY_CATEGORIES = [
"Perfumes","Facial Cosmetics","Makeup","Hair Care","Health","Body Cosmetics",
"Hygiene","Kids & Babies","Sun Care","Eyewear","Home","Nutrition","Erotic","Fashion",
]
# ── Brand detection (fast pre-scan, no AI) ─────────────────────────────────────
def detect_brands_in_text(text: str) -> list[str]:
"""Find which brands from the universe appear in the scraped page text."""
tl = text.lower()
return [b for b in BEAUTY_BRANDS if b.lower() in tl][:60]
def get_dist_matches(detected: list[str]) -> list[str]:
"""Return which detected brands are in our distribution portfolio."""
dl = {b.lower() for b in detected}
return [b for b in OUR_BRANDS if b.lower() in dl]
# ── DuckDuckGo search (contact/company lookup) ────────────────────────────────
async def _ddg_search(query: str) -> str:
try:
async with httpx.AsyncClient(
timeout=10, follow_redirects=True,
headers={"User-Agent": "Mozilla/5.0 (compatible; BeautyLeads/1.0)"},
) as client:
r = await client.get(
"https://html.duckduckgo.com/html/",
params={"q": query, "kl": "es-es"},
)
if r.status_code != 200:
return ""
soup = BeautifulSoup(r.text, "html.parser")
parts = []
for res in soup.select(".result")[:4]:
title = res.select_one(".result__a")
snip = res.select_one(".result__snippet")
url = res.select_one(".result__url")
if snip:
t = title.get_text(strip=True) if title else ""
u = url.get_text(strip=True) if url else ""
parts.append(f"[{u}] {t}{snip.get_text(strip=True)}")
return "\n".join(parts)
except Exception as e:
logger.debug("DDG search failed: %s", e)
return ""
# ── Prompt builder ─────────────────────────────────────────────────────────────
def _build_beauty_prompt(a: dict, detected_brands: list, dist_matches: list,
search_results: str = "") -> str:
contacts_block = []
if a.get("emails"): contacts_block.append(f" Emails: {', '.join(a['emails'][:3])}")
if a.get("phones"): contacts_block.append(f" Phones: {', '.join(a['phones'][:3])}")
if a.get("social_links"): contacts_block.append(f" Social: {', '.join(a['social_links'][:4])}")
contacts_str = "\n".join(contacts_block) or " Not found"
snippet = (a.get("visible_text_snippet") or "")[:1200]
title = a.get("page_title") or ""
meta = a.get("meta_description") or ""
country = a.get("ip_country") or "unknown"
cms = a.get("cms") or "unknown"
detected_str = ", ".join(detected_brands) if detected_brands else "none detected"
dist_str = ", ".join(dist_matches) if dist_matches else "none"
return f"""You are a senior B2B sales analyst for a cosmetics distribution company operating in Europe.
Your task: evaluate whether this website is a viable B2B customer (retailer, multi-brand store,
e-commerce, distributor or chain that buys beauty products wholesale) and generate an outreach plan.
=== SITE DATA ===
Domain: {a.get("domain")}
Country (IP): {country}
Title: {title}
Meta desc: {meta}
CMS: {cms}
Contact info:
{contacts_str}
=== PAGE CONTENT SAMPLE ===
{snippet}
=== BRANDS ALREADY DETECTED ON SITE ===
{detected_str}
=== OUR PORTFOLIO BRANDS FOUND ON THEIR SITE ===
(These brands we distribute — finding them means we're already in their market)
{dist_str}
=== WEB SEARCH RESULTS ===
{(search_results or "No results.")[:500]}
=== OUR DISTRIBUTION PORTFOLIO ===
{', '.join(OUR_BRANDS)}
=== BEAUTY CATEGORIES WE COVER ===
{', '.join(BEAUTY_CATEGORIES)}
=== ASSESSMENT RULES ===
1. Determine if this is a B2B prospect: retailer, pharmacy, parafarmacia,
perfumería, multi-brand beauty ecommerce, salon chain, supermarket beauty section,
or beauty products distributor based in Europe.
2. Identify which categories from our list they cover.
3. From the page content (even if brands list is empty), identify any beauty brands mentioned.
4. Match detected brands against our portfolio — this drives lead quality:
- HOT: 3+ of our portfolio brands detected, OR a large EU retailer clearly in our niche
- WARM: 1-2 portfolio brand matches, OR clear beauty retailer with good potential
- COLD: beauty-adjacent but weak match, OR can't confirm they buy wholesale
- NOT_RELEVANT: not a beauty business or not in Europe
5. Write all human text (proposal, email) in Spanish.
6. Keep JSON values concise (≤ 25 words each).
Respond ONLY with valid JSON, no markdown, no text outside JSON:
{{
"is_relevant": true/false,
"lead_quality": "HOT|WARM|COLD|NOT_RELEVANT",
"lead_reasoning": "1-2 sentences why",
"business_type": "retailer|ecommerce|distributor|pharmacy|salon_chain|other",
"business_name": "name from title or domain",
"country_fiscal": "2-letter ISO or full name",
"countries_active": ["ES","FR"],
"categories": ["Hair Care","Makeup"],
"detected_brands": ["brand1","brand2"],
"dist_matches": ["OurBrand1","OurBrand2"],
"contact_email": "email or empty string",
"contact_phone": "phone or empty string",
"b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands",
"outreach_subject": "short Spanish subject line referencing their business name",
"outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).",
"revenue_estimate": "unknown",
"outreach_notes": "brief context for sales rep"
}}"""
def _parse_beauty_output(raw: str) -> dict:
text = re.sub(r"```(?:json)?", "", raw).strip().rstrip("`").strip()
m = re.search(r"\{[\s\S]+\}", text)
if m:
candidate = m.group(0)
try:
return json.loads(candidate)
except json.JSONDecodeError:
depth_obj = candidate.count("{") - candidate.count("}")
depth_arr = candidate.count("[") - candidate.count("]")
fixed = re.sub(r',\s*"[^"]*"?\s*:\s*[^,\}\]]*$', '', candidate)
fixed += "]" * max(0, depth_arr) + "}" * max(0, depth_obj)
try:
return json.loads(fixed)
except json.JSONDecodeError:
pass
logger.warning("Beauty AI parse failed, raw: %.300s", raw)
return {
"is_relevant": False,
"lead_quality": "COLD",
"business_name": "",
"contact_email": "",
"dist_matches": [],
"parse_error": True,
}
# ── Main entry point ───────────────────────────────────────────────────────────
async def assess_beauty_domain(analysis: dict) -> dict:
"""Full beauty B2B assessment: brand scan + AI evaluation."""
async with _sem():
domain = analysis.get("domain", "")
text = analysis.get("visible_text_snippet", "") or ""
html_raw = text # use snippet; brands already extracted from full page in site_analyzer
detected = detect_brands_in_text(text)
dist_match = get_dist_matches(detected)
# Also search for company context
title = analysis.get("page_title") or ""
biz_name = title.split("|")[0].split("-")[0].strip() or domain
search_results = await _ddg_search(f'"{biz_name}" {domain} beauty cosmetics wholesale contact')
logger.info("Beauty assess %s: %d brands detected, %d portfolio matches",
domain, len(detected), len(dist_match))
payload = {
"input": {
"prompt": _build_beauty_prompt(analysis, detected, dist_match, search_results),
"images": [], "videos": [],
"top_p": 0.9,
"temperature": 0.15,
"thinking_level": "low",
"max_output_tokens": 2000,
}
}
try:
async with httpx.AsyncClient(timeout=120) as client:
resp = await client.post(
REPLICATE_MODEL,
headers={
"Authorization": f"Bearer {REPLICATE_TOKEN}",
"Content-Type": "application/json",
"Prefer": "wait",
},
json=payload,
)
resp.raise_for_status()
data = resp.json()
output = data.get("output", "")
if isinstance(output, list):
output = "".join(output)
result = _parse_beauty_output(output)
# Merge pre-scan data that AI might miss
if not result.get("dist_matches") and dist_match:
result["dist_matches"] = dist_match
if not result.get("detected_brands") and detected:
result["detected_brands"] = detected
logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
domain, result.get("lead_quality"), result.get("dist_matches"))
return result
except Exception as e:
logger.error("Beauty AI error %s: %s", domain, e)
return {
"error": str(e)[:300],
"is_relevant": False,
"lead_quality": "COLD",
"dist_matches": dist_match,
"detected_brands": detected,
"contact_email": "",
}

337
app/beauty_main.py Normal file
View File

@@ -0,0 +1,337 @@
"""BeautyLeads — Cosmetics B2B intelligence dashboard (port 7788).
Shares the same /data volume as the main DomGod service.
Does NOT re-download parquet or rebuild DuckDB index (those run in main service).
Runs its own beauty AI assessment worker against the shared enriched_domains table.
"""
import asyncio
import logging
import os
from pathlib import Path
from contextlib import asynccontextmanager
import aiosqlite
from typing import Optional
from fastapi import FastAPI, Query
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from dotenv import load_dotenv
load_dotenv()
from app.db import (
SQLITE_PATH, init_db, get_stats, get_domains, get_enriched,
build_duckdb_index, index_status,
queue_beauty, get_beauty_queue_status, save_beauty_assessment, get_beauty_leads,
save_prescreen_results,
)
from app.validator import start_validator, stop_validator, get_validator_status
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
# ── Beauty AI worker ──────────────────────────────────────────────────────────
_beauty_worker_task: Optional[asyncio.Task] = None
async def _assess_one_beauty(domain: str) -> None:
from app.beauty_ai import assess_beauty_domain
from app.site_analyzer import analyze_site
logger.info("Beauty AI: starting %s", domain)
try:
async with asyncio.timeout(180):
analysis = await analyze_site(domain)
assessment = await assess_beauty_domain(analysis)
await save_beauty_assessment(domain, assessment)
logger.info("Beauty AI: saved %s%s", domain, assessment.get("lead_quality"))
except Exception as e:
logger.error("Beauty AI: failed %s%s", domain, e)
try:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.execute(
"UPDATE beauty_queue SET status='failed', completed_at=datetime('now'), error=? WHERE domain=?",
(str(e)[:400], domain),
)
await db.commit()
except Exception:
pass
async def _beauty_worker_loop():
logger.info("Beauty AI worker starting")
# Reset stale running jobs
try:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.execute("UPDATE beauty_queue SET status='pending' WHERE status='running'")
await db.commit()
except Exception as e:
logger.error("Beauty worker: stale reset failed: %s", e)
while True:
rows = []
try:
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute(
"SELECT domain FROM beauty_queue WHERE status='pending' LIMIT 5"
) as cur:
rows = await cur.fetchall()
if rows:
await db.executemany(
"UPDATE beauty_queue SET status='running' WHERE domain=?",
[(r[0],) for r in rows],
)
await db.commit()
except Exception as e:
logger.error("Beauty worker DB error: %s", e)
await asyncio.sleep(5)
continue
if not rows:
await asyncio.sleep(3)
continue
await asyncio.gather(*[_assess_one_beauty(r[0]) for r in rows], return_exceptions=True)
def _start_beauty_worker():
global _beauty_worker_task
if _beauty_worker_task is None or _beauty_worker_task.done():
_beauty_worker_task = asyncio.create_task(_beauty_worker_loop())
logger.info("Beauty AI worker started")
# ── App lifespan ──────────────────────────────────────────────────────────────
@asynccontextmanager
async def lifespan(app: FastAPI):
await init_db()
# Detect existing DuckDB index (built by main service); don't rebuild
asyncio.create_task(build_duckdb_index())
_start_beauty_worker()
logger.info("BeautyLeads ready on port 7788")
yield
app = FastAPI(title="BeautyLeads", lifespan=lifespan)
# ── Shared read endpoints (same DB) ──────────────────────────────────────────
@app.get("/api/stats")
async def stats():
return await get_stats()
@app.get("/api/index/status")
async def get_index_status():
return index_status()
@app.get("/api/domains")
async def domains(
tld: str = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(100, ge=1, le=500),
live_only: bool = Query(False),
alpha_only: bool = Query(False),
no_sld: bool = Query(False),
keyword: str = Query(None),
):
total, rows = await get_domains(
tld=tld, page=page, limit=limit,
alpha_only=alpha_only, no_sld=no_sld,
keyword=keyword, live_only=live_only,
)
return {"page": page, "limit": limit, "total": total, "results": rows}
@app.get("/api/enriched")
async def enriched(
min_score: int = Query(0),
country: str = Query(None),
prescreen_status: str = Query(None),
niche: str = Query(None),
site_type: str = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(100, ge=1, le=1000),
):
total, rows = await get_enriched(
min_score=min_score, country=country,
prescreen_status=prescreen_status, niche=niche, site_type=site_type,
page=page, limit=limit,
)
return {"page": page, "limit": limit, "total": total, "results": rows}
# ── Validator (shared) ────────────────────────────────────────────────────────
@app.post("/api/validator/start")
async def validator_start(tld: str = Query(None), rescan_dead: bool = Query(False)):
start_validator(tld_filter=tld or None, rescan_dead=rescan_dead)
return get_validator_status()
@app.post("/api/validator/stop")
async def validator_stop():
stop_validator()
return {"status": "stopped"}
@app.get("/api/validator/status")
async def validator_status():
return get_validator_status()
# ── Pre-screen (shared) ───────────────────────────────────────────────────────
@app.post("/api/prescreen/batch")
async def prescreen_batch(body: dict):
domains_list = body.get("domains", [])
if not domains_list:
return JSONResponse({"error": "no domains provided"}, status_code=400)
if len(domains_list) > 200:
return JSONResponse({"error": "max 200 domains per batch"}, status_code=400)
from app.prescreener import prescreen_domains, classify_with_deepseek, DEEPSEEK_BATCH_SIZE
results = await prescreen_domains(domains_list)
await save_prescreen_results(results)
counts: dict = {}
for r in results:
s = r.get("prescreen_status", "dead")
counts[s] = counts.get(s, 0) + 1
live = [r for r in results if r.get("prescreen_status") == "live"]
classified = 0
if live:
batches = [live[i:i + DEEPSEEK_BATCH_SIZE] for i in range(0, len(live), DEEPSEEK_BATCH_SIZE)]
all_cls: list = []
for i, batch in enumerate(batches):
if i > 0:
await asyncio.sleep(3)
cls = await classify_with_deepseek(batch)
all_cls.extend(cls)
if all_cls:
await save_prescreen_results(all_cls)
classified = len(all_cls)
return {
"total": len(domains_list),
"live": counts.get("live", 0),
"parked": counts.get("parked", 0),
"redirect": counts.get("redirect", 0),
"dead": counts.get("dead", 0),
"classified": classified,
}
# ── Beauty AI endpoints ───────────────────────────────────────────────────────
@app.post("/api/beauty/assess/batch")
async def beauty_assess_batch(body: dict):
domains_list = body.get("domains", [])
if not domains_list:
return JSONResponse({"error": "no domains provided"}, status_code=400)
await queue_beauty(domains_list)
_start_beauty_worker()
return {"queued": len(domains_list)}
@app.post("/api/beauty/worker/restart")
async def beauty_worker_restart():
_start_beauty_worker()
return {"status": "restarted"}
@app.post("/api/beauty/reset")
async def beauty_reset():
"""Reset stale running jobs back to pending."""
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
r = await db.execute("UPDATE beauty_queue SET status='pending' WHERE status='running'")
count = r.rowcount
await db.commit()
_start_beauty_worker()
return {"reset": count}
@app.get("/api/beauty/status")
async def beauty_status():
return await get_beauty_queue_status()
@app.get("/api/beauty/leads")
async def beauty_leads(
quality: str = Query(None),
country: str = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(100, ge=1, le=500),
):
total, rows = await get_beauty_leads(quality=quality, country=country, page=page, limit=limit)
return {"page": page, "limit": limit, "total": total, "results": rows}
@app.post("/api/beauty/assess/single")
async def beauty_assess_single(body: dict):
domain = body.get("domain")
if not domain:
return JSONResponse({"error": "no domain"}, status_code=400)
from app.beauty_ai import assess_beauty_domain
from app.site_analyzer import analyze_site
analysis = await analyze_site(domain)
assessment = await assess_beauty_domain(analysis)
await save_beauty_assessment(domain, assessment)
return {**assessment, "site_analysis": analysis}
# ── Export ────────────────────────────────────────────────────────────────────
@app.get("/api/beauty/export")
async def export_beauty_csv(quality: str = Query(None), country: str = Query(None)):
import json as _json
async def generate():
yield "domain,quality,business_name,country_fiscal,countries_active,categories,detected_brands,portfolio_matches,contact_email,contact_phone,proposal,outreach_subject,outreach_email\n"
p = 1
while True:
_, rows = await get_beauty_leads(quality=quality, country=country, page=p, limit=500)
if not rows:
break
for r in rows:
b = r.get("_beauty") or {}
def esc(v):
return f'"{str(v or "").replace(chr(34), chr(39))}"'
line = ",".join([
esc(r.get("domain")),
esc(r.get("beauty_lead_quality")),
esc(b.get("business_name")),
esc(b.get("country_fiscal")),
esc(", ".join(b.get("countries_active") or [])),
esc(", ".join(b.get("categories") or [])),
esc(", ".join(b.get("detected_brands") or [])),
esc(", ".join(b.get("dist_matches") or [])),
esc(b.get("contact_email")),
esc(b.get("contact_phone")),
esc(b.get("b2b_proposal")),
esc(b.get("outreach_subject")),
esc(b.get("outreach_email")),
])
yield line + "\n"
p += 1
qual = f"_{quality.lower()}" if quality else ""
return StreamingResponse(
generate(), media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="beautyleads{qual}.csv"'},
)
# ── Static UI ─────────────────────────────────────────────────────────────────
static_dir = Path(__file__).parent / "static" / "beauty"
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run("app.beauty_main:app", host="0.0.0.0", port=7788, log_level="info")

View File

@@ -88,6 +88,16 @@ _MIGRATIONS = [
"ALTER TABLE enriched_domains ADD COLUMN prescreen_at TEXT",
"ALTER TABLE enriched_domains ADD COLUMN ip TEXT",
"ALTER TABLE enriched_domains ADD COLUMN load_time_ms INTEGER",
"ALTER TABLE enriched_domains ADD COLUMN beauty_lead_quality TEXT",
"ALTER TABLE enriched_domains ADD COLUMN beauty_assessment TEXT",
"ALTER TABLE enriched_domains ADD COLUMN beauty_assessed_at TEXT",
"""CREATE TABLE IF NOT EXISTS beauty_queue (
domain TEXT PRIMARY KEY,
status TEXT DEFAULT 'pending',
created_at TEXT DEFAULT (datetime('now')),
completed_at TEXT,
error TEXT
)""",
]
# Index build state
@@ -488,6 +498,81 @@ async def queue_domains(domains: list[str]):
await db.commit()
async def queue_beauty(domains: list[str]):
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.executemany(
"INSERT OR IGNORE INTO beauty_queue (domain) VALUES (?)",
[(d,) for d in domains],
)
await db.commit()
async def get_beauty_queue_status():
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute("SELECT status, COUNT(*) FROM beauty_queue GROUP BY status") as cur:
rows = {r[0]: r[1] async for r in cur}
return {
"pending": rows.get("pending", 0),
"running": rows.get("running", 0),
"done": rows.get("done", 0),
"failed": rows.get("failed", 0),
"total": sum(rows.values()),
}
async def save_beauty_assessment(domain: str, assessment: dict):
import json as _json
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
await db.execute(
"INSERT INTO enriched_domains (domain) VALUES (?) ON CONFLICT(domain) DO NOTHING",
(domain,),
)
await db.execute(
"""UPDATE enriched_domains SET
beauty_lead_quality=?, beauty_assessment=?, beauty_assessed_at=datetime('now')
WHERE domain=?""",
(assessment.get("lead_quality"), _json.dumps(assessment), domain),
)
await db.execute(
"UPDATE beauty_queue SET status='done', completed_at=datetime('now') WHERE domain=?",
(domain,),
)
await db.commit()
async def get_beauty_leads(quality: str = None, country: str = None,
page: int = 1, limit: int = 100):
import json as _json
offset = (page - 1) * limit
conditions = ["beauty_lead_quality IS NOT NULL"]
params: list = []
if quality:
conditions.append("beauty_lead_quality = ?")
params.append(quality.upper())
if country:
conditions.append("ip_country = ?")
params.append(country.upper())
where = "WHERE " + " AND ".join(conditions)
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
db.row_factory = aiosqlite.Row
async with db.execute(
f"SELECT * FROM enriched_domains {where} "
f"ORDER BY CASE beauty_lead_quality WHEN 'HOT' THEN 1 WHEN 'WARM' THEN 2 ELSE 3 END "
f"LIMIT ? OFFSET ?",
params + [limit, offset],
) as cur:
rows = [dict(r) async for r in cur]
async with db.execute(f"SELECT COUNT(*) FROM enriched_domains {where}", params) as cur:
total = (await cur.fetchone())[0]
# Parse beauty_assessment JSON inline
for r in rows:
try:
r["_beauty"] = _json.loads(r.get("beauty_assessment") or "{}")
except Exception:
r["_beauty"] = {}
return total, rows
async def get_queue_status():
async with aiosqlite.connect(SQLITE_PATH, timeout=30) as db:
async with db.execute("SELECT status, COUNT(*) FROM job_queue GROUP BY status") as cur:

View File

@@ -0,0 +1,659 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>BeautyLeads — Cosmetics B2B Intelligence</title>
<script src="https://cdn.jsdelivr.net/npm/alpinejs@3.x.x/dist/cdn.min.js" defer></script>
<style>
:root {
--bg: #0f0f13;
--surface: #18181f;
--card: #1e1e28;
--border: #2a2a38;
--text: #e2e0f0;
--muted: #7c7a96;
--accent: #e879a0;
--accent2: #c026d3;
--success: #34d399;
--warn: #f97316;
--danger: #f43f5e;
--info: #818cf8;
}
*{box-sizing:border-box;margin:0;padding:0}
body{background:var(--bg);color:var(--text);font-family:'Segoe UI',system-ui,sans-serif;font-size:13px;min-height:100vh}
a{color:var(--accent);text-decoration:none}a:hover{text-decoration:underline}
input,select,textarea{background:var(--surface);color:var(--text);border:1px solid var(--border);border-radius:6px;padding:5px 8px;font-size:12px;outline:none}
input:focus,select:focus,textarea:focus{border-color:var(--accent)}
button{cursor:pointer;border:none;border-radius:6px;padding:6px 14px;font-size:12px;font-weight:600;transition:opacity .15s}
button:hover{opacity:.85} button:disabled{opacity:.4;cursor:default}
.btn-primary{background:var(--accent);color:#fff}
.btn-secondary{background:var(--surface);color:var(--text);border:1px solid var(--border)}
.btn-danger{background:var(--danger);color:#fff}
.btn-sm{padding:4px 10px;font-size:11px}
.card{background:var(--card);border:1px solid var(--border);border-radius:10px;padding:16px}
/* Header */
.header{background:var(--surface);border-bottom:1px solid var(--border);padding:10px 24px;display:flex;align-items:center;gap:16px}
.logo{font-size:18px;font-weight:700;color:var(--accent)}.logo span{color:var(--muted);font-weight:400;font-size:13px;margin-left:8px}
.tabs{display:flex;gap:2px;margin-left:auto}
.tab-btn{background:none;border:none;color:var(--muted);padding:8px 16px;border-radius:8px;font-size:13px;font-weight:500;cursor:pointer;transition:all .15s}
.tab-btn:hover{color:var(--text);background:var(--card)}
.tab-btn.active{color:var(--accent);background:var(--card)}
/* Stats row */
.stat-row{display:flex;gap:12px;flex-wrap:wrap;padding:16px 24px}
.stat-box{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:12px 18px;min-width:110px;text-align:center}
.stat-val{font-size:22px;font-weight:700;line-height:1.1}
.stat-lbl{font-size:11px;color:var(--muted);margin-top:2px}
.hot-color{color:var(--danger)} .warm-color{color:var(--warn)} .cold-color{color:var(--info)}
.live-color{color:var(--success)} .dead-color{color:var(--danger)} .error-color{color:var(--warn)}
/* Filter bar */
.filter-bar{padding:0 24px 12px;display:flex;gap:8px;flex-wrap:wrap;align-items:center}
.filter-bar input,.filter-bar select{padding:5px 10px}
.filter-bar label{color:var(--muted);font-size:11px}
/* Table */
.tbl-wrap{padding:0 24px 24px;overflow-x:auto}
table{width:100%;border-collapse:collapse}
th{background:var(--surface);color:var(--muted);font-size:11px;text-transform:uppercase;letter-spacing:.05em;padding:8px 10px;text-align:left;border-bottom:1px solid var(--border);white-space:nowrap}
td{padding:7px 10px;border-bottom:1px solid var(--border);vertical-align:top;max-width:260px}
tr:hover td{background:rgba(232,121,160,.04)}
/* Badges */
.badge{display:inline-block;padding:2px 8px;border-radius:20px;font-size:10px;font-weight:700;letter-spacing:.04em}
.badge-hot{background:rgba(244,63,94,.18);color:var(--danger)}
.badge-warm{background:rgba(249,115,22,.18);color:var(--warn)}
.badge-cold{background:rgba(129,140,248,.18);color:var(--info)}
.badge-nr{background:rgba(100,116,139,.15);color:var(--muted)}
.badge-live{background:rgba(52,211,153,.15);color:var(--success)}
.badge-dead{background:rgba(244,63,94,.15);color:var(--danger)}
.badge-error{background:rgba(249,115,22,.15);color:var(--warn)}
.badge-parked{background:rgba(251,191,36,.15);color:#fbbf24}
.badge-redirect{background:rgba(148,163,184,.15);color:#94a3b8}
/* Brand chips */
.chip{display:inline-block;background:rgba(232,121,160,.12);color:var(--accent);border:1px solid rgba(232,121,160,.25);border-radius:12px;padding:1px 7px;font-size:10px;margin:1px}
.chip-match{background:rgba(52,211,153,.12);color:var(--success);border-color:rgba(52,211,153,.3)}
/* Pipeline detail expand */
.detail-row td{background:rgba(30,30,40,.8);padding:12px 16px}
.detail-grid{display:grid;grid-template-columns:1fr 1fr;gap:12px}
.detail-section{background:var(--surface);border-radius:8px;padding:10px 14px}
.detail-section h4{color:var(--muted);font-size:10px;text-transform:uppercase;letter-spacing:.05em;margin-bottom:6px}
.detail-section p{font-size:12px;line-height:1.5;color:var(--text)}
/* Toast */
.toast-container{position:fixed;bottom:20px;right:20px;z-index:999;display:flex;flex-direction:column;gap:8px}
.toast{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:10px 16px;font-size:12px;min-width:240px;max-width:380px;animation:slideIn .2s ease}
.toast.success{border-color:rgba(52,211,153,.4);color:var(--success)}
.toast.error{border-color:rgba(244,63,94,.4);color:var(--danger)}
.toast.info{border-color:rgba(129,140,248,.4);color:var(--info)}
@keyframes slideIn{from{transform:translateX(30px);opacity:0}to{transform:translateX(0);opacity:1}}
/* Validator (reuse same style) */
.val-grid{display:grid;grid-template-columns:repeat(6,1fr);gap:10px;padding:0 24px 16px}
.esb{background:var(--card);border:1px solid var(--border);border-radius:8px;padding:12px;text-align:center}
.ev{font-size:20px;font-weight:700}
.el{font-size:10px;color:var(--muted);margin-top:2px}
/* Prescreen */
.prescreen-wrap{padding:0 24px}
textarea{width:100%;font-family:monospace;resize:vertical}
/* Progress bar */
.progress-wrap{padding:0 24px 12px}
.progress-bar{background:var(--surface);border-radius:4px;height:6px;overflow:hidden}
.progress-fill{background:var(--accent);height:100%;border-radius:4px;transition:width .5s}
/* Copy btn */
.copy-btn{background:var(--surface);border:1px solid var(--border);color:var(--muted);padding:2px 8px;font-size:10px;border-radius:4px;cursor:pointer}
.copy-btn:hover{color:var(--text)}
/* Section header */
.section-header{padding:0 24px 12px;display:flex;align-items:center;gap:12px;flex-wrap:wrap}
.section-header h2{font-size:15px;font-weight:600}
.section-header .muted{color:var(--muted);font-size:12px}
/* Empty state */
.empty{padding:40px;text-align:center;color:var(--muted);font-size:13px}
/* Checkbox */
input[type=checkbox]{width:14px;height:14px;accent-color:var(--accent);cursor:pointer}
</style>
</head>
<body x-data="app()" x-init="init()">
<!-- Header -->
<div class="header">
<div class="logo">
BeautyLeads
<span>Cosmetics B2B Intelligence</span>
</div>
<div class="tabs">
<button class="tab-btn" :class="{active:tab==='browse'}" @click="tab='browse';loadDomains()">Browse</button>
<button class="tab-btn" :class="{active:tab==='validator'}" @click="tab='validator';loadValStatus()">Validator</button>
<button class="tab-btn" :class="{active:tab==='pipeline'}" @click="tab='pipeline';loadLeads()">B2B Pipeline</button>
<button class="tab-btn" :class="{active:tab==='prescreen'}" @click="tab='prescreen'">Pre-screen</button>
<button class="tab-btn" :class="{active:tab==='export'}" @click="tab='export'">Export</button>
</div>
</div>
<!-- Stats row -->
<div class="stat-row">
<div class="stat-box"><div class="stat-val" x-text="(stats.total_domains||0).toLocaleString()"></div><div class="stat-lbl">Total Domains</div></div>
<div class="stat-box"><div class="stat-val live-color" x-text="(stats.beauty_live||0).toLocaleString()"></div><div class="stat-lbl">Beauty Live</div></div>
<div class="stat-box"><div class="stat-val hot-color" x-text="(aiSt.hot||0).toLocaleString()"></div><div class="stat-lbl">HOT Leads</div></div>
<div class="stat-box"><div class="stat-val warm-color" x-text="(aiSt.warm||0).toLocaleString()"></div><div class="stat-lbl">WARM Leads</div></div>
<div class="stat-box"><div class="stat-val" x-text="(aiSt.total||0).toLocaleString()"></div><div class="stat-lbl">Assessed</div></div>
<div class="stat-box"><div class="stat-val" :style="aiSt.pending>0?'color:var(--warn)':''" x-text="(aiSt.pending||0).toLocaleString()"></div><div class="stat-lbl">In Queue</div></div>
</div>
<!-- ══════════════════ BROWSE TAB ══════════════════ -->
<div x-show="tab==='browse'">
<div class="filter-bar">
<input x-model="f.keyword" @keyup.enter="loadDomains()" placeholder="Keyword…" style="width:140px">
<input x-model="f.tld" @keyup.enter="loadDomains()" placeholder="TLD (es, ro…)" style="width:100px">
<select x-model="f.prescreen_status" @change="loadDomains()">
<option value="">All Statuses</option>
<option value="live">Live</option>
<option value="error">Error (4xx/5xx)</option>
<option value="redirect">Redirect</option>
<option value="parked">Parked</option>
<option value="dead">Dead</option>
<option value="none">Not checked</option>
</select>
<select x-model="f.niche" @change="loadDomains()">
<option value="beauty_cosmetics">Beauty &amp; Cosmetics</option>
<option value="">All Niches</option>
<option value="fashion_retail">Fashion Retail</option>
<option value="medical_health">Medical / Health</option>
</select>
<select x-model="f.site_type" @change="loadDomains()">
<option value="ecommerce">E-commerce</option>
<option value="">All Types</option>
<option value="corporate">Corporate</option>
</select>
<input x-model="f.country" @keyup.enter="loadDomains()" placeholder="Country (ES, FR…)" style="width:100px">
<select x-model="f.limit" @change="loadDomains()">
<option value="50">50</option>
<option value="100" selected>100</option>
<option value="200">200</option>
</select>
<button class="btn-primary" @click="loadDomains()">Search</button>
<button class="btn-secondary" @click="resetFilters()">Reset</button>
<span style="margin-left:auto;color:var(--muted);font-size:11px" x-text="''+domains.length+' shown'"></span>
</div>
<!-- Bulk action bar -->
<div class="filter-bar" style="padding-top:0" x-show="selected.length>0">
<span style="color:var(--accent);font-weight:600" x-text="selected.length+' selected'"></span>
<button class="btn-primary btn-sm" @click="queueSelected()">Assess B2B Selected</button>
<button class="btn-secondary btn-sm" @click="selected=[]">Clear</button>
</div>
<div class="tbl-wrap">
<div class="empty" x-show="!loading && domains.length===0">No domains found. Adjust filters or run the validator first.</div>
<div style="padding:20px;text-align:center;color:var(--muted)" x-show="loading">Loading…</div>
<table x-show="!loading && domains.length>0">
<thead>
<tr>
<th style="width:28px"><input type="checkbox" @change="toggleAll($event)" :checked="selected.length===domains.length && domains.length>0"></th>
<th>Domain</th>
<th>Status</th>
<th>Country</th>
<th>Title</th>
<th>Niche</th>
<th>Type</th>
<th>B2B</th>
<th></th>
</tr>
</thead>
<tbody>
<template x-for="row in domains" :key="row.domain">
<tr>
<td><input type="checkbox" :value="row.domain" x-model="selected"></td>
<td><a :href="'https://'+row.domain" target="_blank" x-text="row.domain"></a></td>
<td>
<span x-show="row.prescreen_status" :class="statusBadge(row.prescreen_status)" class="badge" x-text="row.prescreen_status||''"></span>
<span x-show="!row.prescreen_status" style="color:var(--muted)"></span>
</td>
<td x-text="row.ip_country||'—'"></td>
<td style="max-width:200px;overflow:hidden;text-overflow:ellipsis;white-space:nowrap" :title="row.page_title||''" x-text="row.page_title||'—'"></td>
<td x-text="row.niche||'—'"></td>
<td x-text="row.site_type||'—'"></td>
<td>
<span x-show="row.beauty_lead_quality" :class="qualityBadge(row.beauty_lead_quality)" class="badge" x-text="row.beauty_lead_quality||''"></span>
<span x-show="!row.beauty_lead_quality" style="color:var(--muted)"></span>
</td>
<td>
<button class="btn-secondary btn-sm" @click="assessSingle(row.domain)" x-show="!row.beauty_lead_quality">Assess</button>
<button class="btn-secondary btn-sm" @click="assessSingle(row.domain)" x-show="row.beauty_lead_quality">Re-assess</button>
</td>
</tr>
</template>
</tbody>
</table>
</div>
<div class="filter-bar" x-show="!loading && domains.length>0">
<button class="btn-secondary btn-sm" @click="f.page=Math.max(1,f.page-1);loadDomains()" :disabled="f.page<=1">← Prev</button>
<span style="color:var(--muted);font-size:11px" x-text="'Page '+f.page"></span>
<button class="btn-secondary btn-sm" @click="f.page++;loadDomains()" :disabled="domains.length<parseInt(f.limit)">Next →</button>
</div>
</div>
<!-- ══════════════════ VALIDATOR TAB ══════════════════ -->
<div x-show="tab==='validator'" style="padding:0 24px">
<div style="padding:16px 0 8px;color:var(--muted);font-size:12px">
Bulk HTTP validator — checks all domains in the dataset and marks them live/dead/error/parked/redirect.
Run this first, then Pre-screen to classify niches, then use Browse to find beauty leads.
</div>
<div class="val-grid" style="padding:0 0 16px">
<div class="esb"><div class="ev" x-text="(valSt.processed||0).toLocaleString()"></div><div class="el">Checked</div></div>
<div class="esb"><div class="ev live-color" x-text="(valSt.live||0).toLocaleString()"></div><div class="el">Live</div></div>
<div class="esb"><div class="ev dead-color" x-text="(valSt.dead||0).toLocaleString()"></div><div class="el">Dead</div></div>
<div class="esb"><div class="ev error-color" x-text="(valSt.error||0).toLocaleString()"></div><div class="el">Error</div></div>
<div class="esb"><div class="ev" style="color:#fbbf24" x-text="(valSt.parked||0).toLocaleString()"></div><div class="el">Parked</div></div>
<div class="esb"><div class="ev" style="color:var(--info)" x-text="(valSt.rate||0)+'/s'"></div><div class="el">Rate</div></div>
</div>
<div x-show="valSt.running||valSt.processed>0" style="margin-bottom:12px;font-size:12px;color:var(--muted)">
<span x-text="'Offset: '+valSt.offset+' · Skipped: '+valSt.skipped"></span>
<span x-show="valSt.tld_filter" x-text="' · TLD: '+valSt.tld_filter"></span>
</div>
<div style="display:flex;gap:10px;flex-wrap:wrap;align-items:center;margin-bottom:12px">
<input x-model="valTld" placeholder="TLD filter (es, ro…)" style="width:140px">
<label style="color:var(--muted);font-size:11px;display:flex;align-items:center;gap:5px">
<input type="checkbox" x-model="valRescan"> Rescan dead
</label>
<button class="btn-primary" @click="startValidator()" :disabled="valSt.running">Start</button>
<button class="btn-danger" @click="stopValidator()" :disabled="!valSt.running">Stop</button>
<span x-show="valSt.running" style="color:var(--success);font-size:11px;animation:pulse 1.5s infinite">● Running</span>
<span x-show="!valSt.running && valSt.processed>0" style="color:var(--muted);font-size:11px">● Stopped</span>
</div>
</div>
<!-- ══════════════════ B2B PIPELINE TAB ══════════════════ -->
<div x-show="tab==='pipeline'">
<div class="filter-bar">
<select x-model="pf.quality" @change="loadLeads()">
<option value="">All Qualities</option>
<option value="HOT">HOT 🔥</option>
<option value="WARM">WARM</option>
<option value="COLD">COLD</option>
</select>
<input x-model="pf.country" @keyup.enter="loadLeads()" placeholder="Country (ES, FR…)" style="width:100px">
<select x-model="pf.limit" @change="loadLeads()">
<option value="50">50</option>
<option value="100" selected>100</option>
<option value="200">200</option>
</select>
<button class="btn-primary" @click="loadLeads()">Filter</button>
<button class="btn-secondary" @click="pf={quality:'',country:'',page:1,limit:'100'};loadLeads()">Reset</button>
<span style="margin-left:auto;color:var(--muted);font-size:11px" x-text="leadsTotal.toLocaleString()+' total leads'"></span>
</div>
<div class="tbl-wrap">
<div class="empty" x-show="!loadingLeads && leads.length===0">No B2B assessments yet. Go to Browse → select domains → Assess B2B.</div>
<div style="padding:20px;text-align:center;color:var(--muted)" x-show="loadingLeads">Loading…</div>
<table x-show="!loadingLeads && leads.length>0">
<thead>
<tr>
<th>Domain</th>
<th>Quality</th>
<th>Business</th>
<th>Country</th>
<th>Categories</th>
<th>Portfolio Match</th>
<th>Contact</th>
<th></th>
</tr>
</thead>
<tbody>
<template x-for="row in leads" :key="row.domain">
<tr @click="toggleLead(row.domain)" style="cursor:pointer">
<td><a :href="'https://'+row.domain" target="_blank" @click.stop x-text="row.domain"></a></td>
<td><span :class="qualityBadge(row.beauty_lead_quality)" class="badge" x-text="row.beauty_lead_quality||'—'"></span></td>
<td x-text="(row._beauty||{}).business_name||row.page_title||'—'"></td>
<td x-text="(row._beauty||{}).country_fiscal||(row.ip_country||'—')"></td>
<td>
<template x-for="cat in ((row._beauty||{}).categories||[]).slice(0,3)" :key="cat">
<span class="chip" x-text="cat"></span>
</template>
</td>
<td>
<template x-if="((row._beauty||{}).dist_matches||[]).length>0">
<span>
<template x-for="b in ((row._beauty||{}).dist_matches||[]).slice(0,4)" :key="b">
<span class="chip chip-match" x-text="b"></span>
</template>
<span x-show="((row._beauty||{}).dist_matches||[]).length>4" class="chip" x-text="'+'+(((row._beauty||{}).dist_matches||[]).length-4)+' more'"></span>
</span>
</template>
<span x-show="!((row._beauty||{}).dist_matches||[]).length" style="color:var(--muted)"></span>
</td>
<td>
<span x-text="(row._beauty||{}).contact_email||'—'" style="font-size:11px"></span>
</td>
<td @click.stop>
<button class="copy-btn" @click="copyEmail(row)" title="Copy outreach email">Copy Email</button>
</td>
</tr>
<!-- Expanded detail row -->
<tr class="detail-row" x-show="expandedLead===row.domain">
<td colspan="8">
<div class="detail-grid">
<div class="detail-section">
<h4>B2B Proposal</h4>
<p x-text="(row._beauty||{}).b2b_proposal||'—'"></p>
</div>
<div class="detail-section">
<h4>Lead Reasoning</h4>
<p x-text="(row._beauty||{}).lead_reasoning||'—'"></p>
</div>
<div class="detail-section" style="grid-column:1/-1">
<h4 style="margin-bottom:4px">
Outreach Email
<button class="copy-btn" style="margin-left:8px" @click="copyText((row._beauty||{}).outreach_email||'')">Copy</button>
</h4>
<p style="white-space:pre-wrap;font-size:11px;color:var(--muted)" x-text="(row._beauty||{}).outreach_email||'—'"></p>
</div>
<div class="detail-section">
<h4>Detected Brands on Site</h4>
<p style="font-size:11px">
<template x-for="b in ((row._beauty||{}).detected_brands||[]).slice(0,20)" :key="b">
<span class="chip" x-text="b"></span>
</template>
<span x-show="!((row._beauty||{}).detected_brands||[]).length" style="color:var(--muted)">None detected</span>
</p>
</div>
<div class="detail-section">
<h4>Contact Details</h4>
<p style="font-size:11px">
<span x-show="(row._beauty||{}).contact_email">Email: <span x-text="(row._beauty||{}).contact_email"></span><br></span>
<span x-show="(row._beauty||{}).contact_phone">Phone: <span x-text="(row._beauty||{}).contact_phone"></span><br></span>
<span x-show="row.emails" x-text="'On-site emails: '+(row.emails||'')"></span>
</p>
</div>
</div>
</td>
</tr>
</template>
</tbody>
</table>
</div>
<div class="filter-bar" x-show="!loadingLeads && leads.length>0">
<button class="btn-secondary btn-sm" @click="pf.page=Math.max(1,pf.page-1);loadLeads()" :disabled="pf.page<=1">← Prev</button>
<span style="color:var(--muted);font-size:11px" x-text="'Page '+pf.page"></span>
<button class="btn-secondary btn-sm" @click="pf.page++;loadLeads()" :disabled="leads.length<parseInt(pf.limit)">Next →</button>
</div>
</div>
<!-- ══════════════════ PRE-SCREEN TAB ══════════════════ -->
<div x-show="tab==='prescreen'" class="prescreen-wrap">
<div style="padding:16px 0 12px;color:var(--muted);font-size:12px">
Phase 1 — HTTP check each domain (live/dead/parked/redirect).<br>
Phase 2 — DeepSeek classifies niche &amp; type (beauty_cosmetics, ecommerce, etc.).<br>
Paste up to 200 domains, one per line.
</div>
<textarea x-model="prescreenInput" rows="12" placeholder="domain1.com&#10;domain2.es&#10;…"></textarea>
<div style="display:flex;gap:10px;margin-top:10px;align-items:center">
<button class="btn-primary" @click="runPrescreen()" :disabled="prescreenRunning">
<span x-show="!prescreenRunning">Run Pre-screen</span>
<span x-show="prescreenRunning">Running…</span>
</button>
<span x-show="prescreenResult" style="font-size:12px;color:var(--muted)" x-text="prescreenResult"></span>
</div>
</div>
<!-- ══════════════════ EXPORT TAB ══════════════════ -->
<div x-show="tab==='export'" style="padding:24px">
<div class="card" style="max-width:480px">
<h3 style="margin-bottom:16px;font-size:14px;color:var(--accent)">Export Beauty Leads</h3>
<div style="display:flex;flex-direction:column;gap:12px">
<div style="display:flex;gap:8px;align-items:center">
<label style="color:var(--muted);width:70px">Quality</label>
<select x-model="exportQuality" style="flex:1">
<option value="">All</option>
<option value="HOT">HOT only</option>
<option value="WARM">WARM only</option>
</select>
</div>
<div style="display:flex;gap:8px;align-items:center">
<label style="color:var(--muted);width:70px">Country</label>
<input x-model="exportCountry" placeholder="ES, FR, DE …" style="flex:1">
</div>
<button class="btn-primary" @click="exportLeads()">Download CSV</button>
</div>
<p style="margin-top:14px;font-size:11px;color:var(--muted)">
Exports: domain, quality, business name, country, categories, detected brands,
portfolio matches, contact email, B2B proposal, outreach email.
</p>
</div>
</div>
<!-- Toasts -->
<div class="toast-container">
<template x-for="t in toasts" :key="t.id">
<div class="toast" :class="t.type" x-text="t.msg"></div>
</template>
</div>
<script>
function app() {
return {
tab: 'browse',
loading: false,
loadingLeads: false,
domains: [],
leads: [],
leadsTotal: 0,
selected: [],
expandedLead: null,
stats: {},
aiSt: {hot:0, warm:0, cold:0, total:0, pending:0},
valSt: {running:false, processed:0, live:0, dead:0, error:0, parked:0, redirect:0, skipped:0, offset:0, rate:0},
valTld: '',
valRescan: false,
toasts: [],
prescreenInput: '',
prescreenRunning: false,
prescreenResult: '',
exportQuality: '',
exportCountry: '',
f: {keyword:'', tld:'', prescreen_status:'live', niche:'beauty_cosmetics', site_type:'ecommerce', country:'', limit:'100', page:1},
pf: {quality:'', country:'', limit:'100', page:1},
async init() {
await this.loadStats();
await this.loadAiStatus();
await this.loadValStatus();
await this.loadDomains();
setInterval(async () => {
await this.loadStats();
await this.loadAiStatus();
if (this.tab === 'validator') await this.loadValStatus();
}, 4000);
},
async loadStats() {
try {
const d = await fetch('/api/stats').then(r=>r.json());
this.stats = d;
// Count beauty live: rough proxy via enriched stats
} catch(e) {}
},
async loadAiStatus() {
try {
const d = await fetch('/api/beauty/status').then(r=>r.json());
// Also count HOT/WARM from leads
const hot = await fetch('/api/beauty/leads?quality=HOT&limit=1').then(r=>r.json()).catch(()=>({total:0}));
const warm = await fetch('/api/beauty/leads?quality=WARM&limit=1').then(r=>r.json()).catch(()=>({total:0}));
this.aiSt = { ...d, hot: hot.total||0, warm: warm.total||0 };
// beauty live count
try {
const bl = await fetch('/api/enriched?prescreen_status=live&niche=beauty_cosmetics&limit=1').then(r=>r.json());
this.stats.beauty_live = bl.total || 0;
} catch(e) {}
} catch(e) {}
},
async loadValStatus() {
try { this.valSt = await fetch('/api/validator/status').then(r=>r.json()); }
catch(e) {}
},
async loadDomains() {
this.loading = true;
try {
const p = new URLSearchParams({
page: this.f.page,
limit: this.f.limit,
});
if (this.f.keyword) p.set('keyword', this.f.keyword);
if (this.f.tld) p.set('tld', this.f.tld);
const d = await fetch('/api/enriched?' + p).then(r=>r.json());
let rows = d.results || [];
// Client-side filters for prescreen_status, niche, site_type, country
if (this.f.prescreen_status === 'none') rows = rows.filter(r => !r.prescreen_status);
else if (this.f.prescreen_status) rows = rows.filter(r => r.prescreen_status === this.f.prescreen_status);
if (this.f.niche) rows = rows.filter(r => r.niche === this.f.niche);
if (this.f.site_type) rows = rows.filter(r => r.site_type === this.f.site_type);
if (this.f.country) rows = rows.filter(r => (r.ip_country||'').toUpperCase() === this.f.country.toUpperCase());
this.domains = rows;
} catch(e) { this.notify('Failed to load domains: '+e.message, 'error'); }
finally { this.loading = false; }
},
async loadLeads() {
this.loadingLeads = true;
try {
const p = new URLSearchParams({ page: this.pf.page, limit: this.pf.limit });
if (this.pf.quality) p.set('quality', this.pf.quality);
if (this.pf.country) p.set('country', this.pf.country);
const d = await fetch('/api/beauty/leads?' + p).then(r=>r.json());
this.leads = d.results || [];
this.leadsTotal = d.total || 0;
} catch(e) { this.notify('Failed to load leads: '+e.message, 'error'); }
finally { this.loadingLeads = false; }
},
resetFilters() {
this.f = {keyword:'', tld:'', prescreen_status:'live', niche:'beauty_cosmetics', site_type:'ecommerce', country:'', limit:'100', page:1};
this.loadDomains();
},
toggleAll(e) {
this.selected = e.target.checked ? this.domains.map(r=>r.domain) : [];
},
async queueSelected() {
if (!this.selected.length) return;
try {
const d = await fetch('/api/beauty/assess/batch', {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({domains: this.selected}),
}).then(r=>r.json());
this.notify(`Queued ${d.queued} domains for B2B assessment`, 'success');
this.selected = [];
} catch(e) { this.notify('Queue failed: '+e.message, 'error'); }
},
async assessSingle(domain) {
this.notify(`Queuing ${domain}`, 'info');
try {
await fetch('/api/beauty/assess/batch', {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({domains:[domain]}),
});
this.notify(`${domain} queued for assessment`, 'success');
} catch(e) { this.notify('Failed: '+e.message, 'error'); }
},
toggleLead(domain) {
this.expandedLead = this.expandedLead === domain ? null : domain;
},
copyEmail(row) {
const b = row._beauty || {};
const text = [
b.outreach_subject ? 'Subject: ' + b.outreach_subject : '',
'',
b.outreach_email || '',
].join('\n').trim();
this.copyText(text);
},
copyText(text) {
navigator.clipboard.writeText(text).then(
() => this.notify('Copied to clipboard', 'success'),
() => this.notify('Copy failed', 'error'),
);
},
async runPrescreen() {
const lines = this.prescreenInput.split('\n').map(l=>l.trim()).filter(Boolean);
if (!lines.length) return;
if (lines.length > 200) { this.notify('Max 200 domains per batch', 'error'); return; }
this.prescreenRunning = true;
this.prescreenResult = '';
try {
const d = await fetch('/api/prescreen/batch', {
method:'POST', headers:{'Content-Type':'application/json'},
body: JSON.stringify({domains: lines}),
}).then(r=>r.json());
this.prescreenResult = `${d.live} live · ☠ ${d.dead} dead · 🅿 ${d.parked} parked · ↗ ${d.redirect} redirect · 🏷 ${d.classified} classified`;
this.notify(this.prescreenResult, 'success');
} catch(e) { this.notify('Pre-screen failed: '+e.message, 'error'); }
finally { this.prescreenRunning = false; }
},
async startValidator() {
const p = new URLSearchParams();
if (this.valTld) p.set('tld', this.valTld);
if (this.valRescan) p.set('rescan_dead', 'true');
try {
this.valSt = await fetch('/api/validator/start?' + p, {method:'POST'}).then(r=>r.json());
this.notify('Validator started', 'success');
} catch(e) { this.notify('Failed: '+e.message, 'error'); }
},
async stopValidator() {
await fetch('/api/validator/stop', {method:'POST'});
this.notify('Validator stop requested', 'info');
},
exportLeads() {
const p = new URLSearchParams();
if (this.exportQuality) p.set('quality', this.exportQuality);
if (this.exportCountry) p.set('country', this.exportCountry);
window.open('/api/beauty/export?' + p, '_blank');
},
qualityBadge(q) {
if (!q) return 'badge-nr';
const m = {HOT:'badge-hot', WARM:'badge-warm', COLD:'badge-cold', NOT_RELEVANT:'badge-nr'};
return m[q] || 'badge-cold';
},
statusBadge(s) {
const m = {live:'badge-live', dead:'badge-dead', error:'badge-error', parked:'badge-parked', redirect:'badge-redirect'};
return m[s] || 'badge-nr';
},
notify(msg, type='info') {
const id = Date.now() + Math.random();
this.toasts.push({id, msg, type});
setTimeout(() => { this.toasts = this.toasts.filter(t=>t.id!==id); }, 4500);
},
};
}
</script>
</body>
</html>

View File

@@ -1,4 +1,15 @@
version: "3.9"
x-common-env: &common-env
DATA_DIR: /data
PARQUET_URL: https://github.com/digitalcortex/72m-domains-dataset/raw/refs/heads/master/domains.parquet
CONCURRENCY_LIMIT: 50
SCORE_THRESHOLD: 60
TARGET_TLDS: es,com,net
TARGET_COUNTRIES: ES,GB,DE,FR,RO,PT,AD,IT
REPLICATE_API_TOKEN: r8_7I7Feai78f9PzMOs20y5GVFKiLkgUWP463vZO
AI_CONCURRENCY: 3
services:
dashboard:
build: .
@@ -7,12 +18,18 @@ services:
volumes:
- ./data:/data
environment:
- DATA_DIR=/data
- PARQUET_URL=https://github.com/digitalcortex/72m-domains-dataset/raw/refs/heads/master/domains.parquet
- CONCURRENCY_LIMIT=50
- SCORE_THRESHOLD=60
- TARGET_TLDS=es,com,net
- TARGET_COUNTRIES=ES,GB,DE,FR,RO,PT,AD,IT
- REPLICATE_API_TOKEN=r8_7I7Feai78f9PzMOs20y5GVFKiLkgUWP463vZO
- AI_CONCURRENCY=3
<<: *common-env
restart: unless-stopped
beauty:
build: .
command: python -m app.beauty_main
ports:
- "7788:7788"
volumes:
- ./data:/data # shares the same DB / parquet / duckdb
environment:
<<: *common-env
depends_on:
- dashboard # dashboard handles parquet download & index build first
restart: unless-stopped