feat: BeautyLeads B2B cosmetics frontend on port 7788
New service (app/beauty_main.py) sharing the same /data volume: - Separate FastAPI app running on port 7788 - beauty_ai.py: brand universe scan (~650 brands), portfolio match detection against OUR_BRANDS, Gemini B2B assessment prompt in Spanish returning quality/categories/dist_matches/outreach_email - beauty_queue table + beauty_lead_quality/beauty_assessment columns in enriched_domains (with migrations) - Endpoints: /api/beauty/assess/batch, /api/beauty/leads, /api/beauty/status, /api/beauty/export, /api/beauty/reset - Static frontend: Browse (beauty/ecommerce pre-filtered, no CMS/SSL/KD columns), Validator, B2B Pipeline (brand chips, expandable outreach), Pre-screen, Export CSV - docker-compose: second 'beauty' service with shared data volume - Dockerfile: expose 7788 alongside 6677 Also: add 'error' prescreen_status handling + UI (orange stat box, filter option) for 4xx/5xx HTTP responses Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
352
app/beauty_ai.py
Normal file
352
app/beauty_ai.py
Normal file
@@ -0,0 +1,352 @@
|
||||
"""Beauty B2B AI assessment — cosmetics distribution lead qualification.
|
||||
|
||||
Pre-scans scraped text for known brands, then sends a focused prompt to Gemini
|
||||
to evaluate fit as a B2B customer for a cosmetics distribution business.
|
||||
"""
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN", "r8_7I7Feai78f9PzMOs20y5GVFKiLkgUWP463vZO")
|
||||
REPLICATE_MODEL = "https://api.replicate.com/v1/models/google/gemini-3-pro/predictions"
|
||||
AI_CONCURRENCY = int(os.getenv("AI_CONCURRENCY", "3"))
|
||||
|
||||
_ai_sem: Optional[asyncio.Semaphore] = None
|
||||
|
||||
def _sem() -> asyncio.Semaphore:
|
||||
global _ai_sem
|
||||
if _ai_sem is None:
|
||||
_ai_sem = asyncio.Semaphore(AI_CONCURRENCY)
|
||||
return _ai_sem
|
||||
|
||||
|
||||
# ── Brand universe (market brands we can detect on client sites) ──────────────
|
||||
|
||||
BEAUTY_BRANDS = [
|
||||
"4711","7days","7th Heaven","A-derma","Abercrombie & Fitch","Abril Et Nature",
|
||||
"Acqua Di Parma","Actinica","Adidas","Adolfo Dominguez","Aesop","Agatha Ruiz De La Prada",
|
||||
"Agave","Agua Lavanda","Ahava","Air-wick","Aire Sevilla","Al Haramain","Albal","Alcantara",
|
||||
"Alejandro Sanz","Alfaparf Milano","Algasiv","Alma Secret","Alpecin","Alqvimia","Alterna",
|
||||
"Alvarez Gomez","Alyssa Ashley","Ambi Pur","American Crew","Amichi","Ana María Lajusticia",
|
||||
"Angel Schlesser","Anian","Annayake","Anne Möller","Anso","Antonio Banderas","Apisérum",
|
||||
"Apivita","Aqc Fragrances","Aquilea","Aramis","Ardell","Arganour","Ariel","Armaf",
|
||||
"Armand Basi","Artdeco","Artero","As I Am","Aseptine","Atashi","Atrix","Ausonia","Aussie",
|
||||
"Australian Gold","Autan","Aveda","Avena Kinesia","Avène","Axe","Axovital","Azalea",
|
||||
"Azzaro","Babaria","Babyliss","Barbie","Bare Minerals","Barulab","Batiste","Beaver",
|
||||
"Beconfident","Belcils","Bella Aurora","Benefit","Benton","Benzacare","Beter","Biafin",
|
||||
"Bio Ionic","Bio-oil","Bioderma","Biolage","Biotherm","Biovène","Biretix","Bobbi Brown",
|
||||
"Bouclème","Bourjois","Bperfect Cosmetics","Britney Spears","Bumble & Bumble","Burberry",
|
||||
"Bvlgari","Byly","Byphasse","Cacharel","Calvin Klein","Camomila Intea","Cantu","Carefree",
|
||||
"Carmex","Carolina Herrera","Carrera","Carthusia","Catrice","Caudalie","Cerave","Cerruti",
|
||||
"Cetaphil","Chanel","Chanson D'Eau","Chloé","Chopard","Christina Aguilera","Christophe Robin",
|
||||
"Clarins","Clean & Clear","Clinique","Coach","Cocosolis","Colab","Colgate","Collistar",
|
||||
"Color Wow","Comfort Zone","Comodynes","Compeed","Cosrx","Creed","Creme Of Nature",
|
||||
"Cristalinas","Crossmen","Crusellas","Cryopharma","Cumlaude Lab","Cutex","Cygnetic",
|
||||
"Daffoil","Darphin","Davidoff","Declaré","Delfy","Delisea","Denenes","Dentiblanc",
|
||||
"Dermalogica","Desensin","Dexeryl","Diadermine","Diesel","Diet Esthetic","Dior","Diptyque",
|
||||
"Dodot","Dolce & Gabbana","Donna Karan","Dove","Dr. Hauschka","Dr.jart+","Dr. Organic",
|
||||
"Dr. Rimpler","Dr. Tree","Drasanvi","Drunk Elephant","Dsquared2","Ducray","Durex",
|
||||
"Elancyl","Elegant Touch","Elemis","Elie Saab","Elizabeth Arden","Elizabeth Taylor",
|
||||
"Emilio Pucci","Endocare","Eric Favre","Escada","Essence","Essie","Estée Lauder",
|
||||
"Etat Libre D'Orange","Eucerin","Eudermin","Evax","Eve Lom","Eylure","Fa","Fairy","Fanola",
|
||||
"Farmatint","Farmavita","Farouk","Figuière","Fisiocrem","Flor De Mayo","Fluocaril","Foreo",
|
||||
"Forté Pharma","Foxy","Francis Kurkdjian","Frederic Malle","Frosch","Garnier","Ghd",
|
||||
"Gillette","Giorgi Line","Givenchy","Glam Of Sweden","Goldwell","Gosh","Goutal","Gritti",
|
||||
"Gucci","Guerlain","Guess By Marciano","Gummy","Hair Rituel By Sisley","Hairgum","Halita",
|
||||
"Halloween","Hansaplast","Hask","Hawaiian Tropic","Head & Shoulders","Heliocare",
|
||||
"Heno De Pravia","Herbal Essences","Hermès","Hidracel","Hollister","Hugo Boss",
|
||||
"I.c.o.n.","Ibizaloe","Iceberg","Idc Institute","Iroha","Isabelle Lancray","Isdin",
|
||||
"Issey Miyake","It Cosmetics","Ivybears","Jacadi","Jean Paul Gaultier","Jil Sander",
|
||||
"Jimmy Choo","Jo Malone","John Frieda","Johnson's Baby","Joico","Joop","Jordan","Jowaé",
|
||||
"Juicy Couture","Juliette Has A Gun","Just For Men","Juvena","Kaloo","Karl Lagerfeld",
|
||||
"Karseell","Katai","Kate Spade","Kativa","Kenzo","Kerasilk","Kerastase","Kevin Murphy",
|
||||
"Kevyn Aucoin","Kilian","Klorane","L'Anza","L'Occitane","L'Oréal Paris",
|
||||
"L'Oréal Professionnel","La Cabine","La Mer","La Prairie","La Roche Posay","La Toja",
|
||||
"Laboratoires Filorga","Lacer","Lacoste","Lactacyd","Lactovit","Lalique","Lancaster",
|
||||
"Lanvin","Lattafa","Laura Biagiotti","Le Petit Marseillais","Legrain","Lierac","Listerine",
|
||||
"Living Proof","Loewe","Lola Cosmetics","Lolita Lempicka","Lussoni","Lutsine E45",
|
||||
"M2 Beauté","Mac","Macadamia","Mad Beauty","Maria Nila","Marlies Möller","Martiderm",
|
||||
"Martinelia","Marvis","Matrix","Maui","Mavala","Max Factor","Maybelline","Melvita",
|
||||
"Mermade","Michael Kors","Milk Shake","Mix & Shout","Mixa","Moroccanoil","Moschino",
|
||||
"Mustela","Nabeel","Nanobrow","Nanoil","Nanolash","Narciso Rodriguez","Nars","Natur Vital",
|
||||
"Natura Bissé","Natural Honey","Naturalium","Naturtint","Nenuco","Neogen","Neoretin",
|
||||
"Neostrata","Neutrogena","Nivea","Nûby","Nuggela & Sulé","Nyx Professional Make Up",
|
||||
"Ogx","Olaplex","Olay","Old Spice","Olivia Garden","Opi","Oral-b","Oraldine","Orofluido",
|
||||
"Orlane","Oscar De La Renta","Pacha","Paese","Palette","Paloma Picasso","Paltons",
|
||||
"Pantene","Paranix","Parfums Saphir","Parlux","Payot","Phyto","Picu Baby","Pilexil",
|
||||
"Piz Buin","Plantur 39","Platanomelón","Polaar","Police","Polident","Ponds","Poseidon",
|
||||
"Postquam","Proraso","Puig","Purito","Rabanne","Raid","Ralph Lauren","Rated Green",
|
||||
"Real Techniques","Redenhair","Redist","Redken","Reebok","Ref","Refectocil","Relec",
|
||||
"Remescar","Rene Furterer","Revlon","Revolution Hair Care","Revolution Make Up",
|
||||
"Revolution Pro","Rexaline","Rexona","Rilastil","Rimmel London","Roberto Cavalli","Roc",
|
||||
"Rochas","Roger & Gallet","Roja Parfums","Rosacure","S3","Sabon","Salerm","Sally Hansen",
|
||||
"Salvatore Ferragamo","Sanex","Sarah Jessica Parker","Saryna Key","Satisfyer","Scalpers",
|
||||
"Scholl","Schwarzkopf","Scottex","Sebamed","Sebastian Professionals","Seche Vite",
|
||||
"Sensai","Sensilis","Sensodyne","Serge Lutens","Serumkind","Sesderma","Seven Cosmetics",
|
||||
"Sexy Hair","Shiseido","Shu Uemura","Sisley","Skeyndor","Skin Generics","Sleek",
|
||||
"Snp","Soap & Glory","Sol De Janeiro","Solgar","Somatoline Cosmetic","Sophie La Girafe",
|
||||
"Soria Natural","Steinhart","Stendhal Paris","Sterimar","Strivectin","Suavinex",
|
||||
"Suavipiel","Svr Laboratoire Dermatologique","Syoss","System Professional","Tabac",
|
||||
"Taky","Talika","Tampax","Tangle Teezer","Tanit","Teaology","Tena Lady","The Body Shop",
|
||||
"The Ordinary","The Wet Brush","Thermacare","Tiffany & Co","Tigi","Timotei",
|
||||
"Tiziana Terenzi","Tod's","Tom Ford","Tommy Hilfiger","Topicrem","Torriden","Tot Herba",
|
||||
"Tous","Trendy Hair","Tresemme","Trussardi","Tulipán Negro","Urban Decay","Uriage",
|
||||
"Usu Cosmetics","Vagisil","Valmont","Valquer","Vanderbilt","Vaseline","Veet","Vichy",
|
||||
"Victor","Victoria's Secret","Victorio & Lucchino","Vital Proteins","Vivra",
|
||||
"Voltage Cosmetics","Volumax","Waterpik","Waterwipes","Wella","Weleda",
|
||||
"Williams","Woodwick","Xerjoff","Xls Medical","Yankee Candle","Yari","Yotuel",
|
||||
"Youth Lab","Zadig & Voltaire","Ziaja",
|
||||
]
|
||||
|
||||
# Our distribution portfolio — the brands we sell to B2B clients
|
||||
OUR_BRANDS = [
|
||||
"AIMX","Al Haramain","Apivita","Armaf","Aveda","Bouclème","Clarena",
|
||||
"Curly Girl Movement","Cutrin","Davines","Dr. Hauschka","FanPalm","Farmavita",
|
||||
"Flora Curl","GAMMA+","GHD","GOSH","ICON","Image Skincare","Instituto Español",
|
||||
"Janeke","Kay Pro","Kerasilk","Kyo","Label M","Lierac","Living Proof","Londa",
|
||||
"M2 Beauté","Malibu C","Maria Nila","Medik8","Misslyn","Mustela","Nesti Dante",
|
||||
"Nuxe","Obagi","Osmo","Payot","Philip B","Philip Martins","Phyto","Piz Buin",
|
||||
"Ramon Monegal","Redken","REF","Saryna Key","Sesderma","Skala Brasil","Skin1004",
|
||||
"Strivectin","Swissdent","Topicrem","Uriage","Vita Liberata","Waterclouds",
|
||||
"Wella","Youngblood Cosmetics",
|
||||
]
|
||||
|
||||
BEAUTY_CATEGORIES = [
|
||||
"Perfumes","Facial Cosmetics","Makeup","Hair Care","Health","Body Cosmetics",
|
||||
"Hygiene","Kids & Babies","Sun Care","Eyewear","Home","Nutrition","Erotic","Fashion",
|
||||
]
|
||||
|
||||
|
||||
# ── Brand detection (fast pre-scan, no AI) ─────────────────────────────────────
|
||||
|
||||
def detect_brands_in_text(text: str) -> list[str]:
|
||||
"""Find which brands from the universe appear in the scraped page text."""
|
||||
tl = text.lower()
|
||||
return [b for b in BEAUTY_BRANDS if b.lower() in tl][:60]
|
||||
|
||||
|
||||
def get_dist_matches(detected: list[str]) -> list[str]:
|
||||
"""Return which detected brands are in our distribution portfolio."""
|
||||
dl = {b.lower() for b in detected}
|
||||
return [b for b in OUR_BRANDS if b.lower() in dl]
|
||||
|
||||
|
||||
# ── DuckDuckGo search (contact/company lookup) ────────────────────────────────
|
||||
|
||||
async def _ddg_search(query: str) -> str:
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=10, follow_redirects=True,
|
||||
headers={"User-Agent": "Mozilla/5.0 (compatible; BeautyLeads/1.0)"},
|
||||
) as client:
|
||||
r = await client.get(
|
||||
"https://html.duckduckgo.com/html/",
|
||||
params={"q": query, "kl": "es-es"},
|
||||
)
|
||||
if r.status_code != 200:
|
||||
return ""
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
parts = []
|
||||
for res in soup.select(".result")[:4]:
|
||||
title = res.select_one(".result__a")
|
||||
snip = res.select_one(".result__snippet")
|
||||
url = res.select_one(".result__url")
|
||||
if snip:
|
||||
t = title.get_text(strip=True) if title else ""
|
||||
u = url.get_text(strip=True) if url else ""
|
||||
parts.append(f"[{u}] {t} — {snip.get_text(strip=True)}")
|
||||
return "\n".join(parts)
|
||||
except Exception as e:
|
||||
logger.debug("DDG search failed: %s", e)
|
||||
return ""
|
||||
|
||||
|
||||
# ── Prompt builder ─────────────────────────────────────────────────────────────
|
||||
|
||||
def _build_beauty_prompt(a: dict, detected_brands: list, dist_matches: list,
|
||||
search_results: str = "") -> str:
|
||||
contacts_block = []
|
||||
if a.get("emails"): contacts_block.append(f" Emails: {', '.join(a['emails'][:3])}")
|
||||
if a.get("phones"): contacts_block.append(f" Phones: {', '.join(a['phones'][:3])}")
|
||||
if a.get("social_links"): contacts_block.append(f" Social: {', '.join(a['social_links'][:4])}")
|
||||
contacts_str = "\n".join(contacts_block) or " Not found"
|
||||
|
||||
snippet = (a.get("visible_text_snippet") or "")[:1200]
|
||||
title = a.get("page_title") or ""
|
||||
meta = a.get("meta_description") or ""
|
||||
country = a.get("ip_country") or "unknown"
|
||||
cms = a.get("cms") or "unknown"
|
||||
detected_str = ", ".join(detected_brands) if detected_brands else "none detected"
|
||||
dist_str = ", ".join(dist_matches) if dist_matches else "none"
|
||||
|
||||
return f"""You are a senior B2B sales analyst for a cosmetics distribution company operating in Europe.
|
||||
Your task: evaluate whether this website is a viable B2B customer (retailer, multi-brand store,
|
||||
e-commerce, distributor or chain that buys beauty products wholesale) and generate an outreach plan.
|
||||
|
||||
=== SITE DATA ===
|
||||
Domain: {a.get("domain")}
|
||||
Country (IP): {country}
|
||||
Title: {title}
|
||||
Meta desc: {meta}
|
||||
CMS: {cms}
|
||||
Contact info:
|
||||
{contacts_str}
|
||||
|
||||
=== PAGE CONTENT SAMPLE ===
|
||||
{snippet}
|
||||
|
||||
=== BRANDS ALREADY DETECTED ON SITE ===
|
||||
{detected_str}
|
||||
|
||||
=== OUR PORTFOLIO BRANDS FOUND ON THEIR SITE ===
|
||||
(These brands we distribute — finding them means we're already in their market)
|
||||
{dist_str}
|
||||
|
||||
=== WEB SEARCH RESULTS ===
|
||||
{(search_results or "No results.")[:500]}
|
||||
|
||||
=== OUR DISTRIBUTION PORTFOLIO ===
|
||||
{', '.join(OUR_BRANDS)}
|
||||
|
||||
=== BEAUTY CATEGORIES WE COVER ===
|
||||
{', '.join(BEAUTY_CATEGORIES)}
|
||||
|
||||
=== ASSESSMENT RULES ===
|
||||
1. Determine if this is a B2B prospect: retailer, pharmacy, parafarmacia,
|
||||
perfumería, multi-brand beauty ecommerce, salon chain, supermarket beauty section,
|
||||
or beauty products distributor based in Europe.
|
||||
2. Identify which categories from our list they cover.
|
||||
3. From the page content (even if brands list is empty), identify any beauty brands mentioned.
|
||||
4. Match detected brands against our portfolio — this drives lead quality:
|
||||
- HOT: 3+ of our portfolio brands detected, OR a large EU retailer clearly in our niche
|
||||
- WARM: 1-2 portfolio brand matches, OR clear beauty retailer with good potential
|
||||
- COLD: beauty-adjacent but weak match, OR can't confirm they buy wholesale
|
||||
- NOT_RELEVANT: not a beauty business or not in Europe
|
||||
5. Write all human text (proposal, email) in Spanish.
|
||||
6. Keep JSON values concise (≤ 25 words each).
|
||||
|
||||
Respond ONLY with valid JSON, no markdown, no text outside JSON:
|
||||
{{
|
||||
"is_relevant": true/false,
|
||||
"lead_quality": "HOT|WARM|COLD|NOT_RELEVANT",
|
||||
"lead_reasoning": "1-2 sentences why",
|
||||
"business_type": "retailer|ecommerce|distributor|pharmacy|salon_chain|other",
|
||||
"business_name": "name from title or domain",
|
||||
"country_fiscal": "2-letter ISO or full name",
|
||||
"countries_active": ["ES","FR"],
|
||||
"categories": ["Hair Care","Makeup"],
|
||||
"detected_brands": ["brand1","brand2"],
|
||||
"dist_matches": ["OurBrand1","OurBrand2"],
|
||||
"contact_email": "email or empty string",
|
||||
"contact_phone": "phone or empty string",
|
||||
"b2b_proposal": "1-2 sentence value proposition in Spanish referencing their categories and our matching brands",
|
||||
"outreach_subject": "short Spanish subject line referencing their business name",
|
||||
"outreach_email": "3-4 sentence ready-to-send email in Spanish. Mention their business, 1-2 specific brands from our portfolio that match their range, and a clear call to action (catálogo, muestra, llamada).",
|
||||
"revenue_estimate": "unknown",
|
||||
"outreach_notes": "brief context for sales rep"
|
||||
}}"""
|
||||
|
||||
|
||||
def _parse_beauty_output(raw: str) -> dict:
|
||||
text = re.sub(r"```(?:json)?", "", raw).strip().rstrip("`").strip()
|
||||
m = re.search(r"\{[\s\S]+\}", text)
|
||||
if m:
|
||||
candidate = m.group(0)
|
||||
try:
|
||||
return json.loads(candidate)
|
||||
except json.JSONDecodeError:
|
||||
depth_obj = candidate.count("{") - candidate.count("}")
|
||||
depth_arr = candidate.count("[") - candidate.count("]")
|
||||
fixed = re.sub(r',\s*"[^"]*"?\s*:\s*[^,\}\]]*$', '', candidate)
|
||||
fixed += "]" * max(0, depth_arr) + "}" * max(0, depth_obj)
|
||||
try:
|
||||
return json.loads(fixed)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
logger.warning("Beauty AI parse failed, raw: %.300s", raw)
|
||||
return {
|
||||
"is_relevant": False,
|
||||
"lead_quality": "COLD",
|
||||
"business_name": "",
|
||||
"contact_email": "",
|
||||
"dist_matches": [],
|
||||
"parse_error": True,
|
||||
}
|
||||
|
||||
|
||||
# ── Main entry point ───────────────────────────────────────────────────────────
|
||||
|
||||
async def assess_beauty_domain(analysis: dict) -> dict:
|
||||
"""Full beauty B2B assessment: brand scan + AI evaluation."""
|
||||
async with _sem():
|
||||
domain = analysis.get("domain", "")
|
||||
text = analysis.get("visible_text_snippet", "") or ""
|
||||
html_raw = text # use snippet; brands already extracted from full page in site_analyzer
|
||||
|
||||
detected = detect_brands_in_text(text)
|
||||
dist_match = get_dist_matches(detected)
|
||||
|
||||
# Also search for company context
|
||||
title = analysis.get("page_title") or ""
|
||||
biz_name = title.split("|")[0].split("-")[0].strip() or domain
|
||||
search_results = await _ddg_search(f'"{biz_name}" {domain} beauty cosmetics wholesale contact')
|
||||
logger.info("Beauty assess %s: %d brands detected, %d portfolio matches",
|
||||
domain, len(detected), len(dist_match))
|
||||
|
||||
payload = {
|
||||
"input": {
|
||||
"prompt": _build_beauty_prompt(analysis, detected, dist_match, search_results),
|
||||
"images": [], "videos": [],
|
||||
"top_p": 0.9,
|
||||
"temperature": 0.15,
|
||||
"thinking_level": "low",
|
||||
"max_output_tokens": 2000,
|
||||
}
|
||||
}
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=120) as client:
|
||||
resp = await client.post(
|
||||
REPLICATE_MODEL,
|
||||
headers={
|
||||
"Authorization": f"Bearer {REPLICATE_TOKEN}",
|
||||
"Content-Type": "application/json",
|
||||
"Prefer": "wait",
|
||||
},
|
||||
json=payload,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
output = data.get("output", "")
|
||||
if isinstance(output, list):
|
||||
output = "".join(output)
|
||||
|
||||
result = _parse_beauty_output(output)
|
||||
# Merge pre-scan data that AI might miss
|
||||
if not result.get("dist_matches") and dist_match:
|
||||
result["dist_matches"] = dist_match
|
||||
if not result.get("detected_brands") and detected:
|
||||
result["detected_brands"] = detected
|
||||
|
||||
logger.info("Beauty AI %s → quality=%s, dist_matches=%s",
|
||||
domain, result.get("lead_quality"), result.get("dist_matches"))
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Beauty AI error %s: %s", domain, e)
|
||||
return {
|
||||
"error": str(e)[:300],
|
||||
"is_relevant": False,
|
||||
"lead_quality": "COLD",
|
||||
"dist_matches": dist_match,
|
||||
"detected_brands": detected,
|
||||
"contact_email": "",
|
||||
}
|
||||
Reference in New Issue
Block a user