Files
DomGod/app/main.py
2026-04-13 16:27:29 +02:00

203 lines
6.4 KiB
Python

import os
import sys
import asyncio
import logging
from pathlib import Path
from contextlib import asynccontextmanager
import httpx
import duckdb
import aiosqlite
from fastapi import FastAPI, Query
from fastapi.responses import StreamingResponse, JSONResponse
from fastapi.staticfiles import StaticFiles
from dotenv import load_dotenv
load_dotenv()
from app.db import (
DATA_DIR, PARQUET_PATH, SQLITE_PATH,
init_db, get_stats, get_domains, get_enriched,
queue_domains, get_queue_status,
)
from app.enricher import start_worker, pause_worker, resume_worker, is_running
from app.scorer import run_scoring
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
logger = logging.getLogger(__name__)
PARQUET_URL = os.getenv("PARQUET_URL", "")
async def download_parquet():
if PARQUET_PATH.exists():
logger.info("Using cached parquet at %s", PARQUET_PATH)
return
DATA_DIR.mkdir(parents=True, exist_ok=True)
tmp_path = PARQUET_PATH.with_suffix(".tmp")
# Resumable download via Range header
downloaded = tmp_path.stat().st_size if tmp_path.exists() else 0
headers = {"Range": f"bytes={downloaded}-"} if downloaded > 0 else {}
logger.info("Downloading parquet from %s (offset=%d)...", PARQUET_URL, downloaded)
async with httpx.AsyncClient(follow_redirects=True, timeout=None) as client:
async with client.stream("GET", PARQUET_URL, headers=headers) as resp:
if resp.status_code == 416:
# Already fully downloaded
tmp_path.rename(PARQUET_PATH)
return
resp.raise_for_status()
total = int(resp.headers.get("content-length", 0)) + downloaded
mode = "ab" if downloaded > 0 else "wb"
with open(tmp_path, mode) as f:
received = downloaded
async for chunk in resp.aiter_bytes(chunk_size=1024 * 1024):
f.write(chunk)
received += len(chunk)
if total:
pct = received / total * 100
logger.info("Download progress: %.1f%% (%d/%d bytes)", pct, received, total)
tmp_path.rename(PARQUET_PATH)
logger.info("Parquet download complete: %s", PARQUET_PATH)
@asynccontextmanager
async def lifespan(app: FastAPI):
await download_parquet()
await init_db()
start_worker()
logger.info("DomGod dashboard ready on port 6677")
yield
app = FastAPI(title="DomGod", lifespan=lifespan)
# ── API routes ──────────────────────────────────────────────────────────────
@app.get("/api/stats")
async def stats():
return await get_stats()
@app.get("/api/domains")
async def domains(
tld: str = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(100, ge=1, le=1000),
live_only: bool = Query(False),
):
rows = await get_domains(tld=tld, page=page, limit=limit, live_only=live_only)
return {"page": page, "limit": limit, "results": rows}
@app.post("/api/enrich/batch")
async def enrich_batch(body: dict):
domains_list = body.get("domains", [])
if not domains_list:
return JSONResponse({"error": "no domains provided"}, status_code=400)
await queue_domains(domains_list)
resume_worker()
return {"queued": len(domains_list)}
@app.get("/api/enrich/status")
async def enrich_status():
status = await get_queue_status()
status["worker_running"] = is_running()
return status
@app.post("/api/enrich/retry")
async def enrich_retry():
async with aiosqlite.connect(SQLITE_PATH) as db:
await db.execute("UPDATE job_queue SET status='pending', error=NULL WHERE status='failed'")
await db.commit()
resume_worker()
return {"status": "retrying failed jobs"}
@app.post("/api/enrich/pause")
async def enrich_pause():
pause_worker()
return {"status": "paused"}
@app.post("/api/enrich/resume")
async def enrich_resume():
resume_worker()
return {"status": "resumed"}
@app.get("/api/enriched")
async def enriched(
min_score: int = Query(0, ge=0, le=100),
cms: str = Query(None),
country: str = Query(None),
page: int = Query(1, ge=1),
limit: int = Query(100, ge=1, le=1000),
):
rows = await get_enriched(min_score=min_score, cms=cms, country=country, page=page, limit=limit)
return {"page": page, "limit": limit, "results": rows}
@app.get("/api/export")
async def export_csv(
min_score: int = Query(0),
cms: str = Query(None),
country: str = Query(None),
tier: str = Query(None),
):
if tier == "hot":
min_score = 80
elif tier == "warm":
min_score = 50
async def generate():
yield "domain,score,cms,ssl_expiry_days,ip_country,is_live,status_code,has_mx,server,page_title,enriched_at\n"
page = 1
while True:
rows = await get_enriched(min_score=min_score, cms=cms, country=country, page=page, limit=500)
if not rows:
break
for r in rows:
# Apply warm tier upper bound
if tier == "warm" and r.get("score", 0) >= 80:
continue
line = ",".join(
f'"{str(r.get(col) or "").replace(chr(34), chr(39))}"'
for col in [
"domain", "score", "cms", "ssl_expiry_days", "ip_country",
"is_live", "status_code", "has_mx", "server", "page_title", "enriched_at"
]
)
yield line + "\n"
page += 1
filename = f"domgod_leads_score{min_score}{'_' + tier if tier else ''}.csv"
return StreamingResponse(
generate(),
media_type="text/csv",
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
@app.post("/api/score/run")
async def score_run():
result = await run_scoring()
return result
# ── Static UI ───────────────────────────────────────────────────────────────
static_dir = Path(__file__).parent / "static"
app.mount("/", StaticFiles(directory=str(static_dir), html=True), name="static")
if __name__ == "__main__":
import uvicorn
uvicorn.run("app.main:app", host="0.0.0.0", port=6677, log_level="info")