From f8ab910eca1ac828f110326d15a69f1784098eef Mon Sep 17 00:00:00 2001 From: Malin Date: Sun, 19 Apr 2026 20:12:59 +0200 Subject: [PATCH] feat: add rescan dead domains checkbox to validator Adds rescan_dead flag that causes _filter_unvalidated to treat previously-dead domains as needing a fresh check. Useful after fixing the http/https detection bug. Co-Authored-By: Claude Sonnet 4.6 --- app/main.py | 4 ++-- app/static/index.html | 7 ++++++- app/validator.py | 27 +++++++++++++++++++-------- 3 files changed, 27 insertions(+), 11 deletions(-) diff --git a/app/main.py b/app/main.py index c63f7c3..6f7eeba 100644 --- a/app/main.py +++ b/app/main.py @@ -177,8 +177,8 @@ async def enriched( # ── Bulk Validator endpoints ────────────────────────────────────────────────── @app.post("/api/validator/start") -async def validator_start(tld: str = Query(None)): - start_validator(tld_filter=tld or None) +async def validator_start(tld: str = Query(None), rescan_dead: bool = Query(False)): + start_validator(tld_filter=tld or None, rescan_dead=rescan_dead) return get_validator_status() diff --git a/app/static/index.html b/app/static/index.html index fa6ecfb..6c6a59a 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -578,6 +578,10 @@ tr:hover td{background:rgba(255,255,255,.025)} + ⚡ Running… @@ -751,7 +755,7 @@ function app() { f: {tld:'',keyword:'',min_score:0,cms:'',live_only:false,alpha_only:false,no_sld:false,kit_digital_only:false,exclude_assessed:false,limit:'100',prescreen_status:'',niche:'',site_type:''}, qst: {}, customDomains: '', valSt: {running:false,processed:0,live:0,dead:0,parked:0,redirect:0,skipped:0,offset:0,rate:0}, - valTld: '', + valTld: '', valRescan: false, leadsQ: {quality:'', country:'', limit:'50'}, leadsData: [], leadsTotal: 0, leadsPage: 1, leadsLoading: false, prescreening: false, @@ -945,6 +949,7 @@ function app() { async startValidator() { const p = new URLSearchParams(); if(this.valTld.trim()) p.set('tld', this.valTld.trim()); + if(this.valRescan) p.set('rescan_dead', 'true'); await fetch('/api/validator/start'+(p.toString()? '?'+p : ''), {method:'POST'}); this.notify('Validator started', 'success'); await this.loadValStatus(); diff --git a/app/validator.py b/app/validator.py index 190dfde..edcf76a 100644 --- a/app/validator.py +++ b/app/validator.py @@ -161,15 +161,25 @@ def _get_domains_batch(offset: int, limit: int, tld: Optional[str]) -> list[str] return [] -async def _filter_unvalidated(domains: list[str]) -> list[str]: - """Return only domains that don't have a prescreen_status set yet.""" +async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) -> list[str]: + """Return domains that still need checking. + + With rescan_dead=True, domains previously marked 'dead' are included + so they get a fresh check (useful after fixing the http/https bug). + """ if not domains: return [] placeholders = ",".join("?" * len(domains)) + # A domain is "done" if it has a non-null prescreen_status that isn't dead + # (when rescan_dead=True) or any non-null status (normal mode). + if rescan_dead: + condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'" + else: + condition = "prescreen_status IS NOT NULL" async with aiosqlite.connect(SQLITE_PATH) as db: async with db.execute( f"SELECT domain FROM enriched_domains " - f"WHERE domain IN ({placeholders}) AND prescreen_status IS NOT NULL", + f"WHERE domain IN ({placeholders}) AND {condition}", domains, ) as cur: already = {r[0] async for r in cur} @@ -198,7 +208,7 @@ async def _save_batch(results: list[dict]): await db.commit() -async def _validator_loop(tld_filter: Optional[str]): +async def _validator_loop(tld_filter: Optional[str], rescan_dead: bool = False): global _val_stats _val_stats["running"] = True offset = _val_stats["offset"] @@ -226,7 +236,7 @@ async def _validator_loop(tld_filter: Optional[str]): logger.info("Validator: dataset complete at offset=%d", offset) break - to_check = await _filter_unvalidated(batch) + to_check = await _filter_unvalidated(batch, rescan_dead=rescan_dead) _val_stats["skipped"] += len(batch) - len(to_check) offset += len(batch) _val_stats["offset"] = offset @@ -276,7 +286,7 @@ def get_validator_status() -> dict: return dict(_val_stats) -def start_validator(tld_filter: Optional[str] = None): +def start_validator(tld_filter: Optional[str] = None, rescan_dead: bool = False): global _val_task, _val_stats if _val_task and not _val_task.done(): return # already running @@ -287,9 +297,10 @@ def start_validator(tld_filter: Optional[str] = None): processed=0, live=0, dead=0, parked=0, redirect=0, skipped=0, offset=0, rate=0.0, tld_filter=tld_filter, + rescan_dead=rescan_dead, ) - _val_task = asyncio.create_task(_validator_loop(tld_filter)) - logger.info("Validator started (tld=%s)", tld_filter) + _val_task = asyncio.create_task(_validator_loop(tld_filter, rescan_dead=rescan_dead)) + logger.info("Validator started (tld=%s, rescan_dead=%s)", tld_filter, rescan_dead) def stop_validator():