feat: add rescan dead domains checkbox to validator
Adds rescan_dead flag that causes _filter_unvalidated to treat previously-dead domains as needing a fresh check. Useful after fixing the http/https detection bug. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -177,8 +177,8 @@ async def enriched(
|
||||
# ── Bulk Validator endpoints ──────────────────────────────────────────────────
|
||||
|
||||
@app.post("/api/validator/start")
|
||||
async def validator_start(tld: str = Query(None)):
|
||||
start_validator(tld_filter=tld or None)
|
||||
async def validator_start(tld: str = Query(None), rescan_dead: bool = Query(False)):
|
||||
start_validator(tld_filter=tld or None, rescan_dead=rescan_dead)
|
||||
return get_validator_status()
|
||||
|
||||
|
||||
|
||||
@@ -578,6 +578,10 @@ tr:hover td{background:rgba(255,255,255,.025)}
|
||||
<label>TLD filter <span style="font-weight:400;color:var(--muted)">(leave empty for all domains)</span></label>
|
||||
<input type="text" x-model="valTld" placeholder="es or com or ro" style="width:180px" :disabled="valSt.running">
|
||||
</div>
|
||||
<label class="tog" style="padding-bottom:6px" :style="valSt.running?'opacity:.4':''">
|
||||
<input type="checkbox" x-model="valRescan" :disabled="valSt.running">
|
||||
<strong>Rescan dead</strong> <span>(recheck previously dead domains)</span>
|
||||
</label>
|
||||
<button class="btn bs" :disabled="valSt.running" @click="startValidator()">▶ Start Validator</button>
|
||||
<button class="btn bd" :disabled="!valSt.running" @click="stopValidator()">⏹ Stop</button>
|
||||
<span x-show="valSt.running" style="font-size:11px;color:var(--accent2);padding-bottom:6px">⚡ Running…</span>
|
||||
@@ -751,7 +755,7 @@ function app() {
|
||||
f: {tld:'',keyword:'',min_score:0,cms:'',live_only:false,alpha_only:false,no_sld:false,kit_digital_only:false,exclude_assessed:false,limit:'100',prescreen_status:'',niche:'',site_type:''},
|
||||
qst: {}, customDomains: '',
|
||||
valSt: {running:false,processed:0,live:0,dead:0,parked:0,redirect:0,skipped:0,offset:0,rate:0},
|
||||
valTld: '',
|
||||
valTld: '', valRescan: false,
|
||||
leadsQ: {quality:'', country:'', limit:'50'},
|
||||
leadsData: [], leadsTotal: 0, leadsPage: 1, leadsLoading: false,
|
||||
prescreening: false,
|
||||
@@ -945,6 +949,7 @@ function app() {
|
||||
async startValidator() {
|
||||
const p = new URLSearchParams();
|
||||
if(this.valTld.trim()) p.set('tld', this.valTld.trim());
|
||||
if(this.valRescan) p.set('rescan_dead', 'true');
|
||||
await fetch('/api/validator/start'+(p.toString()? '?'+p : ''), {method:'POST'});
|
||||
this.notify('Validator started', 'success');
|
||||
await this.loadValStatus();
|
||||
|
||||
@@ -161,15 +161,25 @@ def _get_domains_batch(offset: int, limit: int, tld: Optional[str]) -> list[str]
|
||||
return []
|
||||
|
||||
|
||||
async def _filter_unvalidated(domains: list[str]) -> list[str]:
|
||||
"""Return only domains that don't have a prescreen_status set yet."""
|
||||
async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) -> list[str]:
|
||||
"""Return domains that still need checking.
|
||||
|
||||
With rescan_dead=True, domains previously marked 'dead' are included
|
||||
so they get a fresh check (useful after fixing the http/https bug).
|
||||
"""
|
||||
if not domains:
|
||||
return []
|
||||
placeholders = ",".join("?" * len(domains))
|
||||
# A domain is "done" if it has a non-null prescreen_status that isn't dead
|
||||
# (when rescan_dead=True) or any non-null status (normal mode).
|
||||
if rescan_dead:
|
||||
condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'"
|
||||
else:
|
||||
condition = "prescreen_status IS NOT NULL"
|
||||
async with aiosqlite.connect(SQLITE_PATH) as db:
|
||||
async with db.execute(
|
||||
f"SELECT domain FROM enriched_domains "
|
||||
f"WHERE domain IN ({placeholders}) AND prescreen_status IS NOT NULL",
|
||||
f"WHERE domain IN ({placeholders}) AND {condition}",
|
||||
domains,
|
||||
) as cur:
|
||||
already = {r[0] async for r in cur}
|
||||
@@ -198,7 +208,7 @@ async def _save_batch(results: list[dict]):
|
||||
await db.commit()
|
||||
|
||||
|
||||
async def _validator_loop(tld_filter: Optional[str]):
|
||||
async def _validator_loop(tld_filter: Optional[str], rescan_dead: bool = False):
|
||||
global _val_stats
|
||||
_val_stats["running"] = True
|
||||
offset = _val_stats["offset"]
|
||||
@@ -226,7 +236,7 @@ async def _validator_loop(tld_filter: Optional[str]):
|
||||
logger.info("Validator: dataset complete at offset=%d", offset)
|
||||
break
|
||||
|
||||
to_check = await _filter_unvalidated(batch)
|
||||
to_check = await _filter_unvalidated(batch, rescan_dead=rescan_dead)
|
||||
_val_stats["skipped"] += len(batch) - len(to_check)
|
||||
offset += len(batch)
|
||||
_val_stats["offset"] = offset
|
||||
@@ -276,7 +286,7 @@ def get_validator_status() -> dict:
|
||||
return dict(_val_stats)
|
||||
|
||||
|
||||
def start_validator(tld_filter: Optional[str] = None):
|
||||
def start_validator(tld_filter: Optional[str] = None, rescan_dead: bool = False):
|
||||
global _val_task, _val_stats
|
||||
if _val_task and not _val_task.done():
|
||||
return # already running
|
||||
@@ -287,9 +297,10 @@ def start_validator(tld_filter: Optional[str] = None):
|
||||
processed=0, live=0, dead=0, parked=0,
|
||||
redirect=0, skipped=0, offset=0, rate=0.0,
|
||||
tld_filter=tld_filter,
|
||||
rescan_dead=rescan_dead,
|
||||
)
|
||||
_val_task = asyncio.create_task(_validator_loop(tld_filter))
|
||||
logger.info("Validator started (tld=%s)", tld_filter)
|
||||
_val_task = asyncio.create_task(_validator_loop(tld_filter, rescan_dead=rescan_dead))
|
||||
logger.info("Validator started (tld=%s, rescan_dead=%s)", tld_filter, rescan_dead)
|
||||
|
||||
|
||||
def stop_validator():
|
||||
|
||||
Reference in New Issue
Block a user