feat: add rescan dead domains checkbox to validator

Adds rescan_dead flag that causes _filter_unvalidated to treat
previously-dead domains as needing a fresh check. Useful after
fixing the http/https detection bug.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-19 20:12:59 +02:00
parent ae2fad0152
commit f8ab910eca
3 changed files with 27 additions and 11 deletions

View File

@@ -161,15 +161,25 @@ def _get_domains_batch(offset: int, limit: int, tld: Optional[str]) -> list[str]
return []
async def _filter_unvalidated(domains: list[str]) -> list[str]:
"""Return only domains that don't have a prescreen_status set yet."""
async def _filter_unvalidated(domains: list[str], rescan_dead: bool = False) -> list[str]:
"""Return domains that still need checking.
With rescan_dead=True, domains previously marked 'dead' are included
so they get a fresh check (useful after fixing the http/https bug).
"""
if not domains:
return []
placeholders = ",".join("?" * len(domains))
# A domain is "done" if it has a non-null prescreen_status that isn't dead
# (when rescan_dead=True) or any non-null status (normal mode).
if rescan_dead:
condition = "prescreen_status IS NOT NULL AND prescreen_status != 'dead'"
else:
condition = "prescreen_status IS NOT NULL"
async with aiosqlite.connect(SQLITE_PATH) as db:
async with db.execute(
f"SELECT domain FROM enriched_domains "
f"WHERE domain IN ({placeholders}) AND prescreen_status IS NOT NULL",
f"WHERE domain IN ({placeholders}) AND {condition}",
domains,
) as cur:
already = {r[0] async for r in cur}
@@ -198,7 +208,7 @@ async def _save_batch(results: list[dict]):
await db.commit()
async def _validator_loop(tld_filter: Optional[str]):
async def _validator_loop(tld_filter: Optional[str], rescan_dead: bool = False):
global _val_stats
_val_stats["running"] = True
offset = _val_stats["offset"]
@@ -226,7 +236,7 @@ async def _validator_loop(tld_filter: Optional[str]):
logger.info("Validator: dataset complete at offset=%d", offset)
break
to_check = await _filter_unvalidated(batch)
to_check = await _filter_unvalidated(batch, rescan_dead=rescan_dead)
_val_stats["skipped"] += len(batch) - len(to_check)
offset += len(batch)
_val_stats["offset"] = offset
@@ -276,7 +286,7 @@ def get_validator_status() -> dict:
return dict(_val_stats)
def start_validator(tld_filter: Optional[str] = None):
def start_validator(tld_filter: Optional[str] = None, rescan_dead: bool = False):
global _val_task, _val_stats
if _val_task and not _val_task.done():
return # already running
@@ -287,9 +297,10 @@ def start_validator(tld_filter: Optional[str] = None):
processed=0, live=0, dead=0, parked=0,
redirect=0, skipped=0, offset=0, rate=0.0,
tld_filter=tld_filter,
rescan_dead=rescan_dead,
)
_val_task = asyncio.create_task(_validator_loop(tld_filter))
logger.info("Validator started (tld=%s)", tld_filter)
_val_task = asyncio.create_task(_validator_loop(tld_filter, rescan_dead=rescan_dead))
logger.info("Validator started (tld=%s, rescan_dead=%s)", tld_filter, rescan_dead)
def stop_validator():