diff --git a/app/validator.py b/app/validator.py index edcf76a..38660ab 100644 --- a/app/validator.py +++ b/app/validator.py @@ -86,13 +86,17 @@ async def _check_domain(domain: str) -> dict: } t0 = time.monotonic() - # Try http first (follows http→https redirects automatically). - # Fall back to https directly if port 80 is closed/refused — many modern - # servers only listen on 443 and would be wrongly marked dead otherwise. + # Try http first with a short timeout so we don't waste time on servers + # that accept TCP but never respond on port 80. If http fails for ANY + # reason (refused, timeout, protocol error, redirect loop…) we fall back + # to https directly, which is what most modern sites actually serve. + timeouts = {"http": httpx.Timeout(connect=4, read=8, write=5, pool=10), + "https": httpx.Timeout(connect=7, read=12, write=5, pool=15)} + for scheme in ("http", "https"): try: async with httpx.AsyncClient( - timeout=httpx.Timeout(connect=7, read=12, write=5, pool=15), + timeout=timeouts[scheme], follow_redirects=True, headers=_HEADERS, verify=False, @@ -128,13 +132,13 @@ async def _check_domain(domain: str) -> dict: result["prescreen_status"] = "live" return result - except httpx.ConnectError: - # Port closed / connection refused — try the other scheme - logger.debug("Validator %s: ConnectError on %s, trying next scheme", domain, scheme) - continue except Exception as e: - logger.debug("Validator %s (%s): %s", domain, scheme, e) - break # timeout or other error — don't retry + # Any failure on http → always try https next + # Any failure on https → give up, leave as dead + logger.debug("Validator %s (%s): %s — %s", domain, scheme, type(e).__name__, e) + if scheme == "https": + break + # fall through to https result["load_time_ms"] = int((time.monotonic() - t0) * 1000) return result