From b53545b7ddf0f915dea447473174258b363c226c Mon Sep 17 00:00:00 2001 From: Malin Date: Mon, 20 Apr 2026 18:48:04 +0200 Subject: [PATCH] fix: bind exception variable in ConnectError handler to prevent NameError Co-Authored-By: Claude Sonnet 4.6 --- app/validator.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/app/validator.py b/app/validator.py index d5a88a0..9abb50a 100644 --- a/app/validator.py +++ b/app/validator.py @@ -104,8 +104,14 @@ async def _check_domain(domain: str) -> dict: # that accept TCP but never respond on port 80. If http fails for ANY # reason (refused, timeout, protocol error, redirect loop…) we fall back # to https directly, which is what most modern sites actually serve. - timeouts = {"http": httpx.Timeout(connect=4, read=8, write=5, pool=10), - "https": httpx.Timeout(connect=7, read=12, write=5, pool=15)} + # Use a short connect timeout for http — if port 80 times out, port 443 + # will too, so we don't bother retrying on timeout errors. + # We DO retry https on ConnectError/RemoteProtocolError (port 80 closed or + # speaking wrong protocol) because those servers are often https-only. + timeouts = { + "http": httpx.Timeout(connect=4, read=6, write=3, pool=5), + "https": httpx.Timeout(connect=6, read=10, write=3, pool=5), + } for scheme in ("http", "https"): try: @@ -126,7 +132,6 @@ async def _check_domain(domain: str) -> dict: final_url = str(resp.url) final_host = urlparse(final_url).netloc.lower().lstrip("www.") - # Redirected to a completely different domain if not _same_domain(domain, final_url): for ph in PARKING_REDIRECT_HOSTS: if ph in final_host: @@ -135,9 +140,8 @@ async def _check_domain(domain: str) -> dict: result["prescreen_status"] = "redirect" return result - # Any response from the server = the domain is live. - # 4xx/5xx still means a working web server — only no-response = dead. - # Only check parking signals on 200 responses (2xx bodies are readable). + # Any HTTP response = server is alive (4xx/5xx still means live web server) + # Only check parking content on readable 200 responses if resp.status_code in (200, 203): html_lc = resp.text[:20_000].lower() for sig in PARKING_BODY_SIGNALS: @@ -148,11 +152,16 @@ async def _check_domain(domain: str) -> dict: result["prescreen_status"] = "live" return result + except (httpx.ConnectError, httpx.RemoteProtocolError) as e: + # Port refused or bad HTTP response → server may be https-only, try it + if scheme == "http": + logger.debug("Validator %s: %s on http, trying https", domain, type(e).__name__) + continue + break except Exception as e: - # Any failure on http → try https. Any failure on https → dead. - logger.debug("Validator %s (%s): %s — %s", domain, scheme, type(e).__name__, e) - if scheme == "https": - break + # Timeout or other error → https won't help, mark dead + logger.debug("Validator %s (%s): %s", domain, scheme, type(e).__name__) + break result["load_time_ms"] = int((time.monotonic() - t0) * 1000) return result