fix: bind exception variable in ConnectError handler to prevent NameError
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -104,8 +104,14 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
# that accept TCP but never respond on port 80. If http fails for ANY
|
# that accept TCP but never respond on port 80. If http fails for ANY
|
||||||
# reason (refused, timeout, protocol error, redirect loop…) we fall back
|
# reason (refused, timeout, protocol error, redirect loop…) we fall back
|
||||||
# to https directly, which is what most modern sites actually serve.
|
# to https directly, which is what most modern sites actually serve.
|
||||||
timeouts = {"http": httpx.Timeout(connect=4, read=8, write=5, pool=10),
|
# Use a short connect timeout for http — if port 80 times out, port 443
|
||||||
"https": httpx.Timeout(connect=7, read=12, write=5, pool=15)}
|
# will too, so we don't bother retrying on timeout errors.
|
||||||
|
# We DO retry https on ConnectError/RemoteProtocolError (port 80 closed or
|
||||||
|
# speaking wrong protocol) because those servers are often https-only.
|
||||||
|
timeouts = {
|
||||||
|
"http": httpx.Timeout(connect=4, read=6, write=3, pool=5),
|
||||||
|
"https": httpx.Timeout(connect=6, read=10, write=3, pool=5),
|
||||||
|
}
|
||||||
|
|
||||||
for scheme in ("http", "https"):
|
for scheme in ("http", "https"):
|
||||||
try:
|
try:
|
||||||
@@ -126,7 +132,6 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
final_url = str(resp.url)
|
final_url = str(resp.url)
|
||||||
final_host = urlparse(final_url).netloc.lower().lstrip("www.")
|
final_host = urlparse(final_url).netloc.lower().lstrip("www.")
|
||||||
|
|
||||||
# Redirected to a completely different domain
|
|
||||||
if not _same_domain(domain, final_url):
|
if not _same_domain(domain, final_url):
|
||||||
for ph in PARKING_REDIRECT_HOSTS:
|
for ph in PARKING_REDIRECT_HOSTS:
|
||||||
if ph in final_host:
|
if ph in final_host:
|
||||||
@@ -135,9 +140,8 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
result["prescreen_status"] = "redirect"
|
result["prescreen_status"] = "redirect"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
# Any response from the server = the domain is live.
|
# Any HTTP response = server is alive (4xx/5xx still means live web server)
|
||||||
# 4xx/5xx still means a working web server — only no-response = dead.
|
# Only check parking content on readable 200 responses
|
||||||
# Only check parking signals on 200 responses (2xx bodies are readable).
|
|
||||||
if resp.status_code in (200, 203):
|
if resp.status_code in (200, 203):
|
||||||
html_lc = resp.text[:20_000].lower()
|
html_lc = resp.text[:20_000].lower()
|
||||||
for sig in PARKING_BODY_SIGNALS:
|
for sig in PARKING_BODY_SIGNALS:
|
||||||
@@ -148,11 +152,16 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
result["prescreen_status"] = "live"
|
result["prescreen_status"] = "live"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
except (httpx.ConnectError, httpx.RemoteProtocolError) as e:
|
||||||
|
# Port refused or bad HTTP response → server may be https-only, try it
|
||||||
|
if scheme == "http":
|
||||||
|
logger.debug("Validator %s: %s on http, trying https", domain, type(e).__name__)
|
||||||
|
continue
|
||||||
|
break
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# Any failure on http → try https. Any failure on https → dead.
|
# Timeout or other error → https won't help, mark dead
|
||||||
logger.debug("Validator %s (%s): %s — %s", domain, scheme, type(e).__name__, e)
|
logger.debug("Validator %s (%s): %s", domain, scheme, type(e).__name__)
|
||||||
if scheme == "https":
|
break
|
||||||
break
|
|
||||||
|
|
||||||
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
|
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user