fix: always fallback to https on any http failure (fixes HTTPS-only sites marked dead)

Previous fix only retried on ConnectError. Servers that accept TCP on port 80
but hang, return protocol errors, or timeout also need the https fallback.
Now any exception on http triggers https retry. Shorter http timeout (4s)
avoids wasting time on non-responsive port 80.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-20 17:43:44 +02:00
parent f8ab910eca
commit 8a4ec88d73

View File

@@ -86,13 +86,17 @@ async def _check_domain(domain: str) -> dict:
} }
t0 = time.monotonic() t0 = time.monotonic()
# Try http first (follows http→https redirects automatically). # Try http first with a short timeout so we don't waste time on servers
# Fall back to https directly if port 80 is closed/refused — many modern # that accept TCP but never respond on port 80. If http fails for ANY
# servers only listen on 443 and would be wrongly marked dead otherwise. # reason (refused, timeout, protocol error, redirect loop…) we fall back
# to https directly, which is what most modern sites actually serve.
timeouts = {"http": httpx.Timeout(connect=4, read=8, write=5, pool=10),
"https": httpx.Timeout(connect=7, read=12, write=5, pool=15)}
for scheme in ("http", "https"): for scheme in ("http", "https"):
try: try:
async with httpx.AsyncClient( async with httpx.AsyncClient(
timeout=httpx.Timeout(connect=7, read=12, write=5, pool=15), timeout=timeouts[scheme],
follow_redirects=True, follow_redirects=True,
headers=_HEADERS, headers=_HEADERS,
verify=False, verify=False,
@@ -128,13 +132,13 @@ async def _check_domain(domain: str) -> dict:
result["prescreen_status"] = "live" result["prescreen_status"] = "live"
return result return result
except httpx.ConnectError:
# Port closed / connection refused — try the other scheme
logger.debug("Validator %s: ConnectError on %s, trying next scheme", domain, scheme)
continue
except Exception as e: except Exception as e:
logger.debug("Validator %s (%s): %s", domain, scheme, e) # Any failure on http → always try https next
break # timeout or other error — don't retry # Any failure on https → give up, leave as dead
logger.debug("Validator %s (%s): %s%s", domain, scheme, type(e).__name__, e)
if scheme == "https":
break
# fall through to https
result["load_time_ms"] = int((time.monotonic() - t0) * 1000) result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
return result return result