fix: always fallback to https on any http failure (fixes HTTPS-only sites marked dead)
Previous fix only retried on ConnectError. Servers that accept TCP on port 80 but hang, return protocol errors, or timeout also need the https fallback. Now any exception on http triggers https retry. Shorter http timeout (4s) avoids wasting time on non-responsive port 80. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -86,13 +86,17 @@ async def _check_domain(domain: str) -> dict:
|
||||
}
|
||||
t0 = time.monotonic()
|
||||
|
||||
# Try http first (follows http→https redirects automatically).
|
||||
# Fall back to https directly if port 80 is closed/refused — many modern
|
||||
# servers only listen on 443 and would be wrongly marked dead otherwise.
|
||||
# Try http first with a short timeout so we don't waste time on servers
|
||||
# that accept TCP but never respond on port 80. If http fails for ANY
|
||||
# reason (refused, timeout, protocol error, redirect loop…) we fall back
|
||||
# to https directly, which is what most modern sites actually serve.
|
||||
timeouts = {"http": httpx.Timeout(connect=4, read=8, write=5, pool=10),
|
||||
"https": httpx.Timeout(connect=7, read=12, write=5, pool=15)}
|
||||
|
||||
for scheme in ("http", "https"):
|
||||
try:
|
||||
async with httpx.AsyncClient(
|
||||
timeout=httpx.Timeout(connect=7, read=12, write=5, pool=15),
|
||||
timeout=timeouts[scheme],
|
||||
follow_redirects=True,
|
||||
headers=_HEADERS,
|
||||
verify=False,
|
||||
@@ -128,13 +132,13 @@ async def _check_domain(domain: str) -> dict:
|
||||
result["prescreen_status"] = "live"
|
||||
return result
|
||||
|
||||
except httpx.ConnectError:
|
||||
# Port closed / connection refused — try the other scheme
|
||||
logger.debug("Validator %s: ConnectError on %s, trying next scheme", domain, scheme)
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.debug("Validator %s (%s): %s", domain, scheme, e)
|
||||
break # timeout or other error — don't retry
|
||||
# Any failure on http → always try https next
|
||||
# Any failure on https → give up, leave as dead
|
||||
logger.debug("Validator %s (%s): %s — %s", domain, scheme, type(e).__name__, e)
|
||||
if scheme == "https":
|
||||
break
|
||||
# fall through to https
|
||||
|
||||
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
|
||||
return result
|
||||
|
||||
Reference in New Issue
Block a user