fix: always fallback to https on any http failure (fixes HTTPS-only sites marked dead)
Previous fix only retried on ConnectError. Servers that accept TCP on port 80 but hang, return protocol errors, or timeout also need the https fallback. Now any exception on http triggers https retry. Shorter http timeout (4s) avoids wasting time on non-responsive port 80. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -86,13 +86,17 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
}
|
}
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
# Try http first (follows http→https redirects automatically).
|
# Try http first with a short timeout so we don't waste time on servers
|
||||||
# Fall back to https directly if port 80 is closed/refused — many modern
|
# that accept TCP but never respond on port 80. If http fails for ANY
|
||||||
# servers only listen on 443 and would be wrongly marked dead otherwise.
|
# reason (refused, timeout, protocol error, redirect loop…) we fall back
|
||||||
|
# to https directly, which is what most modern sites actually serve.
|
||||||
|
timeouts = {"http": httpx.Timeout(connect=4, read=8, write=5, pool=10),
|
||||||
|
"https": httpx.Timeout(connect=7, read=12, write=5, pool=15)}
|
||||||
|
|
||||||
for scheme in ("http", "https"):
|
for scheme in ("http", "https"):
|
||||||
try:
|
try:
|
||||||
async with httpx.AsyncClient(
|
async with httpx.AsyncClient(
|
||||||
timeout=httpx.Timeout(connect=7, read=12, write=5, pool=15),
|
timeout=timeouts[scheme],
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
headers=_HEADERS,
|
headers=_HEADERS,
|
||||||
verify=False,
|
verify=False,
|
||||||
@@ -128,13 +132,13 @@ async def _check_domain(domain: str) -> dict:
|
|||||||
result["prescreen_status"] = "live"
|
result["prescreen_status"] = "live"
|
||||||
return result
|
return result
|
||||||
|
|
||||||
except httpx.ConnectError:
|
|
||||||
# Port closed / connection refused — try the other scheme
|
|
||||||
logger.debug("Validator %s: ConnectError on %s, trying next scheme", domain, scheme)
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug("Validator %s (%s): %s", domain, scheme, e)
|
# Any failure on http → always try https next
|
||||||
break # timeout or other error — don't retry
|
# Any failure on https → give up, leave as dead
|
||||||
|
logger.debug("Validator %s (%s): %s — %s", domain, scheme, type(e).__name__, e)
|
||||||
|
if scheme == "https":
|
||||||
|
break
|
||||||
|
# fall through to https
|
||||||
|
|
||||||
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
|
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user