fix: try https fallback when http port 80 is closed (fixes HTTPS-only domains marked as dead)

Many modern servers refuse HTTP connections entirely. The validator was
only trying http://, causing HTTPS-only sites to be wrongly marked dead.
Now falls back to https:// on ConnectError. Also increased timeouts slightly.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-19 20:11:00 +02:00
parent 3f042196d3
commit ae2fad0152

View File

@@ -85,27 +85,29 @@ async def _check_domain(domain: str) -> dict:
"load_time_ms": None,
}
t0 = time.monotonic()
# Try http first (follows http→https redirects automatically).
# Fall back to https directly if port 80 is closed/refused — many modern
# servers only listen on 443 and would be wrongly marked dead otherwise.
for scheme in ("http", "https"):
try:
async with httpx.AsyncClient(
timeout=httpx.Timeout(connect=5, read=8, write=5, pool=10),
timeout=httpx.Timeout(connect=7, read=12, write=5, pool=15),
follow_redirects=True,
headers=_HEADERS,
verify=False,
max_redirects=5,
) as client:
resp = await client.get(f"http://{domain}")
resp = await client.get(f"{scheme}://{domain}")
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
result["status_code"] = resp.status_code
result["server"] = (resp.headers.get("server") or "")[:100]
# Resolve IP for live-looking domains
result["ip"] = await _resolve_ip(domain)
final_url = str(resp.url)
final_host = urlparse(final_url).netloc.lower().lstrip("www.")
# Redirected to a different root domain?
if not _same_domain(domain, final_url):
for ph in PARKING_REDIRECT_HOSTS:
if ph in final_host:
@@ -126,8 +128,14 @@ async def _check_domain(domain: str) -> dict:
result["prescreen_status"] = "live"
return result
except httpx.ConnectError:
# Port closed / connection refused — try the other scheme
logger.debug("Validator %s: ConnectError on %s, trying next scheme", domain, scheme)
continue
except Exception as e:
logger.debug("Validator %s: %s", domain, e)
logger.debug("Validator %s (%s): %s", domain, scheme, e)
break # timeout or other error — don't retry
result["load_time_ms"] = int((time.monotonic() - t0) * 1000)
return result