Add rate limit handling and retry logic for WHOIS lookups

Implements detection of rate limiting in WHOIS and RDAP responses in WhoisService, tracks last error type, and exposes methods to check and clear rate limit errors. Updates check_domains.php to queue domains that fail due to rate limits for retry with exponential backoff, grouped by TLD to avoid repeated rate limiting. Adds statistics and logging for retries and successful retry attempts, and introduces delays between checks and retries to reduce the likelihood of hitting rate limits.
This commit is contained in:
Hosteroid
2025-11-21 19:39:51 +02:00
parent a7321888c0
commit bcd956a495
2 changed files with 405 additions and 12 deletions

View File

@@ -14,6 +14,9 @@ class WhoisService
private const CACHE_TTL = 86400;
private TldRegistry $tldModel;
// Track last error type for rate limit detection
private static ?string $lastErrorType = null;
/**
* Clear TLD cache (useful for testing or forcing fresh lookups)
*/
@@ -21,6 +24,22 @@ class WhoisService
{
self::$tldCache = [];
}
/**
* Check if the last getDomainInfo call failed due to rate limiting
*/
public static function wasLastErrorRateLimit(): bool
{
return self::$lastErrorType === 'rate_limit';
}
/**
* Clear last error type
*/
public static function clearLastError(): void
{
self::$lastErrorType = null;
}
public function __construct()
{
@@ -32,6 +51,9 @@ class WhoisService
*/
public function getDomainInfo(string $domain): ?array
{
// Clear last error at start of each lookup
self::$lastErrorType = null;
try {
// Get TLD
$parts = explode('.', $domain);
@@ -89,13 +111,22 @@ class WhoisService
// Check if we got a referral to another WHOIS server
$referralServer = $this->extractReferralServer($whoisData);
if ($referralServer && $referralServer !== $whoisServer) {
$whoisData = $this->queryWhois($domain, $referralServer);
$referralWhoisData = $this->queryWhois($domain, $referralServer);
if ($referralWhoisData) {
$whoisData = $referralWhoisData;
}
}
if ($whoisData) {
// Parse WHOIS data to get expiration date and cleaner registrar
$whoisInfo = $this->parseWhoisData($domain, $whoisData, $referralServer ?? $whoisServer);
// If rate limited, skip WHOIS merge but keep RDAP data
if ($whoisInfo === null) {
// Rate limited - return RDAP data as-is
return $rdapData;
}
// Merge expiration date from WHOIS into RDAP data
if (!empty($whoisInfo['expiration_date'])) {
$rdapData['expiration_date'] = $whoisInfo['expiration_date'];
@@ -124,6 +155,7 @@ class WhoisService
$whoisData = $this->queryWhois($domain, $whoisServer);
if (!$whoisData) {
self::$lastErrorType = 'no_data';
$logger = new \App\Services\Logger();
$logger->warning('No WHOIS data received', [
'domain' => $domain,
@@ -143,8 +175,14 @@ class WhoisService
// Check if we got a referral to another WHOIS server
$referralServer = $this->extractReferralServer($whoisData);
if ($referralServer && $referralServer !== $whoisServer) {
// Check if the original response already has complete data
// Check if the original response already has complete data or is rate limited
$originalInfo = $this->parseWhoisData($domain, $whoisData, $whoisServer);
// If rate limited, return null immediately
if ($originalInfo === null) {
return null;
}
$hasCompleteData = !empty($originalInfo['registrar']) &&
$originalInfo['registrar'] !== 'Unknown' &&
!empty($originalInfo['expiration_date']);
@@ -179,6 +217,11 @@ class WhoisService
$actualServer = $referralServer ?? $whoisServer;
$info = $this->parseWhoisData($domain, $whoisData, $actualServer);
// If rate limited, return null
if ($info === null) {
return null;
}
// Override whois_server to reflect the actual server that provided the data
$info['whois_server'] = $actualServer;
@@ -196,6 +239,7 @@ class WhoisService
return $info;
} catch (Exception $e) {
self::$lastErrorType = 'exception';
$logger = new \App\Services\Logger();
$logger->error('WHOIS lookup failed', [
'domain' => $domain,
@@ -435,6 +479,18 @@ class WhoisService
'response_length' => strlen($response)
]);
// Handle rate limiting (HTTP 429)
if ($httpCode === 429) {
self::$lastErrorType = 'rate_limit';
$logger->warning("RDAP rate limit exceeded", [
'domain' => $domain,
'url' => $rdapUrl,
'http_code' => $httpCode
]);
// Return null to indicate rate limit - caller should handle gracefully
return null;
}
if ($httpCode === 200 && $response) {
$data = json_decode($response, true);
if ($data) {
@@ -675,11 +731,105 @@ class WhoisService
return $response;
}
/**
* Check if WHOIS response indicates rate limiting
*/
private function isRateLimitError(string $whoisData): bool
{
$responseLength = strlen($whoisData);
// Rate limit errors are typically short error messages (usually <200 chars), not full domain data
// If response is very long (>500 chars), it's almost certainly valid domain data, not an error
// This is the most reliable check to avoid false positives
if ($responseLength > 500) {
return false;
}
// Even for shorter responses, if it contains domain data indicators, it's likely valid
$whoisDataLower = strtolower(trim($whoisData));
// Check for domain data indicators first (if present, it's not a rate limit error)
$domainDataIndicators = [
'domain name:',
'registrar:',
'creation date:',
'expiration date:',
'updated date:',
'nameserver:',
'registry domain id:',
'registrar whois server:',
'registrar url:'
];
$hasDomainData = false;
foreach ($domainDataIndicators as $indicator) {
if (stripos($whoisDataLower, $indicator) !== false) {
$hasDomainData = true;
break;
}
}
// If it has domain data indicators, it's definitely not a rate limit error
if ($hasDomainData) {
return false;
}
// For short responses without domain data, check for specific rate limit error patterns
// These are typically short, specific error messages
$rateLimitPatterns = [
// Exact error messages (most common formats)
'/^error:\s*ratelimit/i',
'/^error:\s*rate[\s\-_]?limit/i',
'/^ratelimit\s+exceeded/i',
'/^rate[\s\-_]?limit\s+exceeded/i',
'/^rate[\s\-_]?limit\s+error/i',
'/error:\s*ratelimit\s+exceeded/i',
'/error:\s*rate[\s\-_]?limit\s+exceeded/i',
// Other rate limit error formats
'/too many requests/i',
'/quota exceeded/i',
'/^limit exceeded/i',
// Rate limit in error context (at start of response)
'/^error.*rate[\s\-_]?limit/i',
'/^rate[\s\-_]?limit.*error/i',
];
// Check for exact patterns
foreach ($rateLimitPatterns as $pattern) {
if (preg_match($pattern, $whoisDataLower)) {
return true;
}
}
// Additional check: if response contains both "rate" and "limit" in close proximity
// Only for very short responses (<100 chars) to avoid false positives
if ($responseLength < 100 && preg_match('/rate.{0,20}limit|limit.{0,20}rate/i', $whoisDataLower)) {
return true;
}
return false;
}
/**
* Parse WHOIS data
*/
private function parseWhoisData(string $domain, string $whoisData, string $whoisServer = 'Unknown'): array
private function parseWhoisData(string $domain, string $whoisData, string $whoisServer = 'Unknown'): ?array
{
// Check for rate limit errors first
if ($this->isRateLimitError($whoisData)) {
self::$lastErrorType = 'rate_limit';
$logger = new \App\Services\Logger();
$logger->warning("WHOIS rate limit exceeded", [
'domain' => $domain,
'server' => $whoisServer,
'response_preview' => substr($whoisData, 0, 200)
]);
// Return null to indicate rate limit - caller should handle gracefully
return null;
}
$lines = explode("\n", $whoisData);
$data = [
'domain' => $domain,

View File

@@ -91,9 +91,14 @@ $stats = [
'checked' => 0,
'updated' => 0,
'notifications_sent' => 0,
'errors' => 0
'errors' => 0,
'retried' => 0,
'retry_succeeded' => 0
];
// Retry queue: domains that failed due to rate limiting
$retryQueue = [];
foreach ($domains as $domain) {
$domainName = $domain['domain_name'];
logMessage("Checking domain: $domainName");
@@ -103,14 +108,45 @@ foreach ($domains as $domain) {
$whoisData = $whoisService->getDomainInfo($domainName);
if (!$whoisData) {
logMessage(" ✗ Failed to get WHOIS data for $domainName");
$stats['errors']++;
// Check if this was a rate limit error
$wasRateLimited = WhoisService::wasLastErrorRateLimit();
$wasActive = in_array($domain['status'], ['active', 'expiring_soon']);
// Update domain status to error
$domainModel->update($domain['id'], [
'status' => 'error',
'last_checked' => date('Y-m-d H:i:s')
]);
if ($wasRateLimited && $wasActive) {
// Rate limited - add to retry queue instead of marking as error
logMessage(" ⚠ Rate limit for $domainName - queued for retry");
// Extract TLD for grouping retries
$parts = explode('.', $domainName);
$tld = $parts[count($parts) - 1];
$retryQueue[] = [
'domain' => $domain,
'tld' => $tld,
'attempt' => 0,
'last_error' => 'rate_limit'
];
$stats['retried']++;
} elseif ($wasActive) {
// Other temporary error - preserve status
logMessage(" ⚠ Temporary error for $domainName - preserving status");
$domainModel->update($domain['id'], [
'last_checked' => date('Y-m-d H:i:s')
]);
$stats['checked']++;
} else {
// Non-active domain or permanent error
logMessage(" ✗ Failed to get WHOIS data for $domainName");
$stats['errors']++;
$domainModel->update($domain['id'], [
'status' => 'error',
'last_checked' => date('Y-m-d H:i:s')
]);
}
// Add a small delay after errors to avoid overwhelming rate-limited servers
usleep(500000); // 0.5 seconds delay
continue;
}
@@ -136,7 +172,11 @@ foreach ($domains as $domain) {
$stats['updated']++;
logMessage(" ✓ Updated WHOIS data for $domainName");
logMessage(" Expiration: {$whoisData['expiration_date']}, Status: $status");
logMessage(" Expiration: " . ($whoisData['expiration_date'] ?? 'N/A') . ", Status: $status");
// Add a small delay between domain checks to avoid rate limiting
// This helps especially with .nl and other TLDs that have strict rate limits
usleep(1000000); // 1 second delay between checks
// Check if notifications should be sent
$daysLeft = $whoisService->daysUntilExpiration($whoisData['expiration_date']);
@@ -217,6 +257,207 @@ foreach ($domains as $domain) {
}
}
// Process retry queue with exponential backoff
$maxRetries = 3;
$retryDelays = [30, 60, 120]; // Delays in seconds: 30s, 60s, 120s
if (!empty($retryQueue)) {
logMessage("\n=== Processing retry queue (" . count($retryQueue) . " domain(s)) ===");
// Group by TLD to avoid hitting same rate limit multiple times
$tldGroups = [];
foreach ($retryQueue as $item) {
$tld = $item['tld'];
if (!isset($tldGroups[$tld])) {
$tldGroups[$tld] = [];
}
$tldGroups[$tld][] = $item;
}
logMessage("Grouped into " . count($tldGroups) . " TLD group(s) for staggered retries");
// Process each retry attempt
for ($attempt = 0; $attempt < $maxRetries; $attempt++) {
$remainingQueue = [];
$delay = $retryDelays[$attempt] ?? 120;
if ($attempt > 0) {
logMessage("\n--- Retry attempt " . ($attempt + 1) . " after {$delay}s delay ---");
sleep($delay);
// Re-group remaining queue by TLD for this attempt
$tldGroups = [];
foreach ($retryQueue as $item) {
$tld = $item['tld'];
if (!isset($tldGroups[$tld])) {
$tldGroups[$tld] = [];
}
$tldGroups[$tld][] = $item;
}
} else {
logMessage("\n--- Retry attempt " . ($attempt + 1) . " (immediate) ---");
}
// Process each TLD group with delays between groups
$tldIndex = 0;
foreach ($tldGroups as $tld => $tldDomains) {
$tldIndex++;
logMessage("Processing TLD group: .$tld (" . count($tldDomains) . " domain(s))");
foreach ($tldDomains as $queueItem) {
$domain = $queueItem['domain'];
$domainName = $domain['domain_name'];
$currentAttempt = $attempt + 1;
$queueItem['attempt'] = $currentAttempt;
logMessage(" Retrying domain: $domainName (attempt {$queueItem['attempt']})");
try {
// Clear last error before retry
WhoisService::clearLastError();
// Retry WHOIS lookup
$whoisData = $whoisService->getDomainInfo($domainName);
if (!$whoisData) {
$wasRateLimited = WhoisService::wasLastErrorRateLimit();
if ($wasRateLimited && $currentAttempt < $maxRetries) {
// Still rate limited, queue for next retry
logMessage(" ⚠ Still rate limited - will retry again");
$remainingQueue[] = $queueItem;
} else {
// Failed after max retries or non-rate-limit error
logMessage(" ✗ Failed after {$currentAttempt} attempt(s)");
$wasActive = in_array($domain['status'], ['active', 'expiring_soon']);
if ($wasActive) {
// Preserve status if it was active
$domainModel->update($domain['id'], [
'last_checked' => date('Y-m-d H:i:s')
]);
} else {
$domainModel->update($domain['id'], [
'status' => 'error',
'last_checked' => date('Y-m-d H:i:s')
]);
}
}
// Delay between retry attempts
usleep(1000000); // 1 second delay
continue;
}
// Success! Update domain
logMessage(" ✓ Retry successful for $domainName");
$stats['retry_succeeded']++;
$expirationDate = $whoisData['expiration_date'] ?? $domain['expiration_date'];
$status = $whoisService->getDomainStatus($expirationDate, $whoisData['status'] ?? [], $whoisData);
$domainModel->update($domain['id'], [
'registrar' => $whoisData['registrar'],
'registrar_url' => $whoisData['registrar_url'] ?? null,
'expiration_date' => $expirationDate,
'updated_date' => $whoisData['updated_date'] ?? null,
'abuse_email' => $whoisData['abuse_email'] ?? null,
'last_checked' => date('Y-m-d H:i:s'),
'status' => $status,
'whois_data' => json_encode($whoisData)
]);
$stats['checked']++;
$stats['updated']++;
// Check notifications for successfully retried domains
$daysLeft = $whoisService->daysUntilExpiration($whoisData['expiration_date']);
if ($daysLeft !== null) {
$shouldNotify = false;
$notificationType = '';
if ($daysLeft <= 0) {
$shouldNotify = true;
$notificationType = 'expired';
} elseif (in_array($daysLeft, $notificationDays)) {
$shouldNotify = true;
$notificationType = "expiring_in_{$daysLeft}_days";
}
if ($shouldNotify && !$logModel->wasSentRecently($domain['id'], $notificationType, 23)) {
if ($domain['notification_group_id']) {
$channels = $channelModel->getActiveByGroupId($domain['notification_group_id']);
if (!empty($channels)) {
$domainData = $domainModel->find($domain['id']);
$results = $notificationService->sendDomainExpirationAlert($domainData, $channels);
foreach ($results as $result) {
if ($result['success']) {
$stats['notifications_sent']++;
}
$logModel->log(
$domain['id'],
$notificationType,
$result['channel'],
"Domain $domainName expires in $daysLeft days",
$result['success'],
$result['success'] ? null : "Failed to send notification"
);
}
}
}
}
}
// Delay between successful retries
usleep(1000000); // 1 second delay
} catch (Exception $e) {
logMessage(" ✗ Exception during retry: " . $e->getMessage());
if ($currentAttempt < $maxRetries) {
$remainingQueue[] = $queueItem;
}
}
}
// Delay between TLD groups to avoid hitting rate limits
if ($tldIndex < count($tldGroups)) {
sleep(5); // 5 seconds between TLD groups
}
}
// Update retry queue for next attempt
if (empty($remainingQueue)) {
logMessage("All retries completed successfully");
break;
}
// Update retry queue with remaining items for next iteration
$retryQueue = $remainingQueue;
if ($attempt < $maxRetries - 1) {
logMessage(count($remainingQueue) . " domain(s) remaining for next retry");
}
}
if (!empty($retryQueue)) {
logMessage("\n" . count($retryQueue) . " domain(s) still failed after {$maxRetries} retry attempts");
// Preserve status for remaining failed domains
foreach ($retryQueue as $queueItem) {
$domain = $queueItem['domain'];
$wasActive = in_array($domain['status'], ['active', 'expiring_soon']);
if ($wasActive) {
$domainModel->update($domain['id'], [
'last_checked' => date('Y-m-d H:i:s')
]);
}
}
}
logMessage("=== Retry queue processing completed ===\n");
}
// Update last check run timestamp
$settingModel->updateLastCheckRun();
@@ -231,6 +472,8 @@ logMessage("Domains checked: {$stats['checked']}");
logMessage("Domains updated: {$stats['updated']}");
logMessage("Notifications sent: {$stats['notifications_sent']}");
logMessage("Errors: {$stats['errors']}");
logMessage("Domains queued for retry: {$stats['retried']}");
logMessage("Retries succeeded: {$stats['retry_succeeded']}");
logMessage("Execution time: $formattedTime");
logMessage("==========================\n");