badbots_file = ITK_PATH . 'config/badbots.conf'; $this->referrers_file = ITK_PATH . 'config/referrers.conf'; $this->networks_file = ITK_PATH . 'config/networks.conf'; $this->goodbots_file = ITK_PATH . 'config/goodbots.conf'; // Always hook; each method guards itself with its own option check. add_action('init', [$this, 'check_request'], 1); add_filter('robots_txt', [$this, 'modify_robots_txt'], 10, 2); } /* ── Main entry point ─────────────────────────────────────── */ public function check_request(): void { // Never block logged-in admins. if (is_admin() || (function_exists('current_user_can') && current_user_can('manage_options'))) { return; } $options = get_option('itk_security', []); $ua = $_SERVER['HTTP_USER_AGENT'] ?? ''; $referrer = $_SERVER['HTTP_REFERER'] ?? ''; $ip = $this->get_client_ip(); $uri = $_SERVER['REQUEST_URI'] ?? ''; // ── 1. Rate-limit good/legitimate bots ───────────────── if (!empty($options['rate_limit_good_bots'])) { $good_bot = $this->identify_good_bot($ua); if ($good_bot !== null) { $this->handle_good_bot($good_bot, $ua, $ip, $uri); return; // Handled – don't fall through to block checks. } } // ── 2. Block OpenAI bots ─────────────────────────────── if (!empty($options['block_openai_bots']) && $this->is_openai_bot($ua)) { $this->block('OpenAI bot detected', 'openai', $ua, $referrer, $ip, $uri, $options); } // ── 3. Block malicious bots ──────────────────────────── if (!empty($options['block_malicious_bots']) && $this->is_malicious_bot($ua)) { $this->block('Malicious bot detected', 'malicious_bot', $ua, $referrer, $ip, $uri, $options); } // ── 4. Block bad referrers ───────────────────────────── if (!empty($options['block_bad_referrers']) && $this->is_bad_referrer($referrer)) { $this->block('Bad referrer detected', 'bad_referrer', $ua, $referrer, $ip, $uri, $options); } // ── 5. Block bad networks ────────────────────────────── if (!empty($options['block_bad_networks']) && $this->is_bad_network($ip)) { $this->block('IP in blocked network', 'bad_network', $ua, $referrer, $ip, $uri, $options); } } /* ── Good-bot rate limiting ───────────────────────────────── */ /** * Returns ['name' => string, 'limit' => int] or null if not a known good bot. * A limit of 0 means "never allow" (treat as blocked). */ private function identify_good_bot(string $ua): ?array { if (empty($ua)) return null; $cache_key = 'itk_goodbots_list'; $list = get_transient($cache_key); if ($list === false) { $list = []; if (file_exists($this->goodbots_file)) { foreach (file($this->goodbots_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) as $line) { $line = trim($line); if ($line === '' || $line[0] === '#') continue; $parts = explode('|', $line, 2); $list[] = ['name' => trim($parts[0]), 'limit' => isset($parts[1]) ? (int)$parts[1] : 30]; } } set_transient($cache_key, $list, 300); } foreach ($list as $entry) { if (stripos($ua, $entry['name']) !== false) { return $entry; } } return null; } private function handle_good_bot(array $bot, string $ua, string $ip, string $uri): void { $options = get_option('itk_security', []); $name = $bot['name']; $limit = (int)$bot['limit']; // Limit of 0 = always block this "good" bot (e.g. GPTBot still in goodbots.conf) if ($limit === 0) { $this->block("Good bot with limit 0: {$name}", $name, $ua, '', $ip, $uri, $options); return; } // Sliding window: track hits per bot per minute using transients. $window = (int)(time() / 60); // 1-minute window $tk_key = 'itk_rl_' . md5($name) . '_' . $window; $count = (int)get_transient($tk_key); if ($count >= $limit) { // Over the limit – log and send 429. if (!empty($options['log_blocked_attempts'])) { ITK_Database::log_bot([ 'ip' => $ip, 'ua' => $ua, 'referrer' => '', 'uri' => $uri, 'bot_type' => $name, 'reason' => "Rate limited: {$count}/{$limit} req/min", 'action' => 'rate_limited', ]); } status_header(429); header('Retry-After: 60'); header('X-ITK-Rate-Limit: ' . $limit); echo 'Too Many Requests. Crawl-delay: 60'; exit; } // Under the limit – increment counter and allow through. set_transient($tk_key, $count + 1, 120); } /* ── Blocking ─────────────────────────────────────────────── */ private function block( string $reason, string $bot_type, string $ua, string $referrer, string $ip, string $uri, array $options ): void { if (!empty($options['log_blocked_attempts'])) { ITK_Database::log_bot([ 'ip' => $ip, 'ua' => $ua, 'referrer' => $referrer, 'uri' => $uri, 'bot_type' => $bot_type, 'reason' => $reason, 'action' => 'blocked', ]); } $code = $options['response_code'] ?? '403'; $message = $options['custom_message'] ?? 'Access denied.'; $redir = $options['redirect_url'] ?? ''; if ($code === '301_custom' && !empty($redir)) { header('Location: ' . esc_url_raw($redir), true, 301); } else { status_header((int)$code ?: 403); echo esc_html($message); } exit; } /* ── Detection helpers ────────────────────────────────────── */ private function is_openai_bot(string $ua): bool { if (empty($ua)) return false; foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot', 'whisper'] as $b) { if (stripos($ua, $b) !== false) return true; } return false; } private function is_malicious_bot(string $ua): bool { if (empty($ua)) return false; foreach ($this->load_conf_list($this->badbots_file, 'itk_bots_list') as $bot) { if (stripos($ua, $bot) !== false) return true; } return false; } private function is_bad_referrer(string $referrer): bool { if (empty($referrer)) return false; foreach ($this->load_conf_list($this->referrers_file, 'itk_referrers_list') as $ref) { if (stripos($referrer, $ref) !== false) return true; } return false; } private function is_bad_network(string $ip): bool { if (empty($ip) || $ip === 'UNKNOWN') return false; foreach ($this->load_conf_list($this->networks_file, 'itk_networks_list') as $network) { if (filter_var($network, FILTER_VALIDATE_IP)) { if ($ip === $network) return true; } elseif (strpos($network, '/') !== false) { if ($this->ip_in_cidr($ip, $network)) return true; } } return false; } /* ── Robots.txt ───────────────────────────────────────────── */ public function modify_robots_txt(string $output, string $public): string { if ($public === '0') return $output; $options = get_option('itk_security', []); if (empty($options['block_openai_bots'])) return $output; $output .= "\n# InformatiQ Toolkit – AI bot disallow\n"; foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot'] as $bot) { $output .= "User-agent: {$bot}\nDisallow: /\n\n"; } return $output; } /* ── Config file readers ──────────────────────────────────── */ private function load_conf_list(string $file, string $cache_key): array { $cached = get_transient($cache_key); if ($cached !== false) return $cached; if (!file_exists($file) || filesize($file) > 1048576) return []; $lines = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); $list = []; foreach ($lines as $line) { $line = trim($line); if ($line === '' || $line[0] === '#') continue; if (strlen($line) <= 200 && !preg_match('/[<>"\']/', $line)) { $list[] = $line; } } set_transient($cache_key, $list, 300); return $list; } public function invalidate_cache(): void { delete_transient('itk_bots_list'); delete_transient('itk_referrers_list'); delete_transient('itk_networks_list'); delete_transient('itk_goodbots_list'); } /* ── IP utilities ─────────────────────────────────────────── */ public function get_client_ip(): string { $keys = [ 'HTTP_CLIENT_IP', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_FORWARDED', 'HTTP_X_CLUSTER_CLIENT_IP', 'HTTP_FORWARDED_FOR', 'HTTP_FORWARDED', 'REMOTE_ADDR', ]; foreach ($keys as $key) { if (empty($_SERVER[$key])) continue; $ip = trim(explode(',', $_SERVER[$key])[0]); if ($key !== 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) { return $ip; } if ($key === 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP)) { return $ip; } } return 'UNKNOWN'; } private function ip_in_cidr(string $ip, string $cidr): bool { if (strpos($cidr, '/') === false) return false; [$subnet, $mask] = explode('/', $cidr, 2); if (!filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false; if (!is_numeric($mask) || $mask < 0 || $mask > 32) return false; if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false; $ip_long = ip2long($ip); $sub_long = ip2long($subnet); $mask_dec = ~((1 << (32 - (int)$mask)) - 1); return ($ip_long & $mask_dec) === ($sub_long & $mask_dec); } /* ── Accessors for admin ──────────────────────────────────── */ public function get_badbots_file(): string { return $this->badbots_file; } public function get_referrers_file(): string { return $this->referrers_file; } public function get_networks_file(): string { return $this->networks_file; } public function get_goodbots_file(): string { return $this->goodbots_file; } }