Files
InformatiQ-Toolkit/includes/class-itk-bot-blocker.php
Malin 52af2d9931 feat: global IP/CIDR/UA whitelist bypassing all restrictions
- class-itk-whitelist.php: static class with 5min transient cache,
  supports exact IP, CIDR notation, and ua: prefix for UA substrings
- config/whitelist.conf: editable config file (template with examples)
- whitelist check added to bot-blocker, WAF, protection (4 methods),
  and honeypot validator — matched requests skip all ITK enforcement
- admin: whitelist.conf added to Config Files editor tab

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-13 10:00:16 +02:00

313 lines
13 KiB
PHP
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
if (!defined('ABSPATH')) exit;
/**
* ITK Bot Blocker
*
* Handles detection and blocking of malicious bots, bad referrers, and bad
* networks. Good/legitimate bots are rate-limited instead of blocked.
*
* Deactivation bug fix: every check method reads options at call time so
* toggling a setting via AJAX takes effect immediately without any hook
* re-registration.
*/
class ITK_Bot_Blocker {
private string $badbots_file;
private string $referrers_file;
private string $networks_file;
private string $goodbots_file;
public function __construct() {
$this->badbots_file = ITK_PATH . 'config/badbots.conf';
$this->referrers_file = ITK_PATH . 'config/referrers.conf';
$this->networks_file = ITK_PATH . 'config/networks.conf';
$this->goodbots_file = ITK_PATH . 'config/goodbots.conf';
// Always hook; each method guards itself with its own option check.
add_action('init', [$this, 'check_request'], 1);
add_filter('robots_txt', [$this, 'modify_robots_txt'], 10, 2);
}
/* ── Main entry point ─────────────────────────────────────── */
public function check_request(): void {
// Never block logged-in admins.
if (is_admin() || (function_exists('current_user_can') && current_user_can('manage_options'))) {
return;
}
// Skip all checks for whitelisted IPs/UAs.
if (ITK_Whitelist::allowed()) return;
$options = get_option('itk_security', []);
$ua = $_SERVER['HTTP_USER_AGENT'] ?? '';
$referrer = $_SERVER['HTTP_REFERER'] ?? '';
$ip = $this->get_client_ip();
$uri = $_SERVER['REQUEST_URI'] ?? '';
// ── 1. Rate-limit good/legitimate bots ─────────────────
if (!empty($options['rate_limit_good_bots'])) {
$good_bot = $this->identify_good_bot($ua);
if ($good_bot !== null) {
$this->handle_good_bot($good_bot, $ua, $ip, $uri);
return; // Handled don't fall through to block checks.
}
}
// ── 2. Block OpenAI bots ───────────────────────────────
if (!empty($options['block_openai_bots']) && $this->is_openai_bot($ua)) {
$this->block('OpenAI bot detected', 'openai', $ua, $referrer, $ip, $uri, $options);
}
// ── 3. Block malicious bots ────────────────────────────
if (!empty($options['block_malicious_bots']) && $this->is_malicious_bot($ua)) {
$this->block('Malicious bot detected', 'malicious_bot', $ua, $referrer, $ip, $uri, $options);
}
// ── 4. Block bad referrers ─────────────────────────────
if (!empty($options['block_bad_referrers']) && $this->is_bad_referrer($referrer)) {
$this->block('Bad referrer detected', 'bad_referrer', $ua, $referrer, $ip, $uri, $options);
}
// ── 5. Block bad networks ──────────────────────────────
if (!empty($options['block_bad_networks']) && $this->is_bad_network($ip)) {
$this->block('IP in blocked network', 'bad_network', $ua, $referrer, $ip, $uri, $options);
}
}
/* ── Good-bot rate limiting ───────────────────────────────── */
/**
* Returns ['name' => string, 'limit' => int] or null if not a known good bot.
* A limit of 0 means "never allow" (treat as blocked).
*/
private function identify_good_bot(string $ua): ?array {
if (empty($ua)) return null;
$cache_key = 'itk_goodbots_list';
$list = get_transient($cache_key);
if ($list === false) {
$list = [];
if (file_exists($this->goodbots_file)) {
foreach (file($this->goodbots_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
$parts = explode('|', $line, 2);
$list[] = ['name' => trim($parts[0]), 'limit' => isset($parts[1]) ? (int)$parts[1] : 30];
}
}
set_transient($cache_key, $list, 300);
}
foreach ($list as $entry) {
if (stripos($ua, $entry['name']) !== false) {
return $entry;
}
}
return null;
}
private function handle_good_bot(array $bot, string $ua, string $ip, string $uri): void {
$options = get_option('itk_security', []);
$name = $bot['name'];
$limit = (int)$bot['limit'];
// Limit of 0 = always block this "good" bot (e.g. GPTBot still in goodbots.conf)
if ($limit === 0) {
$this->block("Good bot with limit 0: {$name}", $name, $ua, '', $ip, $uri, $options);
return;
}
// Sliding window: track hits per bot per minute using transients.
$window = (int)(time() / 60); // 1-minute window
$tk_key = 'itk_rl_' . md5($name) . '_' . $window;
$count = (int)get_transient($tk_key);
if ($count >= $limit) {
// Over the limit log and send 429.
if (!empty($options['log_blocked_attempts'])) {
$event = [
'ip' => $ip,
'ua' => $ua,
'referrer' => '',
'uri' => $uri,
'bot_type' => $name,
'reason' => "Rate limited: {$count}/{$limit} req/min",
'action' => 'rate_limited',
];
ITK_Database::log_bot($event);
ITK_Bot_API::queue($event);
}
status_header(429);
header('Retry-After: 60');
header('X-ITK-Rate-Limit: ' . $limit);
echo 'Too Many Requests. Crawl-delay: 60';
exit;
}
// Under the limit increment counter and allow through.
set_transient($tk_key, $count + 1, 120);
}
/* ── Blocking ─────────────────────────────────────────────── */
private function block(
string $reason,
string $bot_type,
string $ua,
string $referrer,
string $ip,
string $uri,
array $options
): void {
if (!empty($options['log_blocked_attempts'])) {
$event = [
'ip' => $ip,
'ua' => $ua,
'referrer' => $referrer,
'uri' => $uri,
'bot_type' => $bot_type,
'reason' => $reason,
'action' => 'blocked',
];
ITK_Database::log_bot($event);
ITK_Bot_API::queue($event);
}
$code = $options['response_code'] ?? '403';
$message = $options['custom_message'] ?? 'Access denied.';
$redir = $options['redirect_url'] ?? '';
if ($code === '301_custom' && !empty($redir)) {
header('Location: ' . esc_url_raw($redir), true, 301);
} else {
status_header((int)$code ?: 403);
echo esc_html($message);
}
exit;
}
/* ── Detection helpers ────────────────────────────────────── */
private function is_openai_bot(string $ua): bool {
if (empty($ua)) return false;
foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot', 'whisper'] as $b) {
if (stripos($ua, $b) !== false) return true;
}
return false;
}
private function is_malicious_bot(string $ua): bool {
if (empty($ua)) return false;
foreach ($this->load_conf_list($this->badbots_file, 'itk_bots_list') as $bot) {
if (stripos($ua, $bot) !== false) return true;
}
return false;
}
private function is_bad_referrer(string $referrer): bool {
if (empty($referrer)) return false;
foreach ($this->load_conf_list($this->referrers_file, 'itk_referrers_list') as $ref) {
if (stripos($referrer, $ref) !== false) return true;
}
return false;
}
private function is_bad_network(string $ip): bool {
if (empty($ip) || $ip === 'UNKNOWN') return false;
foreach ($this->load_conf_list($this->networks_file, 'itk_networks_list') as $network) {
if (filter_var($network, FILTER_VALIDATE_IP)) {
if ($ip === $network) return true;
} elseif (strpos($network, '/') !== false) {
if ($this->ip_in_cidr($ip, $network)) return true;
}
}
return false;
}
/* ── Robots.txt ───────────────────────────────────────────── */
public function modify_robots_txt(string $output, string $public): string {
if ($public === '0') return $output;
$options = get_option('itk_security', []);
if (empty($options['block_openai_bots'])) return $output;
$output .= "\n# InformatiQ Toolkit AI bot disallow\n";
foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot'] as $bot) {
$output .= "User-agent: {$bot}\nDisallow: /\n\n";
}
return $output;
}
/* ── Config file readers ──────────────────────────────────── */
private function load_conf_list(string $file, string $cache_key): array {
$cached = get_transient($cache_key);
if ($cached !== false) return $cached;
if (!file_exists($file) || filesize($file) > 1048576) return [];
$lines = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$list = [];
foreach ($lines as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
if (strlen($line) <= 200 && !preg_match('/[<>"\']/', $line)) {
$list[] = $line;
}
}
set_transient($cache_key, $list, 300);
return $list;
}
public function invalidate_cache(): void {
delete_transient('itk_bots_list');
delete_transient('itk_referrers_list');
delete_transient('itk_networks_list');
delete_transient('itk_goodbots_list');
}
/* ── IP utilities ─────────────────────────────────────────── */
public function get_client_ip(): string {
$keys = [
'HTTP_CLIENT_IP', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_FORWARDED',
'HTTP_X_CLUSTER_CLIENT_IP', 'HTTP_FORWARDED_FOR', 'HTTP_FORWARDED',
'REMOTE_ADDR',
];
foreach ($keys as $key) {
if (empty($_SERVER[$key])) continue;
$ip = trim(explode(',', $_SERVER[$key])[0]);
if ($key !== 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
return $ip;
}
if ($key === 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP)) {
return $ip;
}
}
return 'UNKNOWN';
}
private function ip_in_cidr(string $ip, string $cidr): bool {
if (strpos($cidr, '/') === false) return false;
[$subnet, $mask] = explode('/', $cidr, 2);
if (!filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false;
if (!is_numeric($mask) || $mask < 0 || $mask > 32) return false;
if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false;
$ip_long = ip2long($ip);
$sub_long = ip2long($subnet);
$mask_dec = ~((1 << (32 - (int)$mask)) - 1);
return ($ip_long & $mask_dec) === ($sub_long & $mask_dec);
}
/* ── Accessors for admin ──────────────────────────────────── */
public function get_badbots_file(): string { return $this->badbots_file; }
public function get_referrers_file(): string { return $this->referrers_file; }
public function get_networks_file(): string { return $this->networks_file; }
public function get_goodbots_file(): string { return $this->goodbots_file; }
}