Files
InformatiQ-Toolkit/includes/class-itk-bot-blocker.php
Malin a8d7972ad7 feat: add Central API clients, bot rate limiting, and admin API UI
- Add ITK_HP_API and ITK_Bot_API static classes with queue/flush/cron
- Add WP-Cron (5 min) + shutdown flush for both API queues
- Bot Blocker and Honeypot now queue events to their respective APIs
- Admin: Bot Blocker tab gains Central Bot API settings panel
  (enable, URL, token, test connection, flush queue, historical sync)
- Admin: Honeypot tab gains Central Honeypot API settings panel
- Admin JS: AJAX handlers for Test Connection and Flush Now buttons
- Admin CSS: API card styles (status badge, notices, footer controls)
- Add .gitignore (excludes bot-api/ which lives in CloudHost/bot-api)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-09 18:32:27 +02:00

310 lines
12 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
if (!defined('ABSPATH')) exit;
/**
* ITK Bot Blocker
*
* Handles detection and blocking of malicious bots, bad referrers, and bad
* networks. Good/legitimate bots are rate-limited instead of blocked.
*
* Deactivation bug fix: every check method reads options at call time so
* toggling a setting via AJAX takes effect immediately without any hook
* re-registration.
*/
class ITK_Bot_Blocker {
private string $badbots_file;
private string $referrers_file;
private string $networks_file;
private string $goodbots_file;
public function __construct() {
$this->badbots_file = ITK_PATH . 'config/badbots.conf';
$this->referrers_file = ITK_PATH . 'config/referrers.conf';
$this->networks_file = ITK_PATH . 'config/networks.conf';
$this->goodbots_file = ITK_PATH . 'config/goodbots.conf';
// Always hook; each method guards itself with its own option check.
add_action('init', [$this, 'check_request'], 1);
add_filter('robots_txt', [$this, 'modify_robots_txt'], 10, 2);
}
/* ── Main entry point ─────────────────────────────────────── */
public function check_request(): void {
// Never block logged-in admins.
if (is_admin() || (function_exists('current_user_can') && current_user_can('manage_options'))) {
return;
}
$options = get_option('itk_security', []);
$ua = $_SERVER['HTTP_USER_AGENT'] ?? '';
$referrer = $_SERVER['HTTP_REFERER'] ?? '';
$ip = $this->get_client_ip();
$uri = $_SERVER['REQUEST_URI'] ?? '';
// ── 1. Rate-limit good/legitimate bots ─────────────────
if (!empty($options['rate_limit_good_bots'])) {
$good_bot = $this->identify_good_bot($ua);
if ($good_bot !== null) {
$this->handle_good_bot($good_bot, $ua, $ip, $uri);
return; // Handled don't fall through to block checks.
}
}
// ── 2. Block OpenAI bots ───────────────────────────────
if (!empty($options['block_openai_bots']) && $this->is_openai_bot($ua)) {
$this->block('OpenAI bot detected', 'openai', $ua, $referrer, $ip, $uri, $options);
}
// ── 3. Block malicious bots ────────────────────────────
if (!empty($options['block_malicious_bots']) && $this->is_malicious_bot($ua)) {
$this->block('Malicious bot detected', 'malicious_bot', $ua, $referrer, $ip, $uri, $options);
}
// ── 4. Block bad referrers ─────────────────────────────
if (!empty($options['block_bad_referrers']) && $this->is_bad_referrer($referrer)) {
$this->block('Bad referrer detected', 'bad_referrer', $ua, $referrer, $ip, $uri, $options);
}
// ── 5. Block bad networks ──────────────────────────────
if (!empty($options['block_bad_networks']) && $this->is_bad_network($ip)) {
$this->block('IP in blocked network', 'bad_network', $ua, $referrer, $ip, $uri, $options);
}
}
/* ── Good-bot rate limiting ───────────────────────────────── */
/**
* Returns ['name' => string, 'limit' => int] or null if not a known good bot.
* A limit of 0 means "never allow" (treat as blocked).
*/
private function identify_good_bot(string $ua): ?array {
if (empty($ua)) return null;
$cache_key = 'itk_goodbots_list';
$list = get_transient($cache_key);
if ($list === false) {
$list = [];
if (file_exists($this->goodbots_file)) {
foreach (file($this->goodbots_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
$parts = explode('|', $line, 2);
$list[] = ['name' => trim($parts[0]), 'limit' => isset($parts[1]) ? (int)$parts[1] : 30];
}
}
set_transient($cache_key, $list, 300);
}
foreach ($list as $entry) {
if (stripos($ua, $entry['name']) !== false) {
return $entry;
}
}
return null;
}
private function handle_good_bot(array $bot, string $ua, string $ip, string $uri): void {
$options = get_option('itk_security', []);
$name = $bot['name'];
$limit = (int)$bot['limit'];
// Limit of 0 = always block this "good" bot (e.g. GPTBot still in goodbots.conf)
if ($limit === 0) {
$this->block("Good bot with limit 0: {$name}", $name, $ua, '', $ip, $uri, $options);
return;
}
// Sliding window: track hits per bot per minute using transients.
$window = (int)(time() / 60); // 1-minute window
$tk_key = 'itk_rl_' . md5($name) . '_' . $window;
$count = (int)get_transient($tk_key);
if ($count >= $limit) {
// Over the limit log and send 429.
if (!empty($options['log_blocked_attempts'])) {
$event = [
'ip' => $ip,
'ua' => $ua,
'referrer' => '',
'uri' => $uri,
'bot_type' => $name,
'reason' => "Rate limited: {$count}/{$limit} req/min",
'action' => 'rate_limited',
];
ITK_Database::log_bot($event);
ITK_Bot_API::queue($event);
}
status_header(429);
header('Retry-After: 60');
header('X-ITK-Rate-Limit: ' . $limit);
echo 'Too Many Requests. Crawl-delay: 60';
exit;
}
// Under the limit increment counter and allow through.
set_transient($tk_key, $count + 1, 120);
}
/* ── Blocking ─────────────────────────────────────────────── */
private function block(
string $reason,
string $bot_type,
string $ua,
string $referrer,
string $ip,
string $uri,
array $options
): void {
if (!empty($options['log_blocked_attempts'])) {
$event = [
'ip' => $ip,
'ua' => $ua,
'referrer' => $referrer,
'uri' => $uri,
'bot_type' => $bot_type,
'reason' => $reason,
'action' => 'blocked',
];
ITK_Database::log_bot($event);
ITK_Bot_API::queue($event);
}
$code = $options['response_code'] ?? '403';
$message = $options['custom_message'] ?? 'Access denied.';
$redir = $options['redirect_url'] ?? '';
if ($code === '301_custom' && !empty($redir)) {
header('Location: ' . esc_url_raw($redir), true, 301);
} else {
status_header((int)$code ?: 403);
echo esc_html($message);
}
exit;
}
/* ── Detection helpers ────────────────────────────────────── */
private function is_openai_bot(string $ua): bool {
if (empty($ua)) return false;
foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot', 'whisper'] as $b) {
if (stripos($ua, $b) !== false) return true;
}
return false;
}
private function is_malicious_bot(string $ua): bool {
if (empty($ua)) return false;
foreach ($this->load_conf_list($this->badbots_file, 'itk_bots_list') as $bot) {
if (stripos($ua, $bot) !== false) return true;
}
return false;
}
private function is_bad_referrer(string $referrer): bool {
if (empty($referrer)) return false;
foreach ($this->load_conf_list($this->referrers_file, 'itk_referrers_list') as $ref) {
if (stripos($referrer, $ref) !== false) return true;
}
return false;
}
private function is_bad_network(string $ip): bool {
if (empty($ip) || $ip === 'UNKNOWN') return false;
foreach ($this->load_conf_list($this->networks_file, 'itk_networks_list') as $network) {
if (filter_var($network, FILTER_VALIDATE_IP)) {
if ($ip === $network) return true;
} elseif (strpos($network, '/') !== false) {
if ($this->ip_in_cidr($ip, $network)) return true;
}
}
return false;
}
/* ── Robots.txt ───────────────────────────────────────────── */
public function modify_robots_txt(string $output, string $public): string {
if ($public === '0') return $output;
$options = get_option('itk_security', []);
if (empty($options['block_openai_bots'])) return $output;
$output .= "\n# InformatiQ Toolkit AI bot disallow\n";
foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot'] as $bot) {
$output .= "User-agent: {$bot}\nDisallow: /\n\n";
}
return $output;
}
/* ── Config file readers ──────────────────────────────────── */
private function load_conf_list(string $file, string $cache_key): array {
$cached = get_transient($cache_key);
if ($cached !== false) return $cached;
if (!file_exists($file) || filesize($file) > 1048576) return [];
$lines = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$list = [];
foreach ($lines as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
if (strlen($line) <= 200 && !preg_match('/[<>"\']/', $line)) {
$list[] = $line;
}
}
set_transient($cache_key, $list, 300);
return $list;
}
public function invalidate_cache(): void {
delete_transient('itk_bots_list');
delete_transient('itk_referrers_list');
delete_transient('itk_networks_list');
delete_transient('itk_goodbots_list');
}
/* ── IP utilities ─────────────────────────────────────────── */
public function get_client_ip(): string {
$keys = [
'HTTP_CLIENT_IP', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_FORWARDED',
'HTTP_X_CLUSTER_CLIENT_IP', 'HTTP_FORWARDED_FOR', 'HTTP_FORWARDED',
'REMOTE_ADDR',
];
foreach ($keys as $key) {
if (empty($_SERVER[$key])) continue;
$ip = trim(explode(',', $_SERVER[$key])[0]);
if ($key !== 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
return $ip;
}
if ($key === 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP)) {
return $ip;
}
}
return 'UNKNOWN';
}
private function ip_in_cidr(string $ip, string $cidr): bool {
if (strpos($cidr, '/') === false) return false;
[$subnet, $mask] = explode('/', $cidr, 2);
if (!filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false;
if (!is_numeric($mask) || $mask < 0 || $mask > 32) return false;
if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false;
$ip_long = ip2long($ip);
$sub_long = ip2long($subnet);
$mask_dec = ~((1 << (32 - (int)$mask)) - 1);
return ($ip_long & $mask_dec) === ($sub_long & $mask_dec);
}
/* ── Accessors for admin ──────────────────────────────────── */
public function get_badbots_file(): string { return $this->badbots_file; }
public function get_referrers_file(): string { return $this->referrers_file; }
public function get_networks_file(): string { return $this->networks_file; }
public function get_goodbots_file(): string { return $this->goodbots_file; }
}