Files
InformatiQ-Toolkit/includes/class-itk-bot-blocker.php
Malin 6d4349ff7b feat: initial InformatiQ Toolkit plugin
Merges informatiq-wp-secure + informatiq-utils + HoneypotFields into
a single unified plugin with the following improvements:

- Fixed deactivation bug: all protection methods now guard themselves
  with their own option check so toggling off via AJAX takes effect
  immediately without any hook re-registration.
- Added rate-limiting for good/legitimate bots (Googlebot, Bingbot,
  DuckDuckBot, Yandex, etc.) via transient sliding-window counters;
  configurable per-bot limits in goodbots.conf (BotName|req/min);
  returns HTTP 429 with Retry-After: 60 when over limit.
- Unified MySQL-backed logging (itk_bot_log + itk_honeypot_log tables)
  replaces the old wp_options-based 100-entry cap.
- New Dashboard tab with terminal-style bot activity monitor: total
  blocked, today's count, rate-limited hits, top threat sources
  (bar chart), top IPs, top honeypot form types, active-module
  status panel.
- All optimizations from utils.php merged into Optimization tab as
  toggleable settings (was always-on before).
- Single admin page (Settings → InformatiQ Toolkit) with 8 tabs:
  Dashboard | Bot Blocker | Protection | Optimization | Honeypot |
  Bot Logs | Honeypot Logs | Config Files.
- Config file editor for badbots.conf, goodbots.conf, referrers.conf,
  networks.conf, allowed-ips.conf with AJAX save and transient flush.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-09 11:45:26 +02:00

306 lines
12 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<?php
if (!defined('ABSPATH')) exit;
/**
* ITK Bot Blocker
*
* Handles detection and blocking of malicious bots, bad referrers, and bad
* networks. Good/legitimate bots are rate-limited instead of blocked.
*
* Deactivation bug fix: every check method reads options at call time so
* toggling a setting via AJAX takes effect immediately without any hook
* re-registration.
*/
class ITK_Bot_Blocker {
private string $badbots_file;
private string $referrers_file;
private string $networks_file;
private string $goodbots_file;
public function __construct() {
$this->badbots_file = ITK_PATH . 'config/badbots.conf';
$this->referrers_file = ITK_PATH . 'config/referrers.conf';
$this->networks_file = ITK_PATH . 'config/networks.conf';
$this->goodbots_file = ITK_PATH . 'config/goodbots.conf';
// Always hook; each method guards itself with its own option check.
add_action('init', [$this, 'check_request'], 1);
add_filter('robots_txt', [$this, 'modify_robots_txt'], 10, 2);
}
/* ── Main entry point ─────────────────────────────────────── */
public function check_request(): void {
// Never block logged-in admins.
if (is_admin() || (function_exists('current_user_can') && current_user_can('manage_options'))) {
return;
}
$options = get_option('itk_security', []);
$ua = $_SERVER['HTTP_USER_AGENT'] ?? '';
$referrer = $_SERVER['HTTP_REFERER'] ?? '';
$ip = $this->get_client_ip();
$uri = $_SERVER['REQUEST_URI'] ?? '';
// ── 1. Rate-limit good/legitimate bots ─────────────────
if (!empty($options['rate_limit_good_bots'])) {
$good_bot = $this->identify_good_bot($ua);
if ($good_bot !== null) {
$this->handle_good_bot($good_bot, $ua, $ip, $uri);
return; // Handled don't fall through to block checks.
}
}
// ── 2. Block OpenAI bots ───────────────────────────────
if (!empty($options['block_openai_bots']) && $this->is_openai_bot($ua)) {
$this->block('OpenAI bot detected', 'openai', $ua, $referrer, $ip, $uri, $options);
}
// ── 3. Block malicious bots ────────────────────────────
if (!empty($options['block_malicious_bots']) && $this->is_malicious_bot($ua)) {
$this->block('Malicious bot detected', 'malicious_bot', $ua, $referrer, $ip, $uri, $options);
}
// ── 4. Block bad referrers ─────────────────────────────
if (!empty($options['block_bad_referrers']) && $this->is_bad_referrer($referrer)) {
$this->block('Bad referrer detected', 'bad_referrer', $ua, $referrer, $ip, $uri, $options);
}
// ── 5. Block bad networks ──────────────────────────────
if (!empty($options['block_bad_networks']) && $this->is_bad_network($ip)) {
$this->block('IP in blocked network', 'bad_network', $ua, $referrer, $ip, $uri, $options);
}
}
/* ── Good-bot rate limiting ───────────────────────────────── */
/**
* Returns ['name' => string, 'limit' => int] or null if not a known good bot.
* A limit of 0 means "never allow" (treat as blocked).
*/
private function identify_good_bot(string $ua): ?array {
if (empty($ua)) return null;
$cache_key = 'itk_goodbots_list';
$list = get_transient($cache_key);
if ($list === false) {
$list = [];
if (file_exists($this->goodbots_file)) {
foreach (file($this->goodbots_file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES) as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
$parts = explode('|', $line, 2);
$list[] = ['name' => trim($parts[0]), 'limit' => isset($parts[1]) ? (int)$parts[1] : 30];
}
}
set_transient($cache_key, $list, 300);
}
foreach ($list as $entry) {
if (stripos($ua, $entry['name']) !== false) {
return $entry;
}
}
return null;
}
private function handle_good_bot(array $bot, string $ua, string $ip, string $uri): void {
$options = get_option('itk_security', []);
$name = $bot['name'];
$limit = (int)$bot['limit'];
// Limit of 0 = always block this "good" bot (e.g. GPTBot still in goodbots.conf)
if ($limit === 0) {
$this->block("Good bot with limit 0: {$name}", $name, $ua, '', $ip, $uri, $options);
return;
}
// Sliding window: track hits per bot per minute using transients.
$window = (int)(time() / 60); // 1-minute window
$tk_key = 'itk_rl_' . md5($name) . '_' . $window;
$count = (int)get_transient($tk_key);
if ($count >= $limit) {
// Over the limit log and send 429.
if (!empty($options['log_blocked_attempts'])) {
ITK_Database::log_bot([
'ip' => $ip,
'ua' => $ua,
'referrer' => '',
'uri' => $uri,
'bot_type' => $name,
'reason' => "Rate limited: {$count}/{$limit} req/min",
'action' => 'rate_limited',
]);
}
status_header(429);
header('Retry-After: 60');
header('X-ITK-Rate-Limit: ' . $limit);
echo 'Too Many Requests. Crawl-delay: 60';
exit;
}
// Under the limit increment counter and allow through.
set_transient($tk_key, $count + 1, 120);
}
/* ── Blocking ─────────────────────────────────────────────── */
private function block(
string $reason,
string $bot_type,
string $ua,
string $referrer,
string $ip,
string $uri,
array $options
): void {
if (!empty($options['log_blocked_attempts'])) {
ITK_Database::log_bot([
'ip' => $ip,
'ua' => $ua,
'referrer' => $referrer,
'uri' => $uri,
'bot_type' => $bot_type,
'reason' => $reason,
'action' => 'blocked',
]);
}
$code = $options['response_code'] ?? '403';
$message = $options['custom_message'] ?? 'Access denied.';
$redir = $options['redirect_url'] ?? '';
if ($code === '301_custom' && !empty($redir)) {
header('Location: ' . esc_url_raw($redir), true, 301);
} else {
status_header((int)$code ?: 403);
echo esc_html($message);
}
exit;
}
/* ── Detection helpers ────────────────────────────────────── */
private function is_openai_bot(string $ua): bool {
if (empty($ua)) return false;
foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot', 'whisper'] as $b) {
if (stripos($ua, $b) !== false) return true;
}
return false;
}
private function is_malicious_bot(string $ua): bool {
if (empty($ua)) return false;
foreach ($this->load_conf_list($this->badbots_file, 'itk_bots_list') as $bot) {
if (stripos($ua, $bot) !== false) return true;
}
return false;
}
private function is_bad_referrer(string $referrer): bool {
if (empty($referrer)) return false;
foreach ($this->load_conf_list($this->referrers_file, 'itk_referrers_list') as $ref) {
if (stripos($referrer, $ref) !== false) return true;
}
return false;
}
private function is_bad_network(string $ip): bool {
if (empty($ip) || $ip === 'UNKNOWN') return false;
foreach ($this->load_conf_list($this->networks_file, 'itk_networks_list') as $network) {
if (filter_var($network, FILTER_VALIDATE_IP)) {
if ($ip === $network) return true;
} elseif (strpos($network, '/') !== false) {
if ($this->ip_in_cidr($ip, $network)) return true;
}
}
return false;
}
/* ── Robots.txt ───────────────────────────────────────────── */
public function modify_robots_txt(string $output, string $public): string {
if ($public === '0') return $output;
$options = get_option('itk_security', []);
if (empty($options['block_openai_bots'])) return $output;
$output .= "\n# InformatiQ Toolkit AI bot disallow\n";
foreach (['GPTBot', 'ChatGPT-User', 'OAI-SearchBot'] as $bot) {
$output .= "User-agent: {$bot}\nDisallow: /\n\n";
}
return $output;
}
/* ── Config file readers ──────────────────────────────────── */
private function load_conf_list(string $file, string $cache_key): array {
$cached = get_transient($cache_key);
if ($cached !== false) return $cached;
if (!file_exists($file) || filesize($file) > 1048576) return [];
$lines = file($file, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
$list = [];
foreach ($lines as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') continue;
if (strlen($line) <= 200 && !preg_match('/[<>"\']/', $line)) {
$list[] = $line;
}
}
set_transient($cache_key, $list, 300);
return $list;
}
public function invalidate_cache(): void {
delete_transient('itk_bots_list');
delete_transient('itk_referrers_list');
delete_transient('itk_networks_list');
delete_transient('itk_goodbots_list');
}
/* ── IP utilities ─────────────────────────────────────────── */
public function get_client_ip(): string {
$keys = [
'HTTP_CLIENT_IP', 'HTTP_X_FORWARDED_FOR', 'HTTP_X_FORWARDED',
'HTTP_X_CLUSTER_CLIENT_IP', 'HTTP_FORWARDED_FOR', 'HTTP_FORWARDED',
'REMOTE_ADDR',
];
foreach ($keys as $key) {
if (empty($_SERVER[$key])) continue;
$ip = trim(explode(',', $_SERVER[$key])[0]);
if ($key !== 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE)) {
return $ip;
}
if ($key === 'REMOTE_ADDR' && filter_var($ip, FILTER_VALIDATE_IP)) {
return $ip;
}
}
return 'UNKNOWN';
}
private function ip_in_cidr(string $ip, string $cidr): bool {
if (strpos($cidr, '/') === false) return false;
[$subnet, $mask] = explode('/', $cidr, 2);
if (!filter_var($subnet, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false;
if (!is_numeric($mask) || $mask < 0 || $mask > 32) return false;
if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) return false;
$ip_long = ip2long($ip);
$sub_long = ip2long($subnet);
$mask_dec = ~((1 << (32 - (int)$mask)) - 1);
return ($ip_long & $mask_dec) === ($sub_long & $mask_dec);
}
/* ── Accessors for admin ──────────────────────────────────── */
public function get_badbots_file(): string { return $this->badbots_file; }
public function get_referrers_file(): string { return $this->referrers_file; }
public function get_networks_file(): string { return $this->networks_file; }
public function get_goodbots_file(): string { return $this->goodbots_file; }
}