From 932020d03204456ed71b6e314e4ba03fbf296538 Mon Sep 17 00:00:00 2001 From: Malin Date: Thu, 9 Apr 2026 18:32:02 +0200 Subject: [PATCH] feat: initial Bot Intelligence API Docker stack MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Self-hosted Node.js/Express + SQLite (WAL) API server and dashboard for tracking blocked bots and user agents. Features: - POST /api/v1/submit — batch ingest from WordPress plugin - GET /api/v1/stats — aggregated stats with 30s cache - GET /api/v1/stream — SSE live event feed - GET /api/v1/health — health check endpoint - Cyan/blue terminal-style dashboard with live feed, bar charts, 24h activity - Docker Compose setup on port 3001 with persistent SQLite volume - Bearer token auth with constant-time comparison Co-Authored-By: Claude Sonnet 4.6 --- .env.example | 6 + Dockerfile | 17 ++ README.md | 51 ++++ docker-compose.yml | 23 ++ package.json | 17 ++ public/index.html | 738 +++++++++++++++++++++++++++++++++++++++++++++ server.js | 307 +++++++++++++++++++ 7 files changed, 1159 insertions(+) create mode 100644 .env.example create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 docker-compose.yml create mode 100644 package.json create mode 100644 public/index.html create mode 100644 server.js diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..4ea7271 --- /dev/null +++ b/.env.example @@ -0,0 +1,6 @@ +PORT=3001 +DB_PATH=/data/bots.db +NODE_ENV=production +# Set a strong random token — all WP sites must send this as: Authorization: Bearer +# Leave empty to run in open mode (dev only) +API_TOKEN=change-me-to-a-long-random-string diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0432ad3 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,17 @@ +FROM node:20-alpine + +# Native deps for better-sqlite3 +RUN apk add --no-cache python3 make g++ + +WORKDIR /app +COPY package*.json ./ +RUN npm ci --omit=dev + +COPY . . + +RUN mkdir -p /data + +EXPOSE 3000 +VOLUME ["/data"] + +CMD ["node", "server.js"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..23592e0 --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +# Bot Intelligence API + +Centralized bot-blocking telemetry server for the InformatiQ Toolkit WordPress plugin. + +## Quick start + +```bash +cp .env.example .env +# Edit .env and set a strong API_TOKEN +docker compose up -d +``` + +Dashboard available at `http://your-server:3001/` + +## API Endpoints + +| Method | Path | Auth | Description | +|--------|------|------|-------------| +| POST | `/api/v1/submit` | Bearer token | Submit a batch of bot events (max 50) | +| GET | `/api/v1/stats` | — | Aggregated statistics (cached 30s) | +| GET | `/api/v1/stream` | — | SSE live event stream | +| GET | `/api/v1/health` | — | Health check | + +## Submit payload + +```json +{ + "site_hash": "sha256-of-home-url", + "bots": [ + { + "ip": "1.2.3.4", + "bot_type": "malicious_bot", + "action": "blocked", + "reason": "Malicious bot detected", + "user_agent": "BadBot/1.0", + "request_uri": "/wp-login.php", + "logged_at": "2026-01-01 12:00:00" + } + ] +} +``` + +`action` is one of: `blocked`, `rate_limited` + +## Environment variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `3001` | Listen port | +| `DB_PATH` | `/data/bots.db` | SQLite database path | +| `API_TOKEN` | _(empty)_ | Bearer token (leave empty for open/dev mode) | diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..833aa33 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,23 @@ +services: + bot-api: + build: . + container_name: bot-api + restart: unless-stopped + ports: + - "3001:3001" + volumes: + - bot-data:/data + environment: + - PORT=3001 + - DB_PATH=/data/bots.db + - NODE_ENV=production + - API_TOKEN=${API_TOKEN:-change-me-to-a-long-random-string} + healthcheck: + test: ["CMD", "wget", "-qO-", "http://localhost:3001/api/v1/health"] + interval: 30s + timeout: 5s + retries: 3 + +volumes: + bot-data: + driver: local diff --git a/package.json b/package.json new file mode 100644 index 0000000..8630a74 --- /dev/null +++ b/package.json @@ -0,0 +1,17 @@ +{ + "name": "bot-api", + "version": "1.0.0", + "description": "Centralized bot intelligence API + dashboard", + "main": "server.js", + "scripts": { + "start": "node server.js", + "dev": "node --watch server.js" + }, + "dependencies": { + "better-sqlite3": "^9.4.3", + "express": "^4.18.2" + }, + "engines": { + "node": ">=18" + } +} diff --git a/public/index.html b/public/index.html new file mode 100644 index 0000000..e0985aa --- /dev/null +++ b/public/index.html @@ -0,0 +1,738 @@ + + + + + +BOT // NETWORK INTELLIGENCE + + + + +
+ +
+ --:--:-- + LIVE FEED +
+ + + +
+
+
+ +
+ +
+
+
+
TOTAL BLOCKED
+
+
+
+
TODAY
+
+
+
+
LAST 7 DAYS
+
+
+
+
LAST 30 DAYS
+
+
+
+
RATE LIMITED
+
+
+
+
SITES REPORTING
+
+
+ +
+ ▶ MOST ACTIVE BOT TYPE (30D): + + + +
+ +
+ +
+ +
+
+ ▶ 24H ACTIVITY TREND + +
+
+ +
+
+ +
+
▶ BOT BREAKDOWN // LAST 30 DAYS
+
+
+
+
BOT TYPES
+
    +
    +
    +
    UA FAMILIES
    +
      +
      +
      +
      +
      + +
      +
      ▶ ACTIONS + REASONS // LAST 30 DAYS
      +
      +
      +
      +
      ACTIONS
      +
        +
        +
        +
        REASONS
        +
          +
          +
          +
          +
          + +
          + +
          + +
          +
          + ▶ LIVE BOT FEED + 0 events +
          +
          + +
          + +
          +
          ▶ TOP OFFENDERS // LAST 30 DAYS
          +
          + + + + + + + + + + + + +
          #IP ADDRESSHITSAS
          Loading…
          +
          +
          + +
          + +
          + +
          + +
          + BOT NETWORK MONITOR // CENTRALIZED THREAT INTELLIGENCE + +
          + + + + diff --git a/server.js b/server.js new file mode 100644 index 0000000..f30c649 --- /dev/null +++ b/server.js @@ -0,0 +1,307 @@ +'use strict'; + +const express = require('express'); +const Database = require('better-sqlite3'); +const path = require('path'); +const http = require('http'); +const { timingSafeEqual } = require('crypto'); + +const app = express(); +const PORT = Number(process.env.PORT) || 3001; +const DB = new Database(process.env.DB_PATH || '/data/bots.db'); + +// ── Database ────────────────────────────────────────────────────────────────── + +DB.pragma('journal_mode = WAL'); +DB.pragma('synchronous = NORMAL'); +DB.pragma('cache_size = -8000'); + +DB.exec(` + CREATE TABLE IF NOT EXISTS bots ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + received_at INTEGER NOT NULL DEFAULT (unixepoch()), + site_id TEXT NOT NULL DEFAULT '', + ip_masked TEXT NOT NULL DEFAULT '', + bot_type TEXT NOT NULL DEFAULT '', + action TEXT NOT NULL DEFAULT 'blocked', + reason TEXT NOT NULL DEFAULT '', + ua_family TEXT NOT NULL DEFAULT '', + request_uri TEXT NOT NULL DEFAULT '', + country TEXT NOT NULL DEFAULT '', + asn TEXT NOT NULL DEFAULT '' + ); + CREATE TABLE IF NOT EXISTS sites ( + site_id TEXT PRIMARY KEY, + first_seen INTEGER NOT NULL DEFAULT (unixepoch()), + last_seen INTEGER NOT NULL DEFAULT (unixepoch()), + block_count INTEGER NOT NULL DEFAULT 0 + ); + CREATE INDEX IF NOT EXISTS idx_recv ON bots(received_at DESC); + CREATE INDEX IF NOT EXISTS idx_ip ON bots(ip_masked); + CREATE INDEX IF NOT EXISTS idx_site ON bots(site_id); + CREATE INDEX IF NOT EXISTS idx_bot_type ON bots(bot_type); + CREATE INDEX IF NOT EXISTS idx_action ON bots(action); +`); + +// Migrations – silently ignored if columns already exist +['country', 'asn', 'request_uri'].forEach(col => { + try { DB.exec(`ALTER TABLE bots ADD COLUMN ${col} TEXT NOT NULL DEFAULT ''`); } catch {} +}); + +// ── Auth ────────────────────────────────────────────────────────────────────── + +const API_TOKEN = (process.env.API_TOKEN || '').trim(); + +function requireToken(req, res, next) { + if (!API_TOKEN) return next(); + const token = (req.headers['authorization'] || '').replace(/^Bearer\s+/, ''); + const a = Buffer.alloc(128); Buffer.from(token, 'utf8').copy(a, 0, 0, 128); + const b = Buffer.alloc(128); Buffer.from(API_TOKEN, 'utf8').copy(b, 0, 0, 128); + if (!timingSafeEqual(a, b) || token !== API_TOKEN) { + return res.status(403).json({ error: 'Forbidden' }); + } + next(); +} + +// ── UA families ─────────────────────────────────────────────────────────────── + +const UA_MAP = [ + [/curl\//i, 'curl'], + [/python-requests|python\//i, 'Python'], + [/go-http-client/i, 'Go'], + [/wget\//i, 'Wget'], + [/java\//i, 'Java'], + [/scrapy/i, 'Scrapy'], + [/axios/i, 'Axios'], + [/headlesschrome|phantomjs/i, 'Headless Browser'], + [/(bot|crawler|spider|slurp)/i, 'Bot/Crawler'], + [/GPTBot|ChatGPT/i, 'OpenAI Bot'], + [/Googlebot/i, 'Googlebot'], + [/bingbot/i, 'Bingbot'], + [/YandexBot/i, 'YandexBot'], + [/Baiduspider/i, 'Baiduspider'], + [/DuckDuckBot/i, 'DuckDuckBot'], + [/AhrefsBot/i, 'AhrefsBot'], + [/SemrushBot/i, 'SemrushBot'], + [/chrome/i, 'Chrome'], + [/firefox/i, 'Firefox'], + [/safari/i, 'Safari'], +]; + +function parseUA(ua = '') { + for (const [re, label] of UA_MAP) if (re.test(ua)) return label; + return ua.length ? 'Other' : 'No UA'; +} + +// ── IP geo-enrichment ───────────────────────────────────────────────────────── + +const stmtEnrich = DB.prepare('UPDATE bots SET country=?, asn=? WHERE id=?'); +const enrichCache = new Map(); + +function isPrivateIP(ip) { + return /^(10\.|192\.168\.|172\.(1[6-9]|2\d|3[01])\.|127\.|::1$|fc|fd)/.test(ip); +} + +function enrichIP(rowId, ip) { + if (!ip || ip === '?' || isPrivateIP(ip)) return; + const now = Date.now(); + if ((enrichCache.get(ip) || 0) > now) return; + enrichCache.set(ip, now + 3_600_000); + + http.get( + `http://ip-api.com/json/${encodeURIComponent(ip)}?fields=status,countryCode,as`, + { timeout: 5000 }, + res => { + let data = ''; + res.on('data', d => data += d); + res.on('end', () => { + try { + const j = JSON.parse(data); + if (j.status === 'success') { + stmtEnrich.run( + (j.countryCode || '').slice(0, 2), + (j.as || '').slice(0, 50), + rowId + ); + } + } catch {} + }); + } + ).on('error', () => enrichCache.delete(ip)); +} + +// Background enrichment of unenriched rows +const stmtUnenriched = DB.prepare( + "SELECT id, ip_masked FROM bots WHERE country='' AND ip_masked != '' AND ip_masked != '?' LIMIT 5" +); +setInterval(() => { + for (const row of stmtUnenriched.all()) enrichIP(row.id, row.ip_masked); +}, 20_000); + +// ── Rate limiter ────────────────────────────────────────────────────────────── + +const rl = new Map(); +setInterval(() => { const n = Date.now(); for (const [k, v] of rl) if (n > v.r) rl.delete(k); }, 30_000); + +function allowed(ip, max = 30, win = 60_000) { + const n = Date.now(); + let e = rl.get(ip); + if (!e || n > e.r) { e = { c: 0, r: n + win }; rl.set(ip, e); } + return ++e.c <= max; +} + +// ── Stats cache (30s TTL) ───────────────────────────────────────────────────── + +let _cache = null, _cacheTs = 0; + +function getStats() { + if (_cache && Date.now() - _cacheTs < 30_000) return _cache; + const now = Math.floor(Date.now() / 1000); + + _cache = { + total: DB.prepare('SELECT COUNT(*) n FROM bots').get().n, + today: DB.prepare('SELECT COUNT(*) n FROM bots WHERE received_at > ?').get(now - 86400).n, + last_7d: DB.prepare('SELECT COUNT(*) n FROM bots WHERE received_at > ?').get(now - 604800).n, + last_30d: DB.prepare('SELECT COUNT(*) n FROM bots WHERE received_at > ?').get(now - 2592000).n, + rate_limited: DB.prepare("SELECT COUNT(*) n FROM bots WHERE action='rate_limited' AND received_at > ?").get(now - 2592000).n, + total_sites: DB.prepare('SELECT COUNT(*) n FROM sites').get().n, + top_ips: DB.prepare(` + SELECT ip_masked ip, country, asn, COUNT(*) hits + FROM bots WHERE received_at > ? + GROUP BY ip_masked ORDER BY hits DESC LIMIT 10 + `).all(now - 2592000), + top_bot_types: DB.prepare(` + SELECT bot_type, COUNT(*) hits + FROM bots WHERE received_at > ? + GROUP BY bot_type ORDER BY hits DESC LIMIT 8 + `).all(now - 2592000), + top_actions: DB.prepare(` + SELECT action, COUNT(*) hits + FROM bots WHERE received_at > ? + GROUP BY action ORDER BY hits DESC LIMIT 8 + `).all(now - 2592000), + top_reasons: DB.prepare(` + SELECT reason, COUNT(*) hits + FROM bots WHERE received_at > ? + GROUP BY reason ORDER BY hits DESC LIMIT 8 + `).all(now - 2592000), + top_ua: DB.prepare(` + SELECT ua_family, COUNT(*) hits + FROM bots WHERE received_at > ? + GROUP BY ua_family ORDER BY hits DESC LIMIT 8 + `).all(now - 2592000), + recent: DB.prepare(` + SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family + FROM bots ORDER BY id DESC LIMIT 40 + `).all(), + hourly: DB.prepare(` + SELECT (received_at / 3600) * 3600 h, COUNT(*) n + FROM bots WHERE received_at > ? + GROUP BY h ORDER BY h ASC + `).all(now - 86400), + }; + _cacheTs = Date.now(); + return _cache; +} + +// ── SSE live stream ─────────────────────────────────────────────────────────── + +const sseClients = new Set(); +let lastId = DB.prepare('SELECT MAX(id) id FROM bots').get().id || 0; + +setInterval(() => { + if (!sseClients.size) return; + const rows = DB.prepare('SELECT * FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId); + if (!rows.length) return; + lastId = rows.at(-1).id; + const msg = `data: ${JSON.stringify(rows)}\n\n`; + for (const r of sseClients) { try { r.write(msg); } catch { sseClients.delete(r); } } +}, 2000); + +// ── Prepared statements ─────────────────────────────────────────────────────── + +const stmtIns = DB.prepare(` + INSERT INTO bots (received_at, site_id, ip_masked, bot_type, action, reason, ua_family, request_uri, country, asn) + VALUES (?,?,?,?,?,?,?,?,?,?) +`); +const stmtSite = DB.prepare(` + INSERT INTO sites (site_id, first_seen, last_seen, block_count) VALUES (?,?,?,?) + ON CONFLICT(site_id) DO UPDATE SET + last_seen = excluded.last_seen, + block_count = block_count + excluded.block_count +`); + +const insertBatch = DB.transaction((siteId, bots) => { + const now = Math.floor(Date.now() / 1000); + const ids = []; + for (const b of bots) { + const ts = b.logged_at ? Math.floor(new Date(b.logged_at) / 1000) : now; + const ip = String(b.ip || '').trim().slice(0, 45) || '?'; + const r = stmtIns.run( + ts, siteId, ip, + String(b.bot_type || '').slice(0, 100), + String(b.action || 'blocked').slice(0, 20), + String(b.reason || '').slice(0, 255), + parseUA(b.user_agent || ''), + String(b.request_uri || '').slice(0, 500), + '', '' // country/asn filled async + ); + ids.push({ id: Number(r.lastInsertRowid), ip }); + } + stmtSite.run(siteId, now, now, bots.length); + return ids; +}); + +// ── Routes ──────────────────────────────────────────────────────────────────── + +app.use(express.json({ limit: '128kb' })); +app.use(express.static(path.join(__dirname, 'public'))); + +app.post('/api/v1/submit', requireToken, (req, res) => { + const clientIP = (req.headers['x-forwarded-for'] || '').split(',')[0].trim() + || req.socket.remoteAddress || ''; + + if (!allowed(clientIP)) return res.status(429).json({ error: 'Rate limit exceeded' }); + + const { site_hash, bots } = req.body || {}; + + if (!site_hash || typeof site_hash !== 'string' || site_hash.length < 8) { + return res.status(400).json({ error: 'Invalid site_hash' }); + } + if (!Array.isArray(bots) || !bots.length || bots.length > 50) { + return res.status(400).json({ error: 'bots must be array of 1–50 items' }); + } + + try { + const ids = insertBatch(site_hash.slice(0, 20), bots); + _cache = null; + setImmediate(() => ids.forEach(({ id, ip }) => enrichIP(id, ip))); + res.json({ ok: true, received: bots.length }); + } catch (e) { + console.error('[submit]', e.message); + res.status(500).json({ error: 'Internal error' }); + } +}); + +app.get('/api/v1/stats', (_, res) => res.json(getStats())); + +app.get('/api/v1/stream', (req, res) => { + res.writeHead(200, { + 'Content-Type': 'text/event-stream', + 'Cache-Control': 'no-cache', + 'Connection': 'keep-alive', + 'X-Accel-Buffering': 'no', + }); + res.write(':\n\n'); + sseClients.add(res); + req.on('close', () => sseClients.delete(res)); +}); + +app.get('/api/v1/health', (_, res) => + res.json({ ok: true, uptime: process.uptime(), sse_clients: sseClients.size }) +); + +app.listen(PORT, '0.0.0.0', () => { + console.log(`[bot-api] listening on :${PORT}`); + console.log(`[bot-api] db: ${process.env.DB_PATH || '/data/bots.db'}`); +});