Files
bot-api/server.js
Malin 932020d032 feat: initial Bot Intelligence API Docker stack
Self-hosted Node.js/Express + SQLite (WAL) API server and dashboard
for tracking blocked bots and user agents. Features:
- POST /api/v1/submit — batch ingest from WordPress plugin
- GET /api/v1/stats — aggregated stats with 30s cache
- GET /api/v1/stream — SSE live event feed
- GET /api/v1/health — health check endpoint
- Cyan/blue terminal-style dashboard with live feed, bar charts, 24h activity
- Docker Compose setup on port 3001 with persistent SQLite volume
- Bearer token auth with constant-time comparison

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-09 18:32:02 +02:00

308 lines
12 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict';
const express = require('express');
const Database = require('better-sqlite3');
const path = require('path');
const http = require('http');
const { timingSafeEqual } = require('crypto');
const app = express();
const PORT = Number(process.env.PORT) || 3001;
const DB = new Database(process.env.DB_PATH || '/data/bots.db');
// ── Database ──────────────────────────────────────────────────────────────────
DB.pragma('journal_mode = WAL');
DB.pragma('synchronous = NORMAL');
DB.pragma('cache_size = -8000');
DB.exec(`
CREATE TABLE IF NOT EXISTS bots (
id INTEGER PRIMARY KEY AUTOINCREMENT,
received_at INTEGER NOT NULL DEFAULT (unixepoch()),
site_id TEXT NOT NULL DEFAULT '',
ip_masked TEXT NOT NULL DEFAULT '',
bot_type TEXT NOT NULL DEFAULT '',
action TEXT NOT NULL DEFAULT 'blocked',
reason TEXT NOT NULL DEFAULT '',
ua_family TEXT NOT NULL DEFAULT '',
request_uri TEXT NOT NULL DEFAULT '',
country TEXT NOT NULL DEFAULT '',
asn TEXT NOT NULL DEFAULT ''
);
CREATE TABLE IF NOT EXISTS sites (
site_id TEXT PRIMARY KEY,
first_seen INTEGER NOT NULL DEFAULT (unixepoch()),
last_seen INTEGER NOT NULL DEFAULT (unixepoch()),
block_count INTEGER NOT NULL DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_recv ON bots(received_at DESC);
CREATE INDEX IF NOT EXISTS idx_ip ON bots(ip_masked);
CREATE INDEX IF NOT EXISTS idx_site ON bots(site_id);
CREATE INDEX IF NOT EXISTS idx_bot_type ON bots(bot_type);
CREATE INDEX IF NOT EXISTS idx_action ON bots(action);
`);
// Migrations silently ignored if columns already exist
['country', 'asn', 'request_uri'].forEach(col => {
try { DB.exec(`ALTER TABLE bots ADD COLUMN ${col} TEXT NOT NULL DEFAULT ''`); } catch {}
});
// ── Auth ──────────────────────────────────────────────────────────────────────
const API_TOKEN = (process.env.API_TOKEN || '').trim();
function requireToken(req, res, next) {
if (!API_TOKEN) return next();
const token = (req.headers['authorization'] || '').replace(/^Bearer\s+/, '');
const a = Buffer.alloc(128); Buffer.from(token, 'utf8').copy(a, 0, 0, 128);
const b = Buffer.alloc(128); Buffer.from(API_TOKEN, 'utf8').copy(b, 0, 0, 128);
if (!timingSafeEqual(a, b) || token !== API_TOKEN) {
return res.status(403).json({ error: 'Forbidden' });
}
next();
}
// ── UA families ───────────────────────────────────────────────────────────────
const UA_MAP = [
[/curl\//i, 'curl'],
[/python-requests|python\//i, 'Python'],
[/go-http-client/i, 'Go'],
[/wget\//i, 'Wget'],
[/java\//i, 'Java'],
[/scrapy/i, 'Scrapy'],
[/axios/i, 'Axios'],
[/headlesschrome|phantomjs/i, 'Headless Browser'],
[/(bot|crawler|spider|slurp)/i, 'Bot/Crawler'],
[/GPTBot|ChatGPT/i, 'OpenAI Bot'],
[/Googlebot/i, 'Googlebot'],
[/bingbot/i, 'Bingbot'],
[/YandexBot/i, 'YandexBot'],
[/Baiduspider/i, 'Baiduspider'],
[/DuckDuckBot/i, 'DuckDuckBot'],
[/AhrefsBot/i, 'AhrefsBot'],
[/SemrushBot/i, 'SemrushBot'],
[/chrome/i, 'Chrome'],
[/firefox/i, 'Firefox'],
[/safari/i, 'Safari'],
];
function parseUA(ua = '') {
for (const [re, label] of UA_MAP) if (re.test(ua)) return label;
return ua.length ? 'Other' : 'No UA';
}
// ── IP geo-enrichment ─────────────────────────────────────────────────────────
const stmtEnrich = DB.prepare('UPDATE bots SET country=?, asn=? WHERE id=?');
const enrichCache = new Map();
function isPrivateIP(ip) {
return /^(10\.|192\.168\.|172\.(1[6-9]|2\d|3[01])\.|127\.|::1$|fc|fd)/.test(ip);
}
function enrichIP(rowId, ip) {
if (!ip || ip === '?' || isPrivateIP(ip)) return;
const now = Date.now();
if ((enrichCache.get(ip) || 0) > now) return;
enrichCache.set(ip, now + 3_600_000);
http.get(
`http://ip-api.com/json/${encodeURIComponent(ip)}?fields=status,countryCode,as`,
{ timeout: 5000 },
res => {
let data = '';
res.on('data', d => data += d);
res.on('end', () => {
try {
const j = JSON.parse(data);
if (j.status === 'success') {
stmtEnrich.run(
(j.countryCode || '').slice(0, 2),
(j.as || '').slice(0, 50),
rowId
);
}
} catch {}
});
}
).on('error', () => enrichCache.delete(ip));
}
// Background enrichment of unenriched rows
const stmtUnenriched = DB.prepare(
"SELECT id, ip_masked FROM bots WHERE country='' AND ip_masked != '' AND ip_masked != '?' LIMIT 5"
);
setInterval(() => {
for (const row of stmtUnenriched.all()) enrichIP(row.id, row.ip_masked);
}, 20_000);
// ── Rate limiter ──────────────────────────────────────────────────────────────
const rl = new Map();
setInterval(() => { const n = Date.now(); for (const [k, v] of rl) if (n > v.r) rl.delete(k); }, 30_000);
function allowed(ip, max = 30, win = 60_000) {
const n = Date.now();
let e = rl.get(ip);
if (!e || n > e.r) { e = { c: 0, r: n + win }; rl.set(ip, e); }
return ++e.c <= max;
}
// ── Stats cache (30s TTL) ─────────────────────────────────────────────────────
let _cache = null, _cacheTs = 0;
function getStats() {
if (_cache && Date.now() - _cacheTs < 30_000) return _cache;
const now = Math.floor(Date.now() / 1000);
_cache = {
total: DB.prepare('SELECT COUNT(*) n FROM bots').get().n,
today: DB.prepare('SELECT COUNT(*) n FROM bots WHERE received_at > ?').get(now - 86400).n,
last_7d: DB.prepare('SELECT COUNT(*) n FROM bots WHERE received_at > ?').get(now - 604800).n,
last_30d: DB.prepare('SELECT COUNT(*) n FROM bots WHERE received_at > ?').get(now - 2592000).n,
rate_limited: DB.prepare("SELECT COUNT(*) n FROM bots WHERE action='rate_limited' AND received_at > ?").get(now - 2592000).n,
total_sites: DB.prepare('SELECT COUNT(*) n FROM sites').get().n,
top_ips: DB.prepare(`
SELECT ip_masked ip, country, asn, COUNT(*) hits
FROM bots WHERE received_at > ?
GROUP BY ip_masked ORDER BY hits DESC LIMIT 10
`).all(now - 2592000),
top_bot_types: DB.prepare(`
SELECT bot_type, COUNT(*) hits
FROM bots WHERE received_at > ?
GROUP BY bot_type ORDER BY hits DESC LIMIT 8
`).all(now - 2592000),
top_actions: DB.prepare(`
SELECT action, COUNT(*) hits
FROM bots WHERE received_at > ?
GROUP BY action ORDER BY hits DESC LIMIT 8
`).all(now - 2592000),
top_reasons: DB.prepare(`
SELECT reason, COUNT(*) hits
FROM bots WHERE received_at > ?
GROUP BY reason ORDER BY hits DESC LIMIT 8
`).all(now - 2592000),
top_ua: DB.prepare(`
SELECT ua_family, COUNT(*) hits
FROM bots WHERE received_at > ?
GROUP BY ua_family ORDER BY hits DESC LIMIT 8
`).all(now - 2592000),
recent: DB.prepare(`
SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family
FROM bots ORDER BY id DESC LIMIT 40
`).all(),
hourly: DB.prepare(`
SELECT (received_at / 3600) * 3600 h, COUNT(*) n
FROM bots WHERE received_at > ?
GROUP BY h ORDER BY h ASC
`).all(now - 86400),
};
_cacheTs = Date.now();
return _cache;
}
// ── SSE live stream ───────────────────────────────────────────────────────────
const sseClients = new Set();
let lastId = DB.prepare('SELECT MAX(id) id FROM bots').get().id || 0;
setInterval(() => {
if (!sseClients.size) return;
const rows = DB.prepare('SELECT * FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId);
if (!rows.length) return;
lastId = rows.at(-1).id;
const msg = `data: ${JSON.stringify(rows)}\n\n`;
for (const r of sseClients) { try { r.write(msg); } catch { sseClients.delete(r); } }
}, 2000);
// ── Prepared statements ───────────────────────────────────────────────────────
const stmtIns = DB.prepare(`
INSERT INTO bots (received_at, site_id, ip_masked, bot_type, action, reason, ua_family, request_uri, country, asn)
VALUES (?,?,?,?,?,?,?,?,?,?)
`);
const stmtSite = DB.prepare(`
INSERT INTO sites (site_id, first_seen, last_seen, block_count) VALUES (?,?,?,?)
ON CONFLICT(site_id) DO UPDATE SET
last_seen = excluded.last_seen,
block_count = block_count + excluded.block_count
`);
const insertBatch = DB.transaction((siteId, bots) => {
const now = Math.floor(Date.now() / 1000);
const ids = [];
for (const b of bots) {
const ts = b.logged_at ? Math.floor(new Date(b.logged_at) / 1000) : now;
const ip = String(b.ip || '').trim().slice(0, 45) || '?';
const r = stmtIns.run(
ts, siteId, ip,
String(b.bot_type || '').slice(0, 100),
String(b.action || 'blocked').slice(0, 20),
String(b.reason || '').slice(0, 255),
parseUA(b.user_agent || ''),
String(b.request_uri || '').slice(0, 500),
'', '' // country/asn filled async
);
ids.push({ id: Number(r.lastInsertRowid), ip });
}
stmtSite.run(siteId, now, now, bots.length);
return ids;
});
// ── Routes ────────────────────────────────────────────────────────────────────
app.use(express.json({ limit: '128kb' }));
app.use(express.static(path.join(__dirname, 'public')));
app.post('/api/v1/submit', requireToken, (req, res) => {
const clientIP = (req.headers['x-forwarded-for'] || '').split(',')[0].trim()
|| req.socket.remoteAddress || '';
if (!allowed(clientIP)) return res.status(429).json({ error: 'Rate limit exceeded' });
const { site_hash, bots } = req.body || {};
if (!site_hash || typeof site_hash !== 'string' || site_hash.length < 8) {
return res.status(400).json({ error: 'Invalid site_hash' });
}
if (!Array.isArray(bots) || !bots.length || bots.length > 50) {
return res.status(400).json({ error: 'bots must be array of 150 items' });
}
try {
const ids = insertBatch(site_hash.slice(0, 20), bots);
_cache = null;
setImmediate(() => ids.forEach(({ id, ip }) => enrichIP(id, ip)));
res.json({ ok: true, received: bots.length });
} catch (e) {
console.error('[submit]', e.message);
res.status(500).json({ error: 'Internal error' });
}
});
app.get('/api/v1/stats', (_, res) => res.json(getStats()));
app.get('/api/v1/stream', (req, res) => {
res.writeHead(200, {
'Content-Type': 'text/event-stream',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'X-Accel-Buffering': 'no',
});
res.write(':\n\n');
sseClients.add(res);
req.on('close', () => sseClients.delete(res));
});
app.get('/api/v1/health', (_, res) =>
res.json({ ok: true, uptime: process.uptime(), sse_clients: sseClients.size })
);
app.listen(PORT, '0.0.0.0', () => {
console.log(`[bot-api] listening on :${PORT}`);
console.log(`[bot-api] db: ${process.env.DB_PATH || '/data/bots.db'}`);
});