feat: self-observe — record bots that visit the API directly
Add selfObserve middleware that detects bot/scanner User-Agents (or requests with no UA) hitting any endpoint except /health and /submit, and logs them to the bots table as site_id='self', action='observed'. Dashboard shows these with a cyan [LOCAL] badge and colours 'observed' action in cyan to distinguish them from WordPress-reported blocks. Geo-enrichment runs async on self-observed entries too. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
36
server.js
36
server.js
@@ -191,7 +191,7 @@ function getStats() {
|
||||
GROUP BY ua_family ORDER BY hits DESC LIMIT 8
|
||||
`).all(now - 2592000),
|
||||
recent: DB.prepare(`
|
||||
SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family
|
||||
SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family, site_id
|
||||
FROM bots ORDER BY id DESC LIMIT 40
|
||||
`).all(),
|
||||
hourly: DB.prepare(`
|
||||
@@ -211,7 +211,7 @@ let lastId = DB.prepare('SELECT MAX(id) id FROM bots').get().id || 0;
|
||||
|
||||
setInterval(() => {
|
||||
if (!sseClients.size) return;
|
||||
const rows = DB.prepare('SELECT * FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId);
|
||||
const rows = DB.prepare('SELECT id, received_at, ip_masked, country, bot_type, action, reason, ua_family, site_id FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId);
|
||||
if (!rows.length) return;
|
||||
lastId = rows.at(-1).id;
|
||||
const msg = `data: ${JSON.stringify(rows)}\n\n`;
|
||||
@@ -252,9 +252,41 @@ const insertBatch = DB.transaction((siteId, bots) => {
|
||||
return ids;
|
||||
});
|
||||
|
||||
// ── Self-observation (log bots that visit the API directly) ───────────────────
|
||||
//
|
||||
// Matches any request whose UA looks like a bot/scanner/tool, or has no UA.
|
||||
// Skips /health (Docker probe) and /submit (WP plugin).
|
||||
// Logged as site_id='self', action='observed' so they're visually distinct.
|
||||
|
||||
const BOT_UA_RE = /(bot|crawl|spider|scrap|scan|slurp|fetch|wget|curl|python|go-http|java\/|scrapy|axios|headless|phantom|gptbot|chatgpt|openai|yandex|baidu|semrush|ahrefs|mj12|dotbot|petalbot)/i;
|
||||
const SKIP_SELF = new Set(['/api/v1/health', '/api/v1/submit']);
|
||||
|
||||
function selfObserve(req, res, next) {
|
||||
if (SKIP_SELF.has(req.path)) return next();
|
||||
|
||||
const ua = req.headers['user-agent'] || '';
|
||||
if (ua && !BOT_UA_RE.test(ua)) return next(); // normal browser — skip
|
||||
|
||||
const ip = (req.headers['x-forwarded-for'] || '').split(',')[0].trim()
|
||||
|| req.socket.remoteAddress || '?';
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
const fam = parseUA(ua);
|
||||
|
||||
try {
|
||||
const r = stmtIns.run(
|
||||
now, 'self', ip, fam, 'observed', 'Direct API visitor', fam, req.path, '', ''
|
||||
);
|
||||
_cache = null;
|
||||
setImmediate(() => enrichIP(Number(r.lastInsertRowid), ip));
|
||||
} catch {}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ── Routes ────────────────────────────────────────────────────────────────────
|
||||
|
||||
app.use(express.json({ limit: '128kb' }));
|
||||
app.use(selfObserve);
|
||||
app.use(express.static(path.join(__dirname, 'public')));
|
||||
|
||||
app.post('/api/v1/submit', requireToken, (req, res) => {
|
||||
|
||||
Reference in New Issue
Block a user