From a4464214af197e0ed11fc08297a593540f23bbc1 Mon Sep 17 00:00:00 2001 From: Malin Date: Fri, 10 Apr 2026 08:37:52 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20self-observe=20=E2=80=94=20record=20bot?= =?UTF-8?q?s=20that=20visit=20the=20API=20directly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add selfObserve middleware that detects bot/scanner User-Agents (or requests with no UA) hitting any endpoint except /health and /submit, and logs them to the bots table as site_id='self', action='observed'. Dashboard shows these with a cyan [LOCAL] badge and colours 'observed' action in cyan to distinguish them from WordPress-reported blocks. Geo-enrichment runs async on self-observed entries too. Co-Authored-By: Claude Sonnet 4.6 --- public/index.html | 6 ++++++ server.js | 36 ++++++++++++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 2 deletions(-) diff --git a/public/index.html b/public/index.html index e0985aa..bacc69d 100644 --- a/public/index.html +++ b/public/index.html @@ -251,6 +251,10 @@ main { .feed-action { font-weight: bold; } .feed-action.blocked { color: var(--red); } .feed-action.rate_limited { color: var(--amber); } +.feed-action.observed { color: var(--cyan2); } +.feed-local { font-size: 9px; font-weight: 700; letter-spacing: .5px; + color: var(--bg); background: var(--cyan2); border-radius: 3px; + padding: 1px 5px; margin-left: 4px; vertical-align: middle; } .feed-reason { color: var(--dim); font-size: 10px; } .feed-geo { color: var(--dim); font-size: 10px; } @@ -661,6 +665,7 @@ function addRow(row) { el.className = 'feed-row'; const f = flag(row.country||''); const action = row.action||'blocked'; + const isLocal = row.site_id === 'self'; el.innerHTML = ` ${fmtTime(row.received_at)} ${esc(row.ip_masked||row.ip||'?')} @@ -668,6 +673,7 @@ function addRow(row) { ${f?`${f} ${esc(row.country||'')}
`:''} ${esc(row.bot_type||'?')} [${esc(action)}] + ${isLocal?'LOCAL':''}
${esc(row.reason||row.ua_family||'')} `; feedEl.prepend(el); diff --git a/server.js b/server.js index f30c649..6c7f984 100644 --- a/server.js +++ b/server.js @@ -191,7 +191,7 @@ function getStats() { GROUP BY ua_family ORDER BY hits DESC LIMIT 8 `).all(now - 2592000), recent: DB.prepare(` - SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family + SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family, site_id FROM bots ORDER BY id DESC LIMIT 40 `).all(), hourly: DB.prepare(` @@ -211,7 +211,7 @@ let lastId = DB.prepare('SELECT MAX(id) id FROM bots').get().id || 0; setInterval(() => { if (!sseClients.size) return; - const rows = DB.prepare('SELECT * FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId); + const rows = DB.prepare('SELECT id, received_at, ip_masked, country, bot_type, action, reason, ua_family, site_id FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId); if (!rows.length) return; lastId = rows.at(-1).id; const msg = `data: ${JSON.stringify(rows)}\n\n`; @@ -252,9 +252,41 @@ const insertBatch = DB.transaction((siteId, bots) => { return ids; }); +// ── Self-observation (log bots that visit the API directly) ─────────────────── +// +// Matches any request whose UA looks like a bot/scanner/tool, or has no UA. +// Skips /health (Docker probe) and /submit (WP plugin). +// Logged as site_id='self', action='observed' so they're visually distinct. + +const BOT_UA_RE = /(bot|crawl|spider|scrap|scan|slurp|fetch|wget|curl|python|go-http|java\/|scrapy|axios|headless|phantom|gptbot|chatgpt|openai|yandex|baidu|semrush|ahrefs|mj12|dotbot|petalbot)/i; +const SKIP_SELF = new Set(['/api/v1/health', '/api/v1/submit']); + +function selfObserve(req, res, next) { + if (SKIP_SELF.has(req.path)) return next(); + + const ua = req.headers['user-agent'] || ''; + if (ua && !BOT_UA_RE.test(ua)) return next(); // normal browser — skip + + const ip = (req.headers['x-forwarded-for'] || '').split(',')[0].trim() + || req.socket.remoteAddress || '?'; + const now = Math.floor(Date.now() / 1000); + const fam = parseUA(ua); + + try { + const r = stmtIns.run( + now, 'self', ip, fam, 'observed', 'Direct API visitor', fam, req.path, '', '' + ); + _cache = null; + setImmediate(() => enrichIP(Number(r.lastInsertRowid), ip)); + } catch {} + + next(); +} + // ── Routes ──────────────────────────────────────────────────────────────────── app.use(express.json({ limit: '128kb' })); +app.use(selfObserve); app.use(express.static(path.join(__dirname, 'public'))); app.post('/api/v1/submit', requireToken, (req, res) => {