feat: self-observe — record bots that visit the API directly
Add selfObserve middleware that detects bot/scanner User-Agents (or requests with no UA) hitting any endpoint except /health and /submit, and logs them to the bots table as site_id='self', action='observed'. Dashboard shows these with a cyan [LOCAL] badge and colours 'observed' action in cyan to distinguish them from WordPress-reported blocks. Geo-enrichment runs async on self-observed entries too. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -251,6 +251,10 @@ main {
|
||||
.feed-action { font-weight: bold; }
|
||||
.feed-action.blocked { color: var(--red); }
|
||||
.feed-action.rate_limited { color: var(--amber); }
|
||||
.feed-action.observed { color: var(--cyan2); }
|
||||
.feed-local { font-size: 9px; font-weight: 700; letter-spacing: .5px;
|
||||
color: var(--bg); background: var(--cyan2); border-radius: 3px;
|
||||
padding: 1px 5px; margin-left: 4px; vertical-align: middle; }
|
||||
.feed-reason { color: var(--dim); font-size: 10px; }
|
||||
.feed-geo { color: var(--dim); font-size: 10px; }
|
||||
|
||||
@@ -661,6 +665,7 @@ function addRow(row) {
|
||||
el.className = 'feed-row';
|
||||
const f = flag(row.country||'');
|
||||
const action = row.action||'blocked';
|
||||
const isLocal = row.site_id === 'self';
|
||||
el.innerHTML = `
|
||||
<span class="feed-ts">${fmtTime(row.received_at)}</span>
|
||||
<span class="feed-ip">${esc(row.ip_masked||row.ip||'?')}</span>
|
||||
@@ -668,6 +673,7 @@ function addRow(row) {
|
||||
${f?`<span class="feed-geo">${f} ${esc(row.country||'')}</span><br>`:''}
|
||||
<span class="feed-bot">${esc(row.bot_type||'?')}</span>
|
||||
<span class="feed-action ${action}"> [${esc(action)}]</span>
|
||||
${isLocal?'<span class="feed-local">LOCAL</span>':''}
|
||||
<br><span class="feed-reason">${esc(row.reason||row.ua_family||'')}</span>
|
||||
</span>`;
|
||||
feedEl.prepend(el);
|
||||
|
||||
36
server.js
36
server.js
@@ -191,7 +191,7 @@ function getStats() {
|
||||
GROUP BY ua_family ORDER BY hits DESC LIMIT 8
|
||||
`).all(now - 2592000),
|
||||
recent: DB.prepare(`
|
||||
SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family
|
||||
SELECT received_at, ip_masked ip, country, bot_type, action, reason, ua_family, site_id
|
||||
FROM bots ORDER BY id DESC LIMIT 40
|
||||
`).all(),
|
||||
hourly: DB.prepare(`
|
||||
@@ -211,7 +211,7 @@ let lastId = DB.prepare('SELECT MAX(id) id FROM bots').get().id || 0;
|
||||
|
||||
setInterval(() => {
|
||||
if (!sseClients.size) return;
|
||||
const rows = DB.prepare('SELECT * FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId);
|
||||
const rows = DB.prepare('SELECT id, received_at, ip_masked, country, bot_type, action, reason, ua_family, site_id FROM bots WHERE id > ? ORDER BY id ASC LIMIT 20').all(lastId);
|
||||
if (!rows.length) return;
|
||||
lastId = rows.at(-1).id;
|
||||
const msg = `data: ${JSON.stringify(rows)}\n\n`;
|
||||
@@ -252,9 +252,41 @@ const insertBatch = DB.transaction((siteId, bots) => {
|
||||
return ids;
|
||||
});
|
||||
|
||||
// ── Self-observation (log bots that visit the API directly) ───────────────────
|
||||
//
|
||||
// Matches any request whose UA looks like a bot/scanner/tool, or has no UA.
|
||||
// Skips /health (Docker probe) and /submit (WP plugin).
|
||||
// Logged as site_id='self', action='observed' so they're visually distinct.
|
||||
|
||||
const BOT_UA_RE = /(bot|crawl|spider|scrap|scan|slurp|fetch|wget|curl|python|go-http|java\/|scrapy|axios|headless|phantom|gptbot|chatgpt|openai|yandex|baidu|semrush|ahrefs|mj12|dotbot|petalbot)/i;
|
||||
const SKIP_SELF = new Set(['/api/v1/health', '/api/v1/submit']);
|
||||
|
||||
function selfObserve(req, res, next) {
|
||||
if (SKIP_SELF.has(req.path)) return next();
|
||||
|
||||
const ua = req.headers['user-agent'] || '';
|
||||
if (ua && !BOT_UA_RE.test(ua)) return next(); // normal browser — skip
|
||||
|
||||
const ip = (req.headers['x-forwarded-for'] || '').split(',')[0].trim()
|
||||
|| req.socket.remoteAddress || '?';
|
||||
const now = Math.floor(Date.now() / 1000);
|
||||
const fam = parseUA(ua);
|
||||
|
||||
try {
|
||||
const r = stmtIns.run(
|
||||
now, 'self', ip, fam, 'observed', 'Direct API visitor', fam, req.path, '', ''
|
||||
);
|
||||
_cache = null;
|
||||
setImmediate(() => enrichIP(Number(r.lastInsertRowid), ip));
|
||||
} catch {}
|
||||
|
||||
next();
|
||||
}
|
||||
|
||||
// ── Routes ────────────────────────────────────────────────────────────────────
|
||||
|
||||
app.use(express.json({ limit: '128kb' }));
|
||||
app.use(selfObserve);
|
||||
app.use(express.static(path.join(__dirname, 'public')));
|
||||
|
||||
app.post('/api/v1/submit', requireToken, (req, res) => {
|
||||
|
||||
Reference in New Issue
Block a user