feat: add SQLite persistent storage for request logging

- Add SQLAlchemy-based database layer for persistent storage
  - Create models for access_logs, credential_attempts, attack_detections, ip_stats
  - Include fields for future GeoIP and reputation enrichment
  - Implement sanitization utilities to protect against malicious payloads
  - Fix XSS vulnerability in dashboard template (HTML escape all user data)
  - Add DATABASE_PATH and DATABASE_RETENTION_DAYS config options
  - Dual storage: in-memory for dashboard performance + SQLite for persistence

  New files:
  - src/models.py - SQLAlchemy ORM models
  - src/database.py - DatabaseManager singleton
  - src/sanitizer.py - Input sanitization and HTML escaping
  - requirements.txt - SQLAlchemy dependency

  Security protections:
  - Parameterized queries via SQLAlchemy ORM
  - Field length limits to prevent storage exhaustion
  - Null byte and control character stripping
  - HTML escaping on dashboard output
This commit is contained in:
Phillip Tarrant
2025-12-28 10:43:32 -06:00
parent 499760c939
commit f1c142c53d
11 changed files with 860 additions and 32 deletions

View File

@@ -5,49 +5,58 @@ Dashboard template for viewing honeypot statistics.
Customize this template to change the dashboard appearance.
"""
import html
def _escape(value) -> str:
"""Escape HTML special characters to prevent XSS attacks."""
if value is None:
return ""
return html.escape(str(value))
def generate_dashboard(stats: dict) -> str:
"""Generate dashboard HTML with access statistics"""
# Generate IP rows
# Generate IP rows (IPs are generally safe but escape for consistency)
top_ips_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td>{ip}</td><td>{count}</td></tr>'
f'<tr><td class="rank">{i+1}</td><td>{_escape(ip)}</td><td>{count}</td></tr>'
for i, (ip, count) in enumerate(stats['top_ips'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
# Generate paths rows
# Generate paths rows (CRITICAL: paths can contain XSS payloads)
top_paths_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td>{path}</td><td>{count}</td></tr>'
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
for i, (path, count) in enumerate(stats['top_paths'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
# Generate User-Agent rows
# Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
top_ua_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{ua[:80]}</td><td>{count}</td></tr>'
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
for i, (ua, count) in enumerate(stats['top_user_agents'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
# Generate suspicious accesses rows
# Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields)
suspicious_rows = '\n'.join([
f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
for log in stats['recent_suspicious'][-10:]
]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
# Generate honeypot triggered IPs rows
honeypot_rows = '\n'.join([
f'<tr><td>{ip}</td><td style="word-break: break-all;">{", ".join(paths)}</td><td>{len(paths)}</td></tr>'
f'<tr><td>{_escape(ip)}</td><td style="word-break: break-all;">{_escape(", ".join(paths))}</td><td>{len(paths)}</td></tr>'
for ip, paths in stats.get('honeypot_triggered_ips', [])
]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
# Generate attack types rows
# Generate attack types rows (CRITICAL: paths and user agents are user-controlled)
attack_type_rows = '\n'.join([
f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td>{_escape(", ".join(log["attack_types"]))}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
for log in stats.get('attack_types', [])[-10:]
]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
# Generate credential attempts rows
# Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled)
credential_rows = '\n'.join([
f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["username"])}</td><td>{_escape(log["password"])}</td><td>{_escape(log["path"])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
for log in stats.get('credential_attempts', [])[-20:]
]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'