feat: add SQLite persistent storage for request logging
- Add SQLAlchemy-based database layer for persistent storage - Create models for access_logs, credential_attempts, attack_detections, ip_stats - Include fields for future GeoIP and reputation enrichment - Implement sanitization utilities to protect against malicious payloads - Fix XSS vulnerability in dashboard template (HTML escape all user data) - Add DATABASE_PATH and DATABASE_RETENTION_DAYS config options - Dual storage: in-memory for dashboard performance + SQLite for persistence New files: - src/models.py - SQLAlchemy ORM models - src/database.py - DatabaseManager singleton - src/sanitizer.py - Input sanitization and HTML escaping - requirements.txt - SQLAlchemy dependency Security protections: - Parameterized queries via SQLAlchemy ORM - Field length limits to prevent storage exhaustion - Null byte and control character stripping - HTML escaping on dashboard output
This commit is contained in:
@@ -5,49 +5,58 @@ Dashboard template for viewing honeypot statistics.
|
||||
Customize this template to change the dashboard appearance.
|
||||
"""
|
||||
|
||||
import html
|
||||
|
||||
|
||||
def _escape(value) -> str:
|
||||
"""Escape HTML special characters to prevent XSS attacks."""
|
||||
if value is None:
|
||||
return ""
|
||||
return html.escape(str(value))
|
||||
|
||||
|
||||
def generate_dashboard(stats: dict) -> str:
|
||||
"""Generate dashboard HTML with access statistics"""
|
||||
|
||||
# Generate IP rows
|
||||
# Generate IP rows (IPs are generally safe but escape for consistency)
|
||||
top_ips_rows = '\n'.join([
|
||||
f'<tr><td class="rank">{i+1}</td><td>{ip}</td><td>{count}</td></tr>'
|
||||
f'<tr><td class="rank">{i+1}</td><td>{_escape(ip)}</td><td>{count}</td></tr>'
|
||||
for i, (ip, count) in enumerate(stats['top_ips'])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
|
||||
|
||||
# Generate paths rows
|
||||
# Generate paths rows (CRITICAL: paths can contain XSS payloads)
|
||||
top_paths_rows = '\n'.join([
|
||||
f'<tr><td class="rank">{i+1}</td><td>{path}</td><td>{count}</td></tr>'
|
||||
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
|
||||
for i, (path, count) in enumerate(stats['top_paths'])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
|
||||
|
||||
# Generate User-Agent rows
|
||||
# Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
|
||||
top_ua_rows = '\n'.join([
|
||||
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{ua[:80]}</td><td>{count}</td></tr>'
|
||||
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
|
||||
for i, (ua, count) in enumerate(stats['top_user_agents'])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
|
||||
|
||||
# Generate suspicious accesses rows
|
||||
# Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields)
|
||||
suspicious_rows = '\n'.join([
|
||||
f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
|
||||
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
|
||||
for log in stats['recent_suspicious'][-10:]
|
||||
]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
|
||||
|
||||
# Generate honeypot triggered IPs rows
|
||||
honeypot_rows = '\n'.join([
|
||||
f'<tr><td>{ip}</td><td style="word-break: break-all;">{", ".join(paths)}</td><td>{len(paths)}</td></tr>'
|
||||
f'<tr><td>{_escape(ip)}</td><td style="word-break: break-all;">{_escape(", ".join(paths))}</td><td>{len(paths)}</td></tr>'
|
||||
for ip, paths in stats.get('honeypot_triggered_ips', [])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
|
||||
|
||||
# Generate attack types rows
|
||||
# Generate attack types rows (CRITICAL: paths and user agents are user-controlled)
|
||||
attack_type_rows = '\n'.join([
|
||||
f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
|
||||
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td>{_escape(", ".join(log["attack_types"]))}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
|
||||
for log in stats.get('attack_types', [])[-10:]
|
||||
]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
|
||||
|
||||
# Generate credential attempts rows
|
||||
# Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled)
|
||||
credential_rows = '\n'.join([
|
||||
f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
|
||||
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["username"])}</td><td>{_escape(log["password"])}</td><td>{_escape(log["path"])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
|
||||
for log in stats.get('credential_attempts', [])[-20:]
|
||||
]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
|
||||
|
||||
|
||||
Reference in New Issue
Block a user