feat: add SQLite persistent storage for request logging
- Add SQLAlchemy-based database layer for persistent storage - Create models for access_logs, credential_attempts, attack_detections, ip_stats - Include fields for future GeoIP and reputation enrichment - Implement sanitization utilities to protect against malicious payloads - Fix XSS vulnerability in dashboard template (HTML escape all user data) - Add DATABASE_PATH and DATABASE_RETENTION_DAYS config options - Dual storage: in-memory for dashboard performance + SQLite for persistence New files: - src/models.py - SQLAlchemy ORM models - src/database.py - DatabaseManager singleton - src/sanitizer.py - Input sanitization and HTML escaping - requirements.txt - SQLAlchemy dependency Security protections: - Parameterized queries via SQLAlchemy ORM - Field length limits to prevent storage exhaustion - Null byte and control character stripping - HTML escaping on dashboard output
This commit is contained in:
141
src/models.py
Normal file
141
src/models.py
Normal file
@@ -0,0 +1,141 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
SQLAlchemy ORM models for the Krawl honeypot database.
|
||||
Stores access logs, credential attempts, attack detections, and IP statistics.
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
|
||||
from sqlalchemy import String, Integer, Boolean, DateTime, ForeignKey, Index
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
|
||||
|
||||
from sanitizer import (
|
||||
MAX_IP_LENGTH,
|
||||
MAX_PATH_LENGTH,
|
||||
MAX_USER_AGENT_LENGTH,
|
||||
MAX_CREDENTIAL_LENGTH,
|
||||
MAX_ATTACK_PATTERN_LENGTH,
|
||||
MAX_CITY_LENGTH,
|
||||
MAX_ASN_ORG_LENGTH,
|
||||
MAX_REPUTATION_SOURCE_LENGTH,
|
||||
)
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
"""Base class for all ORM models."""
|
||||
pass
|
||||
|
||||
|
||||
class AccessLog(Base):
|
||||
"""
|
||||
Records all HTTP requests to the honeypot.
|
||||
|
||||
Stores request metadata, suspicious activity flags, and timestamps
|
||||
for analysis and dashboard display.
|
||||
"""
|
||||
__tablename__ = 'access_logs'
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
|
||||
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
|
||||
user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True)
|
||||
method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET')
|
||||
is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
|
||||
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
|
||||
|
||||
# Relationship to attack detections
|
||||
attack_detections: Mapped[List["AttackDetection"]] = relationship(
|
||||
"AttackDetection",
|
||||
back_populates="access_log",
|
||||
cascade="all, delete-orphan"
|
||||
)
|
||||
|
||||
# Composite index for common queries
|
||||
__table_args__ = (
|
||||
Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<AccessLog(id={self.id}, ip='{self.ip}', path='{self.path[:50]}')>"
|
||||
|
||||
|
||||
class CredentialAttempt(Base):
|
||||
"""
|
||||
Records captured login attempts from honeypot login forms.
|
||||
|
||||
Stores the submitted username and password along with request metadata.
|
||||
"""
|
||||
__tablename__ = 'credential_attempts'
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
|
||||
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
|
||||
username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
|
||||
password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
|
||||
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
|
||||
|
||||
# Composite index for common queries
|
||||
__table_args__ = (
|
||||
Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<CredentialAttempt(id={self.id}, ip='{self.ip}', username='{self.username}')>"
|
||||
|
||||
|
||||
class AttackDetection(Base):
|
||||
"""
|
||||
Records detected attack patterns in requests.
|
||||
|
||||
Linked to the parent AccessLog record. Multiple attack types can be
|
||||
detected in a single request.
|
||||
"""
|
||||
__tablename__ = 'attack_detections'
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
access_log_id: Mapped[int] = mapped_column(
|
||||
Integer,
|
||||
ForeignKey('access_logs.id', ondelete='CASCADE'),
|
||||
nullable=False,
|
||||
index=True
|
||||
)
|
||||
attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True)
|
||||
|
||||
# Relationship back to access log
|
||||
access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections")
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"
|
||||
|
||||
|
||||
class IpStats(Base):
|
||||
"""
|
||||
Aggregated statistics per IP address.
|
||||
|
||||
Includes fields for future GeoIP and reputation enrichment.
|
||||
Updated on each request from an IP.
|
||||
"""
|
||||
__tablename__ = 'ip_stats'
|
||||
|
||||
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True)
|
||||
total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||
first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
|
||||
|
||||
# GeoIP fields (populated by future enrichment)
|
||||
country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True)
|
||||
city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
|
||||
asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
|
||||
|
||||
# Reputation fields (populated by future enrichment)
|
||||
reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
|
||||
reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True)
|
||||
reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
|
||||
Reference in New Issue
Block a user