feat: add SQLite persistent storage for request logging
- Add SQLAlchemy-based database layer for persistent storage - Create models for access_logs, credential_attempts, attack_detections, ip_stats - Include fields for future GeoIP and reputation enrichment - Implement sanitization utilities to protect against malicious payloads - Fix XSS vulnerability in dashboard template (HTML escape all user data) - Add DATABASE_PATH and DATABASE_RETENTION_DAYS config options - Dual storage: in-memory for dashboard performance + SQLite for persistence New files: - src/models.py - SQLAlchemy ORM models - src/database.py - DatabaseManager singleton - src/sanitizer.py - Input sanitization and HTML escaping - requirements.txt - SQLAlchemy dependency Security protections: - Parameterized queries via SQLAlchemy ORM - Field length limits to prevent storage exhaustion - Null byte and control character stripping - HTML escaping on dashboard output
This commit is contained in:
361
src/database.py
Normal file
361
src/database.py
Normal file
@@ -0,0 +1,361 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
"""
|
||||
Database singleton module for the Krawl honeypot.
|
||||
Provides SQLAlchemy session management and database initialization.
|
||||
"""
|
||||
|
||||
import os
|
||||
import stat
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
||||
|
||||
from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats
|
||||
from sanitizer import (
|
||||
sanitize_ip,
|
||||
sanitize_path,
|
||||
sanitize_user_agent,
|
||||
sanitize_credential,
|
||||
sanitize_attack_pattern,
|
||||
)
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
"""
|
||||
Singleton database manager for the Krawl honeypot.
|
||||
|
||||
Handles database initialization, session management, and provides
|
||||
methods for persisting access logs, credentials, and attack detections.
|
||||
"""
|
||||
_instance: Optional["DatabaseManager"] = None
|
||||
|
||||
def __new__(cls) -> "DatabaseManager":
|
||||
if cls._instance is None:
|
||||
cls._instance = super().__new__(cls)
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def initialize(self, database_path: str = "data/krawl.db") -> None:
|
||||
"""
|
||||
Initialize the database connection and create tables.
|
||||
|
||||
Args:
|
||||
database_path: Path to the SQLite database file
|
||||
"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
# Create data directory if it doesn't exist
|
||||
data_dir = os.path.dirname(database_path)
|
||||
if data_dir and not os.path.exists(data_dir):
|
||||
os.makedirs(data_dir, exist_ok=True)
|
||||
|
||||
# Create SQLite database with check_same_thread=False for multi-threaded access
|
||||
database_url = f"sqlite:///{database_path}"
|
||||
self._engine = create_engine(
|
||||
database_url,
|
||||
connect_args={"check_same_thread": False},
|
||||
echo=False # Set to True for SQL debugging
|
||||
)
|
||||
|
||||
# Create session factory with scoped_session for thread safety
|
||||
session_factory = sessionmaker(bind=self._engine)
|
||||
self._Session = scoped_session(session_factory)
|
||||
|
||||
# Create all tables
|
||||
Base.metadata.create_all(self._engine)
|
||||
|
||||
# Set restrictive file permissions (owner read/write only)
|
||||
if os.path.exists(database_path):
|
||||
try:
|
||||
os.chmod(database_path, stat.S_IRUSR | stat.S_IWUSR) # 600
|
||||
except OSError:
|
||||
# May fail on some systems, not critical
|
||||
pass
|
||||
|
||||
self._initialized = True
|
||||
|
||||
@property
|
||||
def session(self) -> Session:
|
||||
"""Get a thread-local database session."""
|
||||
if not self._initialized:
|
||||
raise RuntimeError("DatabaseManager not initialized. Call initialize() first.")
|
||||
return self._Session()
|
||||
|
||||
def close_session(self) -> None:
|
||||
"""Close the current thread-local session."""
|
||||
if self._initialized:
|
||||
self._Session.remove()
|
||||
|
||||
def persist_access(
|
||||
self,
|
||||
ip: str,
|
||||
path: str,
|
||||
user_agent: str = "",
|
||||
method: str = "GET",
|
||||
is_suspicious: bool = False,
|
||||
is_honeypot_trigger: bool = False,
|
||||
attack_types: Optional[List[str]] = None,
|
||||
matched_patterns: Optional[Dict[str, str]] = None
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Persist an access log entry to the database.
|
||||
|
||||
Args:
|
||||
ip: Client IP address
|
||||
path: Requested path
|
||||
user_agent: Client user agent string
|
||||
method: HTTP method (GET, POST, HEAD)
|
||||
is_suspicious: Whether the request was flagged as suspicious
|
||||
is_honeypot_trigger: Whether a honeypot path was accessed
|
||||
attack_types: List of detected attack types
|
||||
matched_patterns: Dict mapping attack_type to matched pattern
|
||||
|
||||
Returns:
|
||||
The ID of the created AccessLog record, or None on error
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
# Create access log with sanitized fields
|
||||
access_log = AccessLog(
|
||||
ip=sanitize_ip(ip),
|
||||
path=sanitize_path(path),
|
||||
user_agent=sanitize_user_agent(user_agent),
|
||||
method=method[:10],
|
||||
is_suspicious=is_suspicious,
|
||||
is_honeypot_trigger=is_honeypot_trigger,
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
session.add(access_log)
|
||||
session.flush() # Get the ID before committing
|
||||
|
||||
# Add attack detections if any
|
||||
if attack_types:
|
||||
matched_patterns = matched_patterns or {}
|
||||
for attack_type in attack_types:
|
||||
detection = AttackDetection(
|
||||
access_log_id=access_log.id,
|
||||
attack_type=attack_type[:50],
|
||||
matched_pattern=sanitize_attack_pattern(
|
||||
matched_patterns.get(attack_type, "")
|
||||
)
|
||||
)
|
||||
session.add(detection)
|
||||
|
||||
# Update IP stats
|
||||
self._update_ip_stats(session, ip)
|
||||
|
||||
session.commit()
|
||||
return access_log.id
|
||||
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
# Log error but don't crash - database persistence is secondary to honeypot function
|
||||
print(f"Database error persisting access: {e}")
|
||||
return None
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def persist_credential(
|
||||
self,
|
||||
ip: str,
|
||||
path: str,
|
||||
username: Optional[str] = None,
|
||||
password: Optional[str] = None
|
||||
) -> Optional[int]:
|
||||
"""
|
||||
Persist a credential attempt to the database.
|
||||
|
||||
Args:
|
||||
ip: Client IP address
|
||||
path: Login form path
|
||||
username: Submitted username
|
||||
password: Submitted password
|
||||
|
||||
Returns:
|
||||
The ID of the created CredentialAttempt record, or None on error
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
credential = CredentialAttempt(
|
||||
ip=sanitize_ip(ip),
|
||||
path=sanitize_path(path),
|
||||
username=sanitize_credential(username),
|
||||
password=sanitize_credential(password),
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
session.add(credential)
|
||||
session.commit()
|
||||
return credential.id
|
||||
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
print(f"Database error persisting credential: {e}")
|
||||
return None
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def _update_ip_stats(self, session: Session, ip: str) -> None:
|
||||
"""
|
||||
Update IP statistics (upsert pattern).
|
||||
|
||||
Args:
|
||||
session: Active database session
|
||||
ip: IP address to update
|
||||
"""
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
now = datetime.utcnow()
|
||||
|
||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||
|
||||
if ip_stats:
|
||||
ip_stats.total_requests += 1
|
||||
ip_stats.last_seen = now
|
||||
else:
|
||||
ip_stats = IpStats(
|
||||
ip=sanitized_ip,
|
||||
total_requests=1,
|
||||
first_seen=now,
|
||||
last_seen=now
|
||||
)
|
||||
session.add(ip_stats)
|
||||
|
||||
def get_access_logs(
|
||||
self,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
ip_filter: Optional[str] = None,
|
||||
suspicious_only: bool = False
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve access logs with optional filtering.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of records to return
|
||||
offset: Number of records to skip
|
||||
ip_filter: Filter by IP address
|
||||
suspicious_only: Only return suspicious requests
|
||||
|
||||
Returns:
|
||||
List of access log dictionaries
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
query = session.query(AccessLog).order_by(AccessLog.timestamp.desc())
|
||||
|
||||
if ip_filter:
|
||||
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
|
||||
if suspicious_only:
|
||||
query = query.filter(AccessLog.is_suspicious == True)
|
||||
|
||||
logs = query.offset(offset).limit(limit).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'id': log.id,
|
||||
'ip': log.ip,
|
||||
'path': log.path,
|
||||
'user_agent': log.user_agent,
|
||||
'method': log.method,
|
||||
'is_suspicious': log.is_suspicious,
|
||||
'is_honeypot_trigger': log.is_honeypot_trigger,
|
||||
'timestamp': log.timestamp.isoformat(),
|
||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def get_credential_attempts(
|
||||
self,
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
ip_filter: Optional[str] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve credential attempts with optional filtering.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of records to return
|
||||
offset: Number of records to skip
|
||||
ip_filter: Filter by IP address
|
||||
|
||||
Returns:
|
||||
List of credential attempt dictionaries
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
query = session.query(CredentialAttempt).order_by(
|
||||
CredentialAttempt.timestamp.desc()
|
||||
)
|
||||
|
||||
if ip_filter:
|
||||
query = query.filter(CredentialAttempt.ip == sanitize_ip(ip_filter))
|
||||
|
||||
attempts = query.offset(offset).limit(limit).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'id': attempt.id,
|
||||
'ip': attempt.ip,
|
||||
'path': attempt.path,
|
||||
'username': attempt.username,
|
||||
'password': attempt.password,
|
||||
'timestamp': attempt.timestamp.isoformat()
|
||||
}
|
||||
for attempt in attempts
|
||||
]
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def get_ip_stats(self, limit: int = 100) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve IP statistics ordered by total requests.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of records to return
|
||||
|
||||
Returns:
|
||||
List of IP stats dictionaries
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
stats = session.query(IpStats).order_by(
|
||||
IpStats.total_requests.desc()
|
||||
).limit(limit).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'ip': s.ip,
|
||||
'total_requests': s.total_requests,
|
||||
'first_seen': s.first_seen.isoformat(),
|
||||
'last_seen': s.last_seen.isoformat(),
|
||||
'country_code': s.country_code,
|
||||
'city': s.city,
|
||||
'asn': s.asn,
|
||||
'asn_org': s.asn_org,
|
||||
'reputation_score': s.reputation_score,
|
||||
'reputation_source': s.reputation_source
|
||||
}
|
||||
for s in stats
|
||||
]
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
|
||||
# Module-level singleton instance
|
||||
_db_manager = DatabaseManager()
|
||||
|
||||
|
||||
def get_database() -> DatabaseManager:
|
||||
"""Get the database manager singleton instance."""
|
||||
return _db_manager
|
||||
|
||||
|
||||
def initialize_database(database_path: str = "data/krawl.db") -> None:
|
||||
"""Initialize the database system."""
|
||||
_db_manager.initialize(database_path)
|
||||
Reference in New Issue
Block a user