Merge branch 'dev' into feat/sqlite3-storage

This commit is contained in:
Phillip Tarrant
2025-12-28 10:56:37 -06:00
12 changed files with 253 additions and 62 deletions

View File

@@ -3,6 +3,8 @@
import os
from dataclasses import dataclass
from typing import Optional, Tuple
from zoneinfo import ZoneInfo
import time
@dataclass
@@ -25,6 +27,40 @@ class Config:
# Database settings
database_path: str = "data/krawl.db"
database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
"""Get the system's default timezone"""
try:
if os.path.islink('/etc/localtime'):
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
# Default fallback to UTC
return 'UTC'
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
try:
return ZoneInfo(self.timezone)
except Exception:
pass
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
except Exception:
return ZoneInfo('UTC')
@classmethod
def from_env(cls) -> 'Config':
@@ -48,8 +84,9 @@ class Config:
api_server_url=os.getenv('API_SERVER_URL'),
api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)),
server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)'),
database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30))
database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)),
probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
timezone=os.getenv('TIMEZONE') # If not set, will use system timezone
)

View File

@@ -8,6 +8,23 @@ Provides two loggers: app (application) and access (HTTP access logs).
import logging
import os
from logging.handlers import RotatingFileHandler
from typing import Optional
from zoneinfo import ZoneInfo
from datetime import datetime
class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None):
super().__init__(fmt, datefmt)
self.timezone = timezone or ZoneInfo('UTC')
def formatTime(self, record, datefmt=None):
"""Override formatTime to use configured timezone"""
dt = datetime.fromtimestamp(record.created, tz=self.timezone)
if datefmt:
return dt.strftime(datefmt)
return dt.isoformat()
class LoggerManager:
@@ -20,23 +37,27 @@ class LoggerManager:
cls._instance._initialized = False
return cls._instance
def initialize(self, log_dir: str = "logs") -> None:
def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
"""
Initialize the logging system with rotating file handlers.
Args:
log_dir: Directory for log files (created if not exists)
timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
"""
if self._initialized:
return
self.timezone = timezone or ZoneInfo('UTC')
# Create log directory if it doesn't exist
os.makedirs(log_dir, exist_ok=True)
# Common format for all loggers
log_format = logging.Formatter(
log_format = TimezoneFormatter(
"[%(asctime)s] %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
datefmt="%Y-%m-%d %H:%M:%S",
timezone=self.timezone
)
# Rotation settings: 1MB max, 5 backups
@@ -83,7 +104,7 @@ class LoggerManager:
self._credential_logger.handlers.clear()
# Credential logger uses a simple format: timestamp|ip|username|password|path
credential_format = logging.Formatter("%(message)s")
credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone)
credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"),
@@ -136,6 +157,6 @@ def get_credential_logger() -> logging.Logger:
return _logger_manager.credentials
def initialize_logging(log_dir: str = "logs") -> None:
def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
"""Initialize the logging system."""
_logger_manager.initialize(log_dir)
_logger_manager.initialize(log_dir, timezone)

View File

@@ -36,6 +36,8 @@ def print_usage():
print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)')
print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
print(' If not set, system timezone will be used')
def main():
@@ -44,8 +46,13 @@ def main():
print_usage()
exit(0)
# Initialize logging
initialize_logging()
config = Config.from_env()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone
initialize_logging(timezone=tz)
app_logger = get_app_logger()
access_logger = get_access_logger()
credential_logger = get_credential_logger()
@@ -59,7 +66,7 @@ def main():
except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
tracker = AccessTracker()
tracker = AccessTracker(timezone=tz)
Handler.config = config
Handler.tracker = tracker
@@ -81,6 +88,7 @@ def main():
try:
app_logger.info(f'Starting deception server on port {config.port}...')
app_logger.info(f'Timezone configured: {tz.key}')
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')

View File

@@ -6,7 +6,7 @@ Customize this template to change the dashboard appearance.
"""
import html
from datetime import datetime
def _escape(value) -> str:
"""Escape HTML special characters to prevent XSS attacks."""
@@ -14,6 +14,15 @@ def _escape(value) -> str:
return ""
return html.escape(str(value))
def format_timestamp(iso_timestamp: str) -> str:
"""Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)"""
try:
dt = datetime.fromisoformat(iso_timestamp)
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
# Fallback for old format
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
def generate_dashboard(stats: dict) -> str:
"""Generate dashboard HTML with access statistics"""

View File

@@ -3,6 +3,7 @@
from typing import Dict, List, Tuple, Optional
from collections import defaultdict
from datetime import datetime
from zoneinfo import ZoneInfo
import re
import urllib.parse
@@ -16,7 +17,7 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis.
"""
def __init__(self, db_manager: Optional[DatabaseManager] = None):
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
"""
Initialize the access tracker.
@@ -29,6 +30,7 @@ class AccessTracker:
self.user_agent_counts: Dict[str, int] = defaultdict(int)
self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = []
self.timezone = timezone or ZoneInfo('UTC')
self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -119,7 +121,7 @@ class AccessTracker:
'path': path,
'username': username,
'password': password,
'timestamp': datetime.now().isoformat()
'timestamp': datetime.now(self.timezone).isoformat()
})
# Persist to database
@@ -184,9 +186,9 @@ class AccessTracker:
'path': path,
'user_agent': user_agent,
'suspicious': is_suspicious,
'honeypot_triggered': is_honeypot,
'attack_types': attack_findings,
'timestamp': datetime.now().isoformat()
'honeypot_triggered': self.is_honeypot_path(path),
'attack_types':attack_findings,
'timestamp': datetime.now(self.timezone).isoformat()
})
# Persist to database