From 16aca9bba63a56eacde2eb0b9cb4d00b876f1f00 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 26 Dec 2025 07:53:05 -0600 Subject: [PATCH 01/12] Add configurable HTTP Server header for deception Add SERVER_HEADER environment variable to customize the HTTP Server response header, defaulting to Apache/2.2.22 (Ubuntu). This allows the honeypot to masquerade as different web servers to attract attackers. - Add server_header field to Config dataclass - Override version_string() in Handler to return configured header - Update documentation and all deployment configs --- README.md | 1 + docker-compose.yaml | 1 + helm/templates/configmap.yaml | 1 + helm/values.yaml | 1 + kubernetes/manifests/configmap.yaml | 1 + src/config.py | 4 +++- src/handler.py | 4 ++++ src/server.py | 1 + 8 files changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0cf8b96..b84d955 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,7 @@ To customize the deception server installation several **environment variables** | `CANARY_TOKEN_URL` | External canary token URL | None | | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | +| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` | ## robots.txt The actual (juicy) robots.txt configuration is the following diff --git a/docker-compose.yaml b/docker-compose.yaml index 57c648d..1612864 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -20,6 +20,7 @@ services: - MAX_COUNTER=10 - CANARY_TOKEN_TRIES=10 - PROBABILITY_ERROR_CODES=0 + - SERVER_HEADER=Apache/2.2.22 (Ubuntu) # Optional: Set your canary token URL # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt # Optional: Set custom dashboard path (auto-generated if not set) diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index f6fe92c..c50ab75 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -14,4 +14,5 @@ data: MAX_COUNTER: {{ .Values.config.maxCounter | quote }} CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} + SERVER_HEADER: {{ .Values.config.serverHeader | quote }} CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index 9ee9ca5..a095632 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -73,6 +73,7 @@ config: maxCounter: 10 canaryTokenTries: 10 probabilityErrorCodes: 0 + serverHeader: "Apache/2.2.22 (Ubuntu)" # canaryTokenUrl: set-your-canary-token-url-here networkPolicy: diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index 42ba002..431b9a3 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -13,4 +13,5 @@ data: MAX_COUNTER: "10" CANARY_TOKEN_TRIES: "10" PROBABILITY_ERROR_CODES: "0" + SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" # CANARY_TOKEN_URL: set-your-canary-token-url-here \ No newline at end of file diff --git a/src/config.py b/src/config.py index 51391a9..7c6714c 100644 --- a/src/config.py +++ b/src/config.py @@ -21,6 +21,7 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) + server_header: str = "Apache/2.2.22 (Ubuntu)" @classmethod def from_env(cls) -> 'Config': @@ -44,5 +45,6 @@ class Config: api_server_url=os.getenv('API_SERVER_URL'), api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), - probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)) + probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)), + server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)') ) diff --git a/src/handler.py b/src/handler.py index 81f48fa..bed3369 100644 --- a/src/handler.py +++ b/src/handler.py @@ -46,6 +46,10 @@ class Handler(BaseHTTPRequestHandler): """Extract user agent from request""" return self.headers.get('User-Agent', '') + def version_string(self) -> str: + """Return custom server version for deception.""" + return self.config.server_header + def _should_return_error(self) -> bool: """Check if we should return an error based on probability""" if self.config.probability_error_codes <= 0: diff --git a/src/server.py b/src/server.py index d10d33e..73f0ce9 100644 --- a/src/server.py +++ b/src/server.py @@ -31,6 +31,7 @@ def print_usage(): print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') print(' CHAR_SPACE - Characters for random links') + print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') def main(): From d0101b34faf33bc85d3065051353da55bb0bf56f Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 26 Dec 2025 08:00:16 -0600 Subject: [PATCH 02/12] Added test script to show the server header --- tests/check_header.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 tests/check_header.sh diff --git a/tests/check_header.sh b/tests/check_header.sh new file mode 100755 index 0000000..78b8e5d --- /dev/null +++ b/tests/check_header.sh @@ -0,0 +1,3 @@ +#!/bin/env bash +# -s is for silent (no progress bar) | -I is to get the headers | grep is to find only the Server line +curl -s -I http://localhost:5000 | grep "Server:" \ No newline at end of file From 61ba574e92cc86444001a767508dc3e79f469247 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sat, 27 Dec 2025 19:17:27 +0100 Subject: [PATCH 03/12] Added POST log and dashboard for used credentials --- src/handler.py | 15 ++++++++ src/logger.py | 28 +++++++++++++++ src/server.py | 4 ++- src/templates/dashboard_template.py | 28 +++++++++++++++ src/tracker.py | 56 ++++++++++++++++++++++++++++- 5 files changed, 129 insertions(+), 2 deletions(-) diff --git a/src/handler.py b/src/handler.py index 9d8abe2..ac7ca22 100644 --- a/src/handler.py +++ b/src/handler.py @@ -3,6 +3,7 @@ import logging import random import time +from datetime import datetime from http.server import BaseHTTPRequestHandler from typing import Optional, List @@ -25,6 +26,7 @@ class Handler(BaseHTTPRequestHandler): counter: int = 0 app_logger: logging.Logger = None access_logger: logging.Logger = None + credential_logger: logging.Logger = None def _get_client_ip(self) -> str: """Extract client IP address from request, checking proxy headers first""" @@ -213,6 +215,19 @@ class Handler(BaseHTTPRequestHandler): self.access_logger.warning(f"[POST DATA] {post_data[:200]}") + # Parse and log credentials + username, password = self.tracker.parse_credentials(post_data) + if username or password: + # Log to dedicated credentials.log file + timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}" + self.credential_logger.info(credential_line) + + # Also record in tracker for dashboard + self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A') + + self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}") + # send the post data (body) to the record_access function so the post data can be used to detect suspicious things. self.tracker.record_access(client_ip, self.path, user_agent, post_data) diff --git a/src/logger.py b/src/logger.py index 68b8278..9f09236 100644 --- a/src/logger.py +++ b/src/logger.py @@ -77,6 +77,22 @@ class LoggerManager: access_stream_handler.setFormatter(log_format) self._access_logger.addHandler(access_stream_handler) + # Setup credential logger (special format, no stream handler) + self._credential_logger = logging.getLogger("krawl.credentials") + self._credential_logger.setLevel(logging.INFO) + self._credential_logger.handlers.clear() + + # Credential logger uses a simple format: timestamp|ip|username|password|path + credential_format = logging.Formatter("%(message)s") + + credential_file_handler = RotatingFileHandler( + os.path.join(log_dir, "credentials.log"), + maxBytes=max_bytes, + backupCount=backup_count + ) + credential_file_handler.setFormatter(credential_format) + self._credential_logger.addHandler(credential_file_handler) + self._initialized = True @property @@ -93,6 +109,13 @@ class LoggerManager: self.initialize() return self._access_logger + @property + def credentials(self) -> logging.Logger: + """Get the credentials logger.""" + if not self._initialized: + self.initialize() + return self._credential_logger + # Module-level singleton instance _logger_manager = LoggerManager() @@ -108,6 +131,11 @@ def get_access_logger() -> logging.Logger: return _logger_manager.access +def get_credential_logger() -> logging.Logger: + """Get the credential logger instance.""" + return _logger_manager.credentials + + def initialize_logging(log_dir: str = "logs") -> None: """Initialize the logging system.""" _logger_manager.initialize(log_dir) diff --git a/src/server.py b/src/server.py index 861e9f2..fd8f7d2 100644 --- a/src/server.py +++ b/src/server.py @@ -11,7 +11,7 @@ from http.server import HTTPServer from config import Config from tracker import AccessTracker from handler import Handler -from logger import initialize_logging, get_app_logger, get_access_logger +from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger def print_usage(): @@ -45,6 +45,7 @@ def main(): initialize_logging() app_logger = get_app_logger() access_logger = get_access_logger() + credential_logger = get_credential_logger() config = Config.from_env() @@ -55,6 +56,7 @@ def main(): Handler.counter = config.canary_token_tries Handler.app_logger = app_logger Handler.access_logger = access_logger + Handler.credential_logger = credential_logger if len(sys.argv) == 2: try: diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index 3f5524d..a267278 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -45,6 +45,12 @@ def generate_dashboard(stats: dict) -> str: for log in stats.get('attack_types', [])[-10:] ]) or 'No attacks detected' + # Generate credential attempts rows + credential_rows = '\n'.join([ + f'{log["ip"]}{log["username"]}{log["password"]}{log["path"]}{log["timestamp"].split("T")[1][:8]}' + for log in stats.get('credential_attempts', [])[-20:] + ]) or 'No credentials captured yet' + return f""" @@ -159,6 +165,10 @@ def generate_dashboard(stats: dict) -> str:
{stats.get('honeypot_ips', 0)}
Honeypot Caught
+
+
{len(stats.get('credential_attempts', []))}
+
Credentials Captured
+
@@ -194,6 +204,24 @@ def generate_dashboard(stats: dict) -> str:
+
+

🔑 Captured Credentials

+ + + + + + + + + + + + {credential_rows} + +
IP AddressUsernamePasswordPathTime
+
+

😈 Detected Attack Types

diff --git a/src/tracker.py b/src/tracker.py index 6e733f4..717a4c3 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -4,6 +4,7 @@ from typing import Dict, List, Tuple from collections import defaultdict from datetime import datetime import re +import urllib.parse class AccessTracker: @@ -13,6 +14,7 @@ class AccessTracker: self.path_counts: Dict[str, int] = defaultdict(int) self.user_agent_counts: Dict[str, int] = defaultdict(int) self.access_log: List[Dict] = [] + self.credential_attempts: List[Dict] = [] self.suspicious_patterns = [ 'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests', 'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix', @@ -31,6 +33,57 @@ class AccessTracker: # Track IPs that accessed honeypot paths from robots.txt self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list) + def parse_credentials(self, post_data: str) -> Tuple[str, str]: + """ + Parse username and password from POST data. + Returns tuple (username, password) or (None, None) if not found. + """ + if not post_data: + return None, None + + username = None + password = None + + try: + # Parse URL-encoded form data + parsed = urllib.parse.parse_qs(post_data) + + # Common username field names + username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account'] + for field in username_fields: + if field in parsed and parsed[field]: + username = parsed[field][0] + break + + # Common password field names + password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase'] + for field in password_fields: + if field in parsed and parsed[field]: + password = parsed[field][0] + break + + except Exception: + # If parsing fails, try simple regex patterns + username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE) + password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE) + + if username_match: + username = urllib.parse.unquote_plus(username_match.group(1)) + if password_match: + password = urllib.parse.unquote_plus(password_match.group(1)) + + return username, password + + def record_credential_attempt(self, ip: str, path: str, username: str, password: str): + """Record a credential login attempt""" + self.credential_attempts.append({ + 'ip': ip, + 'path': path, + 'username': username, + 'password': password, + 'timestamp': datetime.now().isoformat() + }) + def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''): """Record an access attempt""" self.ip_counts[ip] += 1 @@ -146,5 +199,6 @@ class AccessTracker: 'top_user_agents': self.get_top_user_agents(10), 'recent_suspicious': self.get_suspicious_accesses(20), 'honeypot_triggered_ips': self.get_honeypot_triggered_ips(), - 'attack_types': self.get_attack_type_accesses(20) + 'attack_types': self.get_attack_type_accesses(20), + 'credential_attempts': self.credential_attempts[-50:] # Last 50 attempts } From 6556e17f91d53965f96e5d9f8b9e9f0ddd03e729 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sun, 28 Dec 2025 17:07:18 +0100 Subject: [PATCH 04/12] Added timezone env variable handling --- README.md | 1 + deployment.yaml | 44 -------- docker-compose.yaml | 2 + helm/templates/configmap.yaml | 3 + helm/values.yaml | 1 + kubernetes/manifests/configmap.yaml | 3 +- src/config.py | 41 +++++++- src/logger.py | 33 ++++-- src/server.py | 17 +++- src/templates/dashboard_template.py | 18 +++- src/tracker.py | 10 +- tests/test_credentials.sh | 150 ++++++++++++++++++++++++++++ 12 files changed, 258 insertions(+), 65 deletions(-) delete mode 100644 deployment.yaml create mode 100755 tests/test_credentials.sh diff --git a/README.md b/README.md index b84d955..06157bd 100644 --- a/README.md +++ b/README.md @@ -186,6 +186,7 @@ To customize the deception server installation several **environment variables** | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | | `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` | +| `TIMEZONE` | IANA timezone for logs and dashboard (e.g., `America/New_York`, `Europe/Rome`) | System timezone | ## robots.txt The actual (juicy) robots.txt configuration is the following diff --git a/deployment.yaml b/deployment.yaml deleted file mode 100644 index 4bf5189..0000000 --- a/deployment.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: krawl-server - namespace: krawl - labels: - app: krawl-server -spec: - replicas: 1 - selector: - matchLabels: - app: krawl-server - template: - metadata: - labels: - app: krawl-server - spec: - containers: - - name: krawl - image: ghcr.io/blessedrebus/krawl:latest - imagePullPolicy: Always - ports: - - containerPort: 5000 - name: http - protocol: TCP - envFrom: - - configMapRef: - name: krawl-config - volumeMounts: - - name: wordlists - mountPath: /app/wordlists.json - subPath: wordlists.json - readOnly: true - resources: - requests: - memory: "64Mi" - cpu: "100m" - limits: - memory: "256Mi" - cpu: "500m" - volumes: - - name: wordlists - configMap: - name: krawl-wordlists diff --git a/docker-compose.yaml b/docker-compose.yaml index 1612864..600034d 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -25,6 +25,8 @@ services: # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt # Optional: Set custom dashboard path (auto-generated if not set) # - DASHBOARD_SECRET_PATH=/my-secret-dashboard + # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome) + # - TIMEZONE=UTC restart: unless-stopped healthcheck: test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"] diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index c50ab75..c08aaa5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -16,3 +16,6 @@ data: PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} SERVER_HEADER: {{ .Values.config.serverHeader | quote }} CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} + {{- if .Values.config.timezone }} + TIMEZONE: {{ .Values.config.timezone | quote }} + {{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index a095632..ac51756 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -75,6 +75,7 @@ config: probabilityErrorCodes: 0 serverHeader: "Apache/2.2.22 (Ubuntu)" # canaryTokenUrl: set-your-canary-token-url-here +# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. networkPolicy: enabled: true diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index 431b9a3..073005f 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -14,4 +14,5 @@ data: CANARY_TOKEN_TRIES: "10" PROBABILITY_ERROR_CODES: "0" SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" -# CANARY_TOKEN_URL: set-your-canary-token-url-here \ No newline at end of file +# CANARY_TOKEN_URL: set-your-canary-token-url-here +# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") \ No newline at end of file diff --git a/src/config.py b/src/config.py index 7c6714c..741f01f 100644 --- a/src/config.py +++ b/src/config.py @@ -3,6 +3,8 @@ import os from dataclasses import dataclass from typing import Optional, Tuple +from zoneinfo import ZoneInfo +import time @dataclass @@ -22,6 +24,40 @@ class Config: api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) server_header: str = "Apache/2.2.22 (Ubuntu)" + timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome') + + @staticmethod + # Try to fetch timezone before if not set + def get_system_timezone() -> str: + """Get the system's default timezone""" + try: + if os.path.islink('/etc/localtime'): + tz_path = os.readlink('/etc/localtime') + if 'zoneinfo/' in tz_path: + return tz_path.split('zoneinfo/')[-1] + + local_tz = time.tzname[time.daylight] + if local_tz and local_tz != 'UTC': + return local_tz + except Exception: + pass + + # Default fallback to UTC + return 'UTC' + + def get_timezone(self) -> ZoneInfo: + """Get configured timezone as ZoneInfo object""" + if self.timezone: + try: + return ZoneInfo(self.timezone) + except Exception: + pass + + system_tz = self.get_system_timezone() + try: + return ZoneInfo(system_tz) + except Exception: + return ZoneInfo('UTC') @classmethod def from_env(cls) -> 'Config': @@ -45,6 +81,7 @@ class Config: api_server_url=os.getenv('API_SERVER_URL'), api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), - probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)), - server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)') + probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)), + server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)'), + timezone=os.getenv('TIMEZONE') # If not set, will use system timezone ) diff --git a/src/logger.py b/src/logger.py index 9f09236..992cad8 100644 --- a/src/logger.py +++ b/src/logger.py @@ -8,6 +8,23 @@ Provides two loggers: app (application) and access (HTTP access logs). import logging import os from logging.handlers import RotatingFileHandler +from typing import Optional +from zoneinfo import ZoneInfo +from datetime import datetime + + +class TimezoneFormatter(logging.Formatter): + """Custom formatter that respects configured timezone""" + def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None): + super().__init__(fmt, datefmt) + self.timezone = timezone or ZoneInfo('UTC') + + def formatTime(self, record, datefmt=None): + """Override formatTime to use configured timezone""" + dt = datetime.fromtimestamp(record.created, tz=self.timezone) + if datefmt: + return dt.strftime(datefmt) + return dt.isoformat() class LoggerManager: @@ -20,23 +37,27 @@ class LoggerManager: cls._instance._initialized = False return cls._instance - def initialize(self, log_dir: str = "logs") -> None: + def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None: """ Initialize the logging system with rotating file handlers. Args: log_dir: Directory for log files (created if not exists) + timezone: ZoneInfo timezone for log timestamps (defaults to UTC) """ if self._initialized: return + self.timezone = timezone or ZoneInfo('UTC') + # Create log directory if it doesn't exist os.makedirs(log_dir, exist_ok=True) # Common format for all loggers - log_format = logging.Formatter( + log_format = TimezoneFormatter( "[%(asctime)s] %(levelname)s - %(message)s", - datefmt="%Y-%m-%d %H:%M:%S" + datefmt="%Y-%m-%d %H:%M:%S", + timezone=self.timezone ) # Rotation settings: 1MB max, 5 backups @@ -83,7 +104,7 @@ class LoggerManager: self._credential_logger.handlers.clear() # Credential logger uses a simple format: timestamp|ip|username|password|path - credential_format = logging.Formatter("%(message)s") + credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone) credential_file_handler = RotatingFileHandler( os.path.join(log_dir, "credentials.log"), @@ -136,6 +157,6 @@ def get_credential_logger() -> logging.Logger: return _logger_manager.credentials -def initialize_logging(log_dir: str = "logs") -> None: +def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None: """Initialize the logging system.""" - _logger_manager.initialize(log_dir) + _logger_manager.initialize(log_dir, timezone) diff --git a/src/server.py b/src/server.py index fd8f7d2..fcb794e 100644 --- a/src/server.py +++ b/src/server.py @@ -33,6 +33,8 @@ def print_usage(): print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') print(' CHAR_SPACE - Characters for random links') print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') + print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)') + print(' If not set, system timezone will be used') def main(): @@ -41,15 +43,19 @@ def main(): print_usage() exit(0) - # Initialize logging - initialize_logging() + config = Config.from_env() + + # Get timezone configuration + tz = config.get_timezone() + + # Initialize logging with timezone + initialize_logging(timezone=tz) app_logger = get_app_logger() access_logger = get_access_logger() credential_logger = get_credential_logger() - config = Config.from_env() - - tracker = AccessTracker() + # Initialize tracker with timezone + tracker = AccessTracker(timezone=tz) Handler.config = config Handler.tracker = tracker @@ -71,6 +77,7 @@ def main(): try: app_logger.info(f'Starting deception server on port {config.port}...') + app_logger.info(f'Timezone configured: {tz.key}') app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}') if config.canary_token_url: app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries') diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index a267278..9fc4111 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -5,6 +5,18 @@ Dashboard template for viewing honeypot statistics. Customize this template to change the dashboard appearance. """ +from datetime import datetime + + +def format_timestamp(iso_timestamp: str) -> str: + """Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)""" + try: + dt = datetime.fromisoformat(iso_timestamp) + return dt.strftime("%Y-%m-%d %H:%M:%S") + except Exception: + # Fallback for old format + return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp + def generate_dashboard(stats: dict) -> str: """Generate dashboard HTML with access statistics""" @@ -29,7 +41,7 @@ def generate_dashboard(stats: dict) -> str: # Generate suspicious accesses rows suspicious_rows = '\n'.join([ - f'' + f'' for log in stats['recent_suspicious'][-10:] ]) or '' @@ -41,13 +53,13 @@ def generate_dashboard(stats: dict) -> str: # Generate attack types rows attack_type_rows = '\n'.join([ - f'' + f'' for log in stats.get('attack_types', [])[-10:] ]) or '' # Generate credential attempts rows credential_rows = '\n'.join([ - f'' + f'' for log in stats.get('credential_attempts', [])[-20:] ]) or '' diff --git a/src/tracker.py b/src/tracker.py index 717a4c3..c9322ec 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -1,20 +1,22 @@ #!/usr/bin/env python3 -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Optional from collections import defaultdict from datetime import datetime +from zoneinfo import ZoneInfo import re import urllib.parse class AccessTracker: """Track IP addresses and paths accessed""" - def __init__(self): + def __init__(self, timezone: Optional[ZoneInfo] = None): self.ip_counts: Dict[str, int] = defaultdict(int) self.path_counts: Dict[str, int] = defaultdict(int) self.user_agent_counts: Dict[str, int] = defaultdict(int) self.access_log: List[Dict] = [] self.credential_attempts: List[Dict] = [] + self.timezone = timezone or ZoneInfo('UTC') self.suspicious_patterns = [ 'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests', 'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix', @@ -81,7 +83,7 @@ class AccessTracker: 'path': path, 'username': username, 'password': password, - 'timestamp': datetime.now().isoformat() + 'timestamp': datetime.now(self.timezone).isoformat() }) def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''): @@ -112,7 +114,7 @@ class AccessTracker: 'suspicious': is_suspicious, 'honeypot_triggered': self.is_honeypot_path(path), 'attack_types':attack_findings, - 'timestamp': datetime.now().isoformat() + 'timestamp': datetime.now(self.timezone).isoformat() }) def detect_attack_type(self, data:str) -> list[str]: diff --git a/tests/test_credentials.sh b/tests/test_credentials.sh new file mode 100755 index 0000000..6379b92 --- /dev/null +++ b/tests/test_credentials.sh @@ -0,0 +1,150 @@ +#!/bin/bash + +# This script sends various POST requests with credentials to the honeypot + +GREEN='\033[0;32m' +BLUE='\033[0;34m' +YELLOW='\033[1;33m' +RED='\033[0;31m' +NC='\033[0m' + +# Configuration +HOST="localhost" +PORT="5000" +BASE_URL="http://${HOST}:${PORT}" + +echo -e "${BLUE}========================================${NC}" +echo -e "${BLUE}Krawl Credential Logging Test Script${NC}" +echo -e "${BLUE}========================================${NC}\n" + +# Check if server is running +echo -e "${YELLOW}Checking if server is running on ${BASE_URL}...${NC}" +if ! curl -s -f "${BASE_URL}/health" > /dev/null 2>&1; then + echo -e "${RED}❌ Server is not running. Please start the Krawl server first.${NC}" + echo -e "${YELLOW}Run: python3 src/server.py${NC}" + exit 1 +fi +echo -e "${GREEN}✓ Server is running${NC}\n" + +# Test 1: Simple login form POST +echo -e "${YELLOW}Test 1: POST to /login with form data${NC}" +curl -s -X POST "${BASE_URL}/login" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin&password=admin123" \ + > /dev/null +echo -e "${GREEN}✓ Sent: admin / admin123${NC}\n" + +sleep 1 + +# Test 2: Admin panel login +echo -e "${YELLOW}Test 2: POST to /admin with credentials${NC}" +curl -s -X POST "${BASE_URL}/admin" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "user=root&pass=toor&submit=Login" \ + > /dev/null +echo -e "${GREEN}✓ Sent: root / toor${NC}\n" + +sleep 1 + +# Test 3: WordPress login attempt +echo -e "${YELLOW}Test 3: POST to /wp-login.php${NC}" +curl -s -X POST "${BASE_URL}/wp-login.php" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "log=wpuser&pwd=Password1&wp-submit=Log+In" \ + > /dev/null +echo -e "${GREEN}✓ Sent: wpuser / Password1${NC}\n" + +sleep 1 + +# Test 4: JSON formatted credentials +echo -e "${YELLOW}Test 4: POST to /api/login with JSON${NC}" +curl -s -X POST "${BASE_URL}/api/login" \ + -H "Content-Type: application/json" \ + -d '{"username":"apiuser","password":"apipass123","remember":true}' \ + > /dev/null +echo -e "${GREEN}✓ Sent: apiuser / apipass123${NC}\n" + +sleep 1 + +# Test 5: SSH-style login +echo -e "${YELLOW}Test 5: POST to /ssh with credentials${NC}" +curl -s -X POST "${BASE_URL}/ssh" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=sshuser&password=P@ssw0rd!" \ + > /dev/null +echo -e "${GREEN}✓ Sent: sshuser / P@ssw0rd!${NC}\n" + +sleep 1 + +# Test 6: Database admin +echo -e "${YELLOW}Test 6: POST to /phpmyadmin with credentials${NC}" +curl -s -X POST "${BASE_URL}/phpmyadmin" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "pma_username=dbadmin&pma_password=dbpass123&server=1" \ + > /dev/null +echo -e "${GREEN}✓ Sent: dbadmin / dbpass123${NC}\n" + +sleep 1 + +# Test 7: Multiple fields with email +echo -e "${YELLOW}Test 7: POST to /register with email${NC}" +curl -s -X POST "${BASE_URL}/register" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "email=test@example.com&username=newuser&password=NewPass123&confirm_password=NewPass123" \ + > /dev/null +echo -e "${GREEN}✓ Sent: newuser / NewPass123 (email: test@example.com)${NC}\n" + +sleep 1 + +# Test 8: FTP credentials +echo -e "${YELLOW}Test 8: POST to /ftp/login${NC}" +curl -s -X POST "${BASE_URL}/ftp/login" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "ftpuser=ftpadmin&ftppass=ftp123456" \ + > /dev/null +echo -e "${GREEN}✓ Sent: ftpadmin / ftp123456${NC}\n" + +sleep 1 + +# Test 9: Common brute force attempt +echo -e "${YELLOW}Test 9: Multiple attempts (simulating brute force)${NC}" +for i in {1..3}; do + curl -s -X POST "${BASE_URL}/login" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin&password=pass${i}" \ + > /dev/null + echo -e "${GREEN}✓ Attempt $i: admin / pass${i}${NC}" + sleep 0.5 +done +echo "" + +sleep 1 + +# Test 10: Special characters in credentials +echo -e "${YELLOW}Test 10: POST with special characters${NC}" +curl -s -X POST "${BASE_URL}/login" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + --data-urlencode "username=user@domain.com" \ + --data-urlencode "password=P@\$\$w0rd!#%" \ + > /dev/null +echo -e "${GREEN}✓ Sent: user@domain.com / P@\$\$w0rd!#%${NC}\n" + +echo -e "${BLUE}========================================${NC}" +echo -e "${GREEN}✓ All credential tests completed!${NC}" +echo -e "${BLUE}========================================${NC}\n" + +echo -e "${YELLOW}Check the results:${NC}" +echo -e " 1. View the log file: ${GREEN}cat src/logs/credentials.log${NC}" +echo -e " 2. View the dashboard: ${GREEN}${BASE_URL}/dashboard${NC}" +echo -e " 3. Check recent logs: ${GREEN}tail -20 src/logs/krawl.log${NC}\n" + +# Display last 10 credential entries if log file exists +if [ -f "src/logs/credentials.log" ]; then + echo -e "${BLUE}========================================${NC}" + echo -e "${BLUE}Last 10 Captured Credentials:${NC}" + echo -e "${BLUE}========================================${NC}" + tail -10 src/logs/credentials.log + echo "" +fi + +echo -e "${YELLOW}💡 Tip: Open ${BASE_URL}/dashboard in your browser to see the credentials in real-time!${NC}" From f1c142c53d7f40dc8eec68d886928542ac44e9b6 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Sun, 28 Dec 2025 10:43:32 -0600 Subject: [PATCH 05/12] feat: add SQLite persistent storage for request logging - Add SQLAlchemy-based database layer for persistent storage - Create models for access_logs, credential_attempts, attack_detections, ip_stats - Include fields for future GeoIP and reputation enrichment - Implement sanitization utilities to protect against malicious payloads - Fix XSS vulnerability in dashboard template (HTML escape all user data) - Add DATABASE_PATH and DATABASE_RETENTION_DAYS config options - Dual storage: in-memory for dashboard performance + SQLite for persistence New files: - src/models.py - SQLAlchemy ORM models - src/database.py - DatabaseManager singleton - src/sanitizer.py - Input sanitization and HTML escaping - requirements.txt - SQLAlchemy dependency Security protections: - Parameterized queries via SQLAlchemy ORM - Field length limits to prevent storage exhaustion - Null byte and control character stripping - HTML escaping on dashboard output --- .gitignore | 4 + docs/coding-guidelines.md | 90 +++++++ requirements.txt | 5 + src/config.py | 7 +- src/database.py | 361 ++++++++++++++++++++++++++++ src/handler.py | 4 +- src/models.py | 141 +++++++++++ src/sanitizer.py | 113 +++++++++ src/server.py | 10 + src/templates/dashboard_template.py | 35 ++- src/tracker.py | 122 ++++++++-- 11 files changed, 860 insertions(+), 32 deletions(-) create mode 100644 docs/coding-guidelines.md create mode 100644 requirements.txt create mode 100644 src/database.py create mode 100644 src/models.py create mode 100644 src/sanitizer.py diff --git a/.gitignore b/.gitignore index 5d758cb..a36748e 100644 --- a/.gitignore +++ b/.gitignore @@ -61,6 +61,10 @@ secrets/ *.log logs/ +# Database +data/ +*.db + # Temporary files *.tmp *.temp diff --git a/docs/coding-guidelines.md b/docs/coding-guidelines.md new file mode 100644 index 0000000..1e13575 --- /dev/null +++ b/docs/coding-guidelines.md @@ -0,0 +1,90 @@ +### Coding Standards + +**Style & Structure** +- Prefer longer, explicit code over compact one-liners +- Always include docstrings for functions/classes + inline comments +- Strongly prefer OOP-style code (classes over functional/nested functions) +- Strong typing throughout (dataclasses, TypedDict, Enums, type hints) +- Value future-proofing and expanded usage insights + +**Data Design** +- Use dataclasses for internal data modeling +- Typed JSON structures +- Functions return fully typed objects (no loose dicts) +- Snapshot files in JSON or YAML +- Human-readable fields (e.g., `sql_injection`, `xss_attempt`) + +**Templates & UI** +- Don't mix large HTML/CSS blocks in Python code +- Prefer Jinja templates for HTML rendering +- Clean CSS, minimal inline clutter, readable template logic + +**Writing & Documentation** +- Markdown documentation +- Clear section headers +- Roadmap/Phase/Feature-Session style documents + +**Logging** +- Use singleton for logging found in `src\logger.py` +- Setup logging at app start: + ``` + initialize_logging() + app_logger = get_app_logger() + access_logger = get_access_logger() + credential_logger = get_credential_logger() + ``` + +**Preferred Pip Packages** +- API/Web Server: Simple Python +- HTTP: Requests +- SQLite: Sqlalchemy +- Database Migrations: Alembic + +### Error Handling +- Custom exception classes for domain-specific errors +- Consistent error response formats (JSON structure) +- Logging severity levels (ERROR vs WARNING) + +### Configuration +- `.env` for secrets (never committed) +- Maintain `.env.example` in each component for documentation +- Typed config loaders using dataclasses +- Validation on startup + +### Containerization & Deployment +- Explicit Dockerfiles +- Production-friendly hardening (distroless/slim when meaningful) +- Use git branch as tag + +### Dependency Management +- Use `requirements.txt` and virtual environments (`python3 -m venv venv`) +- Use path `venv` for all virtual environments +- Pin versions to version ranges (or exact versions if pinning a particular version) +- Activate venv before running code (unless in Docker) + +### Testing Standards +- Manual testing preferred for applications +- **tests:** Use shell scripts with curl/httpie for simulation and attack scripts. +- tests should be located in `tests` directory + +### Git Standards + +**Branch Strategy:** +- `master` - Production-ready code only +- `beta` - Public pre-release testing +- `dev` - Main development branch, integration point + +**Workflow:** +- Feature work branches off `dev` (e.g., `feature/add-scheduler`) +- Merge features back to `dev` for testing +- Promote `dev` → `beta` for public testing (when applicable) +- Promote `beta` (or `dev`) → `master` for production + +**Commit Messages:** +- Use conventional commit format: `feat:`, `fix:`, `docs:`, `refactor:`, etc. +- Keep commits atomic and focused +- Write clear, descriptive messages + +**Tagging:** +- Tag releases on `master` with semantic versioning (e.g., `v1.2.3`) +- Optionally tag beta releases (e.g., `v1.2.3-beta.1`) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..94f74f2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +# Krawl Honeypot Dependencies +# Install with: pip install -r requirements.txt + +# Database ORM +SQLAlchemy>=2.0.0,<3.0.0 diff --git a/src/config.py b/src/config.py index 7c6714c..76f1aed 100644 --- a/src/config.py +++ b/src/config.py @@ -22,6 +22,9 @@ class Config: api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) server_header: str = "Apache/2.2.22 (Ubuntu)" + # Database settings + database_path: str = "data/krawl.db" + database_retention_days: int = 30 @classmethod def from_env(cls) -> 'Config': @@ -46,5 +49,7 @@ class Config: api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)), - server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)') + server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)'), + database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'), + database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)) ) diff --git a/src/database.py b/src/database.py new file mode 100644 index 0000000..58a4505 --- /dev/null +++ b/src/database.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python3 + +""" +Database singleton module for the Krawl honeypot. +Provides SQLAlchemy session management and database initialization. +""" + +import os +import stat +from datetime import datetime +from typing import Optional, List, Dict, Any + +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker, scoped_session, Session + +from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats +from sanitizer import ( + sanitize_ip, + sanitize_path, + sanitize_user_agent, + sanitize_credential, + sanitize_attack_pattern, +) + + +class DatabaseManager: + """ + Singleton database manager for the Krawl honeypot. + + Handles database initialization, session management, and provides + methods for persisting access logs, credentials, and attack detections. + """ + _instance: Optional["DatabaseManager"] = None + + def __new__(cls) -> "DatabaseManager": + if cls._instance is None: + cls._instance = super().__new__(cls) + cls._instance._initialized = False + return cls._instance + + def initialize(self, database_path: str = "data/krawl.db") -> None: + """ + Initialize the database connection and create tables. + + Args: + database_path: Path to the SQLite database file + """ + if self._initialized: + return + + # Create data directory if it doesn't exist + data_dir = os.path.dirname(database_path) + if data_dir and not os.path.exists(data_dir): + os.makedirs(data_dir, exist_ok=True) + + # Create SQLite database with check_same_thread=False for multi-threaded access + database_url = f"sqlite:///{database_path}" + self._engine = create_engine( + database_url, + connect_args={"check_same_thread": False}, + echo=False # Set to True for SQL debugging + ) + + # Create session factory with scoped_session for thread safety + session_factory = sessionmaker(bind=self._engine) + self._Session = scoped_session(session_factory) + + # Create all tables + Base.metadata.create_all(self._engine) + + # Set restrictive file permissions (owner read/write only) + if os.path.exists(database_path): + try: + os.chmod(database_path, stat.S_IRUSR | stat.S_IWUSR) # 600 + except OSError: + # May fail on some systems, not critical + pass + + self._initialized = True + + @property + def session(self) -> Session: + """Get a thread-local database session.""" + if not self._initialized: + raise RuntimeError("DatabaseManager not initialized. Call initialize() first.") + return self._Session() + + def close_session(self) -> None: + """Close the current thread-local session.""" + if self._initialized: + self._Session.remove() + + def persist_access( + self, + ip: str, + path: str, + user_agent: str = "", + method: str = "GET", + is_suspicious: bool = False, + is_honeypot_trigger: bool = False, + attack_types: Optional[List[str]] = None, + matched_patterns: Optional[Dict[str, str]] = None + ) -> Optional[int]: + """ + Persist an access log entry to the database. + + Args: + ip: Client IP address + path: Requested path + user_agent: Client user agent string + method: HTTP method (GET, POST, HEAD) + is_suspicious: Whether the request was flagged as suspicious + is_honeypot_trigger: Whether a honeypot path was accessed + attack_types: List of detected attack types + matched_patterns: Dict mapping attack_type to matched pattern + + Returns: + The ID of the created AccessLog record, or None on error + """ + session = self.session + try: + # Create access log with sanitized fields + access_log = AccessLog( + ip=sanitize_ip(ip), + path=sanitize_path(path), + user_agent=sanitize_user_agent(user_agent), + method=method[:10], + is_suspicious=is_suspicious, + is_honeypot_trigger=is_honeypot_trigger, + timestamp=datetime.utcnow() + ) + session.add(access_log) + session.flush() # Get the ID before committing + + # Add attack detections if any + if attack_types: + matched_patterns = matched_patterns or {} + for attack_type in attack_types: + detection = AttackDetection( + access_log_id=access_log.id, + attack_type=attack_type[:50], + matched_pattern=sanitize_attack_pattern( + matched_patterns.get(attack_type, "") + ) + ) + session.add(detection) + + # Update IP stats + self._update_ip_stats(session, ip) + + session.commit() + return access_log.id + + except Exception as e: + session.rollback() + # Log error but don't crash - database persistence is secondary to honeypot function + print(f"Database error persisting access: {e}") + return None + finally: + self.close_session() + + def persist_credential( + self, + ip: str, + path: str, + username: Optional[str] = None, + password: Optional[str] = None + ) -> Optional[int]: + """ + Persist a credential attempt to the database. + + Args: + ip: Client IP address + path: Login form path + username: Submitted username + password: Submitted password + + Returns: + The ID of the created CredentialAttempt record, or None on error + """ + session = self.session + try: + credential = CredentialAttempt( + ip=sanitize_ip(ip), + path=sanitize_path(path), + username=sanitize_credential(username), + password=sanitize_credential(password), + timestamp=datetime.utcnow() + ) + session.add(credential) + session.commit() + return credential.id + + except Exception as e: + session.rollback() + print(f"Database error persisting credential: {e}") + return None + finally: + self.close_session() + + def _update_ip_stats(self, session: Session, ip: str) -> None: + """ + Update IP statistics (upsert pattern). + + Args: + session: Active database session + ip: IP address to update + """ + sanitized_ip = sanitize_ip(ip) + now = datetime.utcnow() + + ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + + if ip_stats: + ip_stats.total_requests += 1 + ip_stats.last_seen = now + else: + ip_stats = IpStats( + ip=sanitized_ip, + total_requests=1, + first_seen=now, + last_seen=now + ) + session.add(ip_stats) + + def get_access_logs( + self, + limit: int = 100, + offset: int = 0, + ip_filter: Optional[str] = None, + suspicious_only: bool = False + ) -> List[Dict[str, Any]]: + """ + Retrieve access logs with optional filtering. + + Args: + limit: Maximum number of records to return + offset: Number of records to skip + ip_filter: Filter by IP address + suspicious_only: Only return suspicious requests + + Returns: + List of access log dictionaries + """ + session = self.session + try: + query = session.query(AccessLog).order_by(AccessLog.timestamp.desc()) + + if ip_filter: + query = query.filter(AccessLog.ip == sanitize_ip(ip_filter)) + if suspicious_only: + query = query.filter(AccessLog.is_suspicious == True) + + logs = query.offset(offset).limit(limit).all() + + return [ + { + 'id': log.id, + 'ip': log.ip, + 'path': log.path, + 'user_agent': log.user_agent, + 'method': log.method, + 'is_suspicious': log.is_suspicious, + 'is_honeypot_trigger': log.is_honeypot_trigger, + 'timestamp': log.timestamp.isoformat(), + 'attack_types': [d.attack_type for d in log.attack_detections] + } + for log in logs + ] + finally: + self.close_session() + + def get_credential_attempts( + self, + limit: int = 100, + offset: int = 0, + ip_filter: Optional[str] = None + ) -> List[Dict[str, Any]]: + """ + Retrieve credential attempts with optional filtering. + + Args: + limit: Maximum number of records to return + offset: Number of records to skip + ip_filter: Filter by IP address + + Returns: + List of credential attempt dictionaries + """ + session = self.session + try: + query = session.query(CredentialAttempt).order_by( + CredentialAttempt.timestamp.desc() + ) + + if ip_filter: + query = query.filter(CredentialAttempt.ip == sanitize_ip(ip_filter)) + + attempts = query.offset(offset).limit(limit).all() + + return [ + { + 'id': attempt.id, + 'ip': attempt.ip, + 'path': attempt.path, + 'username': attempt.username, + 'password': attempt.password, + 'timestamp': attempt.timestamp.isoformat() + } + for attempt in attempts + ] + finally: + self.close_session() + + def get_ip_stats(self, limit: int = 100) -> List[Dict[str, Any]]: + """ + Retrieve IP statistics ordered by total requests. + + Args: + limit: Maximum number of records to return + + Returns: + List of IP stats dictionaries + """ + session = self.session + try: + stats = session.query(IpStats).order_by( + IpStats.total_requests.desc() + ).limit(limit).all() + + return [ + { + 'ip': s.ip, + 'total_requests': s.total_requests, + 'first_seen': s.first_seen.isoformat(), + 'last_seen': s.last_seen.isoformat(), + 'country_code': s.country_code, + 'city': s.city, + 'asn': s.asn, + 'asn_org': s.asn_org, + 'reputation_score': s.reputation_score, + 'reputation_source': s.reputation_source + } + for s in stats + ] + finally: + self.close_session() + + +# Module-level singleton instance +_db_manager = DatabaseManager() + + +def get_database() -> DatabaseManager: + """Get the database manager singleton instance.""" + return _db_manager + + +def initialize_database(database_path: str = "data/krawl.db") -> None: + """Initialize the database system.""" + _db_manager.initialize(database_path) diff --git a/src/handler.py b/src/handler.py index ac7ca22..90214ac 100644 --- a/src/handler.py +++ b/src/handler.py @@ -229,7 +229,7 @@ class Handler(BaseHTTPRequestHandler): self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}") # send the post data (body) to the record_access function so the post data can be used to detect suspicious things. - self.tracker.record_access(client_ip, self.path, user_agent, post_data) + self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST') time.sleep(1) @@ -347,7 +347,7 @@ class Handler(BaseHTTPRequestHandler): self.app_logger.error(f"Error generating dashboard: {e}") return - self.tracker.record_access(client_ip, self.path, user_agent) + self.tracker.record_access(client_ip, self.path, user_agent, method='GET') if self.tracker.is_suspicious_user_agent(user_agent): self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}") diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..f6e7d30 --- /dev/null +++ b/src/models.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 + +""" +SQLAlchemy ORM models for the Krawl honeypot database. +Stores access logs, credential attempts, attack detections, and IP statistics. +""" + +from datetime import datetime +from typing import Optional, List + +from sqlalchemy import String, Integer, Boolean, DateTime, ForeignKey, Index +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship + +from sanitizer import ( + MAX_IP_LENGTH, + MAX_PATH_LENGTH, + MAX_USER_AGENT_LENGTH, + MAX_CREDENTIAL_LENGTH, + MAX_ATTACK_PATTERN_LENGTH, + MAX_CITY_LENGTH, + MAX_ASN_ORG_LENGTH, + MAX_REPUTATION_SOURCE_LENGTH, +) + + +class Base(DeclarativeBase): + """Base class for all ORM models.""" + pass + + +class AccessLog(Base): + """ + Records all HTTP requests to the honeypot. + + Stores request metadata, suspicious activity flags, and timestamps + for analysis and dashboard display. + """ + __tablename__ = 'access_logs' + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True) + path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False) + user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True) + method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET') + is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) + timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True) + + # Relationship to attack detections + attack_detections: Mapped[List["AttackDetection"]] = relationship( + "AttackDetection", + back_populates="access_log", + cascade="all, delete-orphan" + ) + + # Composite index for common queries + __table_args__ = ( + Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'), + ) + + def __repr__(self) -> str: + return f"" + + +class CredentialAttempt(Base): + """ + Records captured login attempts from honeypot login forms. + + Stores the submitted username and password along with request metadata. + """ + __tablename__ = 'credential_attempts' + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True) + path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False) + username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True) + password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True) + timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True) + + # Composite index for common queries + __table_args__ = ( + Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'), + ) + + def __repr__(self) -> str: + return f"" + + +class AttackDetection(Base): + """ + Records detected attack patterns in requests. + + Linked to the parent AccessLog record. Multiple attack types can be + detected in a single request. + """ + __tablename__ = 'attack_detections' + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + access_log_id: Mapped[int] = mapped_column( + Integer, + ForeignKey('access_logs.id', ondelete='CASCADE'), + nullable=False, + index=True + ) + attack_type: Mapped[str] = mapped_column(String(50), nullable=False) + matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True) + + # Relationship back to access log + access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections") + + def __repr__(self) -> str: + return f"" + + +class IpStats(Base): + """ + Aggregated statistics per IP address. + + Includes fields for future GeoIP and reputation enrichment. + Updated on each request from an IP. + """ + __tablename__ = 'ip_stats' + + ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True) + total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0) + first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow) + last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow) + + # GeoIP fields (populated by future enrichment) + country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True) + city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True) + asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True) + + # Reputation fields (populated by future enrichment) + reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) + reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True) + reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) + + def __repr__(self) -> str: + return f"" diff --git a/src/sanitizer.py b/src/sanitizer.py new file mode 100644 index 0000000..f783129 --- /dev/null +++ b/src/sanitizer.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 + +""" +Sanitization utilities for safe database storage and HTML output. +Protects against SQL injection payloads, XSS, and storage exhaustion attacks. +""" + +import html +import re +from typing import Optional + + +# Field length limits for database storage +MAX_IP_LENGTH = 45 # IPv6 max length +MAX_PATH_LENGTH = 2048 # URL max practical length +MAX_USER_AGENT_LENGTH = 512 +MAX_CREDENTIAL_LENGTH = 256 +MAX_ATTACK_PATTERN_LENGTH = 256 +MAX_CITY_LENGTH = 128 +MAX_ASN_ORG_LENGTH = 256 +MAX_REPUTATION_SOURCE_LENGTH = 64 + + +def sanitize_for_storage(value: Optional[str], max_length: int) -> str: + """ + Sanitize and truncate string for safe database storage. + + Removes null bytes and control characters that could cause issues + with database storage or log processing. + + Args: + value: The string to sanitize + max_length: Maximum length to truncate to + + Returns: + Sanitized and truncated string, empty string if input is None/empty + """ + if not value: + return "" + + # Convert to string if not already + value = str(value) + + # Remove null bytes and control characters (except newline \n, tab \t, carriage return \r) + # Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return) + cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', value) + + # Truncate to max length + return cleaned[:max_length] + + +def sanitize_ip(value: Optional[str]) -> str: + """Sanitize IP address for storage.""" + return sanitize_for_storage(value, MAX_IP_LENGTH) + + +def sanitize_path(value: Optional[str]) -> str: + """Sanitize URL path for storage.""" + return sanitize_for_storage(value, MAX_PATH_LENGTH) + + +def sanitize_user_agent(value: Optional[str]) -> str: + """Sanitize user agent string for storage.""" + return sanitize_for_storage(value, MAX_USER_AGENT_LENGTH) + + +def sanitize_credential(value: Optional[str]) -> str: + """Sanitize username or password for storage.""" + return sanitize_for_storage(value, MAX_CREDENTIAL_LENGTH) + + +def sanitize_attack_pattern(value: Optional[str]) -> str: + """Sanitize matched attack pattern for storage.""" + return sanitize_for_storage(value, MAX_ATTACK_PATTERN_LENGTH) + + +def escape_html(value: Optional[str]) -> str: + """ + Escape HTML special characters for safe display in web pages. + + Prevents stored XSS attacks when displaying user-controlled data + in the dashboard. + + Args: + value: The string to escape + + Returns: + HTML-escaped string, empty string if input is None/empty + """ + if not value: + return "" + return html.escape(str(value)) + + +def escape_html_truncated(value: Optional[str], max_display_length: int) -> str: + """ + Escape HTML and truncate for display. + + Args: + value: The string to escape and truncate + max_display_length: Maximum display length (truncation happens before escaping) + + Returns: + HTML-escaped and truncated string + """ + if not value: + return "" + + value_str = str(value) + if len(value_str) > max_display_length: + value_str = value_str[:max_display_length] + "..." + + return html.escape(value_str) diff --git a/src/server.py b/src/server.py index fd8f7d2..a0b5ec3 100644 --- a/src/server.py +++ b/src/server.py @@ -12,6 +12,7 @@ from config import Config from tracker import AccessTracker from handler import Handler from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger +from database import initialize_database def print_usage(): @@ -33,6 +34,8 @@ def print_usage(): print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') print(' CHAR_SPACE - Characters for random links') print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') + print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)') + print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)') def main(): @@ -49,6 +52,13 @@ def main(): config = Config.from_env() + # Initialize database for persistent storage + try: + initialize_database(config.database_path) + app_logger.info(f'Database initialized at: {config.database_path}') + except Exception as e: + app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.') + tracker = AccessTracker() Handler.config = config diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index a267278..92e950d 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -5,49 +5,58 @@ Dashboard template for viewing honeypot statistics. Customize this template to change the dashboard appearance. """ +import html + + +def _escape(value) -> str: + """Escape HTML special characters to prevent XSS attacks.""" + if value is None: + return "" + return html.escape(str(value)) + def generate_dashboard(stats: dict) -> str: """Generate dashboard HTML with access statistics""" - # Generate IP rows + # Generate IP rows (IPs are generally safe but escape for consistency) top_ips_rows = '\n'.join([ - f'' + f'' for i, (ip, count) in enumerate(stats['top_ips']) ]) or '' - # Generate paths rows + # Generate paths rows (CRITICAL: paths can contain XSS payloads) top_paths_rows = '\n'.join([ - f'' + f'' for i, (path, count) in enumerate(stats['top_paths']) ]) or '' - # Generate User-Agent rows + # Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads) top_ua_rows = '\n'.join([ - f'' + f'' for i, (ua, count) in enumerate(stats['top_user_agents']) ]) or '' - # Generate suspicious accesses rows + # Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields) suspicious_rows = '\n'.join([ - f'' + f'' for log in stats['recent_suspicious'][-10:] ]) or '' # Generate honeypot triggered IPs rows honeypot_rows = '\n'.join([ - f'' + f'' for ip, paths in stats.get('honeypot_triggered_ips', []) ]) or '' - # Generate attack types rows + # Generate attack types rows (CRITICAL: paths and user agents are user-controlled) attack_type_rows = '\n'.join([ - f'' + f'' for log in stats.get('attack_types', [])[-10:] ]) or '' - # Generate credential attempts rows + # Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled) credential_rows = '\n'.join([ - f'' + f'' for log in stats.get('credential_attempts', [])[-20:] ]) or '' diff --git a/src/tracker.py b/src/tracker.py index 717a4c3..04ded3b 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -1,15 +1,29 @@ #!/usr/bin/env python3 -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Optional from collections import defaultdict from datetime import datetime import re import urllib.parse +from database import get_database, DatabaseManager + class AccessTracker: - """Track IP addresses and paths accessed""" - def __init__(self): + """ + Track IP addresses and paths accessed. + + Maintains in-memory structures for fast dashboard access and + persists data to SQLite for long-term storage and analysis. + """ + def __init__(self, db_manager: Optional[DatabaseManager] = None): + """ + Initialize the access tracker. + + Args: + db_manager: Optional DatabaseManager for persistence. + If None, will use the global singleton. + """ self.ip_counts: Dict[str, int] = defaultdict(int) self.path_counts: Dict[str, int] = defaultdict(int) self.user_agent_counts: Dict[str, int] = defaultdict(int) @@ -21,7 +35,7 @@ class AccessTracker: 'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster' ] - # common attack types such as xss, shell injection, probes + # Common attack types such as xss, shell injection, probes self.attack_types = { 'path_traversal': r'\.\.', 'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)", @@ -33,6 +47,25 @@ class AccessTracker: # Track IPs that accessed honeypot paths from robots.txt self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list) + # Database manager for persistence (lazily initialized) + self._db_manager = db_manager + + @property + def db(self) -> Optional[DatabaseManager]: + """ + Get the database manager, lazily initializing if needed. + + Returns: + DatabaseManager instance or None if not available + """ + if self._db_manager is None: + try: + self._db_manager = get_database() + except Exception: + # Database not initialized, persistence disabled + pass + return self._db_manager + def parse_credentials(self, post_data: str) -> Tuple[str, str]: """ Parse username and password from POST data. @@ -75,7 +108,12 @@ class AccessTracker: return username, password def record_credential_attempt(self, ip: str, path: str, username: str, password: str): - """Record a credential login attempt""" + """ + Record a credential login attempt. + + Stores in both in-memory list and SQLite database. + """ + # In-memory storage for dashboard self.credential_attempts.append({ 'ip': ip, 'path': path, @@ -84,37 +122,89 @@ class AccessTracker: 'timestamp': datetime.now().isoformat() }) - def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''): - """Record an access attempt""" + # Persist to database + if self.db: + try: + self.db.persist_credential( + ip=ip, + path=path, + username=username, + password=password + ) + except Exception: + # Don't crash if database persistence fails + pass + + def record_access( + self, + ip: str, + path: str, + user_agent: str = '', + body: str = '', + method: str = 'GET' + ): + """ + Record an access attempt. + + Stores in both in-memory structures and SQLite database. + + Args: + ip: Client IP address + path: Requested path + user_agent: Client user agent string + body: Request body (for POST/PUT) + method: HTTP method + """ self.ip_counts[ip] += 1 self.path_counts[path] += 1 if user_agent: self.user_agent_counts[user_agent] += 1 - - # path attack type detection + + # Path attack type detection attack_findings = self.detect_attack_type(path) - # post / put data + # POST/PUT body attack detection if len(body) > 0: attack_findings.extend(self.detect_attack_type(body)) - is_suspicious = self.is_suspicious_user_agent(user_agent) or self.is_honeypot_path(path) or len(attack_findings) > 0 + is_suspicious = ( + self.is_suspicious_user_agent(user_agent) or + self.is_honeypot_path(path) or + len(attack_findings) > 0 + ) + is_honeypot = self.is_honeypot_path(path) - # Track if this IP accessed a honeypot path - if self.is_honeypot_path(path): + if is_honeypot: self.honeypot_triggered[ip].append(path) - + + # In-memory storage for dashboard self.access_log.append({ 'ip': ip, 'path': path, 'user_agent': user_agent, 'suspicious': is_suspicious, - 'honeypot_triggered': self.is_honeypot_path(path), - 'attack_types':attack_findings, + 'honeypot_triggered': is_honeypot, + 'attack_types': attack_findings, 'timestamp': datetime.now().isoformat() }) + # Persist to database + if self.db: + try: + self.db.persist_access( + ip=ip, + path=path, + user_agent=user_agent, + method=method, + is_suspicious=is_suspicious, + is_honeypot_trigger=is_honeypot, + attack_types=attack_findings if attack_findings else None + ) + except Exception: + # Don't crash if database persistence fails + pass + def detect_attack_type(self, data:str) -> list[str]: """ Returns a list of all attack types found in path data From a4baedffd958b83da700431b53f91eb63c858803 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Sun, 28 Dec 2025 13:52:46 -0600 Subject: [PATCH 06/12] updated dashboard to pull data from db. This closes issue #10 --- src/database.py | 196 +++++++++++++++++++++++++++- src/models.py | 4 +- src/templates/dashboard_template.py | 2 +- src/tracker.py | 35 +++-- tests/test_credentials.sh | 4 +- 5 files changed, 218 insertions(+), 23 deletions(-) diff --git a/src/database.py b/src/database.py index 58a4505..e0de320 100644 --- a/src/database.py +++ b/src/database.py @@ -10,7 +10,7 @@ import stat from datetime import datetime from typing import Optional, List, Dict, Any -from sqlalchemy import create_engine +from sqlalchemy import create_engine, func, distinct, case from sqlalchemy.orm import sessionmaker, scoped_session, Session from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats @@ -346,6 +346,200 @@ class DatabaseManager: finally: self.close_session() + def get_dashboard_counts(self) -> Dict[str, int]: + """ + Get aggregate statistics for the dashboard. + + Returns: + Dictionary with total_accesses, unique_ips, unique_paths, + suspicious_accesses, honeypot_triggered, honeypot_ips + """ + session = self.session + try: + # Get main aggregate counts in one query + result = session.query( + func.count(AccessLog.id).label('total_accesses'), + func.count(distinct(AccessLog.ip)).label('unique_ips'), + func.count(distinct(AccessLog.path)).label('unique_paths'), + func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label('suspicious_accesses'), + func.sum(case((AccessLog.is_honeypot_trigger == True, 1), else_=0)).label('honeypot_triggered') + ).first() + + # Get unique IPs that triggered honeypots + honeypot_ips = session.query( + func.count(distinct(AccessLog.ip)) + ).filter(AccessLog.is_honeypot_trigger == True).scalar() or 0 + + return { + 'total_accesses': result.total_accesses or 0, + 'unique_ips': result.unique_ips or 0, + 'unique_paths': result.unique_paths or 0, + 'suspicious_accesses': int(result.suspicious_accesses or 0), + 'honeypot_triggered': int(result.honeypot_triggered or 0), + 'honeypot_ips': honeypot_ips + } + finally: + self.close_session() + + def get_top_ips(self, limit: int = 10) -> List[tuple]: + """ + Get top IP addresses by access count. + + Args: + limit: Maximum number of results + + Returns: + List of (ip, count) tuples ordered by count descending + """ + session = self.session + try: + results = session.query( + AccessLog.ip, + func.count(AccessLog.id).label('count') + ).group_by(AccessLog.ip).order_by( + func.count(AccessLog.id).desc() + ).limit(limit).all() + + return [(row.ip, row.count) for row in results] + finally: + self.close_session() + + def get_top_paths(self, limit: int = 10) -> List[tuple]: + """ + Get top paths by access count. + + Args: + limit: Maximum number of results + + Returns: + List of (path, count) tuples ordered by count descending + """ + session = self.session + try: + results = session.query( + AccessLog.path, + func.count(AccessLog.id).label('count') + ).group_by(AccessLog.path).order_by( + func.count(AccessLog.id).desc() + ).limit(limit).all() + + return [(row.path, row.count) for row in results] + finally: + self.close_session() + + def get_top_user_agents(self, limit: int = 10) -> List[tuple]: + """ + Get top user agents by access count. + + Args: + limit: Maximum number of results + + Returns: + List of (user_agent, count) tuples ordered by count descending + """ + session = self.session + try: + results = session.query( + AccessLog.user_agent, + func.count(AccessLog.id).label('count') + ).filter( + AccessLog.user_agent.isnot(None), + AccessLog.user_agent != '' + ).group_by(AccessLog.user_agent).order_by( + func.count(AccessLog.id).desc() + ).limit(limit).all() + + return [(row.user_agent, row.count) for row in results] + finally: + self.close_session() + + def get_recent_suspicious(self, limit: int = 20) -> List[Dict[str, Any]]: + """ + Get recent suspicious access attempts. + + Args: + limit: Maximum number of results + + Returns: + List of access log dictionaries with is_suspicious=True + """ + session = self.session + try: + logs = session.query(AccessLog).filter( + AccessLog.is_suspicious == True + ).order_by(AccessLog.timestamp.desc()).limit(limit).all() + + return [ + { + 'ip': log.ip, + 'path': log.path, + 'user_agent': log.user_agent, + 'timestamp': log.timestamp.isoformat() + } + for log in logs + ] + finally: + self.close_session() + + def get_honeypot_triggered_ips(self) -> List[tuple]: + """ + Get IPs that triggered honeypot paths with the paths they accessed. + + Returns: + List of (ip, [paths]) tuples + """ + session = self.session + try: + # Get all honeypot triggers grouped by IP + results = session.query( + AccessLog.ip, + AccessLog.path + ).filter( + AccessLog.is_honeypot_trigger == True + ).all() + + # Group paths by IP + ip_paths: Dict[str, List[str]] = {} + for row in results: + if row.ip not in ip_paths: + ip_paths[row.ip] = [] + if row.path not in ip_paths[row.ip]: + ip_paths[row.ip].append(row.path) + + return [(ip, paths) for ip, paths in ip_paths.items()] + finally: + self.close_session() + + def get_recent_attacks(self, limit: int = 20) -> List[Dict[str, Any]]: + """ + Get recent access logs that have attack detections. + + Args: + limit: Maximum number of results + + Returns: + List of access log dicts with attack_types included + """ + session = self.session + try: + # Get access logs that have attack detections + logs = session.query(AccessLog).join( + AttackDetection + ).order_by(AccessLog.timestamp.desc()).limit(limit).all() + + return [ + { + 'ip': log.ip, + 'path': log.path, + 'user_agent': log.user_agent, + 'timestamp': log.timestamp.isoformat(), + 'attack_types': [d.attack_type for d in log.attack_detections] + } + for log in logs + ] + finally: + self.close_session() + # Module-level singleton instance _db_manager = DatabaseManager() diff --git a/src/models.py b/src/models.py index f6e7d30..40dae0b 100644 --- a/src/models.py +++ b/src/models.py @@ -53,9 +53,11 @@ class AccessLog(Base): cascade="all, delete-orphan" ) - # Composite index for common queries + # Indexes for common queries __table_args__ = ( Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'), + Index('ix_access_logs_is_suspicious', 'is_suspicious'), + Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'), ) def __repr__(self) -> str: diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index 2323843..455833d 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -190,7 +190,7 @@ def generate_dashboard(stats: dict) -> str:
-

🍯 Honeypot Triggers

+

🍯 Honeypot Triggers by IP

{log["ip"]}{log["path"]}{log["user_agent"][:60]}{log["timestamp"].split("T")[1][:8]}
{log["ip"]}{log["path"]}{log["user_agent"][:60]}{format_timestamp(log["timestamp"])}
No suspicious activity detected
{log["ip"]}{log["path"]}{", ".join(log["attack_types"])}{log["user_agent"][:60]}{log["timestamp"].split("T")[1][:8]}
{log["ip"]}{log["path"]}{", ".join(log["attack_types"])}{log["user_agent"][:60]}{format_timestamp(log["timestamp"])}
No attacks detected
{log["ip"]}{log["username"]}{log["password"]}{log["path"]}{log["timestamp"].split("T")[1][:8]}
{log["ip"]}{log["username"]}{log["password"]}{log["path"]}{format_timestamp(log["timestamp"])}
No credentials captured yet
{i+1}{ip}{count}
{i+1}{_escape(ip)}{count}
No data
{i+1}{path}{count}
{i+1}{_escape(path)}{count}
No data
{i+1}{ua[:80]}{count}
{i+1}{_escape(ua[:80])}{count}
No data
{log["ip"]}{log["path"]}{log["user_agent"][:60]}{log["timestamp"].split("T")[1][:8]}
{_escape(log["ip"])}{_escape(log["path"])}{_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}
No suspicious activity detected
{ip}{", ".join(paths)}{len(paths)}
{_escape(ip)}{_escape(", ".join(paths))}{len(paths)}
No honeypot triggers yet
{log["ip"]}{log["path"]}{", ".join(log["attack_types"])}{log["user_agent"][:60]}{log["timestamp"].split("T")[1][:8]}
{_escape(log["ip"])}{_escape(log["path"])}{_escape(", ".join(log["attack_types"]))}{_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}
No attacks detected
{log["ip"]}{log["username"]}{log["password"]}{log["path"]}{log["timestamp"].split("T")[1][:8]}
{_escape(log["ip"])}{_escape(log["username"])}{_escape(log["password"])}{_escape(log["path"])}{_escape(log["timestamp"].split("T")[1][:8])}
No credentials captured yet
diff --git a/src/tracker.py b/src/tracker.py index 2d3d34a..4c89c0b 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -276,21 +276,20 @@ class AccessTracker: return [(ip, paths) for ip, paths in self.honeypot_triggered.items()] def get_stats(self) -> Dict: - """Get statistics summary""" - suspicious_count = sum(1 for log in self.access_log if log.get('suspicious', False)) - honeypot_count = sum(1 for log in self.access_log if log.get('honeypot_triggered', False)) - return { - 'total_accesses': len(self.access_log), - 'unique_ips': len(self.ip_counts), - 'unique_paths': len(self.path_counts), - 'suspicious_accesses': suspicious_count, - 'honeypot_triggered': honeypot_count, - 'honeypot_ips': len(self.honeypot_triggered), - 'top_ips': self.get_top_ips(10), - 'top_paths': self.get_top_paths(10), - 'top_user_agents': self.get_top_user_agents(10), - 'recent_suspicious': self.get_suspicious_accesses(20), - 'honeypot_triggered_ips': self.get_honeypot_triggered_ips(), - 'attack_types': self.get_attack_type_accesses(20), - 'credential_attempts': self.credential_attempts[-50:] # Last 50 attempts - } + """Get statistics summary from database.""" + if not self.db: + raise RuntimeError("Database not available for dashboard stats") + + # Get aggregate counts from database + stats = self.db.get_dashboard_counts() + + # Add detailed lists from database + stats['top_ips'] = self.db.get_top_ips(10) + stats['top_paths'] = self.db.get_top_paths(10) + stats['top_user_agents'] = self.db.get_top_user_agents(10) + stats['recent_suspicious'] = self.db.get_recent_suspicious(20) + stats['honeypot_triggered_ips'] = self.db.get_honeypot_triggered_ips() + stats['attack_types'] = self.db.get_recent_attacks(20) + stats['credential_attempts'] = self.db.get_credential_attempts(limit=50) + + return stats diff --git a/tests/test_credentials.sh b/tests/test_credentials.sh index 6379b92..68ee2c0 100755 --- a/tests/test_credentials.sh +++ b/tests/test_credentials.sh @@ -134,9 +134,9 @@ echo -e "${GREEN}✓ All credential tests completed!${NC}" echo -e "${BLUE}========================================${NC}\n" echo -e "${YELLOW}Check the results:${NC}" -echo -e " 1. View the log file: ${GREEN}cat src/logs/credentials.log${NC}" +echo -e " 1. View the log file: ${GREEN}tail -20 logs/credentials.log${NC}" echo -e " 2. View the dashboard: ${GREEN}${BASE_URL}/dashboard${NC}" -echo -e " 3. Check recent logs: ${GREEN}tail -20 src/logs/krawl.log${NC}\n" +echo -e " 3. Check recent logs: ${GREEN}tail -20 logs/access.log ${NC}\n" # Display last 10 credential entries if log file exists if [ -f "src/logs/credentials.log" ]; then From c2c43ac98500d20fb19cf8cf9dedc04f6a08d123 Mon Sep 17 00:00:00 2001 From: Leonardo Bambini Date: Mon, 29 Dec 2025 18:51:37 +0100 Subject: [PATCH 07/12] Added randomized server header and changed behavior of SERVER_HEADER env var --- src/config.py | 2 +- src/generators.py | 13 ++++++++++++- src/handler.py | 4 ++-- src/wordlists.py | 7 ++++++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/config.py b/src/config.py index 7c6714c..ef78935 100644 --- a/src/config.py +++ b/src/config.py @@ -46,5 +46,5 @@ class Config: api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)), - server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)') + server_header=os.getenv('SERVER_HEADER') ) diff --git a/src/generators.py b/src/generators.py index 16c0c32..6e24ba8 100644 --- a/src/generators.py +++ b/src/generators.py @@ -9,7 +9,8 @@ import string import json from templates import html_templates from wordlists import get_wordlists - +from config import Config +from logger import get_app_logger def random_username() -> str: """Generate random username""" @@ -36,6 +37,16 @@ def random_email(username: str = None) -> str: username = random_username() return f"{username}@{random.choice(wl.email_domains)}" +def random_server_header() -> str: + """Generate random server header""" + + if Config.from_env().server_header: + server_header = Config.from_env().server_header + else: + wl = get_wordlists() + server_header = random.choice(wl.server_headers) + + return server_header def random_api_key() -> str: """Generate random API key""" diff --git a/src/handler.py b/src/handler.py index ac7ca22..7c44726 100644 --- a/src/handler.py +++ b/src/handler.py @@ -13,7 +13,7 @@ from templates import html_templates from templates.dashboard_template import generate_dashboard from generators import ( credentials_txt, passwords_txt, users_json, api_keys_json, - api_response, directory_listing + api_response, directory_listing, random_server_header ) from wordlists import get_wordlists @@ -52,7 +52,7 @@ class Handler(BaseHTTPRequestHandler): def version_string(self) -> str: """Return custom server version for deception.""" - return self.config.server_header + return random_server_header() def _should_return_error(self) -> bool: """Check if we should return an error based on probability""" diff --git a/src/wordlists.py b/src/wordlists.py index 62e4045..342930a 100644 --- a/src/wordlists.py +++ b/src/wordlists.py @@ -57,7 +57,8 @@ class Wordlists: }, "users": { "roles": ["Administrator", "User"] - } + }, + "server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"] } @property @@ -111,6 +112,10 @@ class Wordlists: @property def error_codes(self): return self._data.get("error_codes", []) + + @property + def server_headers(self): + return self._data.get("server_headers", []) _wordlists_instance = None From a9808599dc3870f66241111fcfdd6defbd4da42f Mon Sep 17 00:00:00 2001 From: Leonardo Bambini Date: Mon, 29 Dec 2025 18:55:44 +0100 Subject: [PATCH 08/12] Added random server header and changed behavior of SERVER_HEADER env var --- README.md | 2 +- wordlists.json | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index b84d955..0d3efe7 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,7 @@ To customize the deception server installation several **environment variables** | `CANARY_TOKEN_URL` | External canary token URL | None | | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | -| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` | +| `SERVER_HEADER` | HTTP Server header for deception, if not set use random server header | | ## robots.txt The actual (juicy) robots.txt configuration is the following diff --git a/wordlists.json b/wordlists.json index f1aae81..fddf3d3 100644 --- a/wordlists.json +++ b/wordlists.json @@ -193,5 +193,13 @@ 500, 502, 503 + ], + "server_headers": [ + "Apache/2.4.41 (Ubuntu)", + "nginx/1.18.0", + "Microsoft-IIS/10.0", + "cloudflare", + "AmazonS3", + "gunicorn/20.1.0" ] } From 06ffa2c480f9991ea391913be918bc59a65e636f Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Mon, 29 Dec 2025 23:57:37 +0100 Subject: [PATCH 09/12] Added wordlists and server header logic to helm --- docker-compose.yaml | 2 +- helm/templates/configmap.yaml | 7 ++++++- helm/values.yaml | 14 +++++++++++++- src/config.py | 2 +- 4 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 1612864..7026f11 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -20,7 +20,7 @@ services: - MAX_COUNTER=10 - CANARY_TOKEN_TRIES=10 - PROBABILITY_ERROR_CODES=0 - - SERVER_HEADER=Apache/2.2.22 (Ubuntu) + # - SERVER_HEADER=Apache/2.2.22 (Ubuntu) # Optional: Set your canary token URL # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt # Optional: Set custom dashboard path (auto-generated if not set) diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index c50ab75..fb590b0 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -14,5 +14,10 @@ data: MAX_COUNTER: {{ .Values.config.maxCounter | quote }} CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} - SERVER_HEADER: {{ .Values.config.serverHeader | quote }} CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} + {{- if .Values.config.dashboardSecretPath }} + DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }} + {{- end }} + {{- if .Values.config.serverHeader }} + SERVER_HEADER: {{ .Values.config.serverHeader | quote }} + {{- end }} \ No newline at end of file diff --git a/helm/values.yaml b/helm/values.yaml index a095632..217e9a6 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -73,7 +73,8 @@ config: maxCounter: 10 canaryTokenTries: 10 probabilityErrorCodes: 0 - serverHeader: "Apache/2.2.22 (Ubuntu)" +# serverHeader: "Apache/2.2.22 (Ubuntu)" +# dashboardSecretPath: "/my-secret-dashboard" # canaryTokenUrl: set-your-canary-token-url-here networkPolicy: @@ -268,6 +269,17 @@ wordlists: - .git/ - keys/ - credentials/ + server_headers: + - Apache/2.2.22 (Ubuntu) + - nginx/1.18.0 + - Microsoft-IIS/10.0 + - LiteSpeed + - Caddy + - Gunicorn/20.0.4 + - uvicorn/0.13.4 + - Express + - Flask/1.1.2 + - Django/3.1 error_codes: - 400 - 401 diff --git a/src/config.py b/src/config.py index ef78935..3fc5dd8 100644 --- a/src/config.py +++ b/src/config.py @@ -21,7 +21,7 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) - server_header: str = "Apache/2.2.22 (Ubuntu)" + server_header: Optional[str] = None @classmethod def from_env(cls) -> 'Config': From cddad984c3b556cbc990bd989f7f2295eb404394 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Tue, 30 Dec 2025 00:03:44 +0100 Subject: [PATCH 10/12] Added timezone to helm values --- helm/values.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/helm/values.yaml b/helm/values.yaml index dc18d4a..8a6bc1d 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -73,6 +73,7 @@ config: maxCounter: 10 canaryTokenTries: 10 probabilityErrorCodes: 0 +# timezone: "UTC" # serverHeader: "Apache/2.2.22 (Ubuntu)" # dashboardSecretPath: "/my-secret-dashboard" # canaryTokenUrl: set-your-canary-token-url-here From 354f8bf8954e76faefbfa7750d2b3537ee7d3443 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:24:36 +0100 Subject: [PATCH 11/12] Fix indentation for server_header in config.py --- src/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/config.py b/src/config.py index f3bf7f0..87fca1c 100644 --- a/src/config.py +++ b/src/config.py @@ -85,7 +85,7 @@ class Config: api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)), - server_header=os.getenv('SERVER_HEADER') + server_header=os.getenv('SERVER_HEADER'), database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'), database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)), timezone=os.getenv('TIMEZONE') # If not set, will use system timezone From c55b1375adbd6f21fa1712f9bd9c05026fa34207 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Tue, 30 Dec 2025 12:12:42 +0100 Subject: [PATCH 12/12] added db config for kubernetes and helm --- helm/templates/configmap.yaml | 3 +++ helm/templates/deployment.yaml | 14 ++++++++++++++ helm/templates/pvc.yaml | 17 +++++++++++++++++ helm/values.yaml | 18 ++++++++++++++++++ kubernetes/krawl-all-in-one-deploy.yaml | 22 ++++++++++++++++++++++ kubernetes/manifests/configmap.yaml | 5 ++++- kubernetes/manifests/deployment.yaml | 5 +++++ kubernetes/manifests/kustomization.yaml | 1 + kubernetes/manifests/pvc.yaml | 13 +++++++++++++ 9 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 helm/templates/pvc.yaml create mode 100644 kubernetes/manifests/pvc.yaml diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 2990f61..17cd952 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -24,3 +24,6 @@ data: {{- if .Values.config.timezone }} TIMEZONE: {{ .Values.config.timezone | quote }} {{- end }} + # Database configuration + DATABASE_PATH: {{ .Values.database.path | quote }} + DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index b0aeb6d..ecc9655 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -54,6 +54,10 @@ spec: mountPath: /app/wordlists.json subPath: wordlists.json readOnly: true + {{- if .Values.database.persistence.enabled }} + - name: database + mountPath: /app/data + {{- end }} {{- with .Values.resources }} resources: {{- toYaml . | nindent 12 }} @@ -62,6 +66,16 @@ spec: - name: wordlists configMap: name: {{ include "krawl.fullname" . }}-wordlists + {{- if .Values.database.persistence.enabled }} + - name: database + {{- if .Values.database.persistence.existingClaim }} + persistentVolumeClaim: + claimName: {{ .Values.database.persistence.existingClaim }} + {{- else }} + persistentVolumeClaim: + claimName: {{ include "krawl.fullname" . }}-db + {{- end }} + {{- end }} {{- with .Values.nodeSelector }} nodeSelector: {{- toYaml . | nindent 8 }} diff --git a/helm/templates/pvc.yaml b/helm/templates/pvc.yaml new file mode 100644 index 0000000..ec73af2 --- /dev/null +++ b/helm/templates/pvc.yaml @@ -0,0 +1,17 @@ +{{- if and .Values.database.persistence.enabled (not .Values.database.persistence.existingClaim) }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "krawl.fullname" . }}-db + labels: + {{- include "krawl.labels" . | nindent 4 }} +spec: + accessModes: + - {{ .Values.database.persistence.accessMode }} + {{- if .Values.database.persistence.storageClassName }} + storageClassName: {{ .Values.database.persistence.storageClassName }} + {{- end }} + resources: + requests: + storage: {{ .Values.database.persistence.size }} +{{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index 8a6bc1d..c92bc0b 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -79,6 +79,24 @@ config: # canaryTokenUrl: set-your-canary-token-url-here # timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. +# Database configuration +database: + # Path to the SQLite database file + path: "data/krawl.db" + # Number of days to retain access logs and attack data + retentionDays: 30 + # Persistence configuration + persistence: + enabled: true + # Storage class name (use default if not specified) + # storageClassName: "" + # Access mode for the persistent volume + accessMode: ReadWriteOnce + # Size of the persistent volume + size: 1Gi + # Optional: Use existing PVC + # existingClaim: "" + networkPolicy: enabled: true policyTypes: diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index 0362220..d1a026c 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -20,6 +20,9 @@ data: CANARY_TOKEN_TRIES: "10" PROBABILITY_ERROR_CODES: "0" # CANARY_TOKEN_URL: set-your-canary-token-url-here + # Database configuration + DATABASE_PATH: "data/krawl.db" + DATABASE_RETENTION_DAYS: "30" --- apiVersion: v1 kind: ConfigMap @@ -227,6 +230,20 @@ data: ] } --- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: krawl-db + namespace: krawl-system + labels: + app: krawl-server +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi +--- apiVersion: apps/v1 kind: Deployment metadata: @@ -260,6 +277,8 @@ spec: mountPath: /app/wordlists.json subPath: wordlists.json readOnly: true + - name: database + mountPath: /app/data resources: requests: memory: "64Mi" @@ -271,6 +290,9 @@ spec: - name: wordlists configMap: name: krawl-wordlists + - name: database + persistentVolumeClaim: + claimName: krawl-db --- apiVersion: v1 kind: Service diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index 073005f..ef357b0 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -15,4 +15,7 @@ data: PROBABILITY_ERROR_CODES: "0" SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" # CANARY_TOKEN_URL: set-your-canary-token-url-here -# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") \ No newline at end of file +# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") + # Database configuration + DATABASE_PATH: "data/krawl.db" + DATABASE_RETENTION_DAYS: "30" \ No newline at end of file diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml index 0552eba..1650721 100644 --- a/kubernetes/manifests/deployment.yaml +++ b/kubernetes/manifests/deployment.yaml @@ -31,6 +31,8 @@ spec: mountPath: /app/wordlists.json subPath: wordlists.json readOnly: true + - name: database + mountPath: /app/data resources: requests: memory: "64Mi" @@ -42,3 +44,6 @@ spec: - name: wordlists configMap: name: krawl-wordlists + - name: database + persistentVolumeClaim: + claimName: krawl-db diff --git a/kubernetes/manifests/kustomization.yaml b/kubernetes/manifests/kustomization.yaml index 8f41776..4a5fcd9 100644 --- a/kubernetes/manifests/kustomization.yaml +++ b/kubernetes/manifests/kustomization.yaml @@ -5,6 +5,7 @@ resources: - namespace.yaml - configmap.yaml - wordlists-configmap.yaml + - pvc.yaml - deployment.yaml - service.yaml - network-policy.yaml diff --git a/kubernetes/manifests/pvc.yaml b/kubernetes/manifests/pvc.yaml new file mode 100644 index 0000000..6b771ff --- /dev/null +++ b/kubernetes/manifests/pvc.yaml @@ -0,0 +1,13 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: krawl-db + namespace: krawl-system + labels: + app: krawl-server +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 1Gi