From 16aca9bba63a56eacde2eb0b9cb4d00b876f1f00 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 26 Dec 2025 07:53:05 -0600
Subject: [PATCH 01/12] Add configurable HTTP Server header for deception

  Add SERVER_HEADER environment variable to customize the HTTP Server
  response header, defaulting to Apache/2.2.22 (Ubuntu). This allows the
  honeypot to masquerade as different web servers to attract attackers.

  - Add server_header field to Config dataclass
  - Override version_string() in Handler to return configured header
  - Update documentation and all deployment configs
---
 README.md                           | 1 +
 docker-compose.yaml                 | 1 +
 helm/templates/configmap.yaml       | 1 +
 helm/values.yaml                    | 1 +
 kubernetes/manifests/configmap.yaml | 1 +
 src/config.py                       | 4 +++-
 src/handler.py                      | 4 ++++
 src/server.py                       | 1 +
 8 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0cf8b96..b84d955 100644
--- a/README.md
+++ b/README.md
@@ -185,6 +185,7 @@ To customize the deception server installation several **environment variables**
 | `CANARY_TOKEN_URL` | External canary token URL | None |
 | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated |
 | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` |
+| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` |
 
 ## robots.txt
 The actual (juicy) robots.txt configuration is the following
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 57c648d..1612864 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -20,6 +20,7 @@ services:
       - MAX_COUNTER=10
       - CANARY_TOKEN_TRIES=10
       - PROBABILITY_ERROR_CODES=0
+      - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
       # Optional: Set your canary token URL
       # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
       # Optional: Set custom dashboard path (auto-generated if not set)
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index f6fe92c..c50ab75 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -14,4 +14,5 @@ data:
   MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
   CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
   PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
+  SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
   CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
diff --git a/helm/values.yaml b/helm/values.yaml
index 9ee9ca5..a095632 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -73,6 +73,7 @@ config:
   maxCounter: 10
   canaryTokenTries: 10
   probabilityErrorCodes: 0
+  serverHeader: "Apache/2.2.22 (Ubuntu)"
 #  canaryTokenUrl: set-your-canary-token-url-here
 
 networkPolicy:
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index 42ba002..431b9a3 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -13,4 +13,5 @@ data:
   MAX_COUNTER: "10"
   CANARY_TOKEN_TRIES: "10"
   PROBABILITY_ERROR_CODES: "0"
+  SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
 #  CANARY_TOKEN_URL: set-your-canary-token-url-here
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
index 51391a9..7c6714c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -21,6 +21,7 @@ class Config:
     api_server_port: int = 8080
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
+    server_header: str = "Apache/2.2.22 (Ubuntu)"
 
     @classmethod
     def from_env(cls) -> 'Config':
@@ -44,5 +45,6 @@ class Config:
             api_server_url=os.getenv('API_SERVER_URL'),
             api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
             api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
-            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5))
+            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)),
+            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)')
         )
diff --git a/src/handler.py b/src/handler.py
index 81f48fa..bed3369 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -46,6 +46,10 @@ class Handler(BaseHTTPRequestHandler):
         """Extract user agent from request"""
         return self.headers.get('User-Agent', '')
 
+    def version_string(self) -> str:
+        """Return custom server version for deception."""
+        return self.config.server_header
+
     def _should_return_error(self) -> bool:
         """Check if we should return an error based on probability"""
         if self.config.probability_error_codes <= 0:
diff --git a/src/server.py b/src/server.py
index d10d33e..73f0ce9 100644
--- a/src/server.py
+++ b/src/server.py
@@ -31,6 +31,7 @@ def print_usage():
     print('  DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
     print('  PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
     print('  CHAR_SPACE            - Characters for random links')
+    print('  SERVER_HEADER         - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
 
 
 def main():

From d0101b34faf33bc85d3065051353da55bb0bf56f Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 26 Dec 2025 08:00:16 -0600
Subject: [PATCH 02/12] Added test script to show the server header

---
 tests/check_header.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100755 tests/check_header.sh

diff --git a/tests/check_header.sh b/tests/check_header.sh
new file mode 100755
index 0000000..78b8e5d
--- /dev/null
+++ b/tests/check_header.sh
@@ -0,0 +1,3 @@
+#!/bin/env bash
+# -s is for silent (no progress bar) | -I is to get the headers | grep is to find only the Server line
+curl -s -I http://localhost:5000 | grep "Server:"
\ No newline at end of file

From 61ba574e92cc86444001a767508dc3e79f469247 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Sat, 27 Dec 2025 19:17:27 +0100
Subject: [PATCH 03/12] Added POST log and dashboard for used credentials

---
 src/handler.py                      | 15 ++++++++
 src/logger.py                       | 28 +++++++++++++++
 src/server.py                       |  4 ++-
 src/templates/dashboard_template.py | 28 +++++++++++++++
 src/tracker.py                      | 56 ++++++++++++++++++++++++++++-
 5 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/src/handler.py b/src/handler.py
index 9d8abe2..ac7ca22 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -3,6 +3,7 @@
 import logging
 import random
 import time
+from datetime import datetime
 from http.server import BaseHTTPRequestHandler
 from typing import Optional, List
 
@@ -25,6 +26,7 @@ class Handler(BaseHTTPRequestHandler):
     counter: int = 0
     app_logger: logging.Logger = None
     access_logger: logging.Logger = None
+    credential_logger: logging.Logger = None
 
     def _get_client_ip(self) -> str:
         """Extract client IP address from request, checking proxy headers first"""
@@ -213,6 +215,19 @@ class Handler(BaseHTTPRequestHandler):
 
             self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
 
+            # Parse and log credentials
+            username, password = self.tracker.parse_credentials(post_data)
+            if username or password:
+                # Log to dedicated credentials.log file
+                timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
+                credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}"
+                self.credential_logger.info(credential_line)
+                
+                # Also record in tracker for dashboard
+                self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
+                
+                self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
+
         # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
         self.tracker.record_access(client_ip, self.path, user_agent, post_data)
         
diff --git a/src/logger.py b/src/logger.py
index 68b8278..9f09236 100644
--- a/src/logger.py
+++ b/src/logger.py
@@ -77,6 +77,22 @@ class LoggerManager:
         access_stream_handler.setFormatter(log_format)
         self._access_logger.addHandler(access_stream_handler)
 
+        # Setup credential logger (special format, no stream handler)
+        self._credential_logger = logging.getLogger("krawl.credentials")
+        self._credential_logger.setLevel(logging.INFO)
+        self._credential_logger.handlers.clear()
+
+        # Credential logger uses a simple format: timestamp|ip|username|password|path
+        credential_format = logging.Formatter("%(message)s")
+        
+        credential_file_handler = RotatingFileHandler(
+            os.path.join(log_dir, "credentials.log"),
+            maxBytes=max_bytes,
+            backupCount=backup_count
+        )
+        credential_file_handler.setFormatter(credential_format)
+        self._credential_logger.addHandler(credential_file_handler)
+
         self._initialized = True
 
     @property
@@ -93,6 +109,13 @@ class LoggerManager:
             self.initialize()
         return self._access_logger
 
+    @property
+    def credentials(self) -> logging.Logger:
+        """Get the credentials logger."""
+        if not self._initialized:
+            self.initialize()
+        return self._credential_logger
+
 
 # Module-level singleton instance
 _logger_manager = LoggerManager()
@@ -108,6 +131,11 @@ def get_access_logger() -> logging.Logger:
     return _logger_manager.access
 
 
+def get_credential_logger() -> logging.Logger:
+    """Get the credential logger instance."""
+    return _logger_manager.credentials
+
+
 def initialize_logging(log_dir: str = "logs") -> None:
     """Initialize the logging system."""
     _logger_manager.initialize(log_dir)
diff --git a/src/server.py b/src/server.py
index 861e9f2..fd8f7d2 100644
--- a/src/server.py
+++ b/src/server.py
@@ -11,7 +11,7 @@ from http.server import HTTPServer
 from config import Config
 from tracker import AccessTracker
 from handler import Handler
-from logger import initialize_logging, get_app_logger, get_access_logger
+from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
 
 
 def print_usage():
@@ -45,6 +45,7 @@ def main():
     initialize_logging()
     app_logger = get_app_logger()
     access_logger = get_access_logger()
+    credential_logger = get_credential_logger()
 
     config = Config.from_env()
 
@@ -55,6 +56,7 @@ def main():
     Handler.counter = config.canary_token_tries
     Handler.app_logger = app_logger
     Handler.access_logger = access_logger
+    Handler.credential_logger = credential_logger
 
     if len(sys.argv) == 2:
         try:
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 3f5524d..a267278 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -45,6 +45,12 @@ def generate_dashboard(stats: dict) -> str:
         for log in stats.get('attack_types', [])[-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
 
+    # Generate credential attempts rows
+    credential_rows = '\n'.join([
+        f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        for log in stats.get('credential_attempts', [])[-20:]
+    ]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
+
     return f"""<!DOCTYPE html>
 <html>
 <head>
@@ -159,6 +165,10 @@ def generate_dashboard(stats: dict) -> str:
                 <div class="stat-value alert">{stats.get('honeypot_ips', 0)}</div>
                 <div class="stat-label">Honeypot Caught</div>
             </div>
+            <div class="stat-card alert">
+                <div class="stat-value alert">{len(stats.get('credential_attempts', []))}</div>
+                <div class="stat-label">Credentials Captured</div>
+            </div>
         </div>
 
         <div class="table-container alert-section">
@@ -194,6 +204,24 @@ def generate_dashboard(stats: dict) -> str:
             </table>
         </div>
 
+        <div class="table-container alert-section">
+            <h2>🔑 Captured Credentials</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>IP Address</th>
+                        <th>Username</th>
+                        <th>Password</th>
+                        <th>Path</th>
+                        <th>Time</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {credential_rows}
+                </tbody>
+            </table>
+        </div>
+
         <div class="table-container alert-section">
             <h2>&#128520; Detected Attack Types</h2>
             <table>
diff --git a/src/tracker.py b/src/tracker.py
index 6e733f4..717a4c3 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -4,6 +4,7 @@ from typing import Dict, List, Tuple
 from collections import defaultdict
 from datetime import datetime
 import re
+import urllib.parse
 
 
 class AccessTracker:
@@ -13,6 +14,7 @@ class AccessTracker:
         self.path_counts: Dict[str, int] = defaultdict(int)
         self.user_agent_counts: Dict[str, int] = defaultdict(int)
         self.access_log: List[Dict] = []
+        self.credential_attempts: List[Dict] = []
         self.suspicious_patterns = [
             'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
             'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -31,6 +33,57 @@ class AccessTracker:
         # Track IPs that accessed honeypot paths from robots.txt
         self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list)
 
+    def parse_credentials(self, post_data: str) -> Tuple[str, str]:
+        """
+        Parse username and password from POST data.
+        Returns tuple (username, password) or (None, None) if not found.
+        """
+        if not post_data:
+            return None, None
+        
+        username = None
+        password = None
+        
+        try:
+            # Parse URL-encoded form data
+            parsed = urllib.parse.parse_qs(post_data)
+            
+            # Common username field names
+            username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
+            for field in username_fields:
+                if field in parsed and parsed[field]:
+                    username = parsed[field][0]
+                    break
+            
+            # Common password field names
+            password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
+            for field in password_fields:
+                if field in parsed and parsed[field]:
+                    password = parsed[field][0]
+                    break
+                    
+        except Exception:
+            # If parsing fails, try simple regex patterns
+            username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
+            password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
+            
+            if username_match:
+                username = urllib.parse.unquote_plus(username_match.group(1))
+            if password_match:
+                password = urllib.parse.unquote_plus(password_match.group(1))
+        
+        return username, password
+
+    def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
+        """Record a credential login attempt"""
+        self.credential_attempts.append({
+            'ip': ip,
+            'path': path,
+            'username': username,
+            'password': password,
+            'timestamp': datetime.now().isoformat()
+        })
+
     def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''):
         """Record an access attempt"""
         self.ip_counts[ip] += 1
@@ -146,5 +199,6 @@ class AccessTracker:
             'top_user_agents': self.get_top_user_agents(10),
             'recent_suspicious': self.get_suspicious_accesses(20),
             'honeypot_triggered_ips': self.get_honeypot_triggered_ips(),
-            'attack_types': self.get_attack_type_accesses(20)
+            'attack_types': self.get_attack_type_accesses(20),
+            'credential_attempts': self.credential_attempts[-50:]  # Last 50 attempts
         }

From 6556e17f91d53965f96e5d9f8b9e9f0ddd03e729 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Sun, 28 Dec 2025 17:07:18 +0100
Subject: [PATCH 04/12] Added timezone env variable handling

---
 README.md                           |   1 +
 deployment.yaml                     |  44 --------
 docker-compose.yaml                 |   2 +
 helm/templates/configmap.yaml       |   3 +
 helm/values.yaml                    |   1 +
 kubernetes/manifests/configmap.yaml |   3 +-
 src/config.py                       |  41 +++++++-
 src/logger.py                       |  33 ++++--
 src/server.py                       |  17 +++-
 src/templates/dashboard_template.py |  18 +++-
 src/tracker.py                      |  10 +-
 tests/test_credentials.sh           | 150 ++++++++++++++++++++++++++++
 12 files changed, 258 insertions(+), 65 deletions(-)
 delete mode 100644 deployment.yaml
 create mode 100755 tests/test_credentials.sh

diff --git a/README.md b/README.md
index b84d955..06157bd 100644
--- a/README.md
+++ b/README.md
@@ -186,6 +186,7 @@ To customize the deception server installation several **environment variables**
 | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated |
 | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` |
 | `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` |
+| `TIMEZONE` | IANA timezone for logs and dashboard (e.g., `America/New_York`, `Europe/Rome`) | System timezone |
 
 ## robots.txt
 The actual (juicy) robots.txt configuration is the following
diff --git a/deployment.yaml b/deployment.yaml
deleted file mode 100644
index 4bf5189..0000000
--- a/deployment.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: krawl-server
-  namespace: krawl
-  labels:
-    app: krawl-server
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: krawl-server
-  template:
-    metadata:
-      labels:
-        app: krawl-server
-    spec:
-      containers:
-      - name: krawl
-        image: ghcr.io/blessedrebus/krawl:latest
-        imagePullPolicy: Always
-        ports:
-        - containerPort: 5000
-          name: http
-          protocol: TCP
-        envFrom:
-        - configMapRef:
-            name: krawl-config
-        volumeMounts:
-        - name: wordlists
-          mountPath: /app/wordlists.json
-          subPath: wordlists.json
-          readOnly: true
-        resources:
-          requests:
-            memory: "64Mi"
-            cpu: "100m"
-          limits:
-            memory: "256Mi"
-            cpu: "500m"
-      volumes:
-      - name: wordlists
-        configMap:
-          name: krawl-wordlists
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 1612864..600034d 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -25,6 +25,8 @@ services:
       # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
       # Optional: Set custom dashboard path (auto-generated if not set)
       # - DASHBOARD_SECRET_PATH=/my-secret-dashboard
+      # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
+      # - TIMEZONE=UTC
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index c50ab75..c08aaa5 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -16,3 +16,6 @@ data:
   PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
   SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
   CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
+  {{- if .Values.config.timezone }}
+  TIMEZONE: {{ .Values.config.timezone | quote }}
+  {{- end }}
diff --git a/helm/values.yaml b/helm/values.yaml
index a095632..ac51756 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -75,6 +75,7 @@ config:
   probabilityErrorCodes: 0
   serverHeader: "Apache/2.2.22 (Ubuntu)"
 #  canaryTokenUrl: set-your-canary-token-url-here
+#  timezone: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
 
 networkPolicy:
   enabled: true
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index 431b9a3..073005f 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -14,4 +14,5 @@ data:
   CANARY_TOKEN_TRIES: "10"
   PROBABILITY_ERROR_CODES: "0"
   SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
-#  CANARY_TOKEN_URL: set-your-canary-token-url-here
\ No newline at end of file
+#  CANARY_TOKEN_URL: set-your-canary-token-url-here
+#  TIMEZONE: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome")
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
index 7c6714c..741f01f 100644
--- a/src/config.py
+++ b/src/config.py
@@ -3,6 +3,8 @@
 import os
 from dataclasses import dataclass
 from typing import Optional, Tuple
+from zoneinfo import ZoneInfo
+import time
 
 
 @dataclass
@@ -22,6 +24,40 @@ class Config:
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
     server_header: str = "Apache/2.2.22 (Ubuntu)"
+    timezone: str = None  # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
+    
+    @staticmethod
+    # Try to fetch timezone before if not set
+    def get_system_timezone() -> str:
+        """Get the system's default timezone"""
+        try:
+            if os.path.islink('/etc/localtime'):
+                tz_path = os.readlink('/etc/localtime')
+                if 'zoneinfo/' in tz_path:
+                    return tz_path.split('zoneinfo/')[-1]
+            
+            local_tz = time.tzname[time.daylight]
+            if local_tz and local_tz != 'UTC':
+                return local_tz
+        except Exception:
+            pass
+        
+        # Default fallback to UTC
+        return 'UTC'
+    
+    def get_timezone(self) -> ZoneInfo:
+        """Get configured timezone as ZoneInfo object"""
+        if self.timezone:
+            try:
+                return ZoneInfo(self.timezone)
+            except Exception:
+                pass
+        
+        system_tz = self.get_system_timezone()
+        try:
+            return ZoneInfo(system_tz)
+        except Exception:
+            return ZoneInfo('UTC')
 
     @classmethod
     def from_env(cls) -> 'Config':
@@ -45,6 +81,7 @@ class Config:
             api_server_url=os.getenv('API_SERVER_URL'),
             api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
             api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
-            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)),
-            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)')
+            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
+            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)'),
+            timezone=os.getenv('TIMEZONE')  # If not set, will use system timezone
         )
diff --git a/src/logger.py b/src/logger.py
index 9f09236..992cad8 100644
--- a/src/logger.py
+++ b/src/logger.py
@@ -8,6 +8,23 @@ Provides two loggers: app (application) and access (HTTP access logs).
 import logging
 import os
 from logging.handlers import RotatingFileHandler
+from typing import Optional
+from zoneinfo import ZoneInfo
+from datetime import datetime
+
+
+class TimezoneFormatter(logging.Formatter):
+    """Custom formatter that respects configured timezone"""
+    def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None):
+        super().__init__(fmt, datefmt)
+        self.timezone = timezone or ZoneInfo('UTC')
+    
+    def formatTime(self, record, datefmt=None):
+        """Override formatTime to use configured timezone"""
+        dt = datetime.fromtimestamp(record.created, tz=self.timezone)
+        if datefmt:
+            return dt.strftime(datefmt)
+        return dt.isoformat()
 
 
 class LoggerManager:
@@ -20,23 +37,27 @@ class LoggerManager:
             cls._instance._initialized = False
         return cls._instance
 
-    def initialize(self, log_dir: str = "logs") -> None:
+    def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
         """
         Initialize the logging system with rotating file handlers.
 
         Args:
             log_dir: Directory for log files (created if not exists)
+            timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
         """
         if self._initialized:
             return
 
+        self.timezone = timezone or ZoneInfo('UTC')
+
         # Create log directory if it doesn't exist
         os.makedirs(log_dir, exist_ok=True)
 
         # Common format for all loggers
-        log_format = logging.Formatter(
+        log_format = TimezoneFormatter(
             "[%(asctime)s] %(levelname)s - %(message)s",
-            datefmt="%Y-%m-%d %H:%M:%S"
+            datefmt="%Y-%m-%d %H:%M:%S",
+            timezone=self.timezone
         )
 
         # Rotation settings: 1MB max, 5 backups
@@ -83,7 +104,7 @@ class LoggerManager:
         self._credential_logger.handlers.clear()
 
         # Credential logger uses a simple format: timestamp|ip|username|password|path
-        credential_format = logging.Formatter("%(message)s")
+        credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone)
         
         credential_file_handler = RotatingFileHandler(
             os.path.join(log_dir, "credentials.log"),
@@ -136,6 +157,6 @@ def get_credential_logger() -> logging.Logger:
     return _logger_manager.credentials
 
 
-def initialize_logging(log_dir: str = "logs") -> None:
+def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
     """Initialize the logging system."""
-    _logger_manager.initialize(log_dir)
+    _logger_manager.initialize(log_dir, timezone)
diff --git a/src/server.py b/src/server.py
index fd8f7d2..fcb794e 100644
--- a/src/server.py
+++ b/src/server.py
@@ -33,6 +33,8 @@ def print_usage():
     print('  PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
     print('  CHAR_SPACE            - Characters for random links')
     print('  SERVER_HEADER         - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
+    print('  TIMEZONE              - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
+    print('                          If not set, system timezone will be used')
 
 
 def main():
@@ -41,15 +43,19 @@ def main():
         print_usage()
         exit(0)
 
-    # Initialize logging
-    initialize_logging()
+    config = Config.from_env()
+    
+    # Get timezone configuration
+    tz = config.get_timezone()
+    
+    # Initialize logging with timezone
+    initialize_logging(timezone=tz)
     app_logger = get_app_logger()
     access_logger = get_access_logger()
     credential_logger = get_credential_logger()
 
-    config = Config.from_env()
-
-    tracker = AccessTracker()
+    # Initialize tracker with timezone
+    tracker = AccessTracker(timezone=tz)
 
     Handler.config = config
     Handler.tracker = tracker
@@ -71,6 +77,7 @@ def main():
 
     try:
         app_logger.info(f'Starting deception server on port {config.port}...')
+        app_logger.info(f'Timezone configured: {tz.key}')
         app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
         if config.canary_token_url:
             app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index a267278..9fc4111 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -5,6 +5,18 @@ Dashboard template for viewing honeypot statistics.
 Customize this template to change the dashboard appearance.
 """
 
+from datetime import datetime
+
+
+def format_timestamp(iso_timestamp: str) -> str:
+    """Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)"""
+    try:
+        dt = datetime.fromisoformat(iso_timestamp)
+        return dt.strftime("%Y-%m-%d %H:%M:%S")
+    except Exception:
+        # Fallback for old format
+        return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
+
 
 def generate_dashboard(stats: dict) -> str:
     """Generate dashboard HTML with access statistics"""
@@ -29,7 +41,7 @@ def generate_dashboard(stats: dict) -> str:
 
     # Generate suspicious accesses rows
     suspicious_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{format_timestamp(log["timestamp"])}</td></tr>'
         for log in stats['recent_suspicious'][-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
 
@@ -41,13 +53,13 @@ def generate_dashboard(stats: dict) -> str:
 
     # Generate attack types rows
     attack_type_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{format_timestamp(log["timestamp"])}</td></tr>'
         for log in stats.get('attack_types', [])[-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
 
     # Generate credential attempts rows
     credential_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{format_timestamp(log["timestamp"])}</td></tr>'
         for log in stats.get('credential_attempts', [])[-20:]
     ]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
 
diff --git a/src/tracker.py b/src/tracker.py
index 717a4c3..c9322ec 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -1,20 +1,22 @@
 #!/usr/bin/env python3
 
-from typing import Dict, List, Tuple
+from typing import Dict, List, Tuple, Optional
 from collections import defaultdict
 from datetime import datetime
+from zoneinfo import ZoneInfo
 import re
 import urllib.parse
 
 
 class AccessTracker:
     """Track IP addresses and paths accessed"""
-    def __init__(self):
+    def __init__(self, timezone: Optional[ZoneInfo] = None):
         self.ip_counts: Dict[str, int] = defaultdict(int)
         self.path_counts: Dict[str, int] = defaultdict(int)
         self.user_agent_counts: Dict[str, int] = defaultdict(int)
         self.access_log: List[Dict] = []
         self.credential_attempts: List[Dict] = []
+        self.timezone = timezone or ZoneInfo('UTC')
         self.suspicious_patterns = [
             'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
             'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -81,7 +83,7 @@ class AccessTracker:
             'path': path,
             'username': username,
             'password': password,
-            'timestamp': datetime.now().isoformat()
+            'timestamp': datetime.now(self.timezone).isoformat()
         })
 
     def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''):
@@ -112,7 +114,7 @@ class AccessTracker:
             'suspicious': is_suspicious,
             'honeypot_triggered': self.is_honeypot_path(path),
             'attack_types':attack_findings,
-            'timestamp': datetime.now().isoformat()
+            'timestamp': datetime.now(self.timezone).isoformat()
         })
 
     def detect_attack_type(self, data:str) -> list[str]:
diff --git a/tests/test_credentials.sh b/tests/test_credentials.sh
new file mode 100755
index 0000000..6379b92
--- /dev/null
+++ b/tests/test_credentials.sh
@@ -0,0 +1,150 @@
+#!/bin/bash
+
+# This script sends various POST requests with credentials to the honeypot
+
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+RED='\033[0;31m'
+NC='\033[0m'
+
+# Configuration
+HOST="localhost"
+PORT="5000"
+BASE_URL="http://${HOST}:${PORT}"
+
+echo -e "${BLUE}========================================${NC}"
+echo -e "${BLUE}Krawl Credential Logging Test Script${NC}"
+echo -e "${BLUE}========================================${NC}\n"
+
+# Check if server is running
+echo -e "${YELLOW}Checking if server is running on ${BASE_URL}...${NC}"
+if ! curl -s -f "${BASE_URL}/health" > /dev/null 2>&1; then
+    echo -e "${RED}❌ Server is not running. Please start the Krawl server first.${NC}"
+    echo -e "${YELLOW}Run: python3 src/server.py${NC}"
+    exit 1
+fi
+echo -e "${GREEN}✓ Server is running${NC}\n"
+
+# Test 1: Simple login form POST
+echo -e "${YELLOW}Test 1: POST to /login with form data${NC}"
+curl -s -X POST "${BASE_URL}/login" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "username=admin&password=admin123" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: admin / admin123${NC}\n"
+
+sleep 1
+
+# Test 2: Admin panel login
+echo -e "${YELLOW}Test 2: POST to /admin with credentials${NC}"
+curl -s -X POST "${BASE_URL}/admin" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "user=root&pass=toor&submit=Login" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: root / toor${NC}\n"
+
+sleep 1
+
+# Test 3: WordPress login attempt
+echo -e "${YELLOW}Test 3: POST to /wp-login.php${NC}"
+curl -s -X POST "${BASE_URL}/wp-login.php" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "log=wpuser&pwd=Password1&wp-submit=Log+In" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: wpuser / Password1${NC}\n"
+
+sleep 1
+
+# Test 4: JSON formatted credentials
+echo -e "${YELLOW}Test 4: POST to /api/login with JSON${NC}"
+curl -s -X POST "${BASE_URL}/api/login" \
+    -H "Content-Type: application/json" \
+    -d '{"username":"apiuser","password":"apipass123","remember":true}' \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: apiuser / apipass123${NC}\n"
+
+sleep 1
+
+# Test 5: SSH-style login
+echo -e "${YELLOW}Test 5: POST to /ssh with credentials${NC}"
+curl -s -X POST "${BASE_URL}/ssh" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "username=sshuser&password=P@ssw0rd!" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: sshuser / P@ssw0rd!${NC}\n"
+
+sleep 1
+
+# Test 6: Database admin
+echo -e "${YELLOW}Test 6: POST to /phpmyadmin with credentials${NC}"
+curl -s -X POST "${BASE_URL}/phpmyadmin" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "pma_username=dbadmin&pma_password=dbpass123&server=1" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: dbadmin / dbpass123${NC}\n"
+
+sleep 1
+
+# Test 7: Multiple fields with email
+echo -e "${YELLOW}Test 7: POST to /register with email${NC}"
+curl -s -X POST "${BASE_URL}/register" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "email=test@example.com&username=newuser&password=NewPass123&confirm_password=NewPass123" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: newuser / NewPass123 (email: test@example.com)${NC}\n"
+
+sleep 1
+
+# Test 8: FTP credentials
+echo -e "${YELLOW}Test 8: POST to /ftp/login${NC}"
+curl -s -X POST "${BASE_URL}/ftp/login" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    -d "ftpuser=ftpadmin&ftppass=ftp123456" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: ftpadmin / ftp123456${NC}\n"
+
+sleep 1
+
+# Test 9: Common brute force attempt
+echo -e "${YELLOW}Test 9: Multiple attempts (simulating brute force)${NC}"
+for i in {1..3}; do
+    curl -s -X POST "${BASE_URL}/login" \
+        -H "Content-Type: application/x-www-form-urlencoded" \
+        -d "username=admin&password=pass${i}" \
+        > /dev/null
+    echo -e "${GREEN}✓ Attempt $i: admin / pass${i}${NC}"
+    sleep 0.5
+done
+echo ""
+
+sleep 1
+
+# Test 10: Special characters in credentials
+echo -e "${YELLOW}Test 10: POST with special characters${NC}"
+curl -s -X POST "${BASE_URL}/login" \
+    -H "Content-Type: application/x-www-form-urlencoded" \
+    --data-urlencode "username=user@domain.com" \
+    --data-urlencode "password=P@\$\$w0rd!#%" \
+    > /dev/null
+echo -e "${GREEN}✓ Sent: user@domain.com / P@\$\$w0rd!#%${NC}\n"
+
+echo -e "${BLUE}========================================${NC}"
+echo -e "${GREEN}✓ All credential tests completed!${NC}"
+echo -e "${BLUE}========================================${NC}\n"
+
+echo -e "${YELLOW}Check the results:${NC}"
+echo -e "  1. View the log file: ${GREEN}cat src/logs/credentials.log${NC}"
+echo -e "  2. View the dashboard: ${GREEN}${BASE_URL}/dashboard${NC}"
+echo -e "  3. Check recent logs: ${GREEN}tail -20 src/logs/krawl.log${NC}\n"
+
+# Display last 10 credential entries if log file exists
+if [ -f "src/logs/credentials.log" ]; then
+    echo -e "${BLUE}========================================${NC}"
+    echo -e "${BLUE}Last 10 Captured Credentials:${NC}"
+    echo -e "${BLUE}========================================${NC}"
+    tail -10 src/logs/credentials.log
+    echo ""
+fi
+
+echo -e "${YELLOW}💡 Tip: Open ${BASE_URL}/dashboard in your browser to see the credentials in real-time!${NC}"

From f1c142c53d7f40dc8eec68d886928542ac44e9b6 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Sun, 28 Dec 2025 10:43:32 -0600
Subject: [PATCH 05/12] feat: add SQLite persistent storage for request logging

  - Add SQLAlchemy-based database layer for persistent storage
  - Create models for access_logs, credential_attempts, attack_detections, ip_stats
  - Include fields for future GeoIP and reputation enrichment
  - Implement sanitization utilities to protect against malicious payloads
  - Fix XSS vulnerability in dashboard template (HTML escape all user data)
  - Add DATABASE_PATH and DATABASE_RETENTION_DAYS config options
  - Dual storage: in-memory for dashboard performance + SQLite for persistence

  New files:
  - src/models.py - SQLAlchemy ORM models
  - src/database.py - DatabaseManager singleton
  - src/sanitizer.py - Input sanitization and HTML escaping
  - requirements.txt - SQLAlchemy dependency

  Security protections:
  - Parameterized queries via SQLAlchemy ORM
  - Field length limits to prevent storage exhaustion
  - Null byte and control character stripping
  - HTML escaping on dashboard output
---
 .gitignore                          |   4 +
 docs/coding-guidelines.md           |  90 +++++++
 requirements.txt                    |   5 +
 src/config.py                       |   7 +-
 src/database.py                     | 361 ++++++++++++++++++++++++++++
 src/handler.py                      |   4 +-
 src/models.py                       | 141 +++++++++++
 src/sanitizer.py                    | 113 +++++++++
 src/server.py                       |  10 +
 src/templates/dashboard_template.py |  35 ++-
 src/tracker.py                      | 122 ++++++++--
 11 files changed, 860 insertions(+), 32 deletions(-)
 create mode 100644 docs/coding-guidelines.md
 create mode 100644 requirements.txt
 create mode 100644 src/database.py
 create mode 100644 src/models.py
 create mode 100644 src/sanitizer.py

diff --git a/.gitignore b/.gitignore
index 5d758cb..a36748e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,6 +61,10 @@ secrets/
 *.log
 logs/
 
+# Database
+data/
+*.db
+
 # Temporary files
 *.tmp
 *.temp
diff --git a/docs/coding-guidelines.md b/docs/coding-guidelines.md
new file mode 100644
index 0000000..1e13575
--- /dev/null
+++ b/docs/coding-guidelines.md
@@ -0,0 +1,90 @@
+### Coding Standards
+
+**Style & Structure**
+- Prefer longer, explicit code over compact one-liners
+- Always include docstrings for functions/classes + inline comments
+- Strongly prefer OOP-style code (classes over functional/nested functions)
+- Strong typing throughout (dataclasses, TypedDict, Enums, type hints)
+- Value future-proofing and expanded usage insights
+
+**Data Design**
+- Use dataclasses for internal data modeling
+- Typed JSON structures
+- Functions return fully typed objects (no loose dicts)
+- Snapshot files in JSON or YAML
+- Human-readable fields (e.g., `sql_injection`, `xss_attempt`)
+
+**Templates & UI**
+- Don't mix large HTML/CSS blocks in Python code
+- Prefer Jinja templates for HTML rendering
+- Clean CSS, minimal inline clutter, readable template logic
+
+**Writing & Documentation**
+- Markdown documentation
+- Clear section headers
+- Roadmap/Phase/Feature-Session style documents
+
+**Logging**
+- Use singleton for logging found in `src\logger.py`
+- Setup logging at app start: 
+    ```
+    initialize_logging()
+    app_logger = get_app_logger()
+    access_logger = get_access_logger()
+    credential_logger = get_credential_logger()
+    ```
+
+**Preferred Pip Packages**
+- API/Web Server: Simple Python
+- HTTP: Requests
+- SQLite: Sqlalchemy
+- Database Migrations: Alembic
+
+### Error Handling
+- Custom exception classes for domain-specific errors
+- Consistent error response formats (JSON structure)
+- Logging severity levels (ERROR vs WARNING)
+
+### Configuration
+- `.env` for secrets (never committed)
+- Maintain `.env.example` in each component for documentation
+- Typed config loaders using dataclasses
+- Validation on startup
+
+### Containerization & Deployment
+- Explicit Dockerfiles
+- Production-friendly hardening (distroless/slim when meaningful)
+- Use git branch as tag
+
+### Dependency Management
+- Use `requirements.txt` and virtual environments (`python3 -m venv venv`)
+- Use path `venv` for all virtual environments
+- Pin versions to version ranges (or exact versions if pinning a particular version)
+- Activate venv before running code (unless in Docker)
+
+### Testing Standards
+- Manual testing preferred for applications
+- **tests:** Use shell scripts with curl/httpie for simulation and attack scripts.
+- tests should be located in `tests` directory
+
+### Git Standards
+
+**Branch Strategy:**
+- `master` - Production-ready code only
+- `beta` - Public pre-release testing
+- `dev` - Main development branch, integration point
+
+**Workflow:**
+- Feature work branches off `dev` (e.g., `feature/add-scheduler`)
+- Merge features back to `dev` for testing
+- Promote `dev` → `beta` for public testing (when applicable)
+- Promote `beta` (or `dev`) → `master` for production
+
+**Commit Messages:**
+- Use conventional commit format: `feat:`, `fix:`, `docs:`, `refactor:`, etc.
+- Keep commits atomic and focused
+- Write clear, descriptive messages
+
+**Tagging:**
+- Tag releases on `master` with semantic versioning (e.g., `v1.2.3`)
+- Optionally tag beta releases (e.g., `v1.2.3-beta.1`)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..94f74f2
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+# Krawl Honeypot Dependencies
+# Install with: pip install -r requirements.txt
+
+# Database ORM
+SQLAlchemy>=2.0.0,<3.0.0
diff --git a/src/config.py b/src/config.py
index 7c6714c..76f1aed 100644
--- a/src/config.py
+++ b/src/config.py
@@ -22,6 +22,9 @@ class Config:
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
     server_header: str = "Apache/2.2.22 (Ubuntu)"
+    # Database settings
+    database_path: str = "data/krawl.db"
+    database_retention_days: int = 30
 
     @classmethod
     def from_env(cls) -> 'Config':
@@ -46,5 +49,7 @@ class Config:
             api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
             api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
             probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)),
-            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)')
+            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)'),
+            database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
+            database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30))
         )
diff --git a/src/database.py b/src/database.py
new file mode 100644
index 0000000..58a4505
--- /dev/null
+++ b/src/database.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python3
+
+"""
+Database singleton module for the Krawl honeypot.
+Provides SQLAlchemy session management and database initialization.
+"""
+
+import os
+import stat
+from datetime import datetime
+from typing import Optional, List, Dict, Any
+
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker, scoped_session, Session
+
+from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats
+from sanitizer import (
+    sanitize_ip,
+    sanitize_path,
+    sanitize_user_agent,
+    sanitize_credential,
+    sanitize_attack_pattern,
+)
+
+
+class DatabaseManager:
+    """
+    Singleton database manager for the Krawl honeypot.
+
+    Handles database initialization, session management, and provides
+    methods for persisting access logs, credentials, and attack detections.
+    """
+    _instance: Optional["DatabaseManager"] = None
+
+    def __new__(cls) -> "DatabaseManager":
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+            cls._instance._initialized = False
+        return cls._instance
+
+    def initialize(self, database_path: str = "data/krawl.db") -> None:
+        """
+        Initialize the database connection and create tables.
+
+        Args:
+            database_path: Path to the SQLite database file
+        """
+        if self._initialized:
+            return
+
+        # Create data directory if it doesn't exist
+        data_dir = os.path.dirname(database_path)
+        if data_dir and not os.path.exists(data_dir):
+            os.makedirs(data_dir, exist_ok=True)
+
+        # Create SQLite database with check_same_thread=False for multi-threaded access
+        database_url = f"sqlite:///{database_path}"
+        self._engine = create_engine(
+            database_url,
+            connect_args={"check_same_thread": False},
+            echo=False  # Set to True for SQL debugging
+        )
+
+        # Create session factory with scoped_session for thread safety
+        session_factory = sessionmaker(bind=self._engine)
+        self._Session = scoped_session(session_factory)
+
+        # Create all tables
+        Base.metadata.create_all(self._engine)
+
+        # Set restrictive file permissions (owner read/write only)
+        if os.path.exists(database_path):
+            try:
+                os.chmod(database_path, stat.S_IRUSR | stat.S_IWUSR)  # 600
+            except OSError:
+                # May fail on some systems, not critical
+                pass
+
+        self._initialized = True
+
+    @property
+    def session(self) -> Session:
+        """Get a thread-local database session."""
+        if not self._initialized:
+            raise RuntimeError("DatabaseManager not initialized. Call initialize() first.")
+        return self._Session()
+
+    def close_session(self) -> None:
+        """Close the current thread-local session."""
+        if self._initialized:
+            self._Session.remove()
+
+    def persist_access(
+        self,
+        ip: str,
+        path: str,
+        user_agent: str = "",
+        method: str = "GET",
+        is_suspicious: bool = False,
+        is_honeypot_trigger: bool = False,
+        attack_types: Optional[List[str]] = None,
+        matched_patterns: Optional[Dict[str, str]] = None
+    ) -> Optional[int]:
+        """
+        Persist an access log entry to the database.
+
+        Args:
+            ip: Client IP address
+            path: Requested path
+            user_agent: Client user agent string
+            method: HTTP method (GET, POST, HEAD)
+            is_suspicious: Whether the request was flagged as suspicious
+            is_honeypot_trigger: Whether a honeypot path was accessed
+            attack_types: List of detected attack types
+            matched_patterns: Dict mapping attack_type to matched pattern
+
+        Returns:
+            The ID of the created AccessLog record, or None on error
+        """
+        session = self.session
+        try:
+            # Create access log with sanitized fields
+            access_log = AccessLog(
+                ip=sanitize_ip(ip),
+                path=sanitize_path(path),
+                user_agent=sanitize_user_agent(user_agent),
+                method=method[:10],
+                is_suspicious=is_suspicious,
+                is_honeypot_trigger=is_honeypot_trigger,
+                timestamp=datetime.utcnow()
+            )
+            session.add(access_log)
+            session.flush()  # Get the ID before committing
+
+            # Add attack detections if any
+            if attack_types:
+                matched_patterns = matched_patterns or {}
+                for attack_type in attack_types:
+                    detection = AttackDetection(
+                        access_log_id=access_log.id,
+                        attack_type=attack_type[:50],
+                        matched_pattern=sanitize_attack_pattern(
+                            matched_patterns.get(attack_type, "")
+                        )
+                    )
+                    session.add(detection)
+
+            # Update IP stats
+            self._update_ip_stats(session, ip)
+
+            session.commit()
+            return access_log.id
+
+        except Exception as e:
+            session.rollback()
+            # Log error but don't crash - database persistence is secondary to honeypot function
+            print(f"Database error persisting access: {e}")
+            return None
+        finally:
+            self.close_session()
+
+    def persist_credential(
+        self,
+        ip: str,
+        path: str,
+        username: Optional[str] = None,
+        password: Optional[str] = None
+    ) -> Optional[int]:
+        """
+        Persist a credential attempt to the database.
+
+        Args:
+            ip: Client IP address
+            path: Login form path
+            username: Submitted username
+            password: Submitted password
+
+        Returns:
+            The ID of the created CredentialAttempt record, or None on error
+        """
+        session = self.session
+        try:
+            credential = CredentialAttempt(
+                ip=sanitize_ip(ip),
+                path=sanitize_path(path),
+                username=sanitize_credential(username),
+                password=sanitize_credential(password),
+                timestamp=datetime.utcnow()
+            )
+            session.add(credential)
+            session.commit()
+            return credential.id
+
+        except Exception as e:
+            session.rollback()
+            print(f"Database error persisting credential: {e}")
+            return None
+        finally:
+            self.close_session()
+
+    def _update_ip_stats(self, session: Session, ip: str) -> None:
+        """
+        Update IP statistics (upsert pattern).
+
+        Args:
+            session: Active database session
+            ip: IP address to update
+        """
+        sanitized_ip = sanitize_ip(ip)
+        now = datetime.utcnow()
+
+        ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
+
+        if ip_stats:
+            ip_stats.total_requests += 1
+            ip_stats.last_seen = now
+        else:
+            ip_stats = IpStats(
+                ip=sanitized_ip,
+                total_requests=1,
+                first_seen=now,
+                last_seen=now
+            )
+            session.add(ip_stats)
+
+    def get_access_logs(
+        self,
+        limit: int = 100,
+        offset: int = 0,
+        ip_filter: Optional[str] = None,
+        suspicious_only: bool = False
+    ) -> List[Dict[str, Any]]:
+        """
+        Retrieve access logs with optional filtering.
+
+        Args:
+            limit: Maximum number of records to return
+            offset: Number of records to skip
+            ip_filter: Filter by IP address
+            suspicious_only: Only return suspicious requests
+
+        Returns:
+            List of access log dictionaries
+        """
+        session = self.session
+        try:
+            query = session.query(AccessLog).order_by(AccessLog.timestamp.desc())
+
+            if ip_filter:
+                query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
+            if suspicious_only:
+                query = query.filter(AccessLog.is_suspicious == True)
+
+            logs = query.offset(offset).limit(limit).all()
+
+            return [
+                {
+                    'id': log.id,
+                    'ip': log.ip,
+                    'path': log.path,
+                    'user_agent': log.user_agent,
+                    'method': log.method,
+                    'is_suspicious': log.is_suspicious,
+                    'is_honeypot_trigger': log.is_honeypot_trigger,
+                    'timestamp': log.timestamp.isoformat(),
+                    'attack_types': [d.attack_type for d in log.attack_detections]
+                }
+                for log in logs
+            ]
+        finally:
+            self.close_session()
+
+    def get_credential_attempts(
+        self,
+        limit: int = 100,
+        offset: int = 0,
+        ip_filter: Optional[str] = None
+    ) -> List[Dict[str, Any]]:
+        """
+        Retrieve credential attempts with optional filtering.
+
+        Args:
+            limit: Maximum number of records to return
+            offset: Number of records to skip
+            ip_filter: Filter by IP address
+
+        Returns:
+            List of credential attempt dictionaries
+        """
+        session = self.session
+        try:
+            query = session.query(CredentialAttempt).order_by(
+                CredentialAttempt.timestamp.desc()
+            )
+
+            if ip_filter:
+                query = query.filter(CredentialAttempt.ip == sanitize_ip(ip_filter))
+
+            attempts = query.offset(offset).limit(limit).all()
+
+            return [
+                {
+                    'id': attempt.id,
+                    'ip': attempt.ip,
+                    'path': attempt.path,
+                    'username': attempt.username,
+                    'password': attempt.password,
+                    'timestamp': attempt.timestamp.isoformat()
+                }
+                for attempt in attempts
+            ]
+        finally:
+            self.close_session()
+
+    def get_ip_stats(self, limit: int = 100) -> List[Dict[str, Any]]:
+        """
+        Retrieve IP statistics ordered by total requests.
+
+        Args:
+            limit: Maximum number of records to return
+
+        Returns:
+            List of IP stats dictionaries
+        """
+        session = self.session
+        try:
+            stats = session.query(IpStats).order_by(
+                IpStats.total_requests.desc()
+            ).limit(limit).all()
+
+            return [
+                {
+                    'ip': s.ip,
+                    'total_requests': s.total_requests,
+                    'first_seen': s.first_seen.isoformat(),
+                    'last_seen': s.last_seen.isoformat(),
+                    'country_code': s.country_code,
+                    'city': s.city,
+                    'asn': s.asn,
+                    'asn_org': s.asn_org,
+                    'reputation_score': s.reputation_score,
+                    'reputation_source': s.reputation_source
+                }
+                for s in stats
+            ]
+        finally:
+            self.close_session()
+
+
+# Module-level singleton instance
+_db_manager = DatabaseManager()
+
+
+def get_database() -> DatabaseManager:
+    """Get the database manager singleton instance."""
+    return _db_manager
+
+
+def initialize_database(database_path: str = "data/krawl.db") -> None:
+    """Initialize the database system."""
+    _db_manager.initialize(database_path)
diff --git a/src/handler.py b/src/handler.py
index ac7ca22..90214ac 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -229,7 +229,7 @@ class Handler(BaseHTTPRequestHandler):
                 self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
 
         # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
-        self.tracker.record_access(client_ip, self.path, user_agent, post_data)
+        self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
         
         time.sleep(1)
         
@@ -347,7 +347,7 @@ class Handler(BaseHTTPRequestHandler):
                 self.app_logger.error(f"Error generating dashboard: {e}")
             return
 
-        self.tracker.record_access(client_ip, self.path, user_agent)
+        self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
 
         if self.tracker.is_suspicious_user_agent(user_agent):
             self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
diff --git a/src/models.py b/src/models.py
new file mode 100644
index 0000000..f6e7d30
--- /dev/null
+++ b/src/models.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+
+"""
+SQLAlchemy ORM models for the Krawl honeypot database.
+Stores access logs, credential attempts, attack detections, and IP statistics.
+"""
+
+from datetime import datetime
+from typing import Optional, List
+
+from sqlalchemy import String, Integer, Boolean, DateTime, ForeignKey, Index
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, relationship
+
+from sanitizer import (
+    MAX_IP_LENGTH,
+    MAX_PATH_LENGTH,
+    MAX_USER_AGENT_LENGTH,
+    MAX_CREDENTIAL_LENGTH,
+    MAX_ATTACK_PATTERN_LENGTH,
+    MAX_CITY_LENGTH,
+    MAX_ASN_ORG_LENGTH,
+    MAX_REPUTATION_SOURCE_LENGTH,
+)
+
+
+class Base(DeclarativeBase):
+    """Base class for all ORM models."""
+    pass
+
+
+class AccessLog(Base):
+    """
+    Records all HTTP requests to the honeypot.
+
+    Stores request metadata, suspicious activity flags, and timestamps
+    for analysis and dashboard display.
+    """
+    __tablename__ = 'access_logs'
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
+    path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
+    user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True)
+    method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET')
+    is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
+
+    # Relationship to attack detections
+    attack_detections: Mapped[List["AttackDetection"]] = relationship(
+        "AttackDetection",
+        back_populates="access_log",
+        cascade="all, delete-orphan"
+    )
+
+    # Composite index for common queries
+    __table_args__ = (
+        Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
+    )
+
+    def __repr__(self) -> str:
+        return f"<AccessLog(id={self.id}, ip='{self.ip}', path='{self.path[:50]}')>"
+
+
+class CredentialAttempt(Base):
+    """
+    Records captured login attempts from honeypot login forms.
+
+    Stores the submitted username and password along with request metadata.
+    """
+    __tablename__ = 'credential_attempts'
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
+    path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
+    username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
+    password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
+    timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
+
+    # Composite index for common queries
+    __table_args__ = (
+        Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'),
+    )
+
+    def __repr__(self) -> str:
+        return f"<CredentialAttempt(id={self.id}, ip='{self.ip}', username='{self.username}')>"
+
+
+class AttackDetection(Base):
+    """
+    Records detected attack patterns in requests.
+
+    Linked to the parent AccessLog record. Multiple attack types can be
+    detected in a single request.
+    """
+    __tablename__ = 'attack_detections'
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    access_log_id: Mapped[int] = mapped_column(
+        Integer,
+        ForeignKey('access_logs.id', ondelete='CASCADE'),
+        nullable=False,
+        index=True
+    )
+    attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
+    matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True)
+
+    # Relationship back to access log
+    access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections")
+
+    def __repr__(self) -> str:
+        return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"
+
+
+class IpStats(Base):
+    """
+    Aggregated statistics per IP address.
+
+    Includes fields for future GeoIP and reputation enrichment.
+    Updated on each request from an IP.
+    """
+    __tablename__ = 'ip_stats'
+
+    ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True)
+    total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
+    first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
+    last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
+
+    # GeoIP fields (populated by future enrichment)
+    country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True)
+    city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
+    asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
+    asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
+
+    # Reputation fields (populated by future enrichment)
+    reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
+    reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True)
+    reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
+
+    def __repr__(self) -> str:
+        return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
diff --git a/src/sanitizer.py b/src/sanitizer.py
new file mode 100644
index 0000000..f783129
--- /dev/null
+++ b/src/sanitizer.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+
+"""
+Sanitization utilities for safe database storage and HTML output.
+Protects against SQL injection payloads, XSS, and storage exhaustion attacks.
+"""
+
+import html
+import re
+from typing import Optional
+
+
+# Field length limits for database storage
+MAX_IP_LENGTH = 45  # IPv6 max length
+MAX_PATH_LENGTH = 2048  # URL max practical length
+MAX_USER_AGENT_LENGTH = 512
+MAX_CREDENTIAL_LENGTH = 256
+MAX_ATTACK_PATTERN_LENGTH = 256
+MAX_CITY_LENGTH = 128
+MAX_ASN_ORG_LENGTH = 256
+MAX_REPUTATION_SOURCE_LENGTH = 64
+
+
+def sanitize_for_storage(value: Optional[str], max_length: int) -> str:
+    """
+    Sanitize and truncate string for safe database storage.
+
+    Removes null bytes and control characters that could cause issues
+    with database storage or log processing.
+
+    Args:
+        value: The string to sanitize
+        max_length: Maximum length to truncate to
+
+    Returns:
+        Sanitized and truncated string, empty string if input is None/empty
+    """
+    if not value:
+        return ""
+
+    # Convert to string if not already
+    value = str(value)
+
+    # Remove null bytes and control characters (except newline \n, tab \t, carriage return \r)
+    # Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
+    cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', value)
+
+    # Truncate to max length
+    return cleaned[:max_length]
+
+
+def sanitize_ip(value: Optional[str]) -> str:
+    """Sanitize IP address for storage."""
+    return sanitize_for_storage(value, MAX_IP_LENGTH)
+
+
+def sanitize_path(value: Optional[str]) -> str:
+    """Sanitize URL path for storage."""
+    return sanitize_for_storage(value, MAX_PATH_LENGTH)
+
+
+def sanitize_user_agent(value: Optional[str]) -> str:
+    """Sanitize user agent string for storage."""
+    return sanitize_for_storage(value, MAX_USER_AGENT_LENGTH)
+
+
+def sanitize_credential(value: Optional[str]) -> str:
+    """Sanitize username or password for storage."""
+    return sanitize_for_storage(value, MAX_CREDENTIAL_LENGTH)
+
+
+def sanitize_attack_pattern(value: Optional[str]) -> str:
+    """Sanitize matched attack pattern for storage."""
+    return sanitize_for_storage(value, MAX_ATTACK_PATTERN_LENGTH)
+
+
+def escape_html(value: Optional[str]) -> str:
+    """
+    Escape HTML special characters for safe display in web pages.
+
+    Prevents stored XSS attacks when displaying user-controlled data
+    in the dashboard.
+
+    Args:
+        value: The string to escape
+
+    Returns:
+        HTML-escaped string, empty string if input is None/empty
+    """
+    if not value:
+        return ""
+    return html.escape(str(value))
+
+
+def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
+    """
+    Escape HTML and truncate for display.
+
+    Args:
+        value: The string to escape and truncate
+        max_display_length: Maximum display length (truncation happens before escaping)
+
+    Returns:
+        HTML-escaped and truncated string
+    """
+    if not value:
+        return ""
+
+    value_str = str(value)
+    if len(value_str) > max_display_length:
+        value_str = value_str[:max_display_length] + "..."
+
+    return html.escape(value_str)
diff --git a/src/server.py b/src/server.py
index fd8f7d2..a0b5ec3 100644
--- a/src/server.py
+++ b/src/server.py
@@ -12,6 +12,7 @@ from config import Config
 from tracker import AccessTracker
 from handler import Handler
 from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
+from database import initialize_database
 
 
 def print_usage():
@@ -33,6 +34,8 @@ def print_usage():
     print('  PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
     print('  CHAR_SPACE            - Characters for random links')
     print('  SERVER_HEADER         - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
+    print('  DATABASE_PATH         - Path to SQLite database (default: data/krawl.db)')
+    print('  DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
 
 
 def main():
@@ -49,6 +52,13 @@ def main():
 
     config = Config.from_env()
 
+    # Initialize database for persistent storage
+    try:
+        initialize_database(config.database_path)
+        app_logger.info(f'Database initialized at: {config.database_path}')
+    except Exception as e:
+        app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
+
     tracker = AccessTracker()
 
     Handler.config = config
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index a267278..92e950d 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -5,49 +5,58 @@ Dashboard template for viewing honeypot statistics.
 Customize this template to change the dashboard appearance.
 """
 
+import html
+
+
+def _escape(value) -> str:
+    """Escape HTML special characters to prevent XSS attacks."""
+    if value is None:
+        return ""
+    return html.escape(str(value))
+
 
 def generate_dashboard(stats: dict) -> str:
     """Generate dashboard HTML with access statistics"""
     
-    # Generate IP rows
+    # Generate IP rows (IPs are generally safe but escape for consistency)
     top_ips_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td>{ip}</td><td>{count}</td></tr>'
+        f'<tr><td class="rank">{i+1}</td><td>{_escape(ip)}</td><td>{count}</td></tr>'
         for i, (ip, count) in enumerate(stats['top_ips'])
     ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
 
-    # Generate paths rows
+    # Generate paths rows (CRITICAL: paths can contain XSS payloads)
     top_paths_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td>{path}</td><td>{count}</td></tr>'
+        f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
         for i, (path, count) in enumerate(stats['top_paths'])
     ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
 
-    # Generate User-Agent rows
+    # Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
     top_ua_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{ua[:80]}</td><td>{count}</td></tr>'
+        f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
         for i, (ua, count) in enumerate(stats['top_user_agents'])
     ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
 
-    # Generate suspicious accesses rows
+    # Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields)
     suspicious_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
         for log in stats['recent_suspicious'][-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
 
     # Generate honeypot triggered IPs rows
     honeypot_rows = '\n'.join([
-        f'<tr><td>{ip}</td><td style="word-break: break-all;">{", ".join(paths)}</td><td>{len(paths)}</td></tr>'
+        f'<tr><td>{_escape(ip)}</td><td style="word-break: break-all;">{_escape(", ".join(paths))}</td><td>{len(paths)}</td></tr>'
         for ip, paths in stats.get('honeypot_triggered_ips', [])
     ]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
 
-    # Generate attack types rows
+    # Generate attack types rows (CRITICAL: paths and user agents are user-controlled)
     attack_type_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td>{_escape(", ".join(log["attack_types"]))}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
         for log in stats.get('attack_types', [])[-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
 
-    # Generate credential attempts rows
+    # Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled)
     credential_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["username"])}</td><td>{_escape(log["password"])}</td><td>{_escape(log["path"])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
         for log in stats.get('credential_attempts', [])[-20:]
     ]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
 
diff --git a/src/tracker.py b/src/tracker.py
index 717a4c3..04ded3b 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -1,15 +1,29 @@
 #!/usr/bin/env python3
 
-from typing import Dict, List, Tuple
+from typing import Dict, List, Tuple, Optional
 from collections import defaultdict
 from datetime import datetime
 import re
 import urllib.parse
 
+from database import get_database, DatabaseManager
+
 
 class AccessTracker:
-    """Track IP addresses and paths accessed"""
-    def __init__(self):
+    """
+    Track IP addresses and paths accessed.
+
+    Maintains in-memory structures for fast dashboard access and
+    persists data to SQLite for long-term storage and analysis.
+    """
+    def __init__(self, db_manager: Optional[DatabaseManager] = None):
+        """
+        Initialize the access tracker.
+
+        Args:
+            db_manager: Optional DatabaseManager for persistence.
+                        If None, will use the global singleton.
+        """
         self.ip_counts: Dict[str, int] = defaultdict(int)
         self.path_counts: Dict[str, int] = defaultdict(int)
         self.user_agent_counts: Dict[str, int] = defaultdict(int)
@@ -21,7 +35,7 @@ class AccessTracker:
             'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
         ]
 
-        # common attack types such as xss, shell injection, probes
+        # Common attack types such as xss, shell injection, probes
         self.attack_types = {
             'path_traversal': r'\.\.',
             'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
@@ -33,6 +47,25 @@ class AccessTracker:
         # Track IPs that accessed honeypot paths from robots.txt
         self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list)
 
+        # Database manager for persistence (lazily initialized)
+        self._db_manager = db_manager
+
+    @property
+    def db(self) -> Optional[DatabaseManager]:
+        """
+        Get the database manager, lazily initializing if needed.
+
+        Returns:
+            DatabaseManager instance or None if not available
+        """
+        if self._db_manager is None:
+            try:
+                self._db_manager = get_database()
+            except Exception:
+                # Database not initialized, persistence disabled
+                pass
+        return self._db_manager
+
     def parse_credentials(self, post_data: str) -> Tuple[str, str]:
         """
         Parse username and password from POST data.
@@ -75,7 +108,12 @@ class AccessTracker:
         return username, password
 
     def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
-        """Record a credential login attempt"""
+        """
+        Record a credential login attempt.
+
+        Stores in both in-memory list and SQLite database.
+        """
+        # In-memory storage for dashboard
         self.credential_attempts.append({
             'ip': ip,
             'path': path,
@@ -84,37 +122,89 @@ class AccessTracker:
             'timestamp': datetime.now().isoformat()
         })
 
-    def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''):
-        """Record an access attempt"""
+        # Persist to database
+        if self.db:
+            try:
+                self.db.persist_credential(
+                    ip=ip,
+                    path=path,
+                    username=username,
+                    password=password
+                )
+            except Exception:
+                # Don't crash if database persistence fails
+                pass
+
+    def record_access(
+        self,
+        ip: str,
+        path: str,
+        user_agent: str = '',
+        body: str = '',
+        method: str = 'GET'
+    ):
+        """
+        Record an access attempt.
+
+        Stores in both in-memory structures and SQLite database.
+
+        Args:
+            ip: Client IP address
+            path: Requested path
+            user_agent: Client user agent string
+            body: Request body (for POST/PUT)
+            method: HTTP method
+        """
         self.ip_counts[ip] += 1
         self.path_counts[path] += 1
         if user_agent:
             self.user_agent_counts[user_agent] += 1
-        
-        # path attack type detection
+
+        # Path attack type detection
         attack_findings = self.detect_attack_type(path)
 
-        # post / put data
+        # POST/PUT body attack detection
         if len(body) > 0:
             attack_findings.extend(self.detect_attack_type(body))
 
-        is_suspicious = self.is_suspicious_user_agent(user_agent) or self.is_honeypot_path(path) or len(attack_findings) > 0
+        is_suspicious = (
+            self.is_suspicious_user_agent(user_agent) or
+            self.is_honeypot_path(path) or
+            len(attack_findings) > 0
+        )
+        is_honeypot = self.is_honeypot_path(path)
 
-        
         # Track if this IP accessed a honeypot path
-        if self.is_honeypot_path(path):
+        if is_honeypot:
             self.honeypot_triggered[ip].append(path)
-        
+
+        # In-memory storage for dashboard
         self.access_log.append({
             'ip': ip,
             'path': path,
             'user_agent': user_agent,
             'suspicious': is_suspicious,
-            'honeypot_triggered': self.is_honeypot_path(path),
-            'attack_types':attack_findings,
+            'honeypot_triggered': is_honeypot,
+            'attack_types': attack_findings,
             'timestamp': datetime.now().isoformat()
         })
 
+        # Persist to database
+        if self.db:
+            try:
+                self.db.persist_access(
+                    ip=ip,
+                    path=path,
+                    user_agent=user_agent,
+                    method=method,
+                    is_suspicious=is_suspicious,
+                    is_honeypot_trigger=is_honeypot,
+                    attack_types=attack_findings if attack_findings else None
+                )
+            except Exception:
+                # Don't crash if database persistence fails
+                pass
+
     def detect_attack_type(self, data:str) -> list[str]:
         """
         Returns a list of all attack types found in path data

From a4baedffd958b83da700431b53f91eb63c858803 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Sun, 28 Dec 2025 13:52:46 -0600
Subject: [PATCH 06/12] updated dashboard to pull data from db. This closes
 issue #10

---
 src/database.py                     | 196 +++++++++++++++++++++++++++-
 src/models.py                       |   4 +-
 src/templates/dashboard_template.py |   2 +-
 src/tracker.py                      |  35 +++--
 tests/test_credentials.sh           |   4 +-
 5 files changed, 218 insertions(+), 23 deletions(-)

diff --git a/src/database.py b/src/database.py
index 58a4505..e0de320 100644
--- a/src/database.py
+++ b/src/database.py
@@ -10,7 +10,7 @@ import stat
 from datetime import datetime
 from typing import Optional, List, Dict, Any
 
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, func, distinct, case
 from sqlalchemy.orm import sessionmaker, scoped_session, Session
 
 from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats
@@ -346,6 +346,200 @@ class DatabaseManager:
         finally:
             self.close_session()
 
+    def get_dashboard_counts(self) -> Dict[str, int]:
+        """
+        Get aggregate statistics for the dashboard.
+
+        Returns:
+            Dictionary with total_accesses, unique_ips, unique_paths,
+            suspicious_accesses, honeypot_triggered, honeypot_ips
+        """
+        session = self.session
+        try:
+            # Get main aggregate counts in one query
+            result = session.query(
+                func.count(AccessLog.id).label('total_accesses'),
+                func.count(distinct(AccessLog.ip)).label('unique_ips'),
+                func.count(distinct(AccessLog.path)).label('unique_paths'),
+                func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label('suspicious_accesses'),
+                func.sum(case((AccessLog.is_honeypot_trigger == True, 1), else_=0)).label('honeypot_triggered')
+            ).first()
+
+            # Get unique IPs that triggered honeypots
+            honeypot_ips = session.query(
+                func.count(distinct(AccessLog.ip))
+            ).filter(AccessLog.is_honeypot_trigger == True).scalar() or 0
+
+            return {
+                'total_accesses': result.total_accesses or 0,
+                'unique_ips': result.unique_ips or 0,
+                'unique_paths': result.unique_paths or 0,
+                'suspicious_accesses': int(result.suspicious_accesses or 0),
+                'honeypot_triggered': int(result.honeypot_triggered or 0),
+                'honeypot_ips': honeypot_ips
+            }
+        finally:
+            self.close_session()
+
+    def get_top_ips(self, limit: int = 10) -> List[tuple]:
+        """
+        Get top IP addresses by access count.
+
+        Args:
+            limit: Maximum number of results
+
+        Returns:
+            List of (ip, count) tuples ordered by count descending
+        """
+        session = self.session
+        try:
+            results = session.query(
+                AccessLog.ip,
+                func.count(AccessLog.id).label('count')
+            ).group_by(AccessLog.ip).order_by(
+                func.count(AccessLog.id).desc()
+            ).limit(limit).all()
+
+            return [(row.ip, row.count) for row in results]
+        finally:
+            self.close_session()
+
+    def get_top_paths(self, limit: int = 10) -> List[tuple]:
+        """
+        Get top paths by access count.
+
+        Args:
+            limit: Maximum number of results
+
+        Returns:
+            List of (path, count) tuples ordered by count descending
+        """
+        session = self.session
+        try:
+            results = session.query(
+                AccessLog.path,
+                func.count(AccessLog.id).label('count')
+            ).group_by(AccessLog.path).order_by(
+                func.count(AccessLog.id).desc()
+            ).limit(limit).all()
+
+            return [(row.path, row.count) for row in results]
+        finally:
+            self.close_session()
+
+    def get_top_user_agents(self, limit: int = 10) -> List[tuple]:
+        """
+        Get top user agents by access count.
+
+        Args:
+            limit: Maximum number of results
+
+        Returns:
+            List of (user_agent, count) tuples ordered by count descending
+        """
+        session = self.session
+        try:
+            results = session.query(
+                AccessLog.user_agent,
+                func.count(AccessLog.id).label('count')
+            ).filter(
+                AccessLog.user_agent.isnot(None),
+                AccessLog.user_agent != ''
+            ).group_by(AccessLog.user_agent).order_by(
+                func.count(AccessLog.id).desc()
+            ).limit(limit).all()
+
+            return [(row.user_agent, row.count) for row in results]
+        finally:
+            self.close_session()
+
+    def get_recent_suspicious(self, limit: int = 20) -> List[Dict[str, Any]]:
+        """
+        Get recent suspicious access attempts.
+
+        Args:
+            limit: Maximum number of results
+
+        Returns:
+            List of access log dictionaries with is_suspicious=True
+        """
+        session = self.session
+        try:
+            logs = session.query(AccessLog).filter(
+                AccessLog.is_suspicious == True
+            ).order_by(AccessLog.timestamp.desc()).limit(limit).all()
+
+            return [
+                {
+                    'ip': log.ip,
+                    'path': log.path,
+                    'user_agent': log.user_agent,
+                    'timestamp': log.timestamp.isoformat()
+                }
+                for log in logs
+            ]
+        finally:
+            self.close_session()
+
+    def get_honeypot_triggered_ips(self) -> List[tuple]:
+        """
+        Get IPs that triggered honeypot paths with the paths they accessed.
+
+        Returns:
+            List of (ip, [paths]) tuples
+        """
+        session = self.session
+        try:
+            # Get all honeypot triggers grouped by IP
+            results = session.query(
+                AccessLog.ip,
+                AccessLog.path
+            ).filter(
+                AccessLog.is_honeypot_trigger == True
+            ).all()
+
+            # Group paths by IP
+            ip_paths: Dict[str, List[str]] = {}
+            for row in results:
+                if row.ip not in ip_paths:
+                    ip_paths[row.ip] = []
+                if row.path not in ip_paths[row.ip]:
+                    ip_paths[row.ip].append(row.path)
+
+            return [(ip, paths) for ip, paths in ip_paths.items()]
+        finally:
+            self.close_session()
+
+    def get_recent_attacks(self, limit: int = 20) -> List[Dict[str, Any]]:
+        """
+        Get recent access logs that have attack detections.
+
+        Args:
+            limit: Maximum number of results
+
+        Returns:
+            List of access log dicts with attack_types included
+        """
+        session = self.session
+        try:
+            # Get access logs that have attack detections
+            logs = session.query(AccessLog).join(
+                AttackDetection
+            ).order_by(AccessLog.timestamp.desc()).limit(limit).all()
+
+            return [
+                {
+                    'ip': log.ip,
+                    'path': log.path,
+                    'user_agent': log.user_agent,
+                    'timestamp': log.timestamp.isoformat(),
+                    'attack_types': [d.attack_type for d in log.attack_detections]
+                }
+                for log in logs
+            ]
+        finally:
+            self.close_session()
+
 
 # Module-level singleton instance
 _db_manager = DatabaseManager()
diff --git a/src/models.py b/src/models.py
index f6e7d30..40dae0b 100644
--- a/src/models.py
+++ b/src/models.py
@@ -53,9 +53,11 @@ class AccessLog(Base):
         cascade="all, delete-orphan"
     )
 
-    # Composite index for common queries
+    # Indexes for common queries
     __table_args__ = (
         Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
+        Index('ix_access_logs_is_suspicious', 'is_suspicious'),
+        Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'),
     )
 
     def __repr__(self) -> str:
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 2323843..455833d 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -190,7 +190,7 @@ def generate_dashboard(stats: dict) -> str:
         </div>
 
         <div class="table-container alert-section">
-            <h2>🍯 Honeypot Triggers</h2>
+            <h2>🍯 Honeypot Triggers by IP</h2>
             <table>
                 <thead>
                     <tr>
diff --git a/src/tracker.py b/src/tracker.py
index 2d3d34a..4c89c0b 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -276,21 +276,20 @@ class AccessTracker:
         return [(ip, paths) for ip, paths in self.honeypot_triggered.items()]
 
     def get_stats(self) -> Dict:
-        """Get statistics summary"""
-        suspicious_count = sum(1 for log in self.access_log if log.get('suspicious', False))
-        honeypot_count = sum(1 for log in self.access_log if log.get('honeypot_triggered', False))
-        return {
-            'total_accesses': len(self.access_log),
-            'unique_ips': len(self.ip_counts),
-            'unique_paths': len(self.path_counts),
-            'suspicious_accesses': suspicious_count,
-            'honeypot_triggered': honeypot_count,
-            'honeypot_ips': len(self.honeypot_triggered),
-            'top_ips': self.get_top_ips(10),
-            'top_paths': self.get_top_paths(10),
-            'top_user_agents': self.get_top_user_agents(10),
-            'recent_suspicious': self.get_suspicious_accesses(20),
-            'honeypot_triggered_ips': self.get_honeypot_triggered_ips(),
-            'attack_types': self.get_attack_type_accesses(20),
-            'credential_attempts': self.credential_attempts[-50:]  # Last 50 attempts
-        }
+        """Get statistics summary from database."""
+        if not self.db:
+            raise RuntimeError("Database not available for dashboard stats")
+
+        # Get aggregate counts from database
+        stats = self.db.get_dashboard_counts()
+
+        # Add detailed lists from database
+        stats['top_ips'] = self.db.get_top_ips(10)
+        stats['top_paths'] = self.db.get_top_paths(10)
+        stats['top_user_agents'] = self.db.get_top_user_agents(10)
+        stats['recent_suspicious'] = self.db.get_recent_suspicious(20)
+        stats['honeypot_triggered_ips'] = self.db.get_honeypot_triggered_ips()
+        stats['attack_types'] = self.db.get_recent_attacks(20)
+        stats['credential_attempts'] = self.db.get_credential_attempts(limit=50)
+
+        return stats
diff --git a/tests/test_credentials.sh b/tests/test_credentials.sh
index 6379b92..68ee2c0 100755
--- a/tests/test_credentials.sh
+++ b/tests/test_credentials.sh
@@ -134,9 +134,9 @@ echo -e "${GREEN}✓ All credential tests completed!${NC}"
 echo -e "${BLUE}========================================${NC}\n"
 
 echo -e "${YELLOW}Check the results:${NC}"
-echo -e "  1. View the log file: ${GREEN}cat src/logs/credentials.log${NC}"
+echo -e "  1. View the log file: ${GREEN}tail -20 logs/credentials.log${NC}"
 echo -e "  2. View the dashboard: ${GREEN}${BASE_URL}/dashboard${NC}"
-echo -e "  3. Check recent logs: ${GREEN}tail -20 src/logs/krawl.log${NC}\n"
+echo -e "  3. Check recent logs: ${GREEN}tail -20 logs/access.log ${NC}\n"
 
 # Display last 10 credential entries if log file exists
 if [ -f "src/logs/credentials.log" ]; then

From c2c43ac98500d20fb19cf8cf9dedc04f6a08d123 Mon Sep 17 00:00:00 2001
From: Leonardo Bambini <lbambini@Leonardos-MacBook-Air.local>
Date: Mon, 29 Dec 2025 18:51:37 +0100
Subject: [PATCH 07/12] Added randomized server header and changed behavior of
 SERVER_HEADER env var

---
 src/config.py     |  2 +-
 src/generators.py | 13 ++++++++++++-
 src/handler.py    |  4 ++--
 src/wordlists.py  |  7 ++++++-
 4 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/src/config.py b/src/config.py
index 7c6714c..ef78935 100644
--- a/src/config.py
+++ b/src/config.py
@@ -46,5 +46,5 @@ class Config:
             api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
             api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
             probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)),
-            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)')
+            server_header=os.getenv('SERVER_HEADER')
         )
diff --git a/src/generators.py b/src/generators.py
index 16c0c32..6e24ba8 100644
--- a/src/generators.py
+++ b/src/generators.py
@@ -9,7 +9,8 @@ import string
 import json
 from templates import html_templates
 from wordlists import get_wordlists
-
+from config import Config
+from logger import get_app_logger
 
 def random_username() -> str:
     """Generate random username"""
@@ -36,6 +37,16 @@ def random_email(username: str = None) -> str:
         username = random_username()
     return f"{username}@{random.choice(wl.email_domains)}"
 
+def random_server_header() -> str:
+    """Generate random server header"""
+    
+    if Config.from_env().server_header:
+        server_header = Config.from_env().server_header
+    else:
+        wl = get_wordlists()
+        server_header = random.choice(wl.server_headers)
+    
+    return server_header
 
 def random_api_key() -> str:
     """Generate random API key"""
diff --git a/src/handler.py b/src/handler.py
index ac7ca22..7c44726 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -13,7 +13,7 @@ from templates import html_templates
 from templates.dashboard_template import generate_dashboard
 from generators import (
     credentials_txt, passwords_txt, users_json, api_keys_json,
-    api_response, directory_listing
+    api_response, directory_listing, random_server_header
 )
 from wordlists import get_wordlists
 
@@ -52,7 +52,7 @@ class Handler(BaseHTTPRequestHandler):
 
     def version_string(self) -> str:
         """Return custom server version for deception."""
-        return self.config.server_header
+        return random_server_header()
 
     def _should_return_error(self) -> bool:
         """Check if we should return an error based on probability"""
diff --git a/src/wordlists.py b/src/wordlists.py
index 62e4045..342930a 100644
--- a/src/wordlists.py
+++ b/src/wordlists.py
@@ -57,7 +57,8 @@ class Wordlists:
             },
             "users": {
                 "roles": ["Administrator", "User"]
-            }
+            },
+            "server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"]
         }
     
     @property
@@ -111,6 +112,10 @@ class Wordlists:
     @property
     def error_codes(self):
         return self._data.get("error_codes", [])
+    
+    @property
+    def server_headers(self):
+        return self._data.get("server_headers", [])
 
 
 _wordlists_instance = None

From a9808599dc3870f66241111fcfdd6defbd4da42f Mon Sep 17 00:00:00 2001
From: Leonardo Bambini <lbambini@Leonardos-MacBook-Air.local>
Date: Mon, 29 Dec 2025 18:55:44 +0100
Subject: [PATCH 08/12] Added random server header and changed behavior of
 SERVER_HEADER env var

---
 README.md      | 2 +-
 wordlists.json | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b84d955..0d3efe7 100644
--- a/README.md
+++ b/README.md
@@ -185,7 +185,7 @@ To customize the deception server installation several **environment variables**
 | `CANARY_TOKEN_URL` | External canary token URL | None |
 | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated |
 | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` |
-| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` |
+| `SERVER_HEADER` | HTTP Server header for deception, if not set use random server header | |
 
 ## robots.txt
 The actual (juicy) robots.txt configuration is the following
diff --git a/wordlists.json b/wordlists.json
index f1aae81..fddf3d3 100644
--- a/wordlists.json
+++ b/wordlists.json
@@ -193,5 +193,13 @@
     500,
     502,
     503
+  ],
+  "server_headers": [
+    "Apache/2.4.41 (Ubuntu)",
+    "nginx/1.18.0",
+    "Microsoft-IIS/10.0",
+    "cloudflare",
+    "AmazonS3",
+    "gunicorn/20.1.0"
   ]
 }

From 06ffa2c480f9991ea391913be918bc59a65e636f Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Mon, 29 Dec 2025 23:57:37 +0100
Subject: [PATCH 09/12] Added wordlists and server header logic to helm

---
 docker-compose.yaml           |  2 +-
 helm/templates/configmap.yaml |  7 ++++++-
 helm/values.yaml              | 14 +++++++++++++-
 src/config.py                 |  2 +-
 4 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 1612864..7026f11 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -20,7 +20,7 @@ services:
       - MAX_COUNTER=10
       - CANARY_TOKEN_TRIES=10
       - PROBABILITY_ERROR_CODES=0
-      - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
+      # - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
       # Optional: Set your canary token URL
       # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
       # Optional: Set custom dashboard path (auto-generated if not set)
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index c50ab75..fb590b0 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -14,5 +14,10 @@ data:
   MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
   CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
   PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
-  SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
   CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
+  {{- if .Values.config.dashboardSecretPath }}
+  DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }}
+  {{- end }}
+  {{- if .Values.config.serverHeader }}
+  SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
+  {{- end }}
\ No newline at end of file
diff --git a/helm/values.yaml b/helm/values.yaml
index a095632..217e9a6 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -73,7 +73,8 @@ config:
   maxCounter: 10
   canaryTokenTries: 10
   probabilityErrorCodes: 0
-  serverHeader: "Apache/2.2.22 (Ubuntu)"
+#  serverHeader: "Apache/2.2.22 (Ubuntu)"
+#  dashboardSecretPath: "/my-secret-dashboard"
 #  canaryTokenUrl: set-your-canary-token-url-here
 
 networkPolicy:
@@ -268,6 +269,17 @@ wordlists:
       - .git/
       - keys/
       - credentials/
+  server_headers:
+    - Apache/2.2.22 (Ubuntu)
+    - nginx/1.18.0
+    - Microsoft-IIS/10.0
+    - LiteSpeed
+    - Caddy
+    - Gunicorn/20.0.4
+    - uvicorn/0.13.4
+    - Express
+    - Flask/1.1.2
+    - Django/3.1
   error_codes:
     - 400
     - 401
diff --git a/src/config.py b/src/config.py
index ef78935..3fc5dd8 100644
--- a/src/config.py
+++ b/src/config.py
@@ -21,7 +21,7 @@ class Config:
     api_server_port: int = 8080
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
-    server_header: str = "Apache/2.2.22 (Ubuntu)"
+    server_header: Optional[str] = None
 
     @classmethod
     def from_env(cls) -> 'Config':

From cddad984c3b556cbc990bd989f7f2295eb404394 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Tue, 30 Dec 2025 00:03:44 +0100
Subject: [PATCH 10/12] Added timezone to helm values

---
 helm/values.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/helm/values.yaml b/helm/values.yaml
index dc18d4a..8a6bc1d 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -73,6 +73,7 @@ config:
   maxCounter: 10
   canaryTokenTries: 10
   probabilityErrorCodes: 0
+# timezone: "UTC"
 #  serverHeader: "Apache/2.2.22 (Ubuntu)"
 #  dashboardSecretPath: "/my-secret-dashboard"
 #  canaryTokenUrl: set-your-canary-token-url-here

From 354f8bf8954e76faefbfa7750d2b3537ee7d3443 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com>
Date: Tue, 30 Dec 2025 00:24:36 +0100
Subject: [PATCH 11/12] Fix indentation for server_header in config.py

---
 src/config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/config.py b/src/config.py
index f3bf7f0..87fca1c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -85,7 +85,7 @@ class Config:
             api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
             api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
             probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
-            server_header=os.getenv('SERVER_HEADER')
+            server_header=os.getenv('SERVER_HEADER'),
             database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
             database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)),
             timezone=os.getenv('TIMEZONE')  # If not set, will use system timezone

From c55b1375adbd6f21fa1712f9bd9c05026fa34207 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Tue, 30 Dec 2025 12:12:42 +0100
Subject: [PATCH 12/12] added db config for kubernetes and helm

---
 helm/templates/configmap.yaml           |  3 +++
 helm/templates/deployment.yaml          | 14 ++++++++++++++
 helm/templates/pvc.yaml                 | 17 +++++++++++++++++
 helm/values.yaml                        | 18 ++++++++++++++++++
 kubernetes/krawl-all-in-one-deploy.yaml | 22 ++++++++++++++++++++++
 kubernetes/manifests/configmap.yaml     |  5 ++++-
 kubernetes/manifests/deployment.yaml    |  5 +++++
 kubernetes/manifests/kustomization.yaml |  1 +
 kubernetes/manifests/pvc.yaml           | 13 +++++++++++++
 9 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 helm/templates/pvc.yaml
 create mode 100644 kubernetes/manifests/pvc.yaml

diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index 2990f61..17cd952 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -24,3 +24,6 @@ data:
   {{- if .Values.config.timezone }}
   TIMEZONE: {{ .Values.config.timezone | quote }}
   {{- end }}
+  # Database configuration
+  DATABASE_PATH: {{ .Values.database.path | quote }}
+  DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }}
diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml
index b0aeb6d..ecc9655 100644
--- a/helm/templates/deployment.yaml
+++ b/helm/templates/deployment.yaml
@@ -54,6 +54,10 @@ spec:
           mountPath: /app/wordlists.json
           subPath: wordlists.json
           readOnly: true
+        {{- if .Values.database.persistence.enabled }}
+        - name: database
+          mountPath: /app/data
+        {{- end }}
         {{- with .Values.resources }}
         resources:
           {{- toYaml . | nindent 12 }}
@@ -62,6 +66,16 @@ spec:
       - name: wordlists
         configMap:
           name: {{ include "krawl.fullname" . }}-wordlists
+      {{- if .Values.database.persistence.enabled }}
+      - name: database
+        {{- if .Values.database.persistence.existingClaim }}
+        persistentVolumeClaim:
+          claimName: {{ .Values.database.persistence.existingClaim }}
+        {{- else }}
+        persistentVolumeClaim:
+          claimName: {{ include "krawl.fullname" . }}-db
+        {{- end }}
+      {{- end }}
       {{- with .Values.nodeSelector }}
       nodeSelector:
         {{- toYaml . | nindent 8 }}
diff --git a/helm/templates/pvc.yaml b/helm/templates/pvc.yaml
new file mode 100644
index 0000000..ec73af2
--- /dev/null
+++ b/helm/templates/pvc.yaml
@@ -0,0 +1,17 @@
+{{- if and .Values.database.persistence.enabled (not .Values.database.persistence.existingClaim) }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: {{ include "krawl.fullname" . }}-db
+  labels:
+    {{- include "krawl.labels" . | nindent 4 }}
+spec:
+  accessModes:
+    - {{ .Values.database.persistence.accessMode }}
+  {{- if .Values.database.persistence.storageClassName }}
+  storageClassName: {{ .Values.database.persistence.storageClassName }}
+  {{- end }}
+  resources:
+    requests:
+      storage: {{ .Values.database.persistence.size }}
+{{- end }}
diff --git a/helm/values.yaml b/helm/values.yaml
index 8a6bc1d..c92bc0b 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -79,6 +79,24 @@ config:
 #  canaryTokenUrl: set-your-canary-token-url-here
 #  timezone: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
 
+# Database configuration
+database:
+  # Path to the SQLite database file
+  path: "data/krawl.db"
+  # Number of days to retain access logs and attack data
+  retentionDays: 30
+  # Persistence configuration
+  persistence:
+    enabled: true
+    # Storage class name (use default if not specified)
+    # storageClassName: ""
+    # Access mode for the persistent volume
+    accessMode: ReadWriteOnce
+    # Size of the persistent volume
+    size: 1Gi
+    # Optional: Use existing PVC
+    # existingClaim: ""
+
 networkPolicy:
   enabled: true
   policyTypes:
diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml
index 0362220..d1a026c 100644
--- a/kubernetes/krawl-all-in-one-deploy.yaml
+++ b/kubernetes/krawl-all-in-one-deploy.yaml
@@ -20,6 +20,9 @@ data:
   CANARY_TOKEN_TRIES: "10"
   PROBABILITY_ERROR_CODES: "0"
 #  CANARY_TOKEN_URL: set-your-canary-token-url-here
+  # Database configuration
+  DATABASE_PATH: "data/krawl.db"
+  DATABASE_RETENTION_DAYS: "30"
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -227,6 +230,20 @@ data:
       ]
     }
 ---
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: krawl-db
+  namespace: krawl-system
+  labels:
+    app: krawl-server
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi
+---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
@@ -260,6 +277,8 @@ spec:
           mountPath: /app/wordlists.json
           subPath: wordlists.json
           readOnly: true
+        - name: database
+          mountPath: /app/data
         resources:
           requests:
             memory: "64Mi"
@@ -271,6 +290,9 @@ spec:
       - name: wordlists
         configMap:
           name: krawl-wordlists
+      - name: database
+        persistentVolumeClaim:
+          claimName: krawl-db
 ---
 apiVersion: v1
 kind: Service
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index 073005f..ef357b0 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -15,4 +15,7 @@ data:
   PROBABILITY_ERROR_CODES: "0"
   SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
 #  CANARY_TOKEN_URL: set-your-canary-token-url-here
-#  TIMEZONE: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome")
\ No newline at end of file
+#  TIMEZONE: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome")
+  # Database configuration
+  DATABASE_PATH: "data/krawl.db"
+  DATABASE_RETENTION_DAYS: "30"
\ No newline at end of file
diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml
index 0552eba..1650721 100644
--- a/kubernetes/manifests/deployment.yaml
+++ b/kubernetes/manifests/deployment.yaml
@@ -31,6 +31,8 @@ spec:
           mountPath: /app/wordlists.json
           subPath: wordlists.json
           readOnly: true
+        - name: database
+          mountPath: /app/data
         resources:
           requests:
             memory: "64Mi"
@@ -42,3 +44,6 @@ spec:
       - name: wordlists
         configMap:
           name: krawl-wordlists
+      - name: database
+        persistentVolumeClaim:
+          claimName: krawl-db
diff --git a/kubernetes/manifests/kustomization.yaml b/kubernetes/manifests/kustomization.yaml
index 8f41776..4a5fcd9 100644
--- a/kubernetes/manifests/kustomization.yaml
+++ b/kubernetes/manifests/kustomization.yaml
@@ -5,6 +5,7 @@ resources:
   - namespace.yaml
   - configmap.yaml
   - wordlists-configmap.yaml
+  - pvc.yaml
   - deployment.yaml
   - service.yaml
   - network-policy.yaml
diff --git a/kubernetes/manifests/pvc.yaml b/kubernetes/manifests/pvc.yaml
new file mode 100644
index 0000000..6b771ff
--- /dev/null
+++ b/kubernetes/manifests/pvc.yaml
@@ -0,0 +1,13 @@
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: krawl-db
+  namespace: krawl-system
+  labels:
+    app: krawl-server
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: 1Gi