From 66b4d8fe6a5b2319670001dff61102793a759a7c Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Sun, 28 Dec 2025 14:24:52 -0600
Subject: [PATCH 01/21] adding pip and requirements to docker install and
 exposing data/krawl.db via docker-compose.yaml

---
 Dockerfile          | 4 ++++
 docker-compose.yaml | 1 +
 2 files changed, 5 insertions(+)

diff --git a/Dockerfile b/Dockerfile
index adac20f..63d90bf 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,6 +4,10 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
 
 WORKDIR /app
 
+# Install Python dependencies
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
 COPY src/ /app/src/
 COPY wordlists.json /app/
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 600034d..7d519ab 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -10,6 +10,7 @@ services:
       - "5000:5000"
     volumes:
       - ./wordlists.json:/app/wordlists.json:ro
+      - ./data:/app/data
     environment:
       - PORT=5000
       - DELAY=100

From 1486dfc913046837a69e4bd191931ab82e803a67 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 26 Dec 2025 07:53:05 -0600
Subject: [PATCH 02/21] Add configurable HTTP Server header for deception

  Add SERVER_HEADER environment variable to customize the HTTP Server
  response header, defaulting to Apache/2.2.22 (Ubuntu). This allows the
  honeypot to masquerade as different web servers to attract attackers.

  - Add server_header field to Config dataclass
  - Override version_string() in Handler to return configured header
  - Update documentation and all deployment configs
---
 README.md                           | 1 +
 docker-compose.yaml                 | 1 +
 helm/templates/configmap.yaml       | 1 +
 helm/values.yaml                    | 1 +
 kubernetes/manifests/configmap.yaml | 1 +
 src/config.py                       | 4 +++-
 src/handler.py                      | 4 ++++
 src/server.py                       | 1 +
 8 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0cf8b96..b84d955 100644
--- a/README.md
+++ b/README.md
@@ -185,6 +185,7 @@ To customize the deception server installation several **environment variables**
 | `CANARY_TOKEN_URL` | External canary token URL | None |
 | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated |
 | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` |
+| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` |
 
 ## robots.txt
 The actual (juicy) robots.txt configuration is the following
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 57c648d..1612864 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -20,6 +20,7 @@ services:
       - MAX_COUNTER=10
       - CANARY_TOKEN_TRIES=10
       - PROBABILITY_ERROR_CODES=0
+      - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
       # Optional: Set your canary token URL
       # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
       # Optional: Set custom dashboard path (auto-generated if not set)
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index f6fe92c..c50ab75 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -14,4 +14,5 @@ data:
   MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
   CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
   PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
+  SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
   CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
diff --git a/helm/values.yaml b/helm/values.yaml
index 9ee9ca5..a095632 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -73,6 +73,7 @@ config:
   maxCounter: 10
   canaryTokenTries: 10
   probabilityErrorCodes: 0
+  serverHeader: "Apache/2.2.22 (Ubuntu)"
 #  canaryTokenUrl: set-your-canary-token-url-here
 
 networkPolicy:
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index 42ba002..431b9a3 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -13,4 +13,5 @@ data:
   MAX_COUNTER: "10"
   CANARY_TOKEN_TRIES: "10"
   PROBABILITY_ERROR_CODES: "0"
+  SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
 #  CANARY_TOKEN_URL: set-your-canary-token-url-here
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
index 51391a9..7c6714c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -21,6 +21,7 @@ class Config:
     api_server_port: int = 8080
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
+    server_header: str = "Apache/2.2.22 (Ubuntu)"
 
     @classmethod
     def from_env(cls) -> 'Config':
@@ -44,5 +45,6 @@ class Config:
             api_server_url=os.getenv('API_SERVER_URL'),
             api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
             api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
-            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5))
+            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)),
+            server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)')
         )
diff --git a/src/handler.py b/src/handler.py
index 8fef2aa..9d8abe2 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -48,6 +48,10 @@ class Handler(BaseHTTPRequestHandler):
         """Extract user agent from request"""
         return self.headers.get('User-Agent', '')
 
+    def version_string(self) -> str:
+        """Return custom server version for deception."""
+        return self.config.server_header
+
     def _should_return_error(self) -> bool:
         """Check if we should return an error based on probability"""
         if self.config.probability_error_codes <= 0:
diff --git a/src/server.py b/src/server.py
index 0105f6d..861e9f2 100644
--- a/src/server.py
+++ b/src/server.py
@@ -32,6 +32,7 @@ def print_usage():
     print('  DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
     print('  PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
     print('  CHAR_SPACE            - Characters for random links')
+    print('  SERVER_HEADER         - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
 
 
 def main():

From d13ceb4888bbe6701b8985d0886648f6c8cda53f Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 26 Dec 2025 08:00:16 -0600
Subject: [PATCH 03/21] Added test script to show the server header

---
 tests/check_header.sh | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100755 tests/check_header.sh

diff --git a/tests/check_header.sh b/tests/check_header.sh
new file mode 100755
index 0000000..78b8e5d
--- /dev/null
+++ b/tests/check_header.sh
@@ -0,0 +1,3 @@
+#!/bin/env bash
+# -s is for silent (no progress bar) | -I is to get the headers | grep is to find only the Server line
+curl -s -I http://localhost:5000 | grep "Server:"
\ No newline at end of file

From 828f04261fd16ebc21884d16026765cb6830983a Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Sat, 27 Dec 2025 19:17:27 +0100
Subject: [PATCH 04/21] Added POST log and dashboard for used credentials

---
 src/handler.py                      | 15 ++++++++
 src/logger.py                       | 28 +++++++++++++++
 src/server.py                       |  4 ++-
 src/templates/dashboard_template.py | 28 +++++++++++++++
 src/tracker.py                      | 56 ++++++++++++++++++++++++++++-
 5 files changed, 129 insertions(+), 2 deletions(-)

diff --git a/src/handler.py b/src/handler.py
index 9d8abe2..ac7ca22 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -3,6 +3,7 @@
 import logging
 import random
 import time
+from datetime import datetime
 from http.server import BaseHTTPRequestHandler
 from typing import Optional, List
 
@@ -25,6 +26,7 @@ class Handler(BaseHTTPRequestHandler):
     counter: int = 0
     app_logger: logging.Logger = None
     access_logger: logging.Logger = None
+    credential_logger: logging.Logger = None
 
     def _get_client_ip(self) -> str:
         """Extract client IP address from request, checking proxy headers first"""
@@ -213,6 +215,19 @@ class Handler(BaseHTTPRequestHandler):
 
             self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
 
+            # Parse and log credentials
+            username, password = self.tracker.parse_credentials(post_data)
+            if username or password:
+                # Log to dedicated credentials.log file
+                timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
+                credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}"
+                self.credential_logger.info(credential_line)
+                
+                # Also record in tracker for dashboard
+                self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
+                
+                self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
+
         # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
         self.tracker.record_access(client_ip, self.path, user_agent, post_data)
         
diff --git a/src/logger.py b/src/logger.py
index 68b8278..9f09236 100644
--- a/src/logger.py
+++ b/src/logger.py
@@ -77,6 +77,22 @@ class LoggerManager:
         access_stream_handler.setFormatter(log_format)
         self._access_logger.addHandler(access_stream_handler)
 
+        # Setup credential logger (special format, no stream handler)
+        self._credential_logger = logging.getLogger("krawl.credentials")
+        self._credential_logger.setLevel(logging.INFO)
+        self._credential_logger.handlers.clear()
+
+        # Credential logger uses a simple format: timestamp|ip|username|password|path
+        credential_format = logging.Formatter("%(message)s")
+        
+        credential_file_handler = RotatingFileHandler(
+            os.path.join(log_dir, "credentials.log"),
+            maxBytes=max_bytes,
+            backupCount=backup_count
+        )
+        credential_file_handler.setFormatter(credential_format)
+        self._credential_logger.addHandler(credential_file_handler)
+
         self._initialized = True
 
     @property
@@ -93,6 +109,13 @@ class LoggerManager:
             self.initialize()
         return self._access_logger
 
+    @property
+    def credentials(self) -> logging.Logger:
+        """Get the credentials logger."""
+        if not self._initialized:
+            self.initialize()
+        return self._credential_logger
+
 
 # Module-level singleton instance
 _logger_manager = LoggerManager()
@@ -108,6 +131,11 @@ def get_access_logger() -> logging.Logger:
     return _logger_manager.access
 
 
+def get_credential_logger() -> logging.Logger:
+    """Get the credential logger instance."""
+    return _logger_manager.credentials
+
+
 def initialize_logging(log_dir: str = "logs") -> None:
     """Initialize the logging system."""
     _logger_manager.initialize(log_dir)
diff --git a/src/server.py b/src/server.py
index 861e9f2..fd8f7d2 100644
--- a/src/server.py
+++ b/src/server.py
@@ -11,7 +11,7 @@ from http.server import HTTPServer
 from config import Config
 from tracker import AccessTracker
 from handler import Handler
-from logger import initialize_logging, get_app_logger, get_access_logger
+from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
 
 
 def print_usage():
@@ -45,6 +45,7 @@ def main():
     initialize_logging()
     app_logger = get_app_logger()
     access_logger = get_access_logger()
+    credential_logger = get_credential_logger()
 
     config = Config.from_env()
 
@@ -55,6 +56,7 @@ def main():
     Handler.counter = config.canary_token_tries
     Handler.app_logger = app_logger
     Handler.access_logger = access_logger
+    Handler.credential_logger = credential_logger
 
     if len(sys.argv) == 2:
         try:
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 3f5524d..a267278 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -45,6 +45,12 @@ def generate_dashboard(stats: dict) -> str:
         for log in stats.get('attack_types', [])[-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
 
+    # Generate credential attempts rows
+    credential_rows = '\n'.join([
+        f'<tr><td>{log["ip"]}</td><td>{log["username"]}</td><td>{log["password"]}</td><td>{log["path"]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        for log in stats.get('credential_attempts', [])[-20:]
+    ]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
+
     return f"""<!DOCTYPE html>
 <html>
 <head>
@@ -159,6 +165,10 @@ def generate_dashboard(stats: dict) -> str:
                 <div class="stat-value alert">{stats.get('honeypot_ips', 0)}</div>
                 <div class="stat-label">Honeypot Caught</div>
             </div>
+            <div class="stat-card alert">
+                <div class="stat-value alert">{len(stats.get('credential_attempts', []))}</div>
+                <div class="stat-label">Credentials Captured</div>
+            </div>
         </div>
 
         <div class="table-container alert-section">
@@ -194,6 +204,24 @@ def generate_dashboard(stats: dict) -> str:
             </table>
         </div>
 
+        <div class="table-container alert-section">
+            <h2>🔑 Captured Credentials</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>IP Address</th>
+                        <th>Username</th>
+                        <th>Password</th>
+                        <th>Path</th>
+                        <th>Time</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {credential_rows}
+                </tbody>
+            </table>
+        </div>
+
         <div class="table-container alert-section">
             <h2>&#128520; Detected Attack Types</h2>
             <table>
diff --git a/src/tracker.py b/src/tracker.py
index 6e733f4..717a4c3 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -4,6 +4,7 @@ from typing import Dict, List, Tuple
 from collections import defaultdict
 from datetime import datetime
 import re
+import urllib.parse
 
 
 class AccessTracker:
@@ -13,6 +14,7 @@ class AccessTracker:
         self.path_counts: Dict[str, int] = defaultdict(int)
         self.user_agent_counts: Dict[str, int] = defaultdict(int)
         self.access_log: List[Dict] = []
+        self.credential_attempts: List[Dict] = []
         self.suspicious_patterns = [
             'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
             'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -31,6 +33,57 @@ class AccessTracker:
         # Track IPs that accessed honeypot paths from robots.txt
         self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list)
 
+    def parse_credentials(self, post_data: str) -> Tuple[str, str]:
+        """
+        Parse username and password from POST data.
+        Returns tuple (username, password) or (None, None) if not found.
+        """
+        if not post_data:
+            return None, None
+        
+        username = None
+        password = None
+        
+        try:
+            # Parse URL-encoded form data
+            parsed = urllib.parse.parse_qs(post_data)
+            
+            # Common username field names
+            username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
+            for field in username_fields:
+                if field in parsed and parsed[field]:
+                    username = parsed[field][0]
+                    break
+            
+            # Common password field names
+            password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
+            for field in password_fields:
+                if field in parsed and parsed[field]:
+                    password = parsed[field][0]
+                    break
+                    
+        except Exception:
+            # If parsing fails, try simple regex patterns
+            username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
+            password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
+            
+            if username_match:
+                username = urllib.parse.unquote_plus(username_match.group(1))
+            if password_match:
+                password = urllib.parse.unquote_plus(password_match.group(1))
+        
+        return username, password
+
+    def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
+        """Record a credential login attempt"""
+        self.credential_attempts.append({
+            'ip': ip,
+            'path': path,
+            'username': username,
+            'password': password,
+            'timestamp': datetime.now().isoformat()
+        })
+
     def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''):
         """Record an access attempt"""
         self.ip_counts[ip] += 1
@@ -146,5 +199,6 @@ class AccessTracker:
             'top_user_agents': self.get_top_user_agents(10),
             'recent_suspicious': self.get_suspicious_accesses(20),
             'honeypot_triggered_ips': self.get_honeypot_triggered_ips(),
-            'attack_types': self.get_attack_type_accesses(20)
+            'attack_types': self.get_attack_type_accesses(20),
+            'credential_attempts': self.credential_attempts[-50:]  # Last 50 attempts
         }

From 852a15976f1dd6d16eee32e90567ee25ea4ea47a Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Mon, 29 Dec 2025 23:13:02 +0100
Subject: [PATCH 05/21] Added demo

---
 README.md         |   7 ++++++-
 deployment.yaml   |  44 --------------------------------------------
 src/data/krawl.db | Bin 0 -> 61440 bytes
 3 files changed, 6 insertions(+), 45 deletions(-)
 delete mode 100644 deployment.yaml
 create mode 100644 src/data/krawl.db

diff --git a/README.md b/README.md
index b84d955..9954cc0 100644
--- a/README.md
+++ b/README.md
@@ -51,7 +51,12 @@
 ## Star History
 <img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" />
 
- 
+## Try Krawl
+Tip: crawl the `robots.txt` paths for additional fun
+###  http://demo.krawlme.com
+## View the dashboard 
+### http://demo.krawlme.com/das_dashboard
+
 ## What is Krawl?
 
 **Krawl** is a cloud‑native deception server designed to detect, delay, and analyze malicious web crawlers and automated scanners.
diff --git a/deployment.yaml b/deployment.yaml
deleted file mode 100644
index 4bf5189..0000000
--- a/deployment.yaml
+++ /dev/null
@@ -1,44 +0,0 @@
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: krawl-server
-  namespace: krawl
-  labels:
-    app: krawl-server
-spec:
-  replicas: 1
-  selector:
-    matchLabels:
-      app: krawl-server
-  template:
-    metadata:
-      labels:
-        app: krawl-server
-    spec:
-      containers:
-      - name: krawl
-        image: ghcr.io/blessedrebus/krawl:latest
-        imagePullPolicy: Always
-        ports:
-        - containerPort: 5000
-          name: http
-          protocol: TCP
-        envFrom:
-        - configMapRef:
-            name: krawl-config
-        volumeMounts:
-        - name: wordlists
-          mountPath: /app/wordlists.json
-          subPath: wordlists.json
-          readOnly: true
-        resources:
-          requests:
-            memory: "64Mi"
-            cpu: "100m"
-          limits:
-            memory: "256Mi"
-            cpu: "500m"
-      volumes:
-      - name: wordlists
-        configMap:
-          name: krawl-wordlists
diff --git a/src/data/krawl.db b/src/data/krawl.db
new file mode 100644
index 0000000000000000000000000000000000000000..88f7d5763f00e73bcacf6af9ef16e63c68c0e9e5
GIT binary patch
literal 61440
zcmeI5U2Gd!6~{gCx8s?cG^vb0CDW~{#;A2>#vac&rLf-AP0QwEn*`J%A<MBRiPgk*
zJ(Jyb;l<r9Z!5tAFNhTgt;EYBSgArFq}8rgJODx<v4Z6Vp&}$cMcEbX1N*=oe~f4B
zaa?KJIz|6Ua^ri?x%Zy=-Fs&2J7+xWmzT<h#&1>Ydj*3}dL}#!<9U+jJs!^!^pDWL
z&c@OgzD|bzXKb$ry?nxxxb-9({)UGOe8R*1kozS2I=d9T7tN1fj=UZ@9)2ZwFMKWV
zCKX}>0T2KI5ICd+8q-`bc6p9z$mL2&yHUQ8FBnFlcr9Pj46SICs}((8C>Awc&+k^Z
z^W{>Xm@KU2vKu-6(#rYV6@GXF{OStd*9brB=p=sYG#iY~%`vxwMqzVT>(lQm;M!Jf
zWS?5fxwRh&mrK;fjoe~xjbB;a;8#Aov~-$}gdH8bYqW?MH)|UIx$N4)#q8RwEW{nH
z(DQ6@M=RxP)H1DJvGgQ49XES<X6@2)cI_(vbnYrYTP`)rF08KQE-kK@Wj(vkujMY}
z)^aNgxpls4pWdu#@;jef%2DJC+4Y6&`CKG^>&AF6cI+7Q;zAQwxt7<Bf}wZ*M;&lF
zrTi>qQnM^g$<948szzZqU)QehYr3Jkg4rt9btA89T7^GPr@+Rg<(vb`Zs9<2vASO|
z>NoSnYN-oWYN9EYjhmKbqNFwp3wotvMswiwO1@g(w$wVP>soD}?meBldA(SzYxX*J
zS#`f&?Dm^ZHT$qB?APdI*Gkr|^$%k$9*NII1F?l;jHl^7`t@D<zAInYH>%BizQZ~%
zc3zq9w?gK@;ER!9?9)l6G1+mvVqGg~6{Ae4o0Dj-X6UrOOX+y8$GPaxn&w1tk<dw0
zi#I+R4#w7L?`IG0J#Uov=<pZz-0X<lpB>#c4zTDnZ}k*!OoW25Woq1+gN(EEUG_12
zKbBby3^dEqh~F9y24l%2^TN5NgSm9+QWSEyor^-#1Kp2G2RJ4@#O!n@y^vC!e!H*J
z<4<9)JF`1}Q7h>BbJcpOTep2|vUby&>JBgNJ#<Cl4K6@W8Yk>04eJy^=d7OJsaCX`
zwW^Uf>gDZit!_Jp1$&3aCUdK54vtNQ&ffm!<Dd5jVsj^$>z(;xt3VyWmJ1x%d(&lX
z+D%uf`GT=?bYEGw#w~QVTK<FjuqyQJmnDgR$s3I2XphI-dZfFr@9VX4v0UBPZN2&q
z>eW%HuV;re{`^=lwm^ITm}~E~t~c8{_a4}}txkX6HM{5E$}z#%i4)B2%dOLXcUIXt
z8*6vgKGk=19-2Gl3)}R-Z265WwoX!enz2*ux|7)RmcW6Pe`<AgDVJSwcox;@^%Pe<
zYiiuDgPJFqY2Uv*<OX+-+hPB}UWxuHdU5=XaW(S&$ZYsq;g5t~3VsmW4!j*W?Y~VG
zv4H>xfWYHQpfNEW@ywiT$Wx(ER+N;4K!3!!t-`bAVzrW>$#V7i^6qY7PEH8?>~f)4
zt{7E)=PZAzVraWOE#Ozz`769gtI22i`m^(5DxIK5^hK?BtvV+OqCkH|{zAE~ZB=i~
zHKmKW4M~vXGoo}x%J8B(pA_bkQbJMa8A5HGoT4VJ_L`L5)RdIEnNd=ujI1hYp|qta
z1#v6AnchqZTZKnpr!p_6>Fy=eOj43Ejgv<XwVh%@m4uWiH6|w8LmUp<hPbv<TfUhs
z?UgHYj|!cTNzhSO#8l(R1U2bYLp~a|&${PoXXsi<xsot#dkE9+nbmd2yrbhO&&#QV
zl%k}>21lq-$LYLtaH2*P9w8-akdzRUNjWJuLL4>dXs;s_^wPtC)8ms(Hpz5C5tO8o
zY4}-c3h6cFk=c<<WYkPr6dL~Mi0x1XMG@78e|*Gtq@`3+5gY!<i0w$piX^5R{_u$H
zkW@(#<c2>qVmkztx(l5j!I9b_B~q!Rno%15z=-XT#H5&}<KiEw9bzJrk<%)*!#83(
zR6$jf)L*<KwL?g#a!NACWo*QDq~&B%qCA*U+99cl4E2{(GD6?SPe*^@A@7jykuQ@k
zkQ5o`?s7lkzQS#DpW=?Pe`nufzs_D~FS0Yy|4>D2AOHd&00JNY0w4eaAOHd&a2N>8
z`ew3>wWdFK!F_tLqI1Q2YM5eY$@$0-MQb5=@<2t~;_SpQ#m=HB8KP({X>!99J8O>Y
zaK+Z5V05@*Yt?RixMFKrEizoOweA%juGm_b3Jp{2tQ-Z0E4G$)0>czLYcKxcimk;F
z-!R3_>V<c>VrzL}>=bo{u&XOH*SVda|H;c9@&S3Dyi5K;{!IQzeouZ!-X^~!KO;XO
zZ;&68?~rekm&p&vYvk3#;O4La1V8`;KmY_l00ck)1V8`;KmY{(uL$_OA!gc4rp)As
z&l_eY&1Aw%h*`#&32P=%GZ{CNh?#`VBxEK*GYOc9-%NaF;x&^ouQx=4nCSI?_G=z;
zm%K$@Aup0GvPfo0nENaDbM95{C9cA)asoHWzRUiay+akTfdB}A00@8p2!H?xfB*=9
z00=xd0w4EgnFFF1dc{343+`eoQo&W+jZbh9+oKZP#a1kWtGFA1;4Ze}4hD(a(FTLW
z?HGeW;&x=gAaOgMV34>SMc^*BVh09^+YtlqVk=HykhmQk;4Zde0tSiOkpRcM*|2>w
zxm^F}{^B9;kw1~&lAqG6{of_uBwr;jkbSaEu9Bz8IWk9%lPTio-lvM#KmY_l00ck)
z1V8`;KmY_l00cnbFcV<CGu>N5rX9H{NA8FtH|fYtIC8|1;~Y8Gk&8NV<BnX!kqbL=
zAxAFg$ORlZza!^!<h+jDSkybyy)&SB{eM{e|7+GA0EquT%yS&JfdB}A00@8p2!H?x
zfB*=900=yu1k5P?{`mjUgN*+-zyF^hZ+hr2HV^;-5C8!X009sH0T2KI5C8!X0D;3w
z;2aaA4;Gr5vOVa?{!}6Q>>T>r7ysXU29T6dCG+|}L+;Z1{}1a9!Y&X10T2KI5C8!X
z009sH0T2KI5CDPU1YGa`@A>@y4t@UL@Rpzf0w4eaAOHd&00JNY0w4eaAOHd&aJUEz
zdH!F|=l}QU_x~L(`(X_TfB*=900@8p2!H?xfB*=900@A<ha%wk{=e4!|93p(&WCCf
zl|cXmKmY_l00ck)1V8`;KmY_l00bT|fzL3}877-;w<=0XLZCn5bl+Y6lA6fK89`3l
zCF*=qm`_RxMM*1)+`9iC*Z&{z5TQ8`009sH0T2KI5C8!X009sH0T389fxheiUi16^
z|3-d;X8??PXwW|hfB*=900@8p2!H?xfB*=900;~qFy-}om{aUzvAVZct>kO<>ZYbo
zjWr7=oP|uYkT?p>>;GQzPY?Mo`8WNC4Fo^{1V8`;KmY_l00ck)1V8`;K;ZEw;P*1j
Oq}NO*#(Z8TO#TNKFY4$3

literal 0
HcmV?d00001


From 0b1e9537d2eafb0835989921c8b8401e6c4aba22 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Mon, 29 Dec 2025 23:13:39 +0100
Subject: [PATCH 06/21] Added demo

---
 src/data/krawl.db | Bin 61440 -> 0 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 src/data/krawl.db

diff --git a/src/data/krawl.db b/src/data/krawl.db
deleted file mode 100644
index 88f7d5763f00e73bcacf6af9ef16e63c68c0e9e5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 61440
zcmeI5U2Gd!6~{gCx8s?cG^vb0CDW~{#;A2>#vac&rLf-AP0QwEn*`J%A<MBRiPgk*
zJ(Jyb;l<r9Z!5tAFNhTgt;EYBSgArFq}8rgJODx<v4Z6Vp&}$cMcEbX1N*=oe~f4B
zaa?KJIz|6Ua^ri?x%Zy=-Fs&2J7+xWmzT<h#&1>Ydj*3}dL}#!<9U+jJs!^!^pDWL
z&c@OgzD|bzXKb$ry?nxxxb-9({)UGOe8R*1kozS2I=d9T7tN1fj=UZ@9)2ZwFMKWV
zCKX}>0T2KI5ICd+8q-`bc6p9z$mL2&yHUQ8FBnFlcr9Pj46SICs}((8C>Awc&+k^Z
z^W{>Xm@KU2vKu-6(#rYV6@GXF{OStd*9brB=p=sYG#iY~%`vxwMqzVT>(lQm;M!Jf
zWS?5fxwRh&mrK;fjoe~xjbB;a;8#Aov~-$}gdH8bYqW?MH)|UIx$N4)#q8RwEW{nH
z(DQ6@M=RxP)H1DJvGgQ49XES<X6@2)cI_(vbnYrYTP`)rF08KQE-kK@Wj(vkujMY}
z)^aNgxpls4pWdu#@;jef%2DJC+4Y6&`CKG^>&AF6cI+7Q;zAQwxt7<Bf}wZ*M;&lF
zrTi>qQnM^g$<948szzZqU)QehYr3Jkg4rt9btA89T7^GPr@+Rg<(vb`Zs9<2vASO|
z>NoSnYN-oWYN9EYjhmKbqNFwp3wotvMswiwO1@g(w$wVP>soD}?meBldA(SzYxX*J
zS#`f&?Dm^ZHT$qB?APdI*Gkr|^$%k$9*NII1F?l;jHl^7`t@D<zAInYH>%BizQZ~%
zc3zq9w?gK@;ER!9?9)l6G1+mvVqGg~6{Ae4o0Dj-X6UrOOX+y8$GPaxn&w1tk<dw0
zi#I+R4#w7L?`IG0J#Uov=<pZz-0X<lpB>#c4zTDnZ}k*!OoW25Woq1+gN(EEUG_12
zKbBby3^dEqh~F9y24l%2^TN5NgSm9+QWSEyor^-#1Kp2G2RJ4@#O!n@y^vC!e!H*J
z<4<9)JF`1}Q7h>BbJcpOTep2|vUby&>JBgNJ#<Cl4K6@W8Yk>04eJy^=d7OJsaCX`
zwW^Uf>gDZit!_Jp1$&3aCUdK54vtNQ&ffm!<Dd5jVsj^$>z(;xt3VyWmJ1x%d(&lX
z+D%uf`GT=?bYEGw#w~QVTK<FjuqyQJmnDgR$s3I2XphI-dZfFr@9VX4v0UBPZN2&q
z>eW%HuV;re{`^=lwm^ITm}~E~t~c8{_a4}}txkX6HM{5E$}z#%i4)B2%dOLXcUIXt
z8*6vgKGk=19-2Gl3)}R-Z265WwoX!enz2*ux|7)RmcW6Pe`<AgDVJSwcox;@^%Pe<
zYiiuDgPJFqY2Uv*<OX+-+hPB}UWxuHdU5=XaW(S&$ZYsq;g5t~3VsmW4!j*W?Y~VG
zv4H>xfWYHQpfNEW@ywiT$Wx(ER+N;4K!3!!t-`bAVzrW>$#V7i^6qY7PEH8?>~f)4
zt{7E)=PZAzVraWOE#Ozz`769gtI22i`m^(5DxIK5^hK?BtvV+OqCkH|{zAE~ZB=i~
zHKmKW4M~vXGoo}x%J8B(pA_bkQbJMa8A5HGoT4VJ_L`L5)RdIEnNd=ujI1hYp|qta
z1#v6AnchqZTZKnpr!p_6>Fy=eOj43Ejgv<XwVh%@m4uWiH6|w8LmUp<hPbv<TfUhs
z?UgHYj|!cTNzhSO#8l(R1U2bYLp~a|&${PoXXsi<xsot#dkE9+nbmd2yrbhO&&#QV
zl%k}>21lq-$LYLtaH2*P9w8-akdzRUNjWJuLL4>dXs;s_^wPtC)8ms(Hpz5C5tO8o
zY4}-c3h6cFk=c<<WYkPr6dL~Mi0x1XMG@78e|*Gtq@`3+5gY!<i0w$piX^5R{_u$H
zkW@(#<c2>qVmkztx(l5j!I9b_B~q!Rno%15z=-XT#H5&}<KiEw9bzJrk<%)*!#83(
zR6$jf)L*<KwL?g#a!NACWo*QDq~&B%qCA*U+99cl4E2{(GD6?SPe*^@A@7jykuQ@k
zkQ5o`?s7lkzQS#DpW=?Pe`nufzs_D~FS0Yy|4>D2AOHd&00JNY0w4eaAOHd&a2N>8
z`ew3>wWdFK!F_tLqI1Q2YM5eY$@$0-MQb5=@<2t~;_SpQ#m=HB8KP({X>!99J8O>Y
zaK+Z5V05@*Yt?RixMFKrEizoOweA%juGm_b3Jp{2tQ-Z0E4G$)0>czLYcKxcimk;F
z-!R3_>V<c>VrzL}>=bo{u&XOH*SVda|H;c9@&S3Dyi5K;{!IQzeouZ!-X^~!KO;XO
zZ;&68?~rekm&p&vYvk3#;O4La1V8`;KmY_l00ck)1V8`;KmY{(uL$_OA!gc4rp)As
z&l_eY&1Aw%h*`#&32P=%GZ{CNh?#`VBxEK*GYOc9-%NaF;x&^ouQx=4nCSI?_G=z;
zm%K$@Aup0GvPfo0nENaDbM95{C9cA)asoHWzRUiay+akTfdB}A00@8p2!H?xfB*=9
z00=xd0w4EgnFFF1dc{343+`eoQo&W+jZbh9+oKZP#a1kWtGFA1;4Ze}4hD(a(FTLW
z?HGeW;&x=gAaOgMV34>SMc^*BVh09^+YtlqVk=HykhmQk;4Zde0tSiOkpRcM*|2>w
zxm^F}{^B9;kw1~&lAqG6{of_uBwr;jkbSaEu9Bz8IWk9%lPTio-lvM#KmY_l00ck)
z1V8`;KmY_l00cnbFcV<CGu>N5rX9H{NA8FtH|fYtIC8|1;~Y8Gk&8NV<BnX!kqbL=
zAxAFg$ORlZza!^!<h+jDSkybyy)&SB{eM{e|7+GA0EquT%yS&JfdB}A00@8p2!H?x
zfB*=900=yu1k5P?{`mjUgN*+-zyF^hZ+hr2HV^;-5C8!X009sH0T2KI5C8!X0D;3w
z;2aaA4;Gr5vOVa?{!}6Q>>T>r7ysXU29T6dCG+|}L+;Z1{}1a9!Y&X10T2KI5C8!X
z009sH0T2KI5CDPU1YGa`@A>@y4t@UL@Rpzf0w4eaAOHd&00JNY0w4eaAOHd&aJUEz
zdH!F|=l}QU_x~L(`(X_TfB*=900@8p2!H?xfB*=900@A<ha%wk{=e4!|93p(&WCCf
zl|cXmKmY_l00ck)1V8`;KmY_l00bT|fzL3}877-;w<=0XLZCn5bl+Y6lA6fK89`3l
zCF*=qm`_RxMM*1)+`9iC*Z&{z5TQ8`009sH0T2KI5C8!X009sH0T389fxheiUi16^
z|3-d;X8??PXwW|hfB*=900@8p2!H?xfB*=900;~qFy-}om{aUzvAVZct>kO<>ZYbo
zjWr7=oP|uYkT?p>>;GQzPY?Mo`8WNC4Fo^{1V8`;KmY_l00ck)1V8`;K;ZEw;P*1j
Oq}NO*#(Z8TO#TNKFY4$3


From 5a808c330ce38e9385fb9ac6b38901cacbd04ffc Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com>
Date: Tue, 30 Dec 2025 00:29:58 +0100
Subject: [PATCH 07/21] Update README.md

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 9954cc0..8f794ee 100644
--- a/README.md
+++ b/README.md
@@ -53,9 +53,9 @@
 
 ## Try Krawl
 Tip: crawl the `robots.txt` paths for additional fun
-###  http://demo.krawlme.com
+Demo URL: [http://demo.krawlme.com](http://demo.krawlme.com)
 ## View the dashboard 
-### http://demo.krawlme.com/das_dashboard
+Dashboard URL: [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard)
 
 ## What is Krawl?
 

From 5ba02d3d0c3f702950afdf5d71ac4aaac2b767ab Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com>
Date: Tue, 30 Dec 2025 00:30:06 +0100
Subject: [PATCH 08/21] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8f794ee..c68b543 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@
 ## Try Krawl
 Tip: crawl the `robots.txt` paths for additional fun
 Demo URL: [http://demo.krawlme.com](http://demo.krawlme.com)
-## View the dashboard 
+## View the dashboard
 Dashboard URL: [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard)
 
 ## What is Krawl?

From bf73bc7e2cb8b9e47a19396cca005bcd6d8f48cd Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com>
Date: Tue, 30 Dec 2025 00:38:19 +0100
Subject: [PATCH 09/21] Update README with demo and dashboard information

Removed old sections and reorganized demo and dashboard links.
---
 README.md | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index c68b543..58e0b06 100644
--- a/README.md
+++ b/README.md
@@ -48,14 +48,10 @@
 <br>
 </div>
 
-## Star History
-<img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" />
-
-## Try Krawl
+## Demo
 Tip: crawl the `robots.txt` paths for additional fun
-Demo URL: [http://demo.krawlme.com](http://demo.krawlme.com)
-## View the dashboard
-Dashboard URL: [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard)
+### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com)
+### View the dashboard [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard)
 
 ## What is Krawl?
 
@@ -321,3 +317,6 @@ Contributions welcome! Please:
 **This is a deception/honeypot system.**  
 Deploy in isolated environments and monitor carefully for security events.  
 Use responsibly and in compliance with applicable laws and regulations.
+
+## Star History
+<img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" />

From d458eb471db47ffae2ce6b72ff15228c790017e8 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 2 Jan 2026 13:39:54 -0600
Subject: [PATCH 10/21] Migrate configuration from environment variables to
 YAML file

  - Add YAML-based configuration loaded from config.yaml (CONFIG_LOCATION env var)
  - Add PyYAML dependency and install requirements in Dockerfile
  - Replace Config.from_env() with get_config() singleton pattern
  - Remove server_header from config (now randomized from wordlists only)
  - Update docker-compose.yaml to mount config.yaml read-only
  - Update Helm chart: restructure values.yaml, generate config.yaml in ConfigMap
  - Update Kubernetes manifests: ConfigMap now contains config.yaml, deployments mount it
  - Remove Helm secret.yaml (dashboard path now auto-generated in config.yaml)
---
 Dockerfile                              |  3 +
 config.yaml                             | 35 +++++++++
 docker-compose.yaml                     | 18 +----
 helm/templates/configmap.yaml           | 49 ++++++------
 helm/templates/deployment.yaml          | 19 ++---
 helm/templates/secret.yaml              | 16 ----
 helm/values.yaml                        | 47 +++++++-----
 kubernetes/krawl-all-in-one-deploy.yaml | 71 +++++++++++++-----
 kubernetes/manifests/configmap.yaml     | 50 +++++++++----
 kubernetes/manifests/deployment.yaml    | 13 +++-
 requirements.txt                        |  3 +
 src/config.py                           | 99 ++++++++++++++++++-------
 src/generators.py                       | 14 +---
 src/server.py                           | 51 +++++++------
 14 files changed, 307 insertions(+), 181 deletions(-)
 create mode 100644 config.yaml
 delete mode 100644 helm/templates/secret.yaml

diff --git a/Dockerfile b/Dockerfile
index adac20f..e0fb6af 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,6 +4,9 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
 
 WORKDIR /app
 
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
 COPY src/ /app/src/
 COPY wordlists.json /app/
 
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..c4faa8f
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,35 @@
+# Krawl Honeypot Configuration
+
+server:
+  port: 5000
+  delay: 100  # Response delay in milliseconds
+  timezone: null  # e.g., "America/New_York" or null for system default
+
+links:
+  min_length: 5
+  max_length: 15
+  min_per_page: 10
+  max_per_page: 15
+  char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+  max_counter: 10
+
+canary:
+  token_url: null  # Optional canary token URL
+  token_tries: 10
+
+dashboard:
+  # if set to "null" this will Auto-generates random path if not set
+  # can be set to "dashboard" or similar
+  secret_path: dashboard
+
+api:
+  server_url: null
+  server_port: 8080
+  server_path: "/api/v2/users"
+
+database:
+  path: "data/krawl.db"
+  retention_days: 30
+
+behavior:
+  probability_error_codes: 0  # 0-100 percentage
\ No newline at end of file
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 6f81a47..776e919 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -10,23 +10,9 @@ services:
       - "5000:5000"
     volumes:
       - ./wordlists.json:/app/wordlists.json:ro
+      - ./config.yaml:/app/config.yaml:ro
     environment:
-      - PORT=5000
-      - DELAY=100
-      - LINKS_MIN_LENGTH=5
-      - LINKS_MAX_LENGTH=15
-      - LINKS_MIN_PER_PAGE=10
-      - LINKS_MAX_PER_PAGE=15
-      - MAX_COUNTER=10
-      - CANARY_TOKEN_TRIES=10
-      - PROBABILITY_ERROR_CODES=0
-      # - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
-      # Optional: Set your canary token URL
-      # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
-      # Optional: Set custom dashboard path (auto-generated if not set)
-      # - DASHBOARD_SECRET_PATH=/my-secret-dashboard
-      # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
-      # - TIMEZONE=UTC
+      - CONFIG_LOCATION=config.yaml
     restart: unless-stopped
     healthcheck:
       test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index 17cd952..808d9f5 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -5,25 +5,30 @@ metadata:
   labels:
     {{- include "krawl.labels" . | nindent 4 }}
 data:
-  PORT: {{ .Values.config.port | quote }}
-  DELAY: {{ .Values.config.delay | quote }}
-  LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }}
-  LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }}
-  LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }}
-  LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }}
-  MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
-  CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
-  PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
-  CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
-  {{- if .Values.config.dashboardSecretPath }}
-  DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }}
-  {{- end }}
-  {{- if .Values.config.serverHeader }}
-  SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
-  {{- end }}
-  {{- if .Values.config.timezone }}
-  TIMEZONE: {{ .Values.config.timezone | quote }}
-  {{- end }}
-  # Database configuration
-  DATABASE_PATH: {{ .Values.database.path | quote }}
-  DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }}
+  config.yaml: |
+    # Krawl Honeypot Configuration
+    server:
+      port: {{ .Values.config.server.port }}
+      delay: {{ .Values.config.server.delay }}
+      timezone: {{ .Values.config.server.timezone | toYaml }}
+    links:
+      min_length: {{ .Values.config.links.min_length }}
+      max_length: {{ .Values.config.links.max_length }}
+      min_per_page: {{ .Values.config.links.min_per_page }}
+      max_per_page: {{ .Values.config.links.max_per_page }}
+      char_space: {{ .Values.config.links.char_space | quote }}
+      max_counter: {{ .Values.config.links.max_counter }}
+    canary:
+      token_url: {{ .Values.config.canary.token_url | toYaml }}
+      token_tries: {{ .Values.config.canary.token_tries }}
+    dashboard:
+      secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
+    api:
+      server_url: {{ .Values.config.api.server_url | toYaml }}
+      server_port: {{ .Values.config.api.server_port }}
+      server_path: {{ .Values.config.api.server_path | quote }}
+    database:
+      path: {{ .Values.config.database.path | quote }}
+      retention_days: {{ .Values.config.database.retention_days }}
+    behavior:
+      probability_error_codes: {{ .Values.config.behavior.probability_error_codes }}
diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml
index ecc9655..5635fa3 100644
--- a/helm/templates/deployment.yaml
+++ b/helm/templates/deployment.yaml
@@ -38,18 +38,16 @@ spec:
         imagePullPolicy: {{ .Values.image.pullPolicy }}
         ports:
         - name: http
-          containerPort: {{ .Values.config.port }}
+          containerPort: {{ .Values.config.server.port }}
           protocol: TCP
-        envFrom:
-        - configMapRef:
-            name: {{ include "krawl.fullname" . }}-config
         env:
-        - name: DASHBOARD_SECRET_PATH
-          valueFrom:
-            secretKeyRef:
-              name: {{ include "krawl.fullname" . }}
-              key: dashboard-path
+        - name: CONFIG_LOCATION
+          value: "config.yaml"
         volumeMounts:
+        - name: config
+          mountPath: /app/config.yaml
+          subPath: config.yaml
+          readOnly: true
         - name: wordlists
           mountPath: /app/wordlists.json
           subPath: wordlists.json
@@ -63,6 +61,9 @@ spec:
           {{- toYaml . | nindent 12 }}
         {{- end }}
       volumes:
+      - name: config
+        configMap:
+          name: {{ include "krawl.fullname" . }}-config
       - name: wordlists
         configMap:
           name: {{ include "krawl.fullname" . }}-wordlists
diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml
deleted file mode 100644
index 798289c..0000000
--- a/helm/templates/secret.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}}
-{{- $dashboardPath := "" -}}
-{{- if and $secret $secret.data -}}
-  {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}}
-{{- else -}}
-  {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}}
-{{- end -}}
-apiVersion: v1
-kind: Secret
-metadata:
-  name: {{ include "krawl.fullname" . }}
-  labels:
-    {{- include "krawl.labels" . | nindent 4 }}
-type: Opaque
-stringData:
-  dashboard-path: {{ $dashboardPath | quote }}
diff --git a/helm/values.yaml b/helm/values.yaml
index c92bc0b..60b1a66 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -62,29 +62,36 @@ tolerations: []
 
 affinity: {}
 
-# Application configuration
+# Application configuration (config.yaml structure)
 config:
-  port: 5000
-  delay: 100
-  linksMinLength: 5
-  linksMaxLength: 15
-  linksMinPerPage: 10
-  linksMaxPerPage: 15
-  maxCounter: 10
-  canaryTokenTries: 10
-  probabilityErrorCodes: 0
-# timezone: "UTC"
-#  serverHeader: "Apache/2.2.22 (Ubuntu)"
-#  dashboardSecretPath: "/my-secret-dashboard"
-#  canaryTokenUrl: set-your-canary-token-url-here
-#  timezone: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
+  server:
+    port: 5000
+    delay: 100
+    timezone: null  # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
+  links:
+    min_length: 5
+    max_length: 15
+    min_per_page: 10
+    max_per_page: 15
+    char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+    max_counter: 10
+  canary:
+    token_url: null  # Set your canary token URL here
+    token_tries: 10
+  dashboard:
+    secret_path: null  # Auto-generated if not set, or set to "/my-secret-dashboard"
+  api:
+    server_url: null
+    server_port: 8080
+    server_path: "/api/v2/users"
+  database:
+    path: "data/krawl.db"
+    retention_days: 30
+  behavior:
+    probability_error_codes: 0
 
-# Database configuration
+# Database persistence configuration
 database:
-  # Path to the SQLite database file
-  path: "data/krawl.db"
-  # Number of days to retain access logs and attack data
-  retentionDays: 30
   # Persistence configuration
   persistence:
     enabled: true
diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml
index d1a026c..3344260 100644
--- a/kubernetes/krawl-all-in-one-deploy.yaml
+++ b/kubernetes/krawl-all-in-one-deploy.yaml
@@ -10,19 +10,41 @@ metadata:
   name: krawl-config
   namespace: krawl-system
 data:
-  PORT: "5000"
-  DELAY: "100"
-  LINKS_MIN_LENGTH: "5"
-  LINKS_MAX_LENGTH: "15"
-  LINKS_MIN_PER_PAGE: "10"
-  LINKS_MAX_PER_PAGE: "15"
-  MAX_COUNTER: "10"
-  CANARY_TOKEN_TRIES: "10"
-  PROBABILITY_ERROR_CODES: "0"
-#  CANARY_TOKEN_URL: set-your-canary-token-url-here
-  # Database configuration
-  DATABASE_PATH: "data/krawl.db"
-  DATABASE_RETENTION_DAYS: "30"
+  config.yaml: |
+    # Krawl Honeypot Configuration
+    server:
+      port: 5000
+      delay: 100
+      timezone: null  # e.g., "America/New_York" or null for system default
+
+    links:
+      min_length: 5
+      max_length: 15
+      min_per_page: 10
+      max_per_page: 15
+      char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+      max_counter: 10
+
+    canary:
+      token_url: null  # Optional canary token URL
+      token_tries: 10
+
+    dashboard:
+      # Auto-generates random path if null
+      # Can be set to "/dashboard" or similar
+      secret_path: null
+
+    api:
+      server_url: null
+      server_port: 8080
+      server_path: "/api/v2/users"
+
+    database:
+      path: "data/krawl.db"
+      retention_days: 30
+
+    behavior:
+      probability_error_codes: 0  # 0-100 percentage
 ---
 apiVersion: v1
 kind: ConfigMap
@@ -227,6 +249,14 @@ data:
         500,
         502,
         503
+      ],
+      "server_headers": [
+        "Apache/2.4.41 (Ubuntu)",
+        "nginx/1.18.0",
+        "Microsoft-IIS/10.0",
+        "cloudflare",
+        "AmazonS3",
+        "gunicorn/20.1.0"
       ]
     }
 ---
@@ -269,10 +299,14 @@ spec:
         - containerPort: 5000
           name: http
           protocol: TCP
-        envFrom:
-        - configMapRef:
-            name: krawl-config
+        env:
+        - name: CONFIG_LOCATION
+          value: "config.yaml"
         volumeMounts:
+        - name: config
+          mountPath: /app/config.yaml
+          subPath: config.yaml
+          readOnly: true
         - name: wordlists
           mountPath: /app/wordlists.json
           subPath: wordlists.json
@@ -287,6 +321,9 @@ spec:
             memory: "256Mi"
             cpu: "500m"
       volumes:
+      - name: config
+        configMap:
+          name: krawl-config
       - name: wordlists
         configMap:
           name: krawl-wordlists
@@ -353,7 +390,7 @@ spec:
     - podSelector: {}
     - namespaceSelector: {}
     - ipBlock:
-        cidr: 0.0.0.0/0 
+        cidr: 0.0.0.0/0
     ports:
     - protocol: TCP
       port: 5000
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index ef357b0..38a287b 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -4,18 +4,38 @@ metadata:
   name: krawl-config
   namespace: krawl-system
 data:
-  PORT: "5000"
-  DELAY: "100"
-  LINKS_MIN_LENGTH: "5"
-  LINKS_MAX_LENGTH: "15"
-  LINKS_MIN_PER_PAGE: "10"
-  LINKS_MAX_PER_PAGE: "15"
-  MAX_COUNTER: "10"
-  CANARY_TOKEN_TRIES: "10"
-  PROBABILITY_ERROR_CODES: "0"
-  SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
-#  CANARY_TOKEN_URL: set-your-canary-token-url-here
-#  TIMEZONE: "UTC"  # IANA timezone (e.g., "America/New_York", "Europe/Rome")
-  # Database configuration
-  DATABASE_PATH: "data/krawl.db"
-  DATABASE_RETENTION_DAYS: "30"
\ No newline at end of file
+  config.yaml: |
+    # Krawl Honeypot Configuration
+    server:
+      port: 5000
+      delay: 100
+      timezone: null  # e.g., "America/New_York" or null for system default
+
+    links:
+      min_length: 5
+      max_length: 15
+      min_per_page: 10
+      max_per_page: 15
+      char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+      max_counter: 10
+
+    canary:
+      token_url: null  # Optional canary token URL
+      token_tries: 10
+
+    dashboard:
+      # Auto-generates random path if null
+      # Can be set to "/dashboard" or similar
+      secret_path: null
+
+    api:
+      server_url: null
+      server_port: 8080
+      server_path: "/api/v2/users"
+
+    database:
+      path: "data/krawl.db"
+      retention_days: 30
+
+    behavior:
+      probability_error_codes: 0  # 0-100 percentage
diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml
index 1650721..f970625 100644
--- a/kubernetes/manifests/deployment.yaml
+++ b/kubernetes/manifests/deployment.yaml
@@ -23,10 +23,14 @@ spec:
         - containerPort: 5000
           name: http
           protocol: TCP
-        envFrom:
-        - configMapRef:
-            name: krawl-config
+        env:
+        - name: CONFIG_LOCATION
+          value: "config.yaml"
         volumeMounts:
+        - name: config
+          mountPath: /app/config.yaml
+          subPath: config.yaml
+          readOnly: true
         - name: wordlists
           mountPath: /app/wordlists.json
           subPath: wordlists.json
@@ -41,6 +45,9 @@ spec:
             memory: "256Mi"
             cpu: "500m"
       volumes:
+      - name: config
+        configMap:
+          name: krawl-config
       - name: wordlists
         configMap:
           name: krawl-wordlists
diff --git a/requirements.txt b/requirements.txt
index 94f74f2..8cb6dc5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,8 @@
 # Krawl Honeypot Dependencies
 # Install with: pip install -r requirements.txt
 
+# Configuration
+PyYAML>=6.0
+
 # Database ORM
 SQLAlchemy>=2.0.0,<3.0.0
diff --git a/src/config.py b/src/config.py
index 87fca1c..fb679b4 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,11 +1,15 @@
 #!/usr/bin/env python3
 
 import os
+import sys
 from dataclasses import dataclass
+from pathlib import Path
 from typing import Optional, Tuple
 from zoneinfo import ZoneInfo
 import time
 
+import yaml
+
 
 @dataclass
 class Config:
@@ -23,12 +27,11 @@ class Config:
     api_server_port: int = 8080
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
-    server_header: Optional[str] = None
     # Database settings
     database_path: str = "data/krawl.db"
     database_retention_days: int = 30
     timezone: str = None  # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
-    
+
     @staticmethod
     # Try to fetch timezone before if not set
     def get_system_timezone() -> str:
@@ -38,16 +41,16 @@ class Config:
                 tz_path = os.readlink('/etc/localtime')
                 if 'zoneinfo/' in tz_path:
                     return tz_path.split('zoneinfo/')[-1]
-            
+
             local_tz = time.tzname[time.daylight]
             if local_tz and local_tz != 'UTC':
                 return local_tz
         except Exception:
             pass
-        
+
         # Default fallback to UTC
         return 'UTC'
-    
+
     def get_timezone(self) -> ZoneInfo:
         """Get configured timezone as ZoneInfo object"""
         if self.timezone:
@@ -55,7 +58,7 @@ class Config:
                 return ZoneInfo(self.timezone)
             except Exception:
                 pass
-        
+
         system_tz = self.get_system_timezone()
         try:
             return ZoneInfo(system_tz)
@@ -63,31 +66,71 @@ class Config:
             return ZoneInfo('UTC')
 
     @classmethod
-    def from_env(cls) -> 'Config':
-        """Create configuration from environment variables"""
+    def from_yaml(cls) -> 'Config':
+        """Create configuration from YAML file"""
+        config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
+        config_path = Path(__file__).parent.parent / config_location
+
+        try:
+            with open(config_path, 'r') as f:
+                data = yaml.safe_load(f)
+        except FileNotFoundError:
+            print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
+            print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
+            sys.exit(1)
+        except yaml.YAMLError as e:
+            print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
+            sys.exit(1)
+
+        if data is None:
+            data = {}
+
+        # Extract nested values with defaults
+        server = data.get('server', {})
+        links = data.get('links', {})
+        canary = data.get('canary', {})
+        dashboard = data.get('dashboard', {})
+        api = data.get('api', {})
+        database = data.get('database', {})
+        behavior = data.get('behavior', {})
+
+        # Handle dashboard_secret_path - auto-generate if null/not set
+        dashboard_path = dashboard.get('secret_path')
+        if dashboard_path is None:
+            dashboard_path = f'/{os.urandom(16).hex()}'
+
         return cls(
-            port=int(os.getenv('PORT', 5000)),
-            delay=int(os.getenv('DELAY', 100)),
+            port=server.get('port', 5000),
+            delay=server.get('delay', 100),
+            timezone=server.get('timezone'),
             links_length_range=(
-                int(os.getenv('LINKS_MIN_LENGTH', 5)),
-                int(os.getenv('LINKS_MAX_LENGTH', 15))
+                links.get('min_length', 5),
+                links.get('max_length', 15)
             ),
             links_per_page_range=(
-                int(os.getenv('LINKS_MIN_PER_PAGE', 10)),
-                int(os.getenv('LINKS_MAX_PER_PAGE', 15))
+                links.get('min_per_page', 10),
+                links.get('max_per_page', 15)
             ),
-            char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
-            max_counter=int(os.getenv('MAX_COUNTER', 10)),
-            canary_token_url=os.getenv('CANARY_TOKEN_URL'),
-            canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)),
-            dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'),
-            api_server_url=os.getenv('API_SERVER_URL'),
-            api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
-            api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
-            probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
-            server_header=os.getenv('SERVER_HEADER'),
-            database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
-            database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)),
-            timezone=os.getenv('TIMEZONE')  # If not set, will use system timezone
-
+            char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
+            max_counter=links.get('max_counter', 10),
+            canary_token_url=canary.get('token_url'),
+            canary_token_tries=canary.get('token_tries', 10),
+            dashboard_secret_path=dashboard_path,
+            api_server_url=api.get('server_url'),
+            api_server_port=api.get('server_port', 8080),
+            api_server_path=api.get('server_path', '/api/v2/users'),
+            probability_error_codes=behavior.get('probability_error_codes', 0),
+            database_path=database.get('path', 'data/krawl.db'),
+            database_retention_days=database.get('retention_days', 30),
         )
+
+
+_config_instance = None
+
+
+def get_config() -> Config:
+    """Get the singleton Config instance"""
+    global _config_instance
+    if _config_instance is None:
+        _config_instance = Config.from_yaml()
+    return _config_instance
diff --git a/src/generators.py b/src/generators.py
index 6e24ba8..6eca9fd 100644
--- a/src/generators.py
+++ b/src/generators.py
@@ -9,8 +9,6 @@ import string
 import json
 from templates import html_templates
 from wordlists import get_wordlists
-from config import Config
-from logger import get_app_logger
 
 def random_username() -> str:
     """Generate random username"""
@@ -38,15 +36,9 @@ def random_email(username: str = None) -> str:
     return f"{username}@{random.choice(wl.email_domains)}"
 
 def random_server_header() -> str:
-    """Generate random server header"""
-    
-    if Config.from_env().server_header:
-        server_header = Config.from_env().server_header
-    else:
-        wl = get_wordlists()
-        server_header = random.choice(wl.server_headers)
-    
-    return server_header
+    """Generate random server header from wordlists"""
+    wl = get_wordlists()
+    return random.choice(wl.server_headers)
 
 def random_api_key() -> str:
     """Generate random API key"""
diff --git a/src/server.py b/src/server.py
index 06b7c82..7a59c73 100644
--- a/src/server.py
+++ b/src/server.py
@@ -8,7 +8,7 @@ Run this file to start the server.
 import sys
 from http.server import HTTPServer
 
-from config import Config
+from config import get_config
 from tracker import AccessTracker
 from handler import Handler
 from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
@@ -20,24 +20,29 @@ def print_usage():
     print(f'Usage: {sys.argv[0]} [FILE]\n')
     print('FILE is file containing a list of webpage names to serve, one per line.')
     print('If no file is provided, random links will be generated.\n')
-    print('Environment Variables:')
-    print('  PORT                  - Server port (default: 5000)')
-    print('  DELAY                 - Response delay in ms (default: 100)')
-    print('  LINKS_MIN_LENGTH      - Min link length (default: 5)')
-    print('  LINKS_MAX_LENGTH      - Max link length (default: 15)')
-    print('  LINKS_MIN_PER_PAGE    - Min links per page (default: 10)')
-    print('  LINKS_MAX_PER_PAGE    - Max links per page (default: 15)')
-    print('  MAX_COUNTER           - Max counter value (default: 10)')
-    print('  CANARY_TOKEN_URL      - Canary token URL to display')
-    print('  CANARY_TOKEN_TRIES    - Number of tries before showing token (default: 10)')
-    print('  DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
-    print('  PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
-    print('  CHAR_SPACE            - Characters for random links')
-    print('  SERVER_HEADER         - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
-    print('  DATABASE_PATH         - Path to SQLite database (default: data/krawl.db)')
-    print('  DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
-    print('  TIMEZONE              - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
-    print('                          If not set, system timezone will be used')
+    print('Configuration:')
+    print('  Configuration is loaded from a YAML file (default: config.yaml)')
+    print('  Set CONFIG_LOCATION environment variable to use a different file.\n')
+    print('  Example config.yaml structure:')
+    print('    server:')
+    print('      port: 5000')
+    print('      delay: 100')
+    print('      timezone: null  # or "America/New_York"')
+    print('    links:')
+    print('      min_length: 5')
+    print('      max_length: 15')
+    print('      min_per_page: 10')
+    print('      max_per_page: 15')
+    print('    canary:')
+    print('      token_url: null')
+    print('      token_tries: 10')
+    print('    dashboard:')
+    print('      secret_path: null  # auto-generated if not set')
+    print('    database:')
+    print('      path: "data/krawl.db"')
+    print('      retention_days: 30')
+    print('    behavior:')
+    print('      probability_error_codes: 0')
 
 
 def main():
@@ -46,19 +51,17 @@ def main():
         print_usage()
         exit(0)
 
-    config = Config.from_env()
-    
+    config = get_config()
+
     # Get timezone configuration
     tz = config.get_timezone()
-    
+
     # Initialize logging with timezone
     initialize_logging(timezone=tz)
     app_logger = get_app_logger()
     access_logger = get_access_logger()
     credential_logger = get_credential_logger()
 
-    config = Config.from_env()
-
     # Initialize database for persistent storage
     try:
         initialize_database(config.database_path)

From 349c14933529cd1fd24a0bfebd31f99e0425c3cc Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Fri, 2 Jan 2026 13:52:51 -0600
Subject: [PATCH 11/21] Add logs directory bind mount with entrypoint
 permission fix

  - Add ./logs:/app/logs volume mount to docker-compose.yaml for log access
  - Create entrypoint.sh script that fixes directory ownership at startup
  - Install gosu in Dockerfile for secure privilege dropping
  - Use ENTRYPOINT to run permission fix as root, then drop to krawl user

  This ensures bind-mounted directories have correct permissions even when
  Docker creates them as root on the host.
---
 Dockerfile          | 12 +++++++++---
 docker-compose.yaml |  1 +
 entrypoint.sh       |  8 ++++++++
 3 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 entrypoint.sh

diff --git a/Dockerfile b/Dockerfile
index e0fb6af..2c7b954 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,19 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
 
 WORKDIR /app
 
+# Install gosu for dropping privileges
+RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
+    rm -rf /var/lib/apt/lists/*
+
 COPY requirements.txt /app/
 RUN pip install --no-cache-dir -r requirements.txt
 
 COPY src/ /app/src/
 COPY wordlists.json /app/
+COPY entrypoint.sh /app/
 
 RUN useradd -m -u 1000 krawl && \
-    chown -R krawl:krawl /app
-
-USER krawl
+    mkdir -p /app/logs /app/data && \
+    chown -R krawl:krawl /app && \
+    chmod +x /app/entrypoint.sh
 
 EXPOSE 5000
 
 ENV PYTHONUNBUFFERED=1
 
+ENTRYPOINT ["/app/entrypoint.sh"]
 CMD ["python3", "src/server.py"]
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 776e919..02b6ae7 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -11,6 +11,7 @@ services:
     volumes:
       - ./wordlists.json:/app/wordlists.json:ro
       - ./config.yaml:/app/config.yaml:ro
+      - ./logs:/app/logs
     environment:
       - CONFIG_LOCATION=config.yaml
     restart: unless-stopped
diff --git a/entrypoint.sh b/entrypoint.sh
new file mode 100644
index 0000000..28b5fc0
--- /dev/null
+++ b/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -e
+
+# Fix ownership of mounted directories
+chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
+
+# Drop to krawl user and run the application
+exec gosu krawl "$@"

From 5f8bb73546a9447fdf855134b0c7c42244810d42 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Sat, 3 Jan 2026 17:14:58 +0100
Subject: [PATCH 12/21] added random SQL errors, random server errors, XSS
 baits

---
 src/data/krawl.db                      | Bin 0 -> 69632 bytes
 src/handler.py                         | 158 ++++++++++++++++++++++++-
 src/server_errors.py                   |  65 ++++++++++
 src/sql_errors.py                      | 112 ++++++++++++++++++
 src/templates/html/generic_search.html |  66 +++++++++++
 src/templates/html/input_form.html     |  74 ++++++++++++
 src/templates/html/robots.txt          |  10 ++
 src/templates/html_templates.py        |  10 ++
 src/tracker.py                         |  22 ++--
 src/wordlists.py                       |  12 ++
 src/xss_detector.py                    |  73 ++++++++++++
 11 files changed, 589 insertions(+), 13 deletions(-)
 create mode 100644 src/data/krawl.db
 create mode 100644 src/server_errors.py
 create mode 100644 src/sql_errors.py
 create mode 100644 src/templates/html/generic_search.html
 create mode 100644 src/templates/html/input_form.html
 create mode 100644 src/xss_detector.py

diff --git a/src/data/krawl.db b/src/data/krawl.db
new file mode 100644
index 0000000000000000000000000000000000000000..759ffb958d54f426a0a424446aa7baec7af9d275
GIT binary patch
literal 69632
zcmeI5eQX=&eaA^r6m{hB9=)2DZAE7riKR6@-aC2nvPHES$E$56lD8y73W6qSnUzG+
zypwFFLxyvb7X4%0hM;H=v?#Cz+x}=f6bRNN8@6`E3hbpAunkDJV8PaAz*cl<&;kvx
z_K*D@iag#!ox~KG&Mx*l+mHFY+&$m>JkLEZe15!V_3?`}rz&1+v^GnQs71yi(P-ph
zQH(?)D*PLVf88SuABMXg_&@6Vyw^uHBHej7&3!H+jQm1GcujaZ{cq`I{$KfG>ha{O
z$sG4e;$3ci<m(WL2NFO6NB{}kqXgV3A(44J8+DCZy;8kZyHzYXPN}?JtW=$9*{L<^
zcCl10S8cnv(O4_iD!Zb|*_Fc5m4bNT^0~s3;@}I2%a_Gn7ZK-~Ocw4um`-G}+352L
zr*w6rx-0#z0HLGe%F>4~7J{yy<Z2brc%^W@up(Ywz9L@!)r%J&6q6j2ad3(qBF^n*
zRs6N3m9vj7t;`$p0&^AUXUo^Cm0}Z=Ra<p0oobj1G?&L#E?in#c}o09;VE&xR%wSl
zvb<8baQ-q4>$`n%rSM2$rEvLdVO8wuv)gA<e&-4o3xNFW((2izbA{x>om;6yW_C9E
z!r3;iTC-?7CCBdm&ogkkq2fF+X?jX^gS{iC;gmLtt?G@fs_oc;V6N3#wo|mL)w*~N
zs=$>CmkJD&jncl+<;GUsY27ZC8<iedwT-4+b8dUe6gA%tEZOz0jCR4<^<ty7<|XT*
zZdIFGaQ9Gki*~uus`}6I$2GQE<zBv~-p<39V5<q0y;|{ZZFey?7m^EEek5~tHX3O^
zKK6|b7<Uy*TTY|xFLqfMmF_1x-WsKifjgE=WG-n@_jLEkE4QkZYTc;;c3O)zn~q&{
zYMX$+wAl=ZO!Yr`0cm;{p!FsofjIAFvG8#&k@;m%G`^prT9B0F`;h{bZ7&VA=7M`<
zG?7__+|TTvd+&|{?uL7RI(paGN6~{~kIbD^B9YOw=yMOXAFzO&1A<1G$1Wg2`w0wg
zmG*fsp^3TPtMsy-XP?_G8`_`JX0K*<pNnS6wr@6CmEO6Xw#mCquc|Xoaevd5T=<QV
zM5b^u>dtmc%GX8}Yj)AzvYWMXt+8eMA`<;Y<BMZYqGjTXgjUag$%SS-k$DJmPY31h
z-HY!6Bm3p-ov<rcFJ{3NhM|*$EPI`#*CpCi`}Ib>db`<hicYJxwpMNVGLP?<dBCZ=
zWCWyHcqW#}oCQ5cgAhPV<4N}k8Qxcj@0{HVn*K-O*M~+j*^|*5!CCt<jP0LcKsF0^
z3eiO7<jLsskMDH*J<7i84t1{E>FRrF4(xF%me!zM_MRI<+3BP<tIqXC?{QN4h6GHl
z6+gVZe6g@}nR&7TiZ(EvSDf$n0gXfd5+y&O-OM=oN&1cS1N_T8pL#a=gXAjr8aF@s
z(r7YqbL5?oOYzs^Cx<^f9Eq(Dy*+d>`qk(Ikr(M{|L2E0b~G88yWce?M@N?wH7CjN
zN6B6*ZP&_;x&+Rp#xu2zjZ)T-WO4pdsa&f&4g2~T@j~6HZio;dF0YDDiVB=&oDo~w
zi;8Ya!2Eo*yxz#FvLeG@MSP^zs$Of{%C=+A7p|zXYMz#r)3PQi`l7BaDyo#1Wz*1H
z;Rr}N4w5*|C+TMMbg8mgt7k(Llv7nLZ@LpRAZVeVQRQm!Dh$cDni7mJ4o;MDT2W7H
zvM5`WlA%h9YU#4#lEWbCWIv_w73q0NR`k4LxycMDnxTsPRY_W_IR{u~EFN8#<eaI<
zId|evr_7j2)a`6-$+_i(Rc4wcnU<xSmOC{KqE7dhS#Jmut}H_`4Oz}<E;j|TGX1io
zYJEE_L@HDwQ_*sYJ2?qjJ_K5r_NdgXWw+I=T?JN-9hxdyoAi9naK|S=)u}*LN|p}o
zLer!elB(zPM&6wo52{Q%Z6tgtP^(ML>&>uG$vH_=piR%aV`CtTb|~zF2U2b?9dP^2
zwgs{#DVCbgYwj2clH?;54kdNbuq0VGX-kk2K+@6v3e_&QkQz%+qe*H`HJ|}Vr$NnJ
zznZP)Mx#`*!_lKziv|oaa=E;&yJ^0ohpj`c+BQu6L(`*Ml40taV!DS@pvUO1(=b4~
zR$I&Zn(y$WDbS_p&|tbVNsyM?LmD)j#r5iKTl$#YsE1O?p%|gNGfc%$b1u(;w%I+j
zg(k<8a=NPNhMO7%IrN!hYSqq&DKs^@q?xh;gUVFm?yFICNj37Ol5@EcP(z0RySm-5
z$S8RzCoN~l?sy!uKo2s_l+yt>*N-i)(m9E*3xTQ(a}wDwjU2Ss!$GCBqbV#x(8ua#
zPB&rf9|JXX?9Y@|t8umAgg1l)KC&uViYXf~+#Ui&FhH7S#{Mv8JJ69xV4=w{g|%b@
zL<vz4bvGuG{#uk@G;)#&6L&h(jzmrf_eaQ&$bXPG$zPK{C0`<6AiqnVBU@yRJVic2
z9wJ$CoJ^9q@SgBr!dt?(guf8JBK*Ga8R3({ZDC#bsIV+72^w4k4<vvDkN^@u0!RP}
zAOR$R1dsp{`1vM~j?G2Ah1e5}?>OTV8Q&b^yN~hBGQMMs?<nIt!uVzw-(kj=VSI-e
z-!$W!VtkW~Z-ViSGrlp#M;M>L_|lAzXM8Efmt=e#;~Qms3C1_V_~MLjnDNCJ-w+?0
z<Gj*r&;OIZ9U<Q)e@s3>K1y_w622q6F8q$LCj62xo&J9M8|jzRH`0%$=lCD<-{N24
zKhCf43w$K?X6nnS7gA5Bv{W+r@5$HTB6uJHB!C2v01`j~NB{{SfuD`Q{P5gTZ>rH}
zkO?-Kc}dj_!!l0{PAHp}V&#tyN|=*WOH*Y-+&3XuIf7-1Z0LqIHz1*6Nm|~pbnCuB
z2@OeCEYs9x2PM=bQ<il_KQ<^~*IxDL;DkGNq(^oo^sN*6Z98WMB=k%!4-ZP%HHXX$
zNa&d;9vYOeYcx1LAfac`H#I0>*T`;iKtj**ZDK$|&lYWbP{OW-*x0@aeRf-9P{OWB
zm9SqzzpYbxKtj(5i64}(YZ8<ikkGTwNe)WbwVdGwCF~ltj1EZX*_$K=B=n3yMg}G9
zT4=-vB=n3Yh6g3=+BC!lCG6T644s08v^UG|vm)79|5q%_FxvBf@<fEZM}A1YOTGr{
z|9?n+k9?XuPo5!7vQDm%5_y_DL4Hd9oBStvoBT8RC-V2?Z^&PfKOtWsUnHL=FOe6?
zr^s)Tt)H!_$J9sw2_OL^fCP{L5<mh-00|%gB!C2d?g<RXMx!UFbDTONb>^sZA9ZGj
zV_ftY{dAOmIzm6q&`*cyrwsjch&t2MnWABn)R~~(aq5gwhtM#AI%(?Vsgt5kk~$o9
zMyZpaNk-_WICX}pH%6VISd4>TLxKH&>bnv09r6wG3V8wM`{&6#;e_uAe=B@Z_${Fh
z*8Q?Dp8jF_AJea<Ka<{0KatL-XZZi&-{Sv_|2+RJU*sR=kMl!t5j>Cp5<mh-00|%g
zB!C2v01~*{1n!G1Mfb6!*ekkYKM@@5Sxp2)`)wrxqk9$-!O@;wL~yic4G|pe*+A?S
z-LZV=MDJzq5E$LFatMz0Y#Rcjdln7B(ViVcV06!VAvoHzSqO~oSt<lZ_v{mbqdlvH
zfM~xhLU6Qafv`K8u{#Ki?pYfIM|(B~fzdt7g5YS+o*+2dvmywL?%56mM)xcRf}=e<
zfxzgVbwF^mXA=+{?O6iM#+JDEvHtI0{ikdH{+0jt_Fen`^v>!(Y2NLI4)aF>NB{{S
z0VIF~kN^@u0!RP}AOR#0G6A~2zQ_7Mx9|Et7uNbex3m6FH<^i%3lZ{l@=5p)4<vvD
zkN^@u0!RP}AOR$R1dsp{Kmtf$KLWoPOTcae<CE<6aNbq}rAo=J^jYD+26NQZ1vZ#5
z@<VX;{e<|=_H8avy4tsZ-`NI2GgbMaD9v|r&wTAp#y&e2x=R9>2jmUd+=|-&$H*r-
z@BiEH1~Co^AOR$R1dsp{Kmter2_OL^fCP{L68K;QcD>fGZy);Z*93#!|JU9el71_&
z|NH-T&Hvx*?Em+{?gGz70!RP}AOR$R1dsp{Kmter2_OL^fCNG%!0h-pX#c+$d8PCI
zzfd(|W=H@DAOR$R1dsp{Kmter2_OL^fCP|$k3i=Ie|y;k1it^TX9*DS{y*RPKmGlG
zl)M^&zjz=4B!C2v01`j~NB{{S0VIF~kN^@u0{0MsUy1T_(WRx1B1O$f@N0faInno$
zfc&D7lT1yOP5428>i_mYUxbY;R4tcRWcvI6DESfC|KCG5h*FRM5<mh-00|%gB!C2v
z01`j~NB{{Sfe%W6kIix3mj-nGp9tTKknfW>$zPH$k(bDGq)wh9=SY?uA#ved;Vp>9
z0|_7jB!C2v01`j~NB{{S0VIF~kib1aV3Ncm(NpQ!a$|F|Q7<-IjjL6AQfLPrV*}Ie
zz$0uR-ww>MfvI-j6dRaq2adCWTsv@#4IFI;5;ic=4otCuBkjN>8yIf~a%|vmJ21`$
z#@d0yY~WBkaEJ+<h{hw_sWcNt*Z&FmqX_va`7wEye2=_C-X`BB|3LnZe3N{Qyg^<i
zUxsKrkN^@u0!RP}AOR$R1dsp{Kmter2_S(G2*hL2=q#b`F@d^A($vlH)SXIEcRWem
zF^;-ql)9+|b(161<>J(h4^wwIM%|&ISd8Q7`v1|$ixJWg{zZ5+{YLr${$-v|J)8VN
za+Q0Hn;(5?G?}<L^3KSm`0MeL!=D|F#MX!29=aI)YV-j(?H~?!>}V1;Lv)SF(NW)i
zh}TNnwQ{2_fpe+xOl@PMlr<z-oWE2m*XmBgzJ5l$P<N^uA_R!btKyTQ0zZl{&WNq;
zMMXCyc|km1Ew4ASs;tQHR}mknwW`+|x3cZn^MxyYzmkCM8C5AS%ch~Z!V!>k93*j^
zPtwii>788~Lll%#RV{D26Eh%ap`TIZYVm5RyuQ_x>>C>gC(5^%q^>O*s-&ovE-NlM
z45Ci<Qwm=Z{JKR}^t@uZ$qXo(p^E%fNm{Ep*|6~Fx+LdJ*yYomIMgXKrV=$vwtchF
zs@Rfq%Lz-AW=W=H>89mQO@pY@{bg2eRV&rHQ!8zRE6b2fLzZ)z%T0l-OusCtTHg)}
zkqVW_RA7r#cXAT6d<e8K?NO;&%WkV#y9%rvJ2X|aHtG4C;f_y$s#Afgl<byWZH1-@
zcCl6UeBQ{rQ{zFE$+wfO)V$sd3zeLcGzHr9ygN1qvS^3GK6oJI_EKfDRu8Q$kTprM
z)O=oZ$4HPQAE|ICsf&gs$+}5ff|LM~j`mlm_727&0RlCeq~=ru8h~^f)Xep(*=lYy
zN)<aCJ+M!*0Yi*jF0boun(ydg>rkt<U2>|S>Cr98Fm+8a-NPx+WAxW)7$9A%tz~`9
zcX-ki=u&iOFx{CXNXzXZ4Vum3diAy~eavpuL#gCYjL_W~redf$m*+s+>>k=elVeIb
zUDb5MO^t#a`phx4YG=e0ni^fwOj&_JWh!y^)u_6p8hKO6x!ee-p+kUO-ELT9lsuG^
zmNR5`JPul*2bpHd>42N-$Cg*=oWwWUhN|3tv72e+puHXrDy<z&VG)8pRyT9H31j~l
zsG(zjrnF!ozTt#7grqy$tCFRdvH`>GAy5PZq-kdC4|BG5mv&Gik-$Qe;RpJbY=9^s
z3Zm}DMABc2cHRu2Se9YX`G0~xvNQi*PXBpY<^KS@cpw2JfCP{L5<mh-00|%gB=G-=
zfXkhLsmiYTw^OwbaFrt9EoFvbnJ#zy9?&A2mSW{yPQ2?{a*}Gni`@)YnCna@nOS$G
zWM9A9D77jF_cegOpkN6QzP6PO9TrMQ?*l>8{a*u=?9ikrmZaqkOSjyKS&(wK=Km}o
dSRv6B%QRsj`B<mGm@ksTUr5#@Q<il_|6d`vyO#g}

literal 0
HcmV?d00001

diff --git a/src/handler.py b/src/handler.py
index ac7ca22..c93b78b 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -6,6 +6,7 @@ import time
 from datetime import datetime
 from http.server import BaseHTTPRequestHandler
 from typing import Optional, List
+from urllib.parse import urlparse, parse_qs
 
 from config import Config
 from tracker import AccessTracker
@@ -16,6 +17,9 @@ from generators import (
     api_response, directory_listing
 )
 from wordlists import get_wordlists
+from sql_errors import generate_sql_error_response, get_sql_response_with_data
+from xss_detector import detect_xss_pattern, generate_xss_response
+from server_errors import generate_server_error
 
 
 class Handler(BaseHTTPRequestHandler):
@@ -67,6 +71,67 @@ class Handler(BaseHTTPRequestHandler):
         if not error_codes:
             error_codes = [400, 401, 403, 404, 500, 502, 503]
         return random.choice(error_codes)
+    
+    def _parse_query_string(self) -> str:
+        """Extract query string from the request path"""
+        parsed = urlparse(self.path)
+        return parsed.query
+    
+    def _handle_sql_endpoint(self, path: str) -> bool:
+        """
+        Handle SQL injection honeypot endpoints.
+        Returns True if the path was handled, False otherwise.
+        """
+        # SQL-vulnerable endpoints
+        sql_endpoints = ['/api/search', '/api/sql', '/api/database']
+        
+        base_path = urlparse(path).path
+        if base_path not in sql_endpoints:
+            return False
+        
+        try:
+            # Get query parameters
+            query_string = self._parse_query_string()
+            
+            # Log SQL injection attempt
+            client_ip = self._get_client_ip()
+            user_agent = self._get_user_agent()
+            
+            # Always check for SQL injection patterns
+            error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
+            
+            if error_msg:
+                # SQL injection detected - log and return error
+                self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
+                self.send_response(status_code)
+                self.send_header('Content-type', content_type)
+                self.end_headers()
+                self.wfile.write(error_msg.encode())
+            else:
+                # No injection detected - return fake data
+                self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
+                self.send_response(200)
+                self.send_header('Content-type', 'application/json')
+                self.end_headers()
+                response_data = get_sql_response_with_data(base_path, query_string or "")
+                self.wfile.write(response_data.encode())
+            
+            return True
+            
+        except BrokenPipeError:
+            # Client disconnected
+            return True
+        except Exception as e:
+            self.app_logger.error(f"Error handling SQL endpoint {path}: {str(e)}")
+            # Still send a response even on error
+            try:
+                self.send_response(500)
+                self.send_header('Content-type', 'application/json')
+                self.end_headers()
+                self.wfile.write(b'{"error": "Internal server error"}')
+            except:
+                pass
+            return True
 
     def generate_page(self, seed: str) -> str:
         """Generate a webpage containing random links or canary token"""
@@ -207,6 +272,68 @@ class Handler(BaseHTTPRequestHandler):
         user_agent = self._get_user_agent()
         post_data = ""
 
+        from urllib.parse import urlparse
+        base_path = urlparse(self.path).path
+        
+        if base_path in ['/api/search', '/api/sql', '/api/database']:
+            content_length = int(self.headers.get('Content-Length', 0))
+            if content_length > 0:
+                post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+            
+            self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
+            
+            error_msg, content_type, status_code = generate_sql_error_response(post_data)
+            
+            try:
+                if error_msg:
+                    self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
+                    self.send_response(status_code)
+                    self.send_header('Content-type', content_type)
+                    self.end_headers()
+                    self.wfile.write(error_msg.encode())
+                else:
+                    self.send_response(200)
+                    self.send_header('Content-type', 'application/json')
+                    self.end_headers()
+                    response_data = get_sql_response_with_data(base_path, post_data)
+                    self.wfile.write(response_data.encode())
+            except BrokenPipeError:
+                pass
+            except Exception as e:
+                self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
+            return
+        
+        if base_path == '/api/contact':
+            content_length = int(self.headers.get('Content-Length', 0))
+            if content_length > 0:
+                post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+            
+            parsed_data = {}
+            for pair in post_data.split('&'):
+                if '=' in pair:
+                    key, value = pair.split('=', 1)
+                    from urllib.parse import unquote_plus
+                    parsed_data[unquote_plus(key)] = unquote_plus(value)
+            
+            xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
+            
+            if xss_detected:
+                self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
+            else:
+                self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
+            
+            try:
+                self.send_response(200)
+                self.send_header('Content-type', 'text/html')
+                self.end_headers()
+                response_html = generate_xss_response(parsed_data)
+                self.wfile.write(response_html.encode())
+            except BrokenPipeError:
+                pass
+            except Exception as e:
+                self.app_logger.error(f"Error in XSS POST handler: {str(e)}")
+            return
+
         self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
 
         content_length = int(self.headers.get('Content-Length', 0))
@@ -215,20 +342,16 @@ class Handler(BaseHTTPRequestHandler):
 
             self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
 
-            # Parse and log credentials
             username, password = self.tracker.parse_credentials(post_data)
             if username or password:
-                # Log to dedicated credentials.log file
                 timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
                 credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}"
                 self.credential_logger.info(credential_line)
                 
-                # Also record in tracker for dashboard
                 self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
                 
                 self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
 
-        # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
         self.tracker.record_access(client_ip, self.path, user_agent, post_data)
         
         time.sleep(1)
@@ -248,6 +371,10 @@ class Handler(BaseHTTPRequestHandler):
     def serve_special_path(self, path: str) -> bool:
         """Serve special paths like robots.txt, API endpoints, etc."""
         
+        # Check SQL injection honeypot endpoints first
+        if self._handle_sql_endpoint(path):
+            return True
+        
         try:
             if path == '/robots.txt':
                 self.send_response(200)
@@ -285,7 +412,28 @@ class Handler(BaseHTTPRequestHandler):
                 self.wfile.write(html_templates.login_form().encode())
                 return True
             
-            # WordPress login page
+            if path in ['/users', '/user', '/database', '/db', '/search']:
+                self.send_response(200)
+                self.send_header('Content-type', 'text/html')
+                self.end_headers()
+                self.wfile.write(html_templates.product_search().encode())
+                return True
+            
+            if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
+                self.send_response(200)
+                self.send_header('Content-type', 'text/html')
+                self.end_headers()
+                self.wfile.write(html_templates.input_form().encode())
+                return True
+            
+            if path == '/server':
+                error_html, content_type = generate_server_error()
+                self.send_response(500)
+                self.send_header('Content-type', content_type)
+                self.end_headers()
+                self.wfile.write(error_html.encode())
+                return True
+            
             if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
                 self.send_response(200)
                 self.send_header('Content-type', 'text/html')
diff --git a/src/server_errors.py b/src/server_errors.py
new file mode 100644
index 0000000..7591c64
--- /dev/null
+++ b/src/server_errors.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+import random
+from wordlists import get_wordlists
+
+
+def generate_server_error() -> tuple[str, str]:
+    wl = get_wordlists()
+    server_errors = wl.server_errors
+    
+    if not server_errors:
+        return ("500 Internal Server Error", "text/html")
+    
+    server_type = random.choice(list(server_errors.keys()))
+    server_config = server_errors[server_type]
+    
+    error_codes = {
+        400: "Bad Request",
+        401: "Unauthorized",
+        403: "Forbidden",
+        404: "Not Found",
+        500: "Internal Server Error",
+        502: "Bad Gateway",
+        503: "Service Unavailable"
+    }
+    
+    code = random.choice(list(error_codes.keys()))
+    message = error_codes[code]
+    
+    template = server_config.get('template', '')
+    version = random.choice(server_config.get('versions', ['1.0']))
+    
+    html = template.replace('{code}', str(code))
+    html = html.replace('{message}', message)
+    html = html.replace('{version}', version)
+    
+    if server_type == 'apache':
+        os = random.choice(server_config.get('os', ['Ubuntu']))
+        html = html.replace('{os}', os)
+        html = html.replace('{host}', 'localhost')
+    
+    return (html, "text/html")
+
+
+def get_server_header(server_type: str = None) -> str:
+    wl = get_wordlists()
+    server_errors = wl.server_errors
+    
+    if not server_errors:
+        return "nginx/1.18.0"
+    
+    if not server_type:
+        server_type = random.choice(list(server_errors.keys()))
+    
+    server_config = server_errors.get(server_type, {})
+    version = random.choice(server_config.get('versions', ['1.0']))
+    
+    server_headers = {
+        'nginx': f"nginx/{version}",
+        'apache': f"Apache/{version}",
+        'iis': f"Microsoft-IIS/{version}",
+        'tomcat': f"Apache-Coyote/1.1"
+    }
+    
+    return server_headers.get(server_type, "nginx/1.18.0")
diff --git a/src/sql_errors.py b/src/sql_errors.py
new file mode 100644
index 0000000..dc84886
--- /dev/null
+++ b/src/sql_errors.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+
+import random
+import re
+from typing import Optional, Tuple
+from wordlists import get_wordlists
+
+
+def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
+    if not query_string:
+        return None
+    
+    query_lower = query_string.lower()
+    
+    patterns = {
+        'quote': [r"'", r'"', r'`'],
+        'comment': [r'--', r'#', r'/\*', r'\*/'],
+        'union': [r'\bunion\b', r'\bunion\s+select\b'],
+        'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"],
+        'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'],
+        'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'],
+        'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'],
+        'info_schema': [r'information_schema', r'table_schema', r'table_name'],
+    }
+    
+    for injection_type, pattern_list in patterns.items():
+        for pattern in pattern_list:
+            if re.search(pattern, query_lower):
+                return injection_type
+    
+    return None
+
+
+def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
+    wl = get_wordlists()
+    sql_errors = wl.sql_errors
+    
+    if not sql_errors:
+        return ("Database error occurred", "text/plain")
+    
+    if not db_type:
+        db_type = random.choice(list(sql_errors.keys()))
+    
+    db_errors = sql_errors.get(db_type, {})
+    
+    if injection_type and injection_type in db_errors:
+        errors = db_errors[injection_type]
+    elif 'generic' in db_errors:
+        errors = db_errors['generic']
+    else:
+        all_errors = []
+        for error_list in db_errors.values():
+            if isinstance(error_list, list):
+                all_errors.extend(error_list)
+        errors = all_errors if all_errors else ["Database error occurred"]
+    
+    error_message = random.choice(errors) if errors else "Database error occurred"
+    
+    if '{table}' in error_message:
+        tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions']
+        error_message = error_message.replace('{table}', random.choice(tables))
+    
+    if '{column}' in error_message:
+        columns = ['id', 'name', 'email', 'password', 'username', 'created_at']
+        error_message = error_message.replace('{column}', random.choice(columns))
+    
+    return (error_message, "text/plain")
+
+
+def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]:
+    injection_type = detect_sql_injection_pattern(query_string)
+    
+    if not injection_type:
+        return (None, None, None)
+    
+    error_message, content_type = get_random_sql_error(db_type, injection_type)
+    
+    status_code = 500
+    
+    if random.random() < 0.3:
+        status_code = 200
+    
+    return (error_message, content_type, status_code)
+
+
+def get_sql_response_with_data(path: str, params: str) -> str:
+    import json
+    from generators import random_username, random_email, random_password
+    
+    injection_type = detect_sql_injection_pattern(params)
+    
+    if injection_type in ['union', 'boolean', 'stacked']:
+        data = {
+            "success": True,
+            "results": [
+                {
+                    "id": i,
+                    "username": random_username(),
+                    "email": random_email(),
+                    "password_hash": random_password(),
+                    "role": random.choice(["admin", "user", "moderator"])
+                }
+                for i in range(1, random.randint(2, 5))
+            ]
+        }
+        return json.dumps(data, indent=2)
+    
+    return json.dumps({
+        "success": True,
+        "message": "Query executed successfully",
+        "results": []
+    }, indent=2)
diff --git a/src/templates/html/generic_search.html b/src/templates/html/generic_search.html
new file mode 100644
index 0000000..90171bc
--- /dev/null
+++ b/src/templates/html/generic_search.html
@@ -0,0 +1,66 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Search</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            max-width: 600px;
+            margin: 50px auto;
+            padding: 20px;
+        }
+        h1 {
+            color: #333;
+        }
+        input {
+            width: 100%;
+            padding: 8px;
+            margin: 10px 0;
+            box-sizing: border-box;
+        }
+        button {
+            background: #4CAF50;
+            color: white;
+            padding: 10px 20px;
+            border: none;
+            cursor: pointer;
+        }
+        button:hover {
+            background: #45a049;
+        }
+        #results {
+            margin-top: 20px;
+            padding: 10px;
+            border: 1px solid #ddd;
+            background: #f9f9f9;
+            display: none;
+        }
+    </style>
+</head>
+<body>
+    <h1>Search</h1>
+    <form id="searchForm">
+        <input type="text" id="searchQuery" placeholder="Enter search query..." required>
+        <button type="submit">Search</button>
+    </form>
+    <div id="results"></div>
+    
+    <script>
+        document.getElementById('searchForm').addEventListener('submit', async (e) => {
+            e.preventDefault();
+            const query = document.getElementById('searchQuery').value;
+            const results = document.getElementById('results');
+            
+            try {
+                const response = await fetch(`/api/search?q=${encodeURIComponent(query)}`);
+                const text = await response.text();
+                results.innerHTML = `<pre>${text}</pre>`;
+                results.style.display = 'block';
+            } catch (err) {
+                results.innerHTML = `<p>Error: ${err.message}</p>`;
+                results.style.display = 'block';
+            }
+        });
+    </script>
+</body>
+</html>
diff --git a/src/templates/html/input_form.html b/src/templates/html/input_form.html
new file mode 100644
index 0000000..c03b1a8
--- /dev/null
+++ b/src/templates/html/input_form.html
@@ -0,0 +1,74 @@
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Contact</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            max-width: 500px;
+            margin: 50px auto;
+            padding: 20px;
+        }
+        h1 {
+            color: #333;
+        }
+        input, textarea {
+            width: 100%;
+            padding: 8px;
+            margin: 10px 0;
+            border: 1px solid #ddd;
+            box-sizing: border-box;
+        }
+        textarea {
+            min-height: 100px;
+        }
+        button {
+            background: #4CAF50;
+            color: white;
+            padding: 10px 20px;
+            border: none;
+            cursor: pointer;
+        }
+        button:hover {
+            background: #45a049;
+        }
+        #response {
+            margin-top: 20px;
+            padding: 10px;
+            display: none;
+        }
+    </style>
+</head>
+<body>
+    <h1>Contact</h1>
+    <form id="contactForm">
+        <input type="text" name="name" placeholder="Name" required>
+        <input type="email" name="email" placeholder="Email" required>
+        <textarea name="message" placeholder="Message" required></textarea>
+        <button type="submit">Submit</button>
+    </form>
+    <div id="response"></div>
+    
+    <script>
+        document.getElementById('contactForm').addEventListener('submit', function(e) {
+            e.preventDefault();
+            const formData = new FormData(this);
+            
+            fetch('/api/contact', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
+                body: new URLSearchParams(formData)
+            })
+            .then(response => response.text())
+            .then(text => {
+                document.getElementById('response').innerHTML = text;
+                document.getElementById('response').style.display = 'block';
+            })
+            .catch(error => {
+                document.getElementById('response').innerHTML = 'Error: ' + error.message;
+                document.getElementById('response').style.display = 'block';
+            });
+        });
+    </script>
+</body>
+</html>
diff --git a/src/templates/html/robots.txt b/src/templates/html/robots.txt
index 2bae8ca..3618937 100644
--- a/src/templates/html/robots.txt
+++ b/src/templates/html/robots.txt
@@ -11,8 +11,18 @@ Disallow: /login/
 Disallow: /admin/login
 Disallow: /phpMyAdmin/
 Disallow: /admin/login.php
+Disallow: /users
+Disallow: /search
+Disallow: /contact
+Disallow: /info
+Disallow: /input
+Disallow: /feedback
+Disallow: /server
 Disallow: /api/v1/users
 Disallow: /api/v2/secrets
+Disallow: /api/search
+Disallow: /api/sql
+Disallow: /api/database
 Disallow: /.env
 Disallow: /credentials.txt
 Disallow: /passwords.txt
diff --git a/src/templates/html_templates.py b/src/templates/html_templates.py
index c6ad09a..a7cefbc 100644
--- a/src/templates/html_templates.py
+++ b/src/templates/html_templates.py
@@ -50,3 +50,13 @@ def directory_listing(path: str, dirs: list, files: list) -> str:
         rows += row_template.format(href=f, name=f, date="2024-12-01 14:22", size=size)
 
     return load_template("directory_listing", path=path, rows=rows)
+
+
+def product_search() -> str:
+    """Generate product search page with SQL injection honeypot"""
+    return load_template("generic_search")
+
+
+def input_form() -> str:
+    """Generate input form page for XSS honeypot"""
+    return load_template("input_form")
diff --git a/src/tracker.py b/src/tracker.py
index 717a4c3..8465031 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -5,6 +5,7 @@ from collections import defaultdict
 from datetime import datetime
 import re
 import urllib.parse
+from wordlists import get_wordlists
 
 
 class AccessTracker:
@@ -21,14 +22,19 @@ class AccessTracker:
             'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
         ]
 
-        # common attack types such as xss, shell injection, probes
-        self.attack_types = {
-            'path_traversal': r'\.\.',
-            'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
-            'xss_attempt': r'(<script|javascript:|onerror=|onload=)',
-            'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)',
-            'shell_injection': r'(\||;|`|\$\(|&&)',
-        }
+        # Load attack patterns from wordlists
+        wl = get_wordlists()
+        self.attack_types = wl.attack_patterns
+        
+        # Fallback if wordlists not loaded
+        if not self.attack_types:
+            self.attack_types = {
+                'path_traversal': r'\.\.',
+                'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
+                'xss_attempt': r'(<script|javascript:|onerror=|onload=)',
+                'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)',
+                'shell_injection': r'(\||;|`|\$\(|&&)',
+            }
 
         # Track IPs that accessed honeypot paths from robots.txt
         self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list)
diff --git a/src/wordlists.py b/src/wordlists.py
index 62e4045..bfa6f1a 100644
--- a/src/wordlists.py
+++ b/src/wordlists.py
@@ -111,6 +111,18 @@ class Wordlists:
     @property
     def error_codes(self):
         return self._data.get("error_codes", [])
+    
+    @property
+    def sql_errors(self):
+        return self._data.get("sql_errors", {})
+    
+    @property
+    def attack_patterns(self):
+        return self._data.get("attack_patterns", {})
+    
+    @property
+    def server_errors(self):
+        return self._data.get("server_errors", {})
 
 
 _wordlists_instance = None
diff --git a/src/xss_detector.py b/src/xss_detector.py
new file mode 100644
index 0000000..0f3da14
--- /dev/null
+++ b/src/xss_detector.py
@@ -0,0 +1,73 @@
+#!/usr/bin/env python3
+
+import re
+from typing import Optional
+from wordlists import get_wordlists
+
+
+def detect_xss_pattern(input_string: str) -> bool:
+    if not input_string:
+        return False
+    
+    wl = get_wordlists()
+    xss_pattern = wl.attack_patterns.get('xss_attempt', '')
+    
+    if not xss_pattern:
+        xss_pattern = r'(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()'
+    
+    return bool(re.search(xss_pattern, input_string, re.IGNORECASE))
+
+
+def generate_xss_response(input_data: dict) -> str:
+    xss_detected = False
+    reflected_content = []
+    
+    for key, value in input_data.items():
+        if detect_xss_pattern(value):
+            xss_detected = True
+        reflected_content.append(f"<p><strong>{key}:</strong> {value}</p>")
+    
+    if xss_detected:
+        html = f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Submission Received</title>
+    <style>
+        body {{ font-family: Arial, sans-serif; max-width: 600px; margin: 50px auto; padding: 20px; }}
+        .success {{ background: #d4edda; padding: 20px; border-radius: 8px; border: 1px solid #c3e6cb; }}
+        h2 {{ color: #155724; }}
+        p {{ margin: 10px 0; }}
+    </style>
+</head>
+<body>
+    <div class="success">
+        <h2>Thank you for your submission!</h2>
+        <p>We have received your information:</p>
+        {''.join(reflected_content)}
+        <p><em>We will get back to you shortly.</em></p>
+    </div>
+</body>
+</html>
+"""
+        return html
+    
+    return """
+<!DOCTYPE html>
+<html>
+<head>
+    <title>Submission Received</title>
+    <style>
+        body { font-family: Arial, sans-serif; max-width: 600px; margin: 50px auto; padding: 20px; }
+        .success { background: #d4edda; padding: 20px; border-radius: 8px; border: 1px solid #c3e6cb; }
+        h2 { color: #155724; }
+    </style>
+</head>
+<body>
+    <div class="success">
+        <h2>Thank you for your submission!</h2>
+        <p>Your message has been received and we will respond soon.</p>
+    </div>
+</body>
+</html>
+"""

From 4a1d1cf7be3e0a4515b5821e22b01a39c00d142f Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Sat, 3 Jan 2026 17:16:37 +0100
Subject: [PATCH 13/21] added random SQL errors, random server errors, XSS
 baits

---
 .gitignore                  |   7 ++
 src/data/krawl.db           | Bin 69632 -> 0 bytes
 tests/sim_attacks.sh        |   2 +-
 tests/test_sql_injection.sh |  78 +++++++++++++++++
 wordlists.json              | 167 +++++++++++++++++++++++++++++++++++-
 5 files changed, 252 insertions(+), 2 deletions(-)
 delete mode 100644 src/data/krawl.db
 create mode 100644 tests/test_sql_injection.sh

diff --git a/.gitignore b/.gitignore
index 5d758cb..70b93e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,6 +61,13 @@ secrets/
 *.log
 logs/
 
+# Data and databases
+data/
+**/data/
+*.db
+*.sqlite
+*.sqlite3
+
 # Temporary files
 *.tmp
 *.temp
diff --git a/src/data/krawl.db b/src/data/krawl.db
deleted file mode 100644
index 759ffb958d54f426a0a424446aa7baec7af9d275..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 69632
zcmeI5eQX=&eaA^r6m{hB9=)2DZAE7riKR6@-aC2nvPHES$E$56lD8y73W6qSnUzG+
zypwFFLxyvb7X4%0hM;H=v?#Cz+x}=f6bRNN8@6`E3hbpAunkDJV8PaAz*cl<&;kvx
z_K*D@iag#!ox~KG&Mx*l+mHFY+&$m>JkLEZe15!V_3?`}rz&1+v^GnQs71yi(P-ph
zQH(?)D*PLVf88SuABMXg_&@6Vyw^uHBHej7&3!H+jQm1GcujaZ{cq`I{$KfG>ha{O
z$sG4e;$3ci<m(WL2NFO6NB{}kqXgV3A(44J8+DCZy;8kZyHzYXPN}?JtW=$9*{L<^
zcCl10S8cnv(O4_iD!Zb|*_Fc5m4bNT^0~s3;@}I2%a_Gn7ZK-~Ocw4um`-G}+352L
zr*w6rx-0#z0HLGe%F>4~7J{yy<Z2brc%^W@up(Ywz9L@!)r%J&6q6j2ad3(qBF^n*
zRs6N3m9vj7t;`$p0&^AUXUo^Cm0}Z=Ra<p0oobj1G?&L#E?in#c}o09;VE&xR%wSl
zvb<8baQ-q4>$`n%rSM2$rEvLdVO8wuv)gA<e&-4o3xNFW((2izbA{x>om;6yW_C9E
z!r3;iTC-?7CCBdm&ogkkq2fF+X?jX^gS{iC;gmLtt?G@fs_oc;V6N3#wo|mL)w*~N
zs=$>CmkJD&jncl+<;GUsY27ZC8<iedwT-4+b8dUe6gA%tEZOz0jCR4<^<ty7<|XT*
zZdIFGaQ9Gki*~uus`}6I$2GQE<zBv~-p<39V5<q0y;|{ZZFey?7m^EEek5~tHX3O^
zKK6|b7<Uy*TTY|xFLqfMmF_1x-WsKifjgE=WG-n@_jLEkE4QkZYTc;;c3O)zn~q&{
zYMX$+wAl=ZO!Yr`0cm;{p!FsofjIAFvG8#&k@;m%G`^prT9B0F`;h{bZ7&VA=7M`<
zG?7__+|TTvd+&|{?uL7RI(paGN6~{~kIbD^B9YOw=yMOXAFzO&1A<1G$1Wg2`w0wg
zmG*fsp^3TPtMsy-XP?_G8`_`JX0K*<pNnS6wr@6CmEO6Xw#mCquc|Xoaevd5T=<QV
zM5b^u>dtmc%GX8}Yj)AzvYWMXt+8eMA`<;Y<BMZYqGjTXgjUag$%SS-k$DJmPY31h
z-HY!6Bm3p-ov<rcFJ{3NhM|*$EPI`#*CpCi`}Ib>db`<hicYJxwpMNVGLP?<dBCZ=
zWCWyHcqW#}oCQ5cgAhPV<4N}k8Qxcj@0{HVn*K-O*M~+j*^|*5!CCt<jP0LcKsF0^
z3eiO7<jLsskMDH*J<7i84t1{E>FRrF4(xF%me!zM_MRI<+3BP<tIqXC?{QN4h6GHl
z6+gVZe6g@}nR&7TiZ(EvSDf$n0gXfd5+y&O-OM=oN&1cS1N_T8pL#a=gXAjr8aF@s
z(r7YqbL5?oOYzs^Cx<^f9Eq(Dy*+d>`qk(Ikr(M{|L2E0b~G88yWce?M@N?wH7CjN
zN6B6*ZP&_;x&+Rp#xu2zjZ)T-WO4pdsa&f&4g2~T@j~6HZio;dF0YDDiVB=&oDo~w
zi;8Ya!2Eo*yxz#FvLeG@MSP^zs$Of{%C=+A7p|zXYMz#r)3PQi`l7BaDyo#1Wz*1H
z;Rr}N4w5*|C+TMMbg8mgt7k(Llv7nLZ@LpRAZVeVQRQm!Dh$cDni7mJ4o;MDT2W7H
zvM5`WlA%h9YU#4#lEWbCWIv_w73q0NR`k4LxycMDnxTsPRY_W_IR{u~EFN8#<eaI<
zId|evr_7j2)a`6-$+_i(Rc4wcnU<xSmOC{KqE7dhS#Jmut}H_`4Oz}<E;j|TGX1io
zYJEE_L@HDwQ_*sYJ2?qjJ_K5r_NdgXWw+I=T?JN-9hxdyoAi9naK|S=)u}*LN|p}o
zLer!elB(zPM&6wo52{Q%Z6tgtP^(ML>&>uG$vH_=piR%aV`CtTb|~zF2U2b?9dP^2
zwgs{#DVCbgYwj2clH?;54kdNbuq0VGX-kk2K+@6v3e_&QkQz%+qe*H`HJ|}Vr$NnJ
zznZP)Mx#`*!_lKziv|oaa=E;&yJ^0ohpj`c+BQu6L(`*Ml40taV!DS@pvUO1(=b4~
zR$I&Zn(y$WDbS_p&|tbVNsyM?LmD)j#r5iKTl$#YsE1O?p%|gNGfc%$b1u(;w%I+j
zg(k<8a=NPNhMO7%IrN!hYSqq&DKs^@q?xh;gUVFm?yFICNj37Ol5@EcP(z0RySm-5
z$S8RzCoN~l?sy!uKo2s_l+yt>*N-i)(m9E*3xTQ(a}wDwjU2Ss!$GCBqbV#x(8ua#
zPB&rf9|JXX?9Y@|t8umAgg1l)KC&uViYXf~+#Ui&FhH7S#{Mv8JJ69xV4=w{g|%b@
zL<vz4bvGuG{#uk@G;)#&6L&h(jzmrf_eaQ&$bXPG$zPK{C0`<6AiqnVBU@yRJVic2
z9wJ$CoJ^9q@SgBr!dt?(guf8JBK*Ga8R3({ZDC#bsIV+72^w4k4<vvDkN^@u0!RP}
zAOR$R1dsp{`1vM~j?G2Ah1e5}?>OTV8Q&b^yN~hBGQMMs?<nIt!uVzw-(kj=VSI-e
z-!$W!VtkW~Z-ViSGrlp#M;M>L_|lAzXM8Efmt=e#;~Qms3C1_V_~MLjnDNCJ-w+?0
z<Gj*r&;OIZ9U<Q)e@s3>K1y_w622q6F8q$LCj62xo&J9M8|jzRH`0%$=lCD<-{N24
zKhCf43w$K?X6nnS7gA5Bv{W+r@5$HTB6uJHB!C2v01`j~NB{{SfuD`Q{P5gTZ>rH}
zkO?-Kc}dj_!!l0{PAHp}V&#tyN|=*WOH*Y-+&3XuIf7-1Z0LqIHz1*6Nm|~pbnCuB
z2@OeCEYs9x2PM=bQ<il_KQ<^~*IxDL;DkGNq(^oo^sN*6Z98WMB=k%!4-ZP%HHXX$
zNa&d;9vYOeYcx1LAfac`H#I0>*T`;iKtj**ZDK$|&lYWbP{OW-*x0@aeRf-9P{OWB
zm9SqzzpYbxKtj(5i64}(YZ8<ikkGTwNe)WbwVdGwCF~ltj1EZX*_$K=B=n3yMg}G9
zT4=-vB=n3Yh6g3=+BC!lCG6T644s08v^UG|vm)79|5q%_FxvBf@<fEZM}A1YOTGr{
z|9?n+k9?XuPo5!7vQDm%5_y_DL4Hd9oBStvoBT8RC-V2?Z^&PfKOtWsUnHL=FOe6?
zr^s)Tt)H!_$J9sw2_OL^fCP{L5<mh-00|%gB!C2d?g<RXMx!UFbDTONb>^sZA9ZGj
zV_ftY{dAOmIzm6q&`*cyrwsjch&t2MnWABn)R~~(aq5gwhtM#AI%(?Vsgt5kk~$o9
zMyZpaNk-_WICX}pH%6VISd4>TLxKH&>bnv09r6wG3V8wM`{&6#;e_uAe=B@Z_${Fh
z*8Q?Dp8jF_AJea<Ka<{0KatL-XZZi&-{Sv_|2+RJU*sR=kMl!t5j>Cp5<mh-00|%g
zB!C2v01~*{1n!G1Mfb6!*ekkYKM@@5Sxp2)`)wrxqk9$-!O@;wL~yic4G|pe*+A?S
z-LZV=MDJzq5E$LFatMz0Y#Rcjdln7B(ViVcV06!VAvoHzSqO~oSt<lZ_v{mbqdlvH
zfM~xhLU6Qafv`K8u{#Ki?pYfIM|(B~fzdt7g5YS+o*+2dvmywL?%56mM)xcRf}=e<
zfxzgVbwF^mXA=+{?O6iM#+JDEvHtI0{ikdH{+0jt_Fen`^v>!(Y2NLI4)aF>NB{{S
z0VIF~kN^@u0!RP}AOR#0G6A~2zQ_7Mx9|Et7uNbex3m6FH<^i%3lZ{l@=5p)4<vvD
zkN^@u0!RP}AOR$R1dsp{Kmtf$KLWoPOTcae<CE<6aNbq}rAo=J^jYD+26NQZ1vZ#5
z@<VX;{e<|=_H8avy4tsZ-`NI2GgbMaD9v|r&wTAp#y&e2x=R9>2jmUd+=|-&$H*r-
z@BiEH1~Co^AOR$R1dsp{Kmter2_OL^fCP{L68K;QcD>fGZy);Z*93#!|JU9el71_&
z|NH-T&Hvx*?Em+{?gGz70!RP}AOR$R1dsp{Kmter2_OL^fCNG%!0h-pX#c+$d8PCI
zzfd(|W=H@DAOR$R1dsp{Kmter2_OL^fCP|$k3i=Ie|y;k1it^TX9*DS{y*RPKmGlG
zl)M^&zjz=4B!C2v01`j~NB{{S0VIF~kN^@u0{0MsUy1T_(WRx1B1O$f@N0faInno$
zfc&D7lT1yOP5428>i_mYUxbY;R4tcRWcvI6DESfC|KCG5h*FRM5<mh-00|%gB!C2v
z01`j~NB{{Sfe%W6kIix3mj-nGp9tTKknfW>$zPH$k(bDGq)wh9=SY?uA#ved;Vp>9
z0|_7jB!C2v01`j~NB{{S0VIF~kib1aV3Ncm(NpQ!a$|F|Q7<-IjjL6AQfLPrV*}Ie
zz$0uR-ww>MfvI-j6dRaq2adCWTsv@#4IFI;5;ic=4otCuBkjN>8yIf~a%|vmJ21`$
z#@d0yY~WBkaEJ+<h{hw_sWcNt*Z&FmqX_va`7wEye2=_C-X`BB|3LnZe3N{Qyg^<i
zUxsKrkN^@u0!RP}AOR$R1dsp{Kmter2_S(G2*hL2=q#b`F@d^A($vlH)SXIEcRWem
zF^;-ql)9+|b(161<>J(h4^wwIM%|&ISd8Q7`v1|$ixJWg{zZ5+{YLr${$-v|J)8VN
za+Q0Hn;(5?G?}<L^3KSm`0MeL!=D|F#MX!29=aI)YV-j(?H~?!>}V1;Lv)SF(NW)i
zh}TNnwQ{2_fpe+xOl@PMlr<z-oWE2m*XmBgzJ5l$P<N^uA_R!btKyTQ0zZl{&WNq;
zMMXCyc|km1Ew4ASs;tQHR}mknwW`+|x3cZn^MxyYzmkCM8C5AS%ch~Z!V!>k93*j^
zPtwii>788~Lll%#RV{D26Eh%ap`TIZYVm5RyuQ_x>>C>gC(5^%q^>O*s-&ovE-NlM
z45Ci<Qwm=Z{JKR}^t@uZ$qXo(p^E%fNm{Ep*|6~Fx+LdJ*yYomIMgXKrV=$vwtchF
zs@Rfq%Lz-AW=W=H>89mQO@pY@{bg2eRV&rHQ!8zRE6b2fLzZ)z%T0l-OusCtTHg)}
zkqVW_RA7r#cXAT6d<e8K?NO;&%WkV#y9%rvJ2X|aHtG4C;f_y$s#Afgl<byWZH1-@
zcCl6UeBQ{rQ{zFE$+wfO)V$sd3zeLcGzHr9ygN1qvS^3GK6oJI_EKfDRu8Q$kTprM
z)O=oZ$4HPQAE|ICsf&gs$+}5ff|LM~j`mlm_727&0RlCeq~=ru8h~^f)Xep(*=lYy
zN)<aCJ+M!*0Yi*jF0boun(ydg>rkt<U2>|S>Cr98Fm+8a-NPx+WAxW)7$9A%tz~`9
zcX-ki=u&iOFx{CXNXzXZ4Vum3diAy~eavpuL#gCYjL_W~redf$m*+s+>>k=elVeIb
zUDb5MO^t#a`phx4YG=e0ni^fwOj&_JWh!y^)u_6p8hKO6x!ee-p+kUO-ELT9lsuG^
zmNR5`JPul*2bpHd>42N-$Cg*=oWwWUhN|3tv72e+puHXrDy<z&VG)8pRyT9H31j~l
zsG(zjrnF!ozTt#7grqy$tCFRdvH`>GAy5PZq-kdC4|BG5mv&Gik-$Qe;RpJbY=9^s
z3Zm}DMABc2cHRu2Se9YX`G0~xvNQi*PXBpY<^KS@cpw2JfCP{L5<mh-00|%gB=G-=
zfXkhLsmiYTw^OwbaFrt9EoFvbnJ#zy9?&A2mSW{yPQ2?{a*}Gni`@)YnCna@nOS$G
zWM9A9D77jF_cegOpkN6QzP6PO9TrMQ?*l>8{a*u=?9ikrmZaqkOSjyKS&(wK=Km}o
dSRv6B%QRsj`B<mGm@ksTUr5#@Q<il_|6d`vyO#g}

diff --git a/tests/sim_attacks.sh b/tests/sim_attacks.sh
index d4a72b2..3502c3a 100755
--- a/tests/sim_attacks.sh
+++ b/tests/sim_attacks.sh
@@ -17,4 +17,4 @@ curl -s "$TARGET/wp-admin/"
 echo -e "\n=== Testing Shell Injection ==="
 curl -s -X POST "$TARGET/ping" -d "host=127.0.0.1; cat /etc/passwd"
 
-echo -e "\n=== Done ==="
\ No newline at end of file
+echo -e "\n=== Done ==="
diff --git a/tests/test_sql_injection.sh b/tests/test_sql_injection.sh
new file mode 100644
index 0000000..e178b3c
--- /dev/null
+++ b/tests/test_sql_injection.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Test script for SQL injection honeypot endpoints
+
+BASE_URL="http://localhost:5000"
+
+echo "========================================="
+echo "Testing SQL Injection Honeypot Endpoints"
+echo "========================================="
+echo ""
+
+# Test 1: Normal query
+echo "Test 1: Normal GET request to /api/search"
+curl -s "${BASE_URL}/api/search?q=test" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 2: SQL injection with single quote
+echo "Test 2: SQL injection with single quote"
+curl -s "${BASE_URL}/api/search?id=1'" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 3: UNION-based injection
+echo "Test 3: UNION-based SQL injection"
+curl -s "${BASE_URL}/api/search?id=1%20UNION%20SELECT%20*" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 4: Boolean-based injection
+echo "Test 4: Boolean-based SQL injection"
+curl -s "${BASE_URL}/api/sql?user=admin'%20OR%201=1--" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 5: Comment-based injection
+echo "Test 5: Comment-based SQL injection"
+curl -s "${BASE_URL}/api/database?q=test'--" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 6: Time-based injection
+echo "Test 6: Time-based SQL injection"
+curl -s "${BASE_URL}/api/search?id=1%20AND%20SLEEP(5)" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 7: POST request with SQL injection
+echo "Test 7: POST request with SQL injection"
+curl -s -X POST "${BASE_URL}/api/search" -d "username=admin'%20OR%201=1--&password=test" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 8: Information schema query
+echo "Test 8: Information schema injection"
+curl -s "${BASE_URL}/api/sql?table=information_schema.tables" | head -20
+echo ""
+echo "---"
+echo ""
+
+# Test 9: Stacked queries
+echo "Test 9: Stacked queries injection"
+curl -s "${BASE_URL}/api/database?id=1;DROP%20TABLE%20users" | head -20
+echo ""
+echo "---"
+echo ""
+
+echo "========================================="
+echo "Tests completed!"
+echo "Check logs for detailed attack detection"
+echo "========================================="
diff --git a/wordlists.json b/wordlists.json
index f1aae81..c0f1a17 100644
--- a/wordlists.json
+++ b/wordlists.json
@@ -193,5 +193,170 @@
     500,
     502,
     503
-  ]
+  ],
+  "server_errors": {
+    "nginx": {
+      "versions": ["1.18.0", "1.20.1", "1.22.0", "1.24.0"],
+      "template": "<!DOCTYPE html>\n<html>\n<head>\n<title>{code} {message}</title>\n<style>\nbody {{\n    width: 35em;\n    margin: 0 auto;\n    font-family: Tahoma, Verdana, Arial, sans-serif;\n}}\n</style>\n</head>\n<body>\n<h1>An error occurred.</h1>\n<p>Sorry, the page you are looking for is currently unavailable.<br/>\nPlease try again later.</p>\n<p>If you are the system administrator of this resource then you should check the error log for details.</p>\n<p><em>Faithfully yours, nginx/{version}.</em></p>\n</body>\n</html>"
+    },
+    "apache": {
+      "versions": ["2.4.41", "2.4.52", "2.4.54", "2.4.57"],
+      "os": ["Ubuntu", "Debian", "CentOS"],
+      "template": "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n<html><head>\n<title>{code} {message}</title>\n</head><body>\n<h1>{message}</h1>\n<p>The requested URL was not found on this server.</p>\n<hr>\n<address>Apache/{version} ({os}) Server at {host} Port 80</address>\n</body></html>"
+    },
+    "iis": {
+      "versions": ["10.0", "8.5", "8.0"],
+      "template": "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n<html xmlns=\"http://www.w3.org/1999/xhtml\">\n<head>\n<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\"/>\n<title>{code} - {message}</title>\n<style type=\"text/css\">\nbody{{margin:0;font-size:.7em;font-family:Verdana, Arial, Helvetica, sans-serif;background:#EEEEEE;}}\nfieldset{{padding:0 15px 10px 15px;}}\nh1{{font-size:2.4em;margin:0;color:#FFF;}}\nh2{{font-size:1.7em;margin:0;color:#CC0000;}}\nh3{{font-size:1.2em;margin:10px 0 0 0;color:#000000;}}\n#header{{width:96%;margin:0 0 0 0;padding:6px 2% 6px 2%;font-family:\"trebuchet MS\", Verdana, sans-serif;color:#FFF;\nbackground-color:#555555;}}\n#content{{margin:0 0 0 2%;position:relative;}}\n</style>\n</head>\n<body>\n<div id=\"header\"><h1>Server Error</h1></div>\n<div id=\"content\">\n <div class=\"content-container\"><fieldset>\n  <h2>{code} - {message}</h2>\n  <h3>The page cannot be displayed because an internal server error has occurred.</h3>\n </fieldset></div>\n</div>\n</body>\n</html>"
+    },
+    "tomcat": {
+      "versions": ["9.0.65", "10.0.27", "10.1.5"],
+      "template": "<!doctype html><html lang=\"en\"><head><title>HTTP Status {code} - {message}</title><style type=\"text/css\">body {{font-family:Tahoma,Arial,sans-serif;}} h1, h2, h3, b {{color:white;background-color:#525D76;}} h1 {{font-size:22px;}} h2 {{font-size:16px;}} h3 {{font-size:14px;}} p {{font-size:12px;}} a {{color:black;}} .line {{height:1px;background-color:#525D76;border:none;}}</style></head><body><h1>HTTP Status {code} - {message}</h1><hr class=\"line\" /><p><b>Type</b> Status Report</p><p><b>Description</b> The server encountered an internal error that prevented it from fulfilling this request.</p><hr class=\"line\" /><h3>Apache Tomcat/{version}</h3></body></html>"
+    }
+  },
+  "sql_errors": {
+    "mysql": {
+      "generic": [
+        "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''1'' at line 1",
+        "Unknown column '{column}' in 'where clause'",
+        "Table '{table}' doesn't exist",
+        "Operand should contain 1 column(s)",
+        "Subquery returns more than 1 row",
+        "Duplicate entry 'admin' for key 'PRIMARY'"
+      ],
+      "quote": [
+        "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''''' at line 1",
+        "Unclosed quotation mark after the character string ''",
+        "You have an error in your SQL syntax near '\\'' LIMIT 0,30'"
+      ],
+      "union": [
+        "The used SELECT statements have a different number of columns",
+        "Operand should contain 1 column(s)",
+        "Mixing of GROUP columns (MIN(),MAX(),COUNT(),...) with no GROUP columns is illegal"
+      ],
+      "boolean": [
+        "You have an error in your SQL syntax near 'OR 1=1' at line 1",
+        "Unknown column '1' in 'where clause'"
+      ],
+      "time_based": [
+        "Query execution was interrupted",
+        "Lock wait timeout exceeded; try restarting transaction"
+      ],
+      "comment": [
+        "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '--' at line 1"
+      ]
+    },
+    "postgresql": {
+      "generic": [
+        "ERROR: syntax error at or near \"1\"",
+        "ERROR: column \"{column}\" does not exist",
+        "ERROR: relation \"{table}\" does not exist",
+        "ERROR: operator does not exist: integer = text",
+        "ERROR: invalid input syntax for type integer: \"admin\""
+      ],
+      "quote": [
+        "ERROR: unterminated quoted string at or near \"'\"",
+        "ERROR: syntax error at or near \"'\"",
+        "ERROR: unterminated quoted identifier at or near \"'\""
+      ],
+      "union": [
+        "ERROR: each UNION query must have the same number of columns",
+        "ERROR: UNION types integer and text cannot be matched"
+      ],
+      "boolean": [
+        "ERROR: syntax error at or near \"OR\"",
+        "ERROR: invalid input syntax for type boolean: \"1=1\""
+      ],
+      "time_based": [
+        "ERROR: canceling statement due to user request",
+        "ERROR: function pg_sleep(integer) does not exist"
+      ],
+      "info_schema": [
+        "ERROR: permission denied for table {table}",
+        "ERROR: permission denied for schema information_schema"
+      ]
+    },
+    "mssql": {
+      "generic": [
+        "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near '1'.",
+        "Msg 207, Level 16, State 1, Line 1\nInvalid column name '{column}'.",
+        "Msg 208, Level 16, State 1, Line 1\nInvalid object name '{table}'.",
+        "Msg 245, Level 16, State 1, Line 1\nConversion failed when converting the varchar value 'admin' to data type int."
+      ],
+      "quote": [
+        "Msg 105, Level 15, State 1, Line 1\nUnclosed quotation mark after the character string ''.",
+        "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near '''."
+      ],
+      "union": [
+        "Msg 205, Level 16, State 1, Line 1\nAll queries combined using a UNION, INTERSECT or EXCEPT operator must have an equal number of expressions in their target lists.",
+        "Msg 8167, Level 16, State 1, Line 1\nThe type of column \"{column}\" conflicts with the type of other columns specified in the UNION, INTERSECT, or EXCEPT list."
+      ],
+      "boolean": [
+        "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near 'OR'."
+      ],
+      "command": [
+        "Msg 15281, Level 16, State 1, Procedure xp_cmdshell, Line 1\nSQL Server blocked access to procedure 'sys.xp_cmdshell' of component 'xp_cmdshell'"
+      ]
+    },
+    "oracle": {
+      "generic": [
+        "ORA-00933: SQL command not properly ended",
+        "ORA-00904: \"{column}\": invalid identifier",
+        "ORA-00942: table or view \"{table}\" does not exist",
+        "ORA-01722: invalid number",
+        "ORA-01756: quoted string not properly terminated"
+      ],
+      "quote": [
+        "ORA-01756: quoted string not properly terminated",
+        "ORA-00933: SQL command not properly ended"
+      ],
+      "union": [
+        "ORA-01789: query block has incorrect number of result columns",
+        "ORA-01790: expression must have same datatype as corresponding expression"
+      ],
+      "boolean": [
+        "ORA-00933: SQL command not properly ended",
+        "ORA-00920: invalid relational operator"
+      ]
+    },
+    "sqlite": {
+      "generic": [
+        "near \"1\": syntax error",
+        "no such column: {column}",
+        "no such table: {table}",
+        "unrecognized token: \"'\"",
+        "incomplete input"
+      ],
+      "quote": [
+        "unrecognized token: \"'\"",
+        "incomplete input",
+        "near \"'\": syntax error"
+      ],
+      "union": [
+        "SELECTs to the left and right of UNION do not have the same number of result columns"
+      ]
+    },
+    "mongodb": {
+      "generic": [
+        "MongoError: Can't canonicalize query: BadValue unknown operator: $where",
+        "MongoError: Failed to parse: { $where: \"this.{column} == '1'\" }",
+        "SyntaxError: unterminated string literal",
+        "MongoError: exception: invalid operator: $gt"
+      ],
+      "quote": [
+        "SyntaxError: unterminated string literal",
+        "SyntaxError: missing } after property list"
+      ],
+      "command": [
+        "MongoError: $where is not allowed in this context",
+        "MongoError: can't eval: security"
+      ]
+    }
+  },
+  "attack_patterns": {
+    "path_traversal": "\\.\\.",
+    "sql_injection": "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)",
+    "xss_attempt": "(<script|</script|javascript:|onerror=|onload=|onclick=|onmouseover=|onfocus=|onblur=|<iframe|<img|<svg|<embed|<object|<body|<input|eval\\(|alert\\(|prompt\\(|confirm\\(|document\\.|window\\.|<style|expression\\(|vbscript:|data:text/html)",
+    "common_probes": "(wp-admin|phpmyadmin|\\.env|\\.git|/admin|/config)",
+    "shell_injection": "(\\||;|`|\\$\\(|&&)"
+  }
 }

From 4c490e30cb75b73ae8eaa17a39df7c354608332d Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Sat, 3 Jan 2026 13:56:16 -0600
Subject: [PATCH 14/21] fixing dashboard to ensure starts with forward slash,
 put back the server_header option to allow pinning

---
 config.yaml       | 5 ++++-
 src/config.py     | 9 ++++++++-
 src/generators.py | 4 ++++
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/config.yaml b/config.yaml
index c4faa8f..f9825a0 100644
--- a/config.yaml
+++ b/config.yaml
@@ -5,6 +5,9 @@ server:
   delay: 100  # Response delay in milliseconds
   timezone: null  # e.g., "America/New_York" or null for system default
 
+  # manually set the server header, if null a random one will be used.
+  server_header: "Apache/2.2.22 (Ubuntu)" 
+
 links:
   min_length: 5
   max_length: 15
@@ -19,7 +22,7 @@ canary:
 
 dashboard:
   # if set to "null" this will Auto-generates random path if not set
-  # can be set to "dashboard" or similar
+  # can be set to "/dashboard" or similar <-- note this MUST include a forward slash
   secret_path: dashboard
 
 api:
diff --git a/src/config.py b/src/config.py
index fb679b4..d8aa2f2 100644
--- a/src/config.py
+++ b/src/config.py
@@ -16,6 +16,7 @@ class Config:
     """Configuration class for the deception server"""
     port: int = 5000
     delay: int = 100  # milliseconds
+    server_header: str = ""
     links_length_range: Tuple[int, int] = (5, 15)
     links_per_page_range: Tuple[int, int] = (10, 15)
     char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
@@ -27,6 +28,7 @@ class Config:
     api_server_port: int = 8080
     api_server_path: str = "/api/v2/users"
     probability_error_codes: int = 0  # Percentage (0-100)
+
     # Database settings
     database_path: str = "data/krawl.db"
     database_retention_days: int = 30
@@ -98,10 +100,15 @@ class Config:
         dashboard_path = dashboard.get('secret_path')
         if dashboard_path is None:
             dashboard_path = f'/{os.urandom(16).hex()}'
-
+        else:
+            # ensure the dashboard path starts with a /
+            if dashboard_path[:1] != "/":
+                dashboard_path = f"/{dashboard_path}"
+                
         return cls(
             port=server.get('port', 5000),
             delay=server.get('delay', 100),
+            server_header=server.get('server_header',""),
             timezone=server.get('timezone'),
             links_length_range=(
                 links.get('min_length', 5),
diff --git a/src/generators.py b/src/generators.py
index 6eca9fd..92eb590 100644
--- a/src/generators.py
+++ b/src/generators.py
@@ -9,6 +9,7 @@ import string
 import json
 from templates import html_templates
 from wordlists import get_wordlists
+from config import get_config
 
 def random_username() -> str:
     """Generate random username"""
@@ -37,6 +38,9 @@ def random_email(username: str = None) -> str:
 
 def random_server_header() -> str:
     """Generate random server header from wordlists"""
+    config = get_config()
+    if config.server_header:
+        return config.server_header
     wl = get_wordlists()
     return random.choice(wl.server_headers)
 

From 7d9f0616b77a924c42b8c6efbdede6bf376c4ffd Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Mon, 5 Jan 2026 11:54:02 -0600
Subject: [PATCH 15/21] Add background task to export suspicious IPs to text
 file

  - Implement export-malicious-ips task that queries distinct IPs flagged
    as is_suspicious from database and writes to exports/malicious_ips.txt
  - Add exports volume mount to docker-compose.yaml for host persistence
  - Update entrypoint.sh to fix ownership of exports directory for krawl user
  - Update Dockerfile to create /app/exports directory during build

Other tasks can be added by creating them in the tasks dir using the same setup as this task.
All tasks *MUST* include a TASK_CONFIG dict and a main method in the file to work correctly.
---
 Dockerfile                     |   2 +-
 docker-compose.yaml            |   1 +
 entrypoint.sh                  |   2 +-
 exports/.gitkeep               |   0
 requirements.txt               |   3 +
 src/server.py                  |   5 +
 src/tasks/top_attacking_ips.py |  57 +++++++
 src/tasks_master.py            | 288 +++++++++++++++++++++++++++++++++
 8 files changed, 356 insertions(+), 2 deletions(-)
 create mode 100644 exports/.gitkeep
 create mode 100644 src/tasks/top_attacking_ips.py
 create mode 100644 src/tasks_master.py

diff --git a/Dockerfile b/Dockerfile
index 2c7b954..92c2d9f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -16,7 +16,7 @@ COPY wordlists.json /app/
 COPY entrypoint.sh /app/
 
 RUN useradd -m -u 1000 krawl && \
-    mkdir -p /app/logs /app/data && \
+    mkdir -p /app/logs /app/data /app/exports && \
     chown -R krawl:krawl /app && \
     chmod +x /app/entrypoint.sh
 
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 02b6ae7..08bcec9 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -12,6 +12,7 @@ services:
       - ./wordlists.json:/app/wordlists.json:ro
       - ./config.yaml:/app/config.yaml:ro
       - ./logs:/app/logs
+      - ./exports:/app/exports
     environment:
       - CONFIG_LOCATION=config.yaml
     restart: unless-stopped
diff --git a/entrypoint.sh b/entrypoint.sh
index 28b5fc0..fe3ef45 100644
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -2,7 +2,7 @@
 set -e
 
 # Fix ownership of mounted directories
-chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
+chown -R krawl:krawl /app/logs /app/data /app/exports 2>/dev/null || true
 
 # Drop to krawl user and run the application
 exec gosu krawl "$@"
diff --git a/exports/.gitkeep b/exports/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/requirements.txt b/requirements.txt
index 8cb6dc5..cafbb7d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,6 @@ PyYAML>=6.0
 
 # Database ORM
 SQLAlchemy>=2.0.0,<3.0.0
+
+# Scheduling
+APScheduler>=3.11.2
\ No newline at end of file
diff --git a/src/server.py b/src/server.py
index 7a59c73..135284c 100644
--- a/src/server.py
+++ b/src/server.py
@@ -13,6 +13,7 @@ from tracker import AccessTracker
 from handler import Handler
 from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
 from database import initialize_database
+from tasks_master import get_tasksmaster
 
 
 def print_usage():
@@ -89,6 +90,10 @@ def main():
         except IOError:
             app_logger.warning("Can't read input file. Using randomly generated links.")
 
+    # tasks master init
+    tasks_master = get_tasksmaster()
+    tasks_master.run_scheduled_tasks()
+
     try:
         app_logger.info(f'Starting deception server on port {config.port}...')
         app_logger.info(f'Timezone configured: {tz.key}')
diff --git a/src/tasks/top_attacking_ips.py b/src/tasks/top_attacking_ips.py
new file mode 100644
index 0000000..d9e18d3
--- /dev/null
+++ b/src/tasks/top_attacking_ips.py
@@ -0,0 +1,57 @@
+# tasks/export_malicious_ips.py
+
+import os
+from logger import get_app_logger
+from database import get_database
+from models import AccessLog
+from sqlalchemy import distinct
+
+app_logger = get_app_logger()
+
+# ----------------------
+# TASK CONFIG
+# ----------------------
+TASK_CONFIG = {
+    "name": "export-malicious-ips",
+    "cron": "*/5 * * * *",
+    "enabled": True,
+    "run_when_loaded": True
+}
+
+EXPORTS_DIR = "exports"
+OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
+
+# ----------------------
+# TASK LOGIC
+# ----------------------
+def main():
+    """
+    Export all IPs flagged as suspicious to a text file.
+    TasksMaster will call this function based on the cron schedule.
+    """
+    task_name = TASK_CONFIG.get("name")
+    app_logger.info(f"[Background Task] {task_name} starting...")
+
+    try:
+        db = get_database()
+        session = db.session
+
+        # Query distinct suspicious IPs
+        results = session.query(distinct(AccessLog.ip)).filter(
+            AccessLog.is_suspicious == True
+        ).all()
+
+        # Ensure exports directory exists
+        os.makedirs(EXPORTS_DIR, exist_ok=True)
+
+        # Write IPs to file (one per line)
+        with open(OUTPUT_FILE, 'w') as f:
+            for (ip,) in results:
+                f.write(f"{ip}\n")
+
+        app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
+
+    except Exception as e:
+        app_logger.error(f"[Background Task] {task_name} failed: {e}")
+    finally:
+        db.close_session()
diff --git a/src/tasks_master.py b/src/tasks_master.py
new file mode 100644
index 0000000..264471c
--- /dev/null
+++ b/src/tasks_master.py
@@ -0,0 +1,288 @@
+import os
+import sys
+import datetime
+import functools
+import threading
+import importlib
+import importlib.util
+
+from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
+
+app_logger = get_app_logger()
+
+try:
+    from apscheduler.schedulers.background import BackgroundScheduler
+    from apscheduler.triggers.cron import CronTrigger
+    from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
+except ModuleNotFoundError:
+    msg = (
+        "Required modules are not installed. "
+        "Can not continue with module / application loading.\n"
+        "Install it with: pip install -r requirements"
+    )
+    print(msg, file=sys.stderr)
+    app_logger.error(msg)
+    exit()
+
+
+# ---------- TASKSMASTER CLASS ----------
+class TasksMaster:
+    
+    TASK_DEFAULT_CRON = '*/15 * * * *'
+    TASK_JITTER = 240
+    TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
+
+    def __init__(self, scheduler: BackgroundScheduler):
+        self.tasks = self._config_tasks()
+        self.scheduler = scheduler
+        self.last_run_times = {}
+        self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
+
+    def _config_tasks(self):
+        """
+        Loads tasks from the TASKS_FOLDER and logs how many were found.
+        """
+        tasks_defined = self._load_tasks_from_folder(self.TASKS_FOLDER)
+        app_logger.info(f"Scheduled Tasks Loaded from folder: {self.TASKS_FOLDER}")
+        return tasks_defined
+
+    def _load_tasks_from_folder(self, folder_path):
+        """
+        Loads and registers task modules from a specified folder.
+
+        This function scans the given folder for Python (.py) files, dynamically
+        imports each as a module, and looks for two attributes:
+        - TASK_CONFIG: A dictionary containing task metadata, specifically the
+        'name' and 'cron' (cron schedule string).
+        - main: A callable function that represents the task's execution logic.
+
+        Tasks with both attributes are added to a list with their configuration and
+        execution function.
+
+        Args:
+            folder_path (str): Path to the folder containing task scripts.
+
+        Returns:
+            list[dict]: A list of task definitions with keys:
+            - 'name' (str): The name of the task.
+            - 'filename' (str): The file the task was loaded from.
+            - 'cron' (str): The crontab string for scheduling.
+            - 'enabled' (bool): Whether the task is enabled.
+            - 'run_when_loaded' (bool): Whether to run the task immediately.
+        """
+        tasks = []
+
+        if not os.path.exists(folder_path):
+            app_logger.error(f"{folder_path} does not exist! Unable to load tasks!")
+            return tasks
+
+        # we sort the files so that we have a set order, which helps with debugging
+        for filename in sorted(os.listdir(folder_path)):
+
+            # skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
+            if not filename.endswith('.py') or filename.startswith("__"):
+                continue
+
+            path = os.path.join(folder_path, filename)
+            module_name = filename[:-3]
+            spec = importlib.util.spec_from_file_location(f"tasks.{module_name}", path)
+            module = importlib.util.module_from_spec(spec)
+            try:
+                spec.loader.exec_module(module)
+                sys.modules[f"tasks.{module_name}"] = module
+            except Exception as e:
+                app_logger.error(f"Failed to import {filename}: {e}")
+                continue
+            
+            # if we have a tasks config and a main function, we attempt to schedule it
+            if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
+
+                # ensure task_config is a dict
+                if not isinstance(module.TASK_CONFIG, dict):
+                    app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
+                    continue
+
+                task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
+                task_name = module.TASK_CONFIG.get("name", module_name)
+
+                # ensure the task_cron is a valid cron value
+                try:
+                    CronTrigger.from_crontab(task_cron)
+                except ValueError as ve:
+                    app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
+                    continue
+
+                task = {
+                    'name': module.TASK_CONFIG.get('name', module_name),
+                    'filename': filename,
+                    'cron': task_cron,
+                    "enabled": module.TASK_CONFIG.get("enabled", False),
+                    "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
+                }
+
+                tasks.append(task)
+
+            # we are missing things, and we log what's missing
+            else:
+                if not hasattr(module, 'TASK_CONFIG'):
+                    app_logger.warning(f"Missing TASK_CONFIG in {filename}")
+                elif not hasattr(module, 'main'):
+                    app_logger.warning(f"Missing main() in {filename}")
+
+        return tasks
+
+    def _add_jobs(self):
+        # for each task in the tasks config file...
+        for task_to_run in self.tasks:
+
+            # remember, these tasks, are built from the "load_tasks_from_folder" function, 
+            # if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here.
+            task_name = task_to_run.get("name")
+            run_when_loaded = task_to_run.get("run_when_loaded")
+            module_name = os.path.splitext(task_to_run.get("filename"))[0]
+            task_enabled = task_to_run.get("enabled", False)
+
+            # if no crontab set for this task, we use 15 as the default.
+            task_cron = task_to_run.get("cron") or self.TASK_DEFAULT_CRON
+
+            # if task is disabled, skip this one
+            if not task_enabled:
+                app_logger.info(f"{task_name} is disabled in client config. Skipping task")
+                continue
+            try:
+                if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))):
+                    # schedule the task now that everything has checked out above...
+                    self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
+                    app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
+                else:
+                    app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
+
+            except Exception as e:
+                app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
+    
+    def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
+        try:
+            # Dynamically import the module
+            module = importlib.import_module(f"tasks.{module_name}")
+
+            # Check if the module has a 'main' function
+            if hasattr(module, 'main'):
+                app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
+                
+                # unique_job_id
+                job_identifier = f"{module_name}__{task_name}"
+
+                # little insurance to make sure the cron is set to something and not none
+                if task_cron is None:
+                    task_cron = self.TASK_DEFAULT_CRON
+
+                trigger = CronTrigger.from_crontab(task_cron)
+
+                # schedule the task / job
+                if run_when_loaded:
+                    app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
+
+                    self.scheduler.add_job(
+                        module.main, 
+                        trigger, 
+                        id=job_identifier,
+                        jitter=self.TASK_JITTER, 
+                        name=task_name, 
+                        next_run_time=datetime.datetime.now(),
+                        max_instances=1
+                    )
+                else:
+                    self.scheduler.add_job(
+                        module.main, 
+                        trigger, 
+                        id=job_identifier,
+                        jitter=self.TASK_JITTER,
+                        name=task_name,
+                        max_instances=1
+                    )
+            else:
+                app_logger.error(f"{module_name} does not define a 'main' function.")
+                
+        except Exception as e:
+            app_logger.error(f"Failed to load {module_name}: {e}")
+
+    def job_listener(self, event):
+        job_id = event.job_id
+        self.last_run_times[job_id] = datetime.datetime.now()
+
+        if event.exception:
+            app_logger.error(f"Job {event.job_id} failed: {event.exception}")
+        else:
+            app_logger.info(f"Job {event.job_id} completed successfully.")
+
+    def list_jobs(self):
+        scheduled_jobs = self.scheduler.get_jobs()
+        jobs_list = []
+    
+        for job in scheduled_jobs:
+            jobs_list.append({
+                    "id": job.id,
+                    "name": job.name,
+                    "next_run": job.next_run_time,
+                })
+        return jobs_list
+
+    def run_scheduled_tasks(self):
+        """
+        Runs and schedules enabled tasks using the background scheduler.
+
+        This method performs the following:
+        1. Retrieves the current task configurations and updates internal state.
+        2. Adds new jobs to the scheduler based on the latest configuration.
+        3. Starts the scheduler to begin executing tasks at their defined intervals.
+        
+        This ensures the scheduler is always running with the most up-to-date
+        task definitions and enabled status.
+        """
+
+        # Add enabled tasks to the scheduler
+        self._add_jobs()
+
+        # Start the scheduler to begin executing the scheduled tasks (if not already running)
+        if not self.scheduler.running:
+            self.scheduler.start()
+
+
+# ---------- SINGLETON WRAPPER ----------
+T = type
+
+def singleton_loader(func):
+    """Decorator to ensure only one instance exists."""
+    cache: dict[str, T] = {}
+    lock = threading.Lock()
+
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs) -> T:
+        with lock:
+            if func.__name__ not in cache:
+                cache[func.__name__] = func(*args, **kwargs)
+            return cache[func.__name__]
+    return wrapper
+
+
+@singleton_loader
+def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster:
+    """
+    Returns the singleton TasksMaster instance.
+
+    - Automatically creates a BackgroundScheduler if none is provided.
+    - Automatically starts the scheduler when the singleton is created.
+
+    :param scheduler: Optional APScheduler instance. If None, a new BackgroundScheduler will be created.
+    """
+    if scheduler is None:
+        scheduler = BackgroundScheduler()
+
+    tm_instance = TasksMaster(scheduler)
+
+    # Auto-start scheduler if not already running
+    if not scheduler.running:
+        scheduler.start()
+        app_logger.info("TasksMaster scheduler started automatically with singleton creation.")
+
+    return tm_instance

From 02aed9e65abffd19361b63067d6936e6a4400c32 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Tue, 6 Jan 2026 18:50:36 +0100
Subject: [PATCH 16/21] added drop down menu and scoring graph to the dashboard

---
 src/database.py                     |  39 ++-
 src/handler.py                      |  27 ++
 src/templates/dashboard_template.py | 398 +++++++++++++++++++++++++++-
 src/templates/html/main_page.html   |  13 +-
 4 files changed, 455 insertions(+), 22 deletions(-)

diff --git a/src/database.py b/src/database.py
index 9d8e444..e60348a 100644
--- a/src/database.py
+++ b/src/database.py
@@ -256,7 +256,7 @@ class DatabaseManager:
         
         """
         session = self.session
-
+        sanitized_ip = sanitize_ip(ip)
         ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
 
         ip_stats.category = category
@@ -439,6 +439,43 @@ class DatabaseManager:
         finally:
             self.close_session()
 
+    def get_ip_stats_by_ip(self, ip: str) -> Optional[Dict[str, Any]]:
+        """
+        Retrieve IP statistics for a specific IP address.
+
+        Args:
+            ip: The IP address to look up
+
+        Returns:
+            Dictionary with IP stats or None if not found
+        """
+        session = self.session
+        try:
+            stat = session.query(IpStats).filter(IpStats.ip == ip).first()
+            
+            if not stat:
+                return None
+            
+            return {
+                'ip': stat.ip,
+                'total_requests': stat.total_requests,
+                'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
+                'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
+                'country_code': stat.country_code,
+                'city': stat.city,
+                'asn': stat.asn,
+                'asn_org': stat.asn_org,
+                'reputation_score': stat.reputation_score,
+                'reputation_source': stat.reputation_source,
+                'analyzed_metrics': stat.analyzed_metrics or {},
+                'category': stat.category,
+                'category_scores': stat.category_scores or {},
+                'manual_category': stat.manual_category,
+                'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None
+            }
+        finally:
+            self.close_session()
+
     def get_dashboard_counts(self) -> Dict[str, int]:
         """
         Get aggregate statistics for the dashboard.
diff --git a/src/handler.py b/src/handler.py
index eef528d..2598706 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -413,6 +413,33 @@ class Handler(BaseHTTPRequestHandler):
             except Exception as e:
                 self.app_logger.error(f"Error generating dashboard: {e}")
             return
+        
+        # API endpoint for fetching IP stats
+        if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
+            ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
+            self.send_response(200)
+            self.send_header('Content-type', 'application/json')
+            self.send_header('Access-Control-Allow-Origin', '*')
+            # Prevent browser caching - force fresh data from database every time
+            self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
+            self.send_header('Pragma', 'no-cache')
+            self.send_header('Expires', '0')
+            self.end_headers()
+            try:
+                from database import get_database
+                import json
+                db = get_database()
+                ip_stats = db.get_ip_stats_by_ip(ip_address)
+                if ip_stats:
+                    self.wfile.write(json.dumps(ip_stats).encode())
+                else:
+                    self.wfile.write(json.dumps({'error': 'IP not found'}).encode())
+            except BrokenPipeError:
+                pass
+            except Exception as e:
+                self.app_logger.error(f"Error fetching IP stats: {e}")
+                self.wfile.write(json.dumps({'error': str(e)}).encode())
+            return
 
         self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
         
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index dfad3dd..df0378a 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -27,9 +27,20 @@ def format_timestamp(iso_timestamp: str) -> str:
 def generate_dashboard(stats: dict) -> str:
     """Generate dashboard HTML with access statistics"""
     
-    # Generate IP rows (IPs are generally safe but escape for consistency)
+    # Generate IP rows with clickable functionality for dropdown stats
     top_ips_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td>{_escape(ip)}</td><td>{count}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(ip)}">
+            <td class="rank">{i+1}</td>
+            <td class="ip-clickable">{_escape(ip)}</td>
+            <td>{count}</td>
+        </tr>
+        <tr class="ip-stats-row" id="stats-row-{_escape(ip).replace(".", "-")}" style="display: none;">
+            <td colspan="3" class="ip-stats-cell">
+                <div class="ip-stats-dropdown">
+                    <div class="loading">Loading stats...</div>
+                </div>
+            </td>
+        </tr>'''
         for i, (ip, count) in enumerate(stats['top_ips'])
     ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
 
@@ -45,27 +56,76 @@ def generate_dashboard(stats: dict) -> str:
         for i, (ua, count) in enumerate(stats['top_user_agents'])
     ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
 
-    # Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields)
+    # Generate suspicious accesses rows with clickable IPs
     suspicious_rows = '\n'.join([
-        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
+            <td class="ip-clickable">{_escape(log["ip"])}</td>
+            <td>{_escape(log["path"])}</td>
+            <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
+            <td>{_escape(log["timestamp"].split("T")[1][:8])}</td>
+        </tr>
+        <tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
+            <td colspan="4" class="ip-stats-cell">
+                <div class="ip-stats-dropdown">
+                    <div class="loading">Loading stats...</div>
+                </div>
+            </td>
+        </tr>'''
         for log in stats['recent_suspicious'][-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
 
-    # Generate honeypot triggered IPs rows
+    # Generate honeypot triggered IPs rows with clickable IPs
     honeypot_rows = '\n'.join([
-        f'<tr><td>{_escape(ip)}</td><td style="word-break: break-all;">{_escape(", ".join(paths))}</td><td>{len(paths)}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(ip)}">
+            <td class="ip-clickable">{_escape(ip)}</td>
+            <td style="word-break: break-all;">{_escape(", ".join(paths))}</td>
+            <td>{len(paths)}</td>
+        </tr>
+        <tr class="ip-stats-row" id="stats-row-honeypot-{_escape(ip).replace(".", "-")}" style="display: none;">
+            <td colspan="3" class="ip-stats-cell">
+                <div class="ip-stats-dropdown">
+                    <div class="loading">Loading stats...</div>
+                </div>
+            </td>
+        </tr>'''
         for ip, paths in stats.get('honeypot_triggered_ips', [])
     ]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
 
-    # Generate attack types rows (CRITICAL: paths and user agents are user-controlled)
+    # Generate attack types rows with clickable IPs
     attack_type_rows = '\n'.join([
-        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td>{_escape(", ".join(log["attack_types"]))}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
+            <td class="ip-clickable">{_escape(log["ip"])}</td>
+            <td>{_escape(log["path"])}</td>
+            <td>{_escape(", ".join(log["attack_types"]))}</td>
+            <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
+            <td>{_escape(log["timestamp"].split("T")[1][:8])}</td>
+        </tr>
+        <tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
+            <td colspan="5" class="ip-stats-cell">
+                <div class="ip-stats-dropdown">
+                    <div class="loading">Loading stats...</div>
+                </div>
+            </td>
+        </tr>'''
         for log in stats.get('attack_types', [])[-10:]
     ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
 
-    # Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled)
+    # Generate credential attempts rows with clickable IPs
     credential_rows = '\n'.join([
-        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["username"])}</td><td>{_escape(log["password"])}</td><td>{_escape(log["path"])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
+            <td class="ip-clickable">{_escape(log["ip"])}</td>
+            <td>{_escape(log["username"])}</td>
+            <td>{_escape(log["password"])}</td>
+            <td>{_escape(log["path"])}</td>
+            <td>{_escape(log["timestamp"].split("T")[1][:8])}</td>
+        </tr>
+        <tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
+            <td colspan="5" class="ip-stats-cell">
+                <div class="ip-stats-dropdown">
+                    <div class="loading">Loading stats...</div>
+                </div>
+            </td>
+        </tr>'''
         for log in stats.get('credential_attempts', [])[-20:]
     ]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
 
@@ -180,6 +240,119 @@ def generate_dashboard(stats: dict) -> str:
             content: '▼';
             opacity: 1;
         }}
+        .ip-row {{
+            transition: background-color 0.2s;
+        }}
+        .ip-clickable {{
+            cursor: pointer;
+            color: #58a6ff !important;
+            font-weight: 500;
+            text-decoration: underline;
+            text-decoration-style: dotted;
+            text-underline-offset: 3px;
+        }}
+        .ip-clickable:hover {{
+            color: #79c0ff !important;
+            text-decoration-style: solid;
+            background: #1c2128;
+        }}
+        .ip-stats-row {{
+            background: #0d1117;
+        }}
+        .ip-stats-cell {{
+            padding: 0 !important;
+        }}
+        .ip-stats-dropdown {{
+            margin-top: 10px;
+            padding: 15px;
+            background: #0d1117;
+            border: 1px solid #30363d;
+            border-radius: 6px;
+            font-size: 13px;
+            display: flex;
+            gap: 20px;
+        }}
+        .stats-left {{
+            flex: 1;
+        }}
+        .stats-right {{
+            flex: 0 0 200px;
+            display: flex;
+            flex-direction: column;
+            align-items: center;
+            justify-content: center;
+        }}
+        .radar-chart {{
+            position: relative;
+            width: 180px;
+            height: 180px;
+            overflow: visible;
+        }}
+        .radar-legend {{
+            margin-top: 10px;
+            font-size: 11px;
+        }}
+        .radar-legend-item {{
+            display: flex;
+            align-items: center;
+            gap: 6px;
+            margin: 3px 0;
+        }}
+        .radar-legend-color {{
+            width: 12px;
+            height: 12px;
+            border-radius: 2px;
+        }}
+        .ip-stats-dropdown .loading {{
+            color: #8b949e;
+            font-style: italic;
+        }}
+        .stat-row {{
+            display: flex;
+            justify-content: space-between;
+            padding: 5px 0;
+            border-bottom: 1px solid #21262d;
+        }}
+        .stat-row:last-child {{
+            border-bottom: none;
+        }}
+        .stat-label-sm {{
+            color: #8b949e;
+            font-weight: 500;
+        }}
+        .stat-value-sm {{
+            color: #58a6ff;
+            font-weight: 600;
+        }}
+        .category-badge {{
+            display: inline-block;
+            padding: 4px 8px;
+            border-radius: 4px;
+            font-size: 12px;
+            font-weight: 600;
+            text-transform: uppercase;
+        }}
+        .category-attacker {{
+            background: #f851491a;
+            color: #f85149;
+            border: 1px solid #f85149;
+        }}
+        .category-good-crawler {{
+            background: #3fb9501a;
+            color: #3fb950;
+            border: 1px solid #3fb950;
+        }}
+        .category-bad-crawler {{
+            background: #f0883e1a;
+            color: #f0883e;
+            border: 1px solid #f0883e;
+        }}
+        .category-regular-user {{
+            background: #58a6ff1a;
+            color: #58a6ff;
+            border: 1px solid #58a6ff;
+        }}
+
     </style>
 </head>
 <body>
@@ -387,6 +560,211 @@ def generate_dashboard(stats: dict) -> str:
                 rows.forEach(row => tbody.appendChild(row));
             }});
         }});
+
+        // IP stats dropdown functionality
+        document.querySelectorAll('.ip-clickable').forEach(cell => {{
+            cell.addEventListener('click', async function(e) {{
+                const row = e.currentTarget.closest('.ip-row');
+                if (!row) return;
+
+                const ip = row.getAttribute('data-ip');
+                const statsRow = row.nextElementSibling;
+                if (!statsRow || !statsRow.classList.contains('ip-stats-row')) return;
+
+                const isVisible = getComputedStyle(statsRow).display !== 'none';
+
+                document.querySelectorAll('.ip-stats-row').forEach(r => {{
+                    r.style.display = 'none';
+                }});
+
+                if (isVisible) return;
+
+                statsRow.style.display = 'table-row';
+
+                const dropdown = statsRow.querySelector('.ip-stats-dropdown');
+
+                // Always fetch fresh data from database
+                if (dropdown) {{
+                    dropdown.innerHTML = '<div class="loading">Loading stats...</div>';
+                    try {{
+                        const response = await fetch(`${{window.location.pathname}}/api/ip-stats/${{ip}}`, {{
+                            cache: 'no-store',
+                            headers: {{
+                                'Cache-Control': 'no-cache',
+                                'Pragma': 'no-cache'
+                            }}
+                        }});
+                        if (!response.ok) throw new Error(`HTTP ${{response.status}}`);
+
+                        const data = await response.json();
+                        dropdown.innerHTML = data.error
+                            ? `<div style="color:#f85149;">Error: ${{data.error}}</div>`
+                            : formatIpStats(data);
+                    }} catch (err) {{
+                        dropdown.innerHTML = `<div style="color:#f85149;">Failed to load stats: ${{err.message}}</div>`;
+                    }}
+                }}
+            }});
+        }});
+        
+        function formatIpStats(stats) {{
+            let html = '<div class="stats-left">';
+            
+            // Basic info
+            html += '<div class="stat-row">';
+            html += '<span class="stat-label-sm">Total Requests:</span>';
+            html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
+            html += '</div>';
+            
+            html += '<div class="stat-row">';
+            html += '<span class="stat-label-sm">First Seen:</span>';
+            html += `<span class="stat-value-sm">${{stats.first_seen ? new Date(stats.first_seen).toLocaleString() : 'N/A'}}</span>`;
+            html += '</div>';
+            
+            html += '<div class="stat-row">';
+            html += '<span class="stat-label-sm">Last Seen:</span>';
+            html += `<span class="stat-value-sm">${{stats.last_seen ? new Date(stats.last_seen).toLocaleString() : 'N/A'}}</span>`;
+            html += '</div>';
+            
+            // Category
+            if (stats.category) {{
+                html += '<div class="stat-row">';
+                html += '<span class="stat-label-sm">Category:</span>';
+                const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
+                html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
+                html += '</div>';
+            }}
+            
+            // GeoIP info if available
+            if (stats.country_code || stats.city) {{
+                html += '<div class="stat-row">';
+                html += '<span class="stat-label-sm">Location:</span>';
+                html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
+                html += '</div>';
+            }}
+            
+            if (stats.asn_org) {{
+                html += '<div class="stat-row">';
+                html += '<span class="stat-label-sm">ASN Org:</span>';
+                html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`;
+                html += '</div>';
+            }}
+            
+            // Reputation score if available
+            if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
+                html += '<div class="stat-row">';
+                html += '<span class="stat-label-sm">Reputation Score:</span>';
+                html += `<span class="stat-value-sm">${{stats.reputation_score}} ${{stats.reputation_source ? '(' + stats.reputation_source + ')' : ''}}</span>`;
+                html += '</div>';
+            }}
+            
+            html += '</div>';
+            
+            // Radar chart on the right
+            if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
+                html += '<div class="stats-right">';
+                html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
+                
+                const scores = {{
+                    attacker: stats.category_scores.attacker || 0,
+                    good_crawler: stats.category_scores.good_crawler || 0,
+                    bad_crawler: stats.category_scores.bad_crawler || 0,
+                    regular_user: stats.category_scores.regular_user || 0
+                }};
+                
+                // Normalize scores for better visualization
+                const maxScore = Math.max(...Object.values(scores), 1);
+                const minVisibleRadius = 0.15; // Minimum 15% visibility even for 0 values
+                const normalizedScores = {{}};
+                
+                Object.keys(scores).forEach(key => {{
+                    // Scale values: ensure minimum visibility + proportional to max
+                    normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
+                }});
+                
+                const colors = {{
+                    attacker: '#f85149',
+                    good_crawler: '#3fb950',
+                    bad_crawler: '#f0883e',
+                    regular_user: '#58a6ff'
+                }};
+                
+                const labels = {{
+                    attacker: 'Attacker',
+                    good_crawler: 'Good Bot',
+                    bad_crawler: 'Bad Bot',
+                    regular_user: 'User'
+                }};
+                
+                // Draw radar background grid
+                const cx = 100, cy = 100, maxRadius = 75;
+                for (let i = 1; i <= 5; i++) {{
+                    const r = (maxRadius / 5) * i;
+                    html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
+                }}
+                
+                // Draw axes
+                const angles = [0, 90, 180, 270];
+                const keys = ['attacker', 'good_crawler', 'bad_crawler', 'regular_user'];
+                
+                angles.forEach((angle, i) => {{
+                    const rad = (angle - 90) * Math.PI / 180;
+                    const x2 = cx + maxRadius * Math.cos(rad);
+                    const y2 = cy + maxRadius * Math.sin(rad);
+                    html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
+                    
+                    // Add labels
+                    const labelDist = maxRadius + 30;
+                    const lx = cx + labelDist * Math.cos(rad);
+                    const ly = cy + labelDist * Math.sin(rad);
+                    html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
+                }});
+                
+                // Draw filled polygon for scores
+                let points = [];
+                angles.forEach((angle, i) => {{
+                    const normalizedScore = normalizedScores[keys[i]];
+                    const rad = (angle - 90) * Math.PI / 180;
+                    const r = normalizedScore * maxRadius;
+                    const x = cx + r * Math.cos(rad);
+                    const y = cy + r * Math.sin(rad);
+                    points.push(`${{x}},${{y}}`);
+                }});
+                
+                // Determine dominant category color
+                const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
+                const dominantColor = colors[dominantKey];
+                
+                // Draw single colored area
+                html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
+                
+                // Draw points
+                angles.forEach((angle, i) => {{
+                    const normalizedScore = normalizedScores[keys[i]];
+                    const rad = (angle - 90) * Math.PI / 180;
+                    const r = normalizedScore * maxRadius;
+                    const x = cx + r * Math.cos(rad);
+                    const y = cy + r * Math.sin(rad);
+                    html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`;
+                }});
+                
+                html += '</svg>';
+                
+                // Legend
+                html += '<div class="radar-legend">';
+                keys.forEach(key => {{
+                    html += '<div class="radar-legend-item">';
+                    html += `<div class="radar-legend-color" style="background: ${{colors[key]}};"></div>`;
+                    html += `<span style="color: #8b949e;">${{labels[key]}}: ${{scores[key]}}%</span>`;
+                    html += '</div>';
+                }});
+                html += '</div>';
+                
+                html += '</div>';
+            }}
+            
+            return html;
+        }}
     </script>
 </body>
 </html>
diff --git a/src/templates/html/main_page.html b/src/templates/html/main_page.html
index d0b39de..ac154e8 100644
--- a/src/templates/html/main_page.html
+++ b/src/templates/html/main_page.html
@@ -46,21 +46,12 @@
             gap: 10px;
             align-items: center;
             overflow-y: auto;
+            overflow-x: hidden;
             flex: 1;
             padding-top: 10px;
         }}
         .links-container::-webkit-scrollbar {{
-            width: 8px;
-        }}
-        .links-container::-webkit-scrollbar-track {{
-            background: #0d1117;
-        }}
-        .links-container::-webkit-scrollbar-thumb {{
-            background: #30363d;
-            border-radius: 4px;
-        }}
-        .links-container::-webkit-scrollbar-thumb:hover {{
-            background: #484f58;
+            width: 0px;
         }}
         .link-box {{
             background: #161b22;

From 769084102925b3fc7b9ca2486c653037b8a4de4c Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Wed, 7 Jan 2026 18:24:43 +0100
Subject: [PATCH 17/21] added categorization visualization and itmeline

---
 src/database.py                        |  71 +++++++++++++++-
 src/migrations/add_category_history.py |  40 +++++++++
 src/models.py                          |  25 ++++++
 src/templates/dashboard_template.py    | 112 +++++++++++++++++++++++--
 4 files changed, 240 insertions(+), 8 deletions(-)
 create mode 100644 src/migrations/add_category_history.py

diff --git a/src/database.py b/src/database.py
index e60348a..0245105 100644
--- a/src/database.py
+++ b/src/database.py
@@ -13,7 +13,7 @@ from typing import Optional, List, Dict, Any
 from sqlalchemy import create_engine, func, distinct, case
 from sqlalchemy.orm import sessionmaker, scoped_session, Session
 
-from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats
+from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
 from sanitizer import (
     sanitize_ip,
     sanitize_path,
@@ -226,6 +226,7 @@ class DatabaseManager:
     def  update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None:
         """
         Update IP statistics (ip is already persisted).
+        Records category change in history if category has changed.
 
         Args:
             ip: IP address to update
@@ -241,6 +242,11 @@ class DatabaseManager:
         sanitized_ip = sanitize_ip(ip)
         ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
 
+        # Check if category has changed and record it
+        old_category = ip_stats.category
+        if old_category != category:
+            self._record_category_change(sanitized_ip, old_category, category, last_analysis)
+
         ip_stats.analyzed_metrics = analyzed_metrics
         ip_stats.category = category
         ip_stats.category_scores = category_scores
@@ -259,9 +265,66 @@ class DatabaseManager:
         sanitized_ip = sanitize_ip(ip)
         ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
 
+        # Record the manual category change
+        old_category = ip_stats.category
+        if old_category != category:
+            self._record_category_change(sanitized_ip, old_category, category, datetime.utcnow())
+
         ip_stats.category = category
         ip_stats.manual_category = True
 
+    def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
+        """
+        Internal method to record category changes in history.
+
+        Args:
+            ip: IP address
+            old_category: Previous category (None if first categorization)
+            new_category: New category
+            timestamp: When the change occurred
+        """
+        session = self.session
+        try:
+            history_entry = CategoryHistory(
+                ip=ip,
+                old_category=old_category,
+                new_category=new_category,
+                timestamp=timestamp
+            )
+            session.add(history_entry)
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            print(f"Error recording category change: {e}")
+
+    def get_category_history(self, ip: str) -> List[Dict[str, Any]]:
+        """
+        Retrieve category change history for a specific IP.
+
+        Args:
+            ip: IP address to get history for
+
+        Returns:
+            List of category change records ordered by timestamp
+        """
+        session = self.session
+        try:
+            sanitized_ip = sanitize_ip(ip)
+            history = session.query(CategoryHistory).filter(
+                CategoryHistory.ip == sanitized_ip
+            ).order_by(CategoryHistory.timestamp.asc()).all()
+
+            return [
+                {
+                    'old_category': h.old_category,
+                    'new_category': h.new_category,
+                    'timestamp': h.timestamp.isoformat()
+                }
+                for h in history
+            ]
+        finally:
+            self.close_session()
+
     def get_access_logs(
         self,
         limit: int = 100,
@@ -456,6 +519,9 @@ class DatabaseManager:
             if not stat:
                 return None
             
+            # Get category history for this IP
+            category_history = self.get_category_history(ip)
+            
             return {
                 'ip': stat.ip,
                 'total_requests': stat.total_requests,
@@ -471,7 +537,8 @@ class DatabaseManager:
                 'category': stat.category,
                 'category_scores': stat.category_scores or {},
                 'manual_category': stat.manual_category,
-                'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None
+                'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
+                'category_history': category_history
             }
         finally:
             self.close_session()
diff --git a/src/migrations/add_category_history.py b/src/migrations/add_category_history.py
new file mode 100644
index 0000000..654204e
--- /dev/null
+++ b/src/migrations/add_category_history.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python3
+"""
+Migration script to add CategoryHistory table to existing databases.
+Run this once to upgrade your database schema.
+"""
+
+import sys
+from pathlib import Path
+
+# Add parent directory to path to import modules
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from database import get_database, DatabaseManager
+from models import Base, CategoryHistory
+
+
+def migrate():
+    """Create CategoryHistory table if it doesn't exist."""
+    print("Starting migration: Adding CategoryHistory table...")
+    
+    try:
+        db = get_database()
+        
+        # Initialize database if not already done
+        if not db._initialized:
+            db.initialize()
+        
+        # Create only the CategoryHistory table
+        CategoryHistory.__table__.create(db._engine, checkfirst=True)
+        
+        print("✓ Migration completed successfully!")
+        print("  - CategoryHistory table created")
+        
+    except Exception as e:
+        print(f"✗ Migration failed: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    migrate()
diff --git a/src/models.py b/src/models.py
index 190ef26..2b86fd5 100644
--- a/src/models.py
+++ b/src/models.py
@@ -151,6 +151,31 @@ class IpStats(Base):
     def __repr__(self) -> str:
         return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
 
+
+class CategoryHistory(Base):
+    """
+    Records category changes for IP addresses over time.
+
+    Tracks when an IP's category changes, storing both the previous
+    and new category along with timestamp for timeline visualization.
+    """
+    __tablename__ = 'category_history'
+
+    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
+    ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
+    old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
+    new_category: Mapped[str] = mapped_column(String(50), nullable=False)
+    timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
+
+    # Composite index for efficient IP-based timeline queries
+    __table_args__ = (
+        Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
+    )
+
+    def __repr__(self) -> str:
+        return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>"
+
+
 # class IpLog(Base):
 #     """
 #     Records all IPs that have accessed the honeypot, along with aggregated stats and inferred user category.
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index df0378a..332288c 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -284,8 +284,8 @@ def generate_dashboard(stats: dict) -> str:
         }}
         .radar-chart {{
             position: relative;
-            width: 180px;
-            height: 180px;
+            width: 220px;
+            height: 220px;
             overflow: visible;
         }}
         .radar-legend {{
@@ -352,6 +352,72 @@ def generate_dashboard(stats: dict) -> str:
             color: #58a6ff;
             border: 1px solid #58a6ff;
         }}
+        .timeline-container {{
+            margin-top: 15px;
+            padding-top: 15px;
+            border-top: 1px solid #30363d;
+        }}
+        .timeline-title {{
+            color: #58a6ff;
+            font-size: 13px;
+            font-weight: 600;
+            margin-bottom: 10px;
+        }}
+        .timeline {{
+            position: relative;
+            padding-left: 30px;
+        }}
+        .timeline::before {{
+            content: '';
+            position: absolute;
+            left: 12px;
+            top: 5px;
+            bottom: 5px;
+            width: 3px;
+            background: #30363d;
+        }}
+        .timeline-item {{
+            position: relative;
+            padding-bottom: 15px;
+        }}
+        .timeline-item:last-child {{
+            padding-bottom: 0;
+        }}
+        .timeline-marker {{
+            position: absolute;
+            left: -26px;
+            width: 16px;
+            height: 16px;
+            border-radius: 50%;
+            border: 2px solid #0d1117;
+        }}
+        .timeline-marker.attacker {{
+            background: #f85149;
+        }}
+        .timeline-marker.good-crawler {{
+            background: #3fb950;
+        }}
+        .timeline-marker.bad-crawler {{
+            background: #f0883e;
+        }}
+        .timeline-marker.regular-user {{
+            background: #58a6ff;
+        }}
+        .timeline-content {{
+            font-size: 12px;
+        }}
+        .timeline-category {{
+            font-weight: 600;
+        }}
+        .timeline-timestamp {{
+            color: #8b949e;
+            font-size: 11px;
+            margin-top: 2px;
+        }}
+        .timeline-arrow {{
+            color: #8b949e;
+            margin: 0 7px;
+        }}
 
     </style>
 </head>
@@ -658,11 +724,45 @@ def generate_dashboard(stats: dict) -> str:
                 html += '</div>';
             }}
             
+            // Category History Timeline
+            if (stats.category_history && stats.category_history.length > 0) {{
+                html += '<div class="timeline-container">';
+                html += '<div class="timeline-title">Behavior Timeline</div>';
+                html += '<div class="timeline">';
+                
+                stats.category_history.forEach((change, index) => {{
+                    const categoryClass = change.new_category.toLowerCase().replace('_', '-');
+                    const timestamp = new Date(change.timestamp).toLocaleString();
+                    
+                    html += '<div class="timeline-item">';
+                    html += `<div class="timeline-marker ${{categoryClass}}"></div>`;
+                    html += '<div class="timeline-content">';
+                    
+                    if (change.old_category) {{
+                        const oldCategoryBadge = 'category-' + change.old_category.toLowerCase().replace('_', '-');
+                        html += `<span class="category-badge ${{oldCategoryBadge}}">${{change.old_category}}</span>`;
+                        html += '<span class="timeline-arrow">→</span>';
+                    }} else {{
+                        html += '<span style="color: #8b949e;">Initial:</span> ';
+                    }}
+                    
+                    const newCategoryBadge = 'category-' + change.new_category.toLowerCase().replace('_', '-');
+                    html += `<span class="category-badge ${{newCategoryBadge}}">${{change.new_category}}</span>`;
+                    html += `<div class="timeline-timestamp">${{timestamp}}</div>`;
+                    html += '</div>';
+                    html += '</div>';
+                }});
+                
+                html += '</div>';
+                html += '</div>';
+            }}
+            
             html += '</div>';
             
             // Radar chart on the right
             if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
                 html += '<div class="stats-right">';
+                html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
                 html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
                 
                 const scores = {{
@@ -705,7 +805,7 @@ def generate_dashboard(stats: dict) -> str:
                 
                 // Draw axes
                 const angles = [0, 90, 180, 270];
-                const keys = ['attacker', 'good_crawler', 'bad_crawler', 'regular_user'];
+                const keys = ['good_crawler', 'regular_user', 'bad_crawler', 'attacker'];
                 
                 angles.forEach((angle, i) => {{
                     const rad = (angle - 90) * Math.PI / 180;
@@ -713,8 +813,8 @@ def generate_dashboard(stats: dict) -> str:
                     const y2 = cy + maxRadius * Math.sin(rad);
                     html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
                     
-                    // Add labels
-                    const labelDist = maxRadius + 30;
+                    // Add labels at consistent distance
+                    const labelDist = maxRadius + 35;
                     const lx = cx + labelDist * Math.cos(rad);
                     const ly = cy + labelDist * Math.sin(rad);
                     html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
@@ -755,7 +855,7 @@ def generate_dashboard(stats: dict) -> str:
                 keys.forEach(key => {{
                     html += '<div class="radar-legend-item">';
                     html += `<div class="radar-legend-color" style="background: ${{colors[key]}};"></div>`;
-                    html += `<span style="color: #8b949e;">${{labels[key]}}: ${{scores[key]}}%</span>`;
+                    html += `<span style="color: #8b949e;">${{labels[key]}}: ${{scores[key]}} pt</span>`;
                     html += '</div>';
                 }});
                 html += '</div>';

From edb288a27157cf85993dad9940f90c053caa3ae1 Mon Sep 17 00:00:00 2001
From: Phillip Tarrant <ptarrant@gmail.com>
Date: Wed, 7 Jan 2026 12:33:43 -0600
Subject: [PATCH 18/21] Fixed some print statements to leverage logging, pulled
 in most recent dev edits, added exports to gitignore

---
 .gitignore      |  3 +++
 src/analyzer.py | 18 ++++++++++++------
 src/database.py | 12 ++++++++----
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/.gitignore b/.gitignore
index 70b93e4..63ae0e9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -76,3 +76,6 @@ data/
 # Personal canary tokens or sensitive configs
 *canary*token*.yaml
 personal-values.yaml
+
+#exports dir (keeping .gitkeep so we have the dir)
+/exports/*
\ No newline at end of file
diff --git a/src/analyzer.py b/src/analyzer.py
index a745813..b10e4e7 100644
--- a/src/analyzer.py
+++ b/src/analyzer.py
@@ -8,10 +8,13 @@ from datetime import datetime, timedelta
 import re
 from wordlists import get_wordlists
 from config import get_config
+from logger import get_app_logger
 """
 Functions for user activity analysis
 """
 
+app_logger = get_app_logger()
+
 class Analyzer:
     """
     Analyzes users activity and produces aggregated insights
@@ -56,7 +59,7 @@ class Analyzer:
         attack_urls_threshold = config.attack_urls_threshold
         uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
 
-        print(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
+        app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
 
         score = {}
         score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
@@ -185,7 +188,7 @@ class Analyzer:
             variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
             std = variance ** 0.5
             cv = std/mean
-            print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
+            app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
 
         if cv >= uneven_request_timing_threshold:
             score["attacker"]["uneven_request_timing"] = True
@@ -268,10 +271,13 @@ class Analyzer:
         regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
         regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
 
-        print(f"Attacker score: {attacker_score}")
-        print(f"Good Crawler score: {good_crawler_score}")
-        print(f"Bad Crawler score: {bad_crawler_score}")
-        print(f"Regular User score: {regular_user_score}")
+        score_details = f"""
+        Attacker score: {attacker_score}
+        Good Crawler score: {good_crawler_score}
+        Bad Crawler score: {bad_crawler_score}
+        Regular User score: {regular_user_score}
+        """
+        app_logger.debug(score_details)
 
         analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
         category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
diff --git a/src/database.py b/src/database.py
index 0245105..c184e9e 100644
--- a/src/database.py
+++ b/src/database.py
@@ -22,6 +22,9 @@ from sanitizer import (
     sanitize_attack_pattern,
 )
 
+from logger import get_app_logger
+
+applogger = get_app_logger()
 
 class DatabaseManager:
     """
@@ -154,7 +157,7 @@ class DatabaseManager:
         except Exception as e:
             session.rollback()
             # Log error but don't crash - database persistence is secondary to honeypot function
-            print(f"Database error persisting access: {e}")
+            applogger.critical(f"Database error persisting access: {e}")
             return None
         finally:
             self.close_session()
@@ -193,7 +196,7 @@ class DatabaseManager:
 
         except Exception as e:
             session.rollback()
-            print(f"Database error persisting credential: {e}")
+            applogger.critical(f"Database error persisting credential: {e}")
             return None
         finally:
             self.close_session()
@@ -236,7 +239,8 @@ class DatabaseManager:
             last_analysis: timestamp of last analysis
 
         """
-        print(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
+        applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
+        applogger.info(f"IP: {ip} category has been updated to {category}")
 
         session = self.session
         sanitized_ip = sanitize_ip(ip)
@@ -295,7 +299,7 @@ class DatabaseManager:
             session.commit()
         except Exception as e:
             session.rollback()
-            print(f"Error recording category change: {e}")
+            applogger.error(f"Error recording category change: {e}")
 
     def get_category_history(self, ip: str) -> List[Dict[str, Any]]:
         """

From b61461d0282f7d3b775f66c65412124040e95d89 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Thu, 8 Jan 2026 19:20:22 +0100
Subject: [PATCH 19/21] fixed categorization visualization, fixed date in the
 dashboard, fixed attack regex detection

---
 Dockerfile                          |  1 +
 config.yaml                         | 18 +++---
 src/analyzer.py                     | 37 ++++++++++---
 src/database.py                     | 47 +++++++++++-----
 src/handler.py                      |  3 +-
 src/templates/dashboard_template.py | 86 +++++++++++++++++++++++------
 src/wordlists.py                    |  3 +-
 wordlists.json                      | 17 +++---
 8 files changed, 154 insertions(+), 58 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 2c7b954..78023a7 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -14,6 +14,7 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY src/ /app/src/
 COPY wordlists.json /app/
 COPY entrypoint.sh /app/
+COPY config.yaml /app/
 
 RUN useradd -m -u 1000 krawl && \
     mkdir -p /app/logs /app/data && \
diff --git a/config.yaml b/config.yaml
index 2150e1f..52daa09 100644
--- a/config.yaml
+++ b/config.yaml
@@ -3,7 +3,7 @@
 server:
   port: 5000
   delay: 100  # Response delay in milliseconds
-  timezone: null  # e.g., "America/New_York" or null for system default
+  timezone: null  # e.g., "America/New_York", "Europe/Paris" or null for system default
 
   # manually set the server header, if null a random one will be used.
   server_header: null
@@ -11,8 +11,8 @@ server:
 links:
   min_length: 5
   max_length: 15
-  min_per_page: 10
-  max_per_page: 15
+  min_per_page: 5
+  max_per_page: 10
   char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
   max_counter: 10
 
@@ -38,9 +38,9 @@ behavior:
   probability_error_codes: 0  # 0-100 percentage
 
 analyzer:
-  # http_risky_methods_threshold: 0.1
-  # violated_robots_threshold: 0.1
-  # uneven_request_timing_threshold: 5
-  # uneven_request_timing_time_window_seconds: 300
-  # user_agents_used_threshold: 2
-  # attack_urls_threshold: 1
+  http_risky_methods_threshold: 0.1
+  violated_robots_threshold: 0.1
+  uneven_request_timing_threshold: 2
+  uneven_request_timing_time_window_seconds: 300
+  user_agents_used_threshold: 2
+  attack_urls_threshold: 1
diff --git a/src/analyzer.py b/src/analyzer.py
index a745813..b63cd5e 100644
--- a/src/analyzer.py
+++ b/src/analyzer.py
@@ -6,6 +6,7 @@ from zoneinfo import ZoneInfo
 from pathlib import Path
 from datetime import datetime, timedelta
 import re
+import urllib.parse
 from wordlists import get_wordlists
 from config import get_config
 """
@@ -101,6 +102,15 @@ class Analyzer:
         total_accesses_count = len(accesses)
         if total_accesses_count <= 0:
             return
+        
+        # Set category as "unknown" for the first 5 requests
+        if total_accesses_count < 3:
+            category = "unknown"
+            analyzed_metrics = {}
+            category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
+            last_analysis = datetime.now(tz=ZoneInfo('UTC'))
+            self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+            return 0
 
         #--------------------- HTTP Methods ---------------------
 
@@ -147,7 +157,7 @@ class Analyzer:
                     robots_disallows.append(parts[1].strip())
 
         #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
-        violated_robots_count = len([item for item in accesses if item["path"].rstrip("/") in tuple(robots_disallows)])
+        violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
         #print(f"Violated robots count: {violated_robots_count}")
         if total_accesses_count > 0:
             violated_robots_ratio = violated_robots_count / total_accesses_count
@@ -168,7 +178,8 @@ class Analyzer:
         #--------------------- Requests Timing ---------------------
         #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
         timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
-        timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
+        now_utc = datetime.now(tz=ZoneInfo('UTC'))
+        timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
         timestamps = sorted(timestamps, reverse=True)
 
         time_diffs = []
@@ -221,13 +232,25 @@ class Analyzer:
         attack_urls_found_list = []
 
         wl = get_wordlists()
-        if wl.attack_urls:
+        if wl.attack_patterns:
             queried_paths = [item["path"] for item in accesses]
 
             for queried_path in queried_paths:
-                for name, pattern in wl.attack_urls.items():
-                    if re.search(pattern, queried_path, re.IGNORECASE):
-                        attack_urls_found_list.append(pattern)
+                # URL decode the path to catch encoded attacks
+                try:
+                    decoded_path = urllib.parse.unquote(queried_path)
+                    # Double decode to catch double-encoded attacks
+                    decoded_path_twice = urllib.parse.unquote(decoded_path)
+                except Exception:
+                    decoded_path = queried_path
+                    decoded_path_twice = queried_path
+                
+                for name, pattern in wl.attack_patterns.items():
+                    # Check original, decoded, and double-decoded paths
+                    if (re.search(pattern, queried_path, re.IGNORECASE) or 
+                        re.search(pattern, decoded_path, re.IGNORECASE) or
+                        re.search(pattern, decoded_path_twice, re.IGNORECASE)):
+                        attack_urls_found_list.append(f"{name}: {pattern}")
             
             if len(attack_urls_found_list) > attack_urls_threshold:
                 score["attacker"]["attack_url"] = True
@@ -276,7 +299,7 @@ class Analyzer:
         analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
         category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
         category = max(category_scores, key=category_scores.get)
-        last_analysis = datetime.utcnow()
+        last_analysis = datetime.now(tz=ZoneInfo('UTC'))
 
         self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
 
diff --git a/src/database.py b/src/database.py
index 0245105..35a6e2e 100644
--- a/src/database.py
+++ b/src/database.py
@@ -9,6 +9,7 @@ import os
 import stat
 from datetime import datetime
 from typing import Optional, List, Dict, Any
+from zoneinfo import ZoneInfo
 
 from sqlalchemy import create_engine, func, distinct, case
 from sqlalchemy.orm import sessionmaker, scoped_session, Session
@@ -127,7 +128,7 @@ class DatabaseManager:
                 method=method[:10],
                 is_suspicious=is_suspicious,
                 is_honeypot_trigger=is_honeypot_trigger,
-                timestamp=datetime.utcnow()
+                timestamp=datetime.now(tz=ZoneInfo('UTC'))
             )
             session.add(access_log)
             session.flush()  # Get the ID before committing
@@ -185,7 +186,7 @@ class DatabaseManager:
                 path=sanitize_path(path),
                 username=sanitize_credential(username),
                 password=sanitize_credential(password),
-                timestamp=datetime.utcnow()
+                timestamp=datetime.now(tz=ZoneInfo('UTC'))
             )
             session.add(credential)
             session.commit()
@@ -207,7 +208,7 @@ class DatabaseManager:
             ip: IP address to update
         """
         sanitized_ip = sanitize_ip(ip)
-        now = datetime.utcnow()
+        now = datetime.now(tz=ZoneInfo('UTC'))
 
         ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
 
@@ -251,6 +252,12 @@ class DatabaseManager:
         ip_stats.category = category
         ip_stats.category_scores = category_scores
         ip_stats.last_analysis = last_analysis
+        
+        try:
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            print(f"Error updating IP stats analysis: {e}")
 
     def  manual_update_category(self, ip: str, category: str) -> None:
         """
@@ -268,14 +275,21 @@ class DatabaseManager:
         # Record the manual category change
         old_category = ip_stats.category
         if old_category != category:
-            self._record_category_change(sanitized_ip, old_category, category, datetime.utcnow())
+            self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC')))
 
         ip_stats.category = category
         ip_stats.manual_category = True
+        
+        try:
+            session.commit()
+        except Exception as e:
+            session.rollback()
+            print(f"Error updating manual category: {e}")
 
     def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
         """
         Internal method to record category changes in history.
+        Only records if there's an actual change from a previous category.
 
         Args:
             ip: IP address
@@ -283,6 +297,11 @@ class DatabaseManager:
             new_category: New category
             timestamp: When the change occurred
         """
+        # Don't record initial categorization (when old_category is None)
+        # Only record actual category changes
+        if old_category is None:
+            return
+            
         session = self.session
         try:
             history_entry = CategoryHistory(
@@ -318,7 +337,7 @@ class DatabaseManager:
                 {
                     'old_category': h.old_category,
                     'new_category': h.new_category,
-                    'timestamp': h.timestamp.isoformat()
+                    'timestamp': h.timestamp.isoformat() + '+00:00'
                 }
                 for h in history
             ]
@@ -364,7 +383,7 @@ class DatabaseManager:
                     'method': log.method,
                     'is_suspicious': log.is_suspicious,
                     'is_honeypot_trigger': log.is_honeypot_trigger,
-                    'timestamp': log.timestamp.isoformat(),
+                    'timestamp': log.timestamp.isoformat() + '+00:00',
                     'attack_types': [d.attack_type for d in log.attack_detections]
                 }
                 for log in logs
@@ -457,7 +476,7 @@ class DatabaseManager:
                     'path': attempt.path,
                     'username': attempt.username,
                     'password': attempt.password,
-                    'timestamp': attempt.timestamp.isoformat()
+                    'timestamp': attempt.timestamp.isoformat() + '+00:00'
                 }
                 for attempt in attempts
             ]
@@ -484,8 +503,8 @@ class DatabaseManager:
                 {
                     'ip': s.ip,
                     'total_requests': s.total_requests,
-                    'first_seen': s.first_seen.isoformat(),
-                    'last_seen': s.last_seen.isoformat(),
+                    'first_seen': s.first_seen.isoformat() + '+00:00',
+                    'last_seen': s.last_seen.isoformat() + '+00:00',
                     'country_code': s.country_code,
                     'city': s.city,
                     'asn': s.asn,
@@ -525,8 +544,8 @@ class DatabaseManager:
             return {
                 'ip': stat.ip,
                 'total_requests': stat.total_requests,
-                'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
-                'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
+                'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None,
+                'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None,
                 'country_code': stat.country_code,
                 'city': stat.city,
                 'asn': stat.asn,
@@ -537,7 +556,7 @@ class DatabaseManager:
                 'category': stat.category,
                 'category_scores': stat.category_scores or {},
                 'manual_category': stat.manual_category,
-                'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
+                'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None,
                 'category_history': category_history
             }
         finally:
@@ -671,7 +690,7 @@ class DatabaseManager:
                     'ip': log.ip,
                     'path': log.path,
                     'user_agent': log.user_agent,
-                    'timestamp': log.timestamp.isoformat()
+                    'timestamp': log.timestamp.isoformat() + '+00:00'
                 }
                 for log in logs
             ]
@@ -729,7 +748,7 @@ class DatabaseManager:
                     'ip': log.ip,
                     'path': log.path,
                     'user_agent': log.user_agent,
-                    'timestamp': log.timestamp.isoformat(),
+                    'timestamp': log.timestamp.isoformat() + '+00:00',
                     'attack_types': [d.attack_type for d in log.attack_detections]
                 }
                 for log in logs
diff --git a/src/handler.py b/src/handler.py
index 2598706..ebc0b66 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -407,7 +407,8 @@ class Handler(BaseHTTPRequestHandler):
             self.end_headers()
             try:
                 stats = self.tracker.get_stats()
-                self.wfile.write(generate_dashboard(stats).encode())
+                timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
+                self.wfile.write(generate_dashboard(stats, timezone).encode())
             except BrokenPipeError:
                 pass
             except Exception as e:
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 332288c..bbb6ad9 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -7,6 +7,7 @@ Customize this template to change the dashboard appearance.
 
 import html
 from datetime import datetime
+from zoneinfo import ZoneInfo
 
 def _escape(value) -> str:
     """Escape HTML special characters to prevent XSS attacks."""
@@ -14,18 +15,36 @@ def _escape(value) -> str:
         return ""
     return html.escape(str(value))
 
-def format_timestamp(iso_timestamp: str) -> str:
-    """Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)"""
+def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str:
+    """Format ISO timestamp for display with timezone conversion
+    
+    Args:
+        iso_timestamp: ISO format timestamp string (UTC)
+        timezone: IANA timezone string to convert to
+        time_only: If True, return only HH:MM:SS, otherwise full datetime
+    """
     try:
+        # Parse UTC timestamp
         dt = datetime.fromisoformat(iso_timestamp)
+        # Convert to target timezone
+        if dt.tzinfo is not None:
+            dt = dt.astimezone(ZoneInfo(timezone))
+        
+        if time_only:
+            return dt.strftime("%H:%M:%S")
         return dt.strftime("%Y-%m-%d %H:%M:%S")
     except Exception:
         # Fallback for old format
         return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
 
 
-def generate_dashboard(stats: dict) -> str:
-    """Generate dashboard HTML with access statistics"""
+def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str:
+    """Generate dashboard HTML with access statistics
+    
+    Args:
+        stats: Statistics dictionary
+        timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
+    """
     
     # Generate IP rows with clickable functionality for dropdown stats
     top_ips_rows = '\n'.join([
@@ -62,7 +81,7 @@ def generate_dashboard(stats: dict) -> str:
             <td class="ip-clickable">{_escape(log["ip"])}</td>
             <td>{_escape(log["path"])}</td>
             <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
-            <td>{_escape(log["timestamp"].split("T")[1][:8])}</td>
+            <td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
         </tr>
         <tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
             <td colspan="4" class="ip-stats-cell">
@@ -98,7 +117,7 @@ def generate_dashboard(stats: dict) -> str:
             <td>{_escape(log["path"])}</td>
             <td>{_escape(", ".join(log["attack_types"]))}</td>
             <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
-            <td>{_escape(log["timestamp"].split("T")[1][:8])}</td>
+            <td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
         </tr>
         <tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
             <td colspan="5" class="ip-stats-cell">
@@ -117,7 +136,7 @@ def generate_dashboard(stats: dict) -> str:
             <td>{_escape(log["username"])}</td>
             <td>{_escape(log["password"])}</td>
             <td>{_escape(log["path"])}</td>
-            <td>{_escape(log["timestamp"].split("T")[1][:8])}</td>
+            <td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
         </tr>
         <tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
             <td colspan="5" class="ip-stats-cell">
@@ -352,6 +371,11 @@ def generate_dashboard(stats: dict) -> str:
             color: #58a6ff;
             border: 1px solid #58a6ff;
         }}
+        .category-unknown {{
+            background: #8b949e1a;
+            color: #8b949e;
+            border: 1px solid #8b949e;
+        }}
         .timeline-container {{
             margin-top: 15px;
             padding-top: 15px;
@@ -403,6 +427,9 @@ def generate_dashboard(stats: dict) -> str:
         .timeline-marker.regular-user {{
             background: #58a6ff;
         }}
+        .timeline-marker.unknown {{
+            background: #8b949e;
+        }}
         .timeline-content {{
             font-size: 12px;
         }}
@@ -570,6 +597,30 @@ def generate_dashboard(stats: dict) -> str:
         </div>
     </div>
     <script>
+        // Server timezone configuration
+        const SERVER_TIMEZONE = '{timezone}';
+        
+        // Convert UTC timestamp to configured timezone
+        function formatTimestamp(isoTimestamp) {{
+            if (!isoTimestamp) return 'N/A';
+            try {{
+                const date = new Date(isoTimestamp);
+                return date.toLocaleString('en-US', {{ 
+                    timeZone: SERVER_TIMEZONE,
+                    year: 'numeric',
+                    month: '2-digit',
+                    day: '2-digit',
+                    hour: '2-digit',
+                    minute: '2-digit',
+                    second: '2-digit',
+                    hour12: false
+                }});
+            }} catch (err) {{
+                console.error('Error formatting timestamp:', err);
+                return new Date(isoTimestamp).toLocaleString();
+            }}
+        }}
+        
         // Add sorting functionality to tables
         document.querySelectorAll('th.sortable').forEach(header => {{
             header.addEventListener('click', function() {{
@@ -684,12 +735,12 @@ def generate_dashboard(stats: dict) -> str:
             
             html += '<div class="stat-row">';
             html += '<span class="stat-label-sm">First Seen:</span>';
-            html += `<span class="stat-value-sm">${{stats.first_seen ? new Date(stats.first_seen).toLocaleString() : 'N/A'}}</span>`;
+            html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`;
             html += '</div>';
             
             html += '<div class="stat-row">';
             html += '<span class="stat-label-sm">Last Seen:</span>';
-            html += `<span class="stat-value-sm">${{stats.last_seen ? new Date(stats.last_seen).toLocaleString() : 'N/A'}}</span>`;
+            html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
             html += '</div>';
             
             // Category
@@ -732,7 +783,7 @@ def generate_dashboard(stats: dict) -> str:
                 
                 stats.category_history.forEach((change, index) => {{
                     const categoryClass = change.new_category.toLowerCase().replace('_', '-');
-                    const timestamp = new Date(change.timestamp).toLocaleString();
+                    const timestamp = formatTimestamp(change.timestamp);
                     
                     html += '<div class="timeline-item">';
                     html += `<div class="timeline-marker ${{categoryClass}}"></div>`;
@@ -769,7 +820,8 @@ def generate_dashboard(stats: dict) -> str:
                     attacker: stats.category_scores.attacker || 0,
                     good_crawler: stats.category_scores.good_crawler || 0,
                     bad_crawler: stats.category_scores.bad_crawler || 0,
-                    regular_user: stats.category_scores.regular_user || 0
+                    regular_user: stats.category_scores.regular_user || 0,
+                    unknown: stats.category_scores.unknown || 0
                 }};
                 
                 // Normalize scores for better visualization
@@ -786,14 +838,16 @@ def generate_dashboard(stats: dict) -> str:
                     attacker: '#f85149',
                     good_crawler: '#3fb950',
                     bad_crawler: '#f0883e',
-                    regular_user: '#58a6ff'
+                    regular_user: '#58a6ff',
+                    unknown: '#8b949e'
                 }};
                 
                 const labels = {{
                     attacker: 'Attacker',
                     good_crawler: 'Good Bot',
                     bad_crawler: 'Bad Bot',
-                    regular_user: 'User'
+                    regular_user: 'User',
+                    unknown: 'Unknown'
                 }};
                 
                 // Draw radar background grid
@@ -803,9 +857,9 @@ def generate_dashboard(stats: dict) -> str:
                     html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
                 }}
                 
-                // Draw axes
-                const angles = [0, 90, 180, 270];
-                const keys = ['good_crawler', 'regular_user', 'bad_crawler', 'attacker'];
+                // Draw axes (now with 5 points for pentagon)
+                const angles = [0, 72, 144, 216, 288];
+                const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
                 
                 angles.forEach((angle, i) => {{
                     const rad = (angle - 90) * Math.PI / 180;
diff --git a/src/wordlists.py b/src/wordlists.py
index 0b0a0e9..81f2022 100644
--- a/src/wordlists.py
+++ b/src/wordlists.py
@@ -131,7 +131,8 @@ class Wordlists:
     
     @property
     def attack_urls(self):
-        return self._data.get("attack_urls", [])
+        """Deprecated: use attack_patterns instead. Returns attack_patterns for backward compatibility."""
+        return self._data.get("attack_patterns", {})
 
 
 _wordlists_instance = None
diff --git a/wordlists.json b/wordlists.json
index 833f1eb..3ea6f40 100644
--- a/wordlists.json
+++ b/wordlists.json
@@ -353,11 +353,14 @@
     }
   },
   "attack_patterns": {
-    "path_traversal": "\\.\\.",
+    "path_traversal": "(\\.\\.|%2e%2e|%252e%252e|\\.{2,}|%c0%ae|%c1%9c)",
     "sql_injection": "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)",
     "xss_attempt": "(<script|</script|javascript:|onerror=|onload=|onclick=|onmouseover=|onfocus=|onblur=|<iframe|<img|<svg|<embed|<object|<body|<input|eval\\(|alert\\(|prompt\\(|confirm\\(|document\\.|window\\.|<style|expression\\(|vbscript:|data:text/html)",
-    "common_probes": "(wp-admin|phpmyadmin|\\.env|\\.git|/admin|/config)",
-    "shell_injection": "(\\||;|`|\\$\\(|&&)"
+    "shell_injection": "(\\||;|`|\\$\\(|&&|\\bnc\\b|\\bnetcat\\b|\\bwget\\b|\\bcurl\\b|/bin/bash|/bin/sh|cmd\\.exe)",
+    "lfi_rfi": "(file://|php://|expect://|data://|zip://|phar://|/etc/passwd|/etc/shadow|/proc/self|c:\\\\windows)",
+    "xxe_injection": "(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC)",
+    "ldap_injection": "(\\*\\)|\\(\\||\\(&)",
+    "command_injection": "(&&|\\|\\||;|\\$\\{|\\$\\(|`)"
   },
   "server_headers": [
     "Apache/2.4.41 (Ubuntu)",
@@ -366,11 +369,5 @@
     "cloudflare",
     "AmazonS3",
     "gunicorn/20.1.0"
-  ],
-  "attack_urls": { 
-    "path_traversal": "\\.\\.",
-    "sql_injection": "('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
-    "xss_attempt": "(<script|javascript:|onerror=|onload=)",
-    "shell_injection": "(\\||;|`|\\$\\(|&&)"
-  }
+  ]
 }

From be7ba1f820258342f728b9b9deb73428f7f0c5c7 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Fri, 9 Jan 2026 20:37:20 +0100
Subject: [PATCH 20/21] added download button

---
 config.yaml                         |  2 +-
 src/exports/malicious_ips.txt       |  1 +
 src/handler.py                      | 32 +++++++++++++++++++++++++-
 src/templates/dashboard_template.py | 35 +++++++++++++++++++++++++++--
 4 files changed, 66 insertions(+), 4 deletions(-)
 create mode 100644 src/exports/malicious_ips.txt

diff --git a/config.yaml b/config.yaml
index 52daa09..a2f6b58 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,7 +1,7 @@
 # Krawl Honeypot Configuration
 
 server:
-  port: 5000
+  port: 1234
   delay: 100  # Response delay in milliseconds
   timezone: null  # e.g., "America/New_York", "Europe/Paris" or null for system default
 
diff --git a/src/exports/malicious_ips.txt b/src/exports/malicious_ips.txt
new file mode 100644
index 0000000..7b9ad53
--- /dev/null
+++ b/src/exports/malicious_ips.txt
@@ -0,0 +1 @@
+127.0.0.1
diff --git a/src/handler.py b/src/handler.py
index ebc0b66..1f96d6c 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -408,7 +408,8 @@ class Handler(BaseHTTPRequestHandler):
             try:
                 stats = self.tracker.get_stats()
                 timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
-                self.wfile.write(generate_dashboard(stats, timezone).encode())
+                dashboard_path = self.config.dashboard_secret_path
+                self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode())
             except BrokenPipeError:
                 pass
             except Exception as e:
@@ -442,6 +443,35 @@ class Handler(BaseHTTPRequestHandler):
                 self.wfile.write(json.dumps({'error': str(e)}).encode())
             return
 
+        # API endpoint for downloading malicious IPs file
+        if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt":
+            import os
+            file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
+            try:
+                if os.path.exists(file_path):
+                    with open(file_path, 'rb') as f:
+                        content = f.read()
+                    self.send_response(200)
+                    self.send_header('Content-type', 'text/plain')
+                    self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"')
+                    self.send_header('Content-Length', str(len(content)))
+                    self.end_headers()
+                    self.wfile.write(content)
+                else:
+                    self.send_response(404)
+                    self.send_header('Content-type', 'text/plain')
+                    self.end_headers()
+                    self.wfile.write(b'File not found')
+            except BrokenPipeError:
+                pass
+            except Exception as e:
+                self.app_logger.error(f"Error serving malicious IPs file: {e}")
+                self.send_response(500)
+                self.send_header('Content-type', 'text/plain')
+                self.end_headers()
+                self.wfile.write(b'Internal server error')
+            return
+
         self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
         
         self.analyzer.infer_user_category(client_ip)
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index bbb6ad9..4e7005c 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -38,12 +38,13 @@ def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool
         return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
 
 
-def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str:
+def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str:
     """Generate dashboard HTML with access statistics
     
     Args:
         stats: Statistics dictionary
         timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
+        dashboard_path: The secret dashboard path for generating API URLs
     """
     
     # Generate IP rows with clickable functionality for dropdown stats
@@ -164,12 +165,36 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str:
         .container {{
             max-width: 1400px;
             margin: 0 auto;
+            position: relative;
         }}
         h1 {{
             color: #58a6ff;
             text-align: center;
             margin-bottom: 40px;
         }}
+        .download-section {{
+            position: absolute;
+            top: 0;
+            right: 0;
+        }}
+        .download-btn {{
+            display: inline-block;
+            padding: 8px 14px;
+            background: #238636;
+            color: #ffffff;
+            text-decoration: none;
+            border-radius: 6px;
+            font-weight: 500;
+            font-size: 13px;
+            transition: background 0.2s;
+            border: 1px solid #2ea043;
+        }}
+        .download-btn:hover {{
+            background: #2ea043;
+        }}
+        .download-btn:active {{
+            background: #1f7a2f;
+        }}
         .stats-grid {{
             display: grid;
             grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
@@ -450,6 +475,11 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str:
 </head>
 <body>
     <div class="container">
+        <div class="download-section">
+            <a href="{dashboard_path}/api/download/malicious_ips.txt" class="download-btn" download>
+            Export Malicious IPs
+            </a>
+        </div>
         <h1>Krawl Dashboard</h1>
         
         <div class="stats-grid">
@@ -599,6 +629,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str:
     <script>
         // Server timezone configuration
         const SERVER_TIMEZONE = '{timezone}';
+        const DASHBOARD_PATH = '{dashboard_path}';
         
         // Convert UTC timestamp to configured timezone
         function formatTimestamp(isoTimestamp) {{
@@ -704,7 +735,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str:
                 if (dropdown) {{
                     dropdown.innerHTML = '<div class="loading">Loading stats...</div>';
                     try {{
-                        const response = await fetch(`${{window.location.pathname}}/api/ip-stats/${{ip}}`, {{
+                        const response = await fetch(`${{DASHBOARD_PATH}}/api/ip-stats/${{ip}}`, {{
                             cache: 'no-store',
                             headers: {{
                                 'Cache-Control': 'no-cache',

From 431a42aa0ac8740fb161aeb7ae751f6b5b98dd65 Mon Sep 17 00:00:00 2001
From: Patrick Di Fazio <patrick.difazio@studio.unibo.it>
Date: Fri, 9 Jan 2026 20:39:25 +0100
Subject: [PATCH 21/21] removed exports

---
 config.yaml                   | 2 +-
 src/exports/malicious_ips.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)
 delete mode 100644 src/exports/malicious_ips.txt

diff --git a/config.yaml b/config.yaml
index a2f6b58..52daa09 100644
--- a/config.yaml
+++ b/config.yaml
@@ -1,7 +1,7 @@
 # Krawl Honeypot Configuration
 
 server:
-  port: 1234
+  port: 5000
   delay: 100  # Response delay in milliseconds
   timezone: null  # e.g., "America/New_York", "Europe/Paris" or null for system default
 
diff --git a/src/exports/malicious_ips.txt b/src/exports/malicious_ips.txt
deleted file mode 100644
index 7b9ad53..0000000
--- a/src/exports/malicious_ips.txt
+++ /dev/null
@@ -1 +0,0 @@
-127.0.0.1