From 66b4d8fe6a5b2319670001dff61102793a759a7c Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Sun, 28 Dec 2025 14:24:52 -0600 Subject: [PATCH 01/21] adding pip and requirements to docker install and exposing data/krawl.db via docker-compose.yaml --- Dockerfile | 4 ++++ docker-compose.yaml | 1 + 2 files changed, 5 insertions(+) diff --git a/Dockerfile b/Dockerfile index adac20f..63d90bf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,10 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app +# Install Python dependencies +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt + COPY src/ /app/src/ COPY wordlists.json /app/ diff --git a/docker-compose.yaml b/docker-compose.yaml index 600034d..7d519ab 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,6 +10,7 @@ services: - "5000:5000" volumes: - ./wordlists.json:/app/wordlists.json:ro + - ./data:/app/data environment: - PORT=5000 - DELAY=100 From 1486dfc913046837a69e4bd191931ab82e803a67 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 26 Dec 2025 07:53:05 -0600 Subject: [PATCH 02/21] Add configurable HTTP Server header for deception Add SERVER_HEADER environment variable to customize the HTTP Server response header, defaulting to Apache/2.2.22 (Ubuntu). This allows the honeypot to masquerade as different web servers to attract attackers. - Add server_header field to Config dataclass - Override version_string() in Handler to return configured header - Update documentation and all deployment configs --- README.md | 1 + docker-compose.yaml | 1 + helm/templates/configmap.yaml | 1 + helm/values.yaml | 1 + kubernetes/manifests/configmap.yaml | 1 + src/config.py | 4 +++- src/handler.py | 4 ++++ src/server.py | 1 + 8 files changed, 13 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 0cf8b96..b84d955 100644 --- a/README.md +++ b/README.md @@ -185,6 +185,7 @@ To customize the deception server installation several **environment variables** | `CANARY_TOKEN_URL` | External canary token URL | None | | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | +| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` | ## robots.txt The actual (juicy) robots.txt configuration is the following diff --git a/docker-compose.yaml b/docker-compose.yaml index 57c648d..1612864 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -20,6 +20,7 @@ services: - MAX_COUNTER=10 - CANARY_TOKEN_TRIES=10 - PROBABILITY_ERROR_CODES=0 + - SERVER_HEADER=Apache/2.2.22 (Ubuntu) # Optional: Set your canary token URL # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt # Optional: Set custom dashboard path (auto-generated if not set) diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index f6fe92c..c50ab75 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -14,4 +14,5 @@ data: MAX_COUNTER: {{ .Values.config.maxCounter | quote }} CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} + SERVER_HEADER: {{ .Values.config.serverHeader | quote }} CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index 9ee9ca5..a095632 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -73,6 +73,7 @@ config: maxCounter: 10 canaryTokenTries: 10 probabilityErrorCodes: 0 + serverHeader: "Apache/2.2.22 (Ubuntu)" # canaryTokenUrl: set-your-canary-token-url-here networkPolicy: diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index 42ba002..431b9a3 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -13,4 +13,5 @@ data: MAX_COUNTER: "10" CANARY_TOKEN_TRIES: "10" PROBABILITY_ERROR_CODES: "0" + SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" # CANARY_TOKEN_URL: set-your-canary-token-url-here \ No newline at end of file diff --git a/src/config.py b/src/config.py index 51391a9..7c6714c 100644 --- a/src/config.py +++ b/src/config.py @@ -21,6 +21,7 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) + server_header: str = "Apache/2.2.22 (Ubuntu)" @classmethod def from_env(cls) -> 'Config': @@ -44,5 +45,6 @@ class Config: api_server_url=os.getenv('API_SERVER_URL'), api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), - probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)) + probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 5)), + server_header=os.getenv('SERVER_HEADER', 'Apache/2.2.22 (Ubuntu)') ) diff --git a/src/handler.py b/src/handler.py index 8fef2aa..9d8abe2 100644 --- a/src/handler.py +++ b/src/handler.py @@ -48,6 +48,10 @@ class Handler(BaseHTTPRequestHandler): """Extract user agent from request""" return self.headers.get('User-Agent', '') + def version_string(self) -> str: + """Return custom server version for deception.""" + return self.config.server_header + def _should_return_error(self) -> bool: """Check if we should return an error based on probability""" if self.config.probability_error_codes <= 0: diff --git a/src/server.py b/src/server.py index 0105f6d..861e9f2 100644 --- a/src/server.py +++ b/src/server.py @@ -32,6 +32,7 @@ def print_usage(): print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') print(' CHAR_SPACE - Characters for random links') + print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') def main(): From d13ceb4888bbe6701b8985d0886648f6c8cda53f Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 26 Dec 2025 08:00:16 -0600 Subject: [PATCH 03/21] Added test script to show the server header --- tests/check_header.sh | 3 +++ 1 file changed, 3 insertions(+) create mode 100755 tests/check_header.sh diff --git a/tests/check_header.sh b/tests/check_header.sh new file mode 100755 index 0000000..78b8e5d --- /dev/null +++ b/tests/check_header.sh @@ -0,0 +1,3 @@ +#!/bin/env bash +# -s is for silent (no progress bar) | -I is to get the headers | grep is to find only the Server line +curl -s -I http://localhost:5000 | grep "Server:" \ No newline at end of file From 828f04261fd16ebc21884d16026765cb6830983a Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sat, 27 Dec 2025 19:17:27 +0100 Subject: [PATCH 04/21] Added POST log and dashboard for used credentials --- src/handler.py | 15 ++++++++ src/logger.py | 28 +++++++++++++++ src/server.py | 4 ++- src/templates/dashboard_template.py | 28 +++++++++++++++ src/tracker.py | 56 ++++++++++++++++++++++++++++- 5 files changed, 129 insertions(+), 2 deletions(-) diff --git a/src/handler.py b/src/handler.py index 9d8abe2..ac7ca22 100644 --- a/src/handler.py +++ b/src/handler.py @@ -3,6 +3,7 @@ import logging import random import time +from datetime import datetime from http.server import BaseHTTPRequestHandler from typing import Optional, List @@ -25,6 +26,7 @@ class Handler(BaseHTTPRequestHandler): counter: int = 0 app_logger: logging.Logger = None access_logger: logging.Logger = None + credential_logger: logging.Logger = None def _get_client_ip(self) -> str: """Extract client IP address from request, checking proxy headers first""" @@ -213,6 +215,19 @@ class Handler(BaseHTTPRequestHandler): self.access_logger.warning(f"[POST DATA] {post_data[:200]}") + # Parse and log credentials + username, password = self.tracker.parse_credentials(post_data) + if username or password: + # Log to dedicated credentials.log file + timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}" + self.credential_logger.info(credential_line) + + # Also record in tracker for dashboard + self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A') + + self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}") + # send the post data (body) to the record_access function so the post data can be used to detect suspicious things. self.tracker.record_access(client_ip, self.path, user_agent, post_data) diff --git a/src/logger.py b/src/logger.py index 68b8278..9f09236 100644 --- a/src/logger.py +++ b/src/logger.py @@ -77,6 +77,22 @@ class LoggerManager: access_stream_handler.setFormatter(log_format) self._access_logger.addHandler(access_stream_handler) + # Setup credential logger (special format, no stream handler) + self._credential_logger = logging.getLogger("krawl.credentials") + self._credential_logger.setLevel(logging.INFO) + self._credential_logger.handlers.clear() + + # Credential logger uses a simple format: timestamp|ip|username|password|path + credential_format = logging.Formatter("%(message)s") + + credential_file_handler = RotatingFileHandler( + os.path.join(log_dir, "credentials.log"), + maxBytes=max_bytes, + backupCount=backup_count + ) + credential_file_handler.setFormatter(credential_format) + self._credential_logger.addHandler(credential_file_handler) + self._initialized = True @property @@ -93,6 +109,13 @@ class LoggerManager: self.initialize() return self._access_logger + @property + def credentials(self) -> logging.Logger: + """Get the credentials logger.""" + if not self._initialized: + self.initialize() + return self._credential_logger + # Module-level singleton instance _logger_manager = LoggerManager() @@ -108,6 +131,11 @@ def get_access_logger() -> logging.Logger: return _logger_manager.access +def get_credential_logger() -> logging.Logger: + """Get the credential logger instance.""" + return _logger_manager.credentials + + def initialize_logging(log_dir: str = "logs") -> None: """Initialize the logging system.""" _logger_manager.initialize(log_dir) diff --git a/src/server.py b/src/server.py index 861e9f2..fd8f7d2 100644 --- a/src/server.py +++ b/src/server.py @@ -11,7 +11,7 @@ from http.server import HTTPServer from config import Config from tracker import AccessTracker from handler import Handler -from logger import initialize_logging, get_app_logger, get_access_logger +from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger def print_usage(): @@ -45,6 +45,7 @@ def main(): initialize_logging() app_logger = get_app_logger() access_logger = get_access_logger() + credential_logger = get_credential_logger() config = Config.from_env() @@ -55,6 +56,7 @@ def main(): Handler.counter = config.canary_token_tries Handler.app_logger = app_logger Handler.access_logger = access_logger + Handler.credential_logger = credential_logger if len(sys.argv) == 2: try: diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index 3f5524d..a267278 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -45,6 +45,12 @@ def generate_dashboard(stats: dict) -> str: for log in stats.get('attack_types', [])[-10:] ]) or 'No attacks detected' + # Generate credential attempts rows + credential_rows = '\n'.join([ + f'{log["ip"]}{log["username"]}{log["password"]}{log["path"]}{log["timestamp"].split("T")[1][:8]}' + for log in stats.get('credential_attempts', [])[-20:] + ]) or 'No credentials captured yet' + return f""" @@ -159,6 +165,10 @@ def generate_dashboard(stats: dict) -> str:
{stats.get('honeypot_ips', 0)}
Honeypot Caught
+
+
{len(stats.get('credential_attempts', []))}
+
Credentials Captured
+
@@ -194,6 +204,24 @@ def generate_dashboard(stats: dict) -> str:
+
+

🔑 Captured Credentials

+ + + + + + + + + + + + {credential_rows} + +
IP AddressUsernamePasswordPathTime
+
+

😈 Detected Attack Types

diff --git a/src/tracker.py b/src/tracker.py index 6e733f4..717a4c3 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -4,6 +4,7 @@ from typing import Dict, List, Tuple from collections import defaultdict from datetime import datetime import re +import urllib.parse class AccessTracker: @@ -13,6 +14,7 @@ class AccessTracker: self.path_counts: Dict[str, int] = defaultdict(int) self.user_agent_counts: Dict[str, int] = defaultdict(int) self.access_log: List[Dict] = [] + self.credential_attempts: List[Dict] = [] self.suspicious_patterns = [ 'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests', 'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix', @@ -31,6 +33,57 @@ class AccessTracker: # Track IPs that accessed honeypot paths from robots.txt self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list) + def parse_credentials(self, post_data: str) -> Tuple[str, str]: + """ + Parse username and password from POST data. + Returns tuple (username, password) or (None, None) if not found. + """ + if not post_data: + return None, None + + username = None + password = None + + try: + # Parse URL-encoded form data + parsed = urllib.parse.parse_qs(post_data) + + # Common username field names + username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account'] + for field in username_fields: + if field in parsed and parsed[field]: + username = parsed[field][0] + break + + # Common password field names + password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase'] + for field in password_fields: + if field in parsed and parsed[field]: + password = parsed[field][0] + break + + except Exception: + # If parsing fails, try simple regex patterns + username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE) + password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE) + + if username_match: + username = urllib.parse.unquote_plus(username_match.group(1)) + if password_match: + password = urllib.parse.unquote_plus(password_match.group(1)) + + return username, password + + def record_credential_attempt(self, ip: str, path: str, username: str, password: str): + """Record a credential login attempt""" + self.credential_attempts.append({ + 'ip': ip, + 'path': path, + 'username': username, + 'password': password, + 'timestamp': datetime.now().isoformat() + }) + def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''): """Record an access attempt""" self.ip_counts[ip] += 1 @@ -146,5 +199,6 @@ class AccessTracker: 'top_user_agents': self.get_top_user_agents(10), 'recent_suspicious': self.get_suspicious_accesses(20), 'honeypot_triggered_ips': self.get_honeypot_triggered_ips(), - 'attack_types': self.get_attack_type_accesses(20) + 'attack_types': self.get_attack_type_accesses(20), + 'credential_attempts': self.credential_attempts[-50:] # Last 50 attempts } From 852a15976f1dd6d16eee32e90567ee25ea4ea47a Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Mon, 29 Dec 2025 23:13:02 +0100 Subject: [PATCH 05/21] Added demo --- README.md | 7 ++++++- deployment.yaml | 44 -------------------------------------------- src/data/krawl.db | Bin 0 -> 61440 bytes 3 files changed, 6 insertions(+), 45 deletions(-) delete mode 100644 deployment.yaml create mode 100644 src/data/krawl.db diff --git a/README.md b/README.md index b84d955..9954cc0 100644 --- a/README.md +++ b/README.md @@ -51,7 +51,12 @@ ## Star History Star History Chart - +## Try Krawl +Tip: crawl the `robots.txt` paths for additional fun +### http://demo.krawlme.com +## View the dashboard +### http://demo.krawlme.com/das_dashboard + ## What is Krawl? **Krawl** is a cloud‑native deception server designed to detect, delay, and analyze malicious web crawlers and automated scanners. diff --git a/deployment.yaml b/deployment.yaml deleted file mode 100644 index 4bf5189..0000000 --- a/deployment.yaml +++ /dev/null @@ -1,44 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: krawl-server - namespace: krawl - labels: - app: krawl-server -spec: - replicas: 1 - selector: - matchLabels: - app: krawl-server - template: - metadata: - labels: - app: krawl-server - spec: - containers: - - name: krawl - image: ghcr.io/blessedrebus/krawl:latest - imagePullPolicy: Always - ports: - - containerPort: 5000 - name: http - protocol: TCP - envFrom: - - configMapRef: - name: krawl-config - volumeMounts: - - name: wordlists - mountPath: /app/wordlists.json - subPath: wordlists.json - readOnly: true - resources: - requests: - memory: "64Mi" - cpu: "100m" - limits: - memory: "256Mi" - cpu: "500m" - volumes: - - name: wordlists - configMap: - name: krawl-wordlists diff --git a/src/data/krawl.db b/src/data/krawl.db new file mode 100644 index 0000000000000000000000000000000000000000..88f7d5763f00e73bcacf6af9ef16e63c68c0e9e5 GIT binary patch literal 61440 zcmeI5U2Gd!6~{gCx8s?cG^vb0CDW~{#;A2>#vac&rLf-AP0QwEn*`J%AYdj*3}dL}#!<9U+jJs!^!^pDWL z&c@OgzD|bzXKb$ry?nxxxb-9({)UGOe8R*1kozS2I=d9T7tN1fj=UZ@9)2ZwFMKWV zCKX}>0T2KI5ICd+8q-`bc6p9z$mL2&yHUQ8FBnFlcr9Pj46SICs}((8C>Awc&+k^Z z^W{>Xm@KU2vKu-6(#rYV6@GXF{OStd*9brB=p=sYG#iY~%`vxwMqzVT>(lQm;M!Jf zWS?5fxwRh&mrK;fjoe~xjbB;a;8#Aov~-$}gdH8bYqW?MH)|UIx$N4)#q8RwEW{nH z(DQ6@M=RxP)H1DJvGgQ49XES&AF6cI+7Q;zAQwxt7qQnM^g$<948szzZqU)QehYr3Jkg4rt9btA89T7^GPr@+Rg<(vb`Zs9<2vASO| z>NoSnYN-oWYN9EYjhmKbqNFwp3wotvMswiwO1@g(w$wVP>soD}?meBldA(SzYxX*J zS#`f&?Dm^ZHT$qB?APdI*Gkr|^$%k$9*NII1F?l;jHl^7`t@D%BizQZ~% zc3zq9w?gK@;ER!9?9)l6G1+mvVqGg~6{Ae4o0Dj-X6UrOOX+y8$GPaxn&w1tk#c4zTDnZ}k*!OoW25Woq1+gN(EEUG_12 zKbBby3^dEqh~F9y24l%2^TN5NgSm9+QWSEyor^-#1Kp2G2RJ4@#O!n@y^vC!e!H*J z<4<9)JF`1}Q7h>BbJcpOTep2|vUby&>JBgNJ#04eJy^=d7OJsaCX` zwW^Uf>gDZit!_Jp1$&3aCUdK54vtNQ&ffm!z(;xt3VyWmJ1x%d(&lX z+D%uf`GT=?bYEGw#w~QVTKeW%HuV;re{`^=lwm^ITm}~E~t~c8{_a4}}txkX6HM{5E$}z#%i4)B2%dOLXcUIXt z8*6vgKGk=19-2Gl3)}R-Z265WwoX!enz2*ux|7)RmcW6Pe`xfWYHQpfNEW@ywiT$Wx(ER+N;4K!3!!t-`bAVzrW>$#V7i^6qY7PEH8?>~f)4 zt{7E)=PZAzVraWOE#Ozz`769gtI22i`m^(5DxIK5^hK?BtvV+OqCkH|{zAE~ZB=i~ zHKmKW4M~vXGoo}x%J8B(pA_bkQbJMa8A5HGoT4VJ_L`L5)RdIEnNd=ujI1hYp|qta z1#v6AnchqZTZKnpr!p_6>Fy=eOj43Ejgv21lq-$LYLtaH2*P9w8-akdzRUNjWJuLL4>dXs;s_^wPtC)8ms(Hpz5C5tO8o zY4}-c3h6cFk=c<qVmkztx(l5j!I9b_B~q!Rno%15z=-XT#H5&}D2AOHd&00JNY0w4eaAOHd&a2N>8 z`ew3>wWdFK!F_tLqI1Q2YM5eY$@$0-MQb5=@<2t~;_SpQ#m=HB8KP({X>!99J8O>Y zaK+Z5V05@*Yt?RixMFKrEizoOweA%juGm_b3Jp{2tQ-Z0E4G$)0>czLYcKxcimk;F z-!R3_>VVrzL}>=bo{u&XOH*SVda|H;c9@&S3Dyi5K;{!IQzeouZ!-X^~!KO;XO zZ;&68?~rekm&p&vYvk3#;O4La1V8`;KmY_l00ck)1V8`;KmY{(uL$_OA!gc4rp)As z&l_eY&1Aw%h*`#&32P=%GZ{CNh?#`VBxEK*GYOc9-%NaF;x&^ouQx=4nCSI?_G=z; zm%K$@Aup0GvPfo0nENaDbM95{C9cA)asoHWzRUiay+akTfdB}A00@8p2!H?xfB*=9 z00=xd0w4EgnFFF1dc{343+`eoQo&W+jZbh9+oKZP#a1kWtGFA1;4Ze}4hD(a(FTLW z?HGeW;&x=gAaOgMV34>SMc^*BVh09^+YtlqVk=HykhmQk;4Zde0tSiOkpRcM*|2>w zxm^F}{^B9;kw1~&lAqG6{of_uBwr;jkbSaEu9Bz8IWk9%lPTio-lvM#KmY_l00ck) z1V8`;KmY_l00cnbFcVN5rX9H{NA8FtH|fYtIC8|1;~Y8Gk&8NV>T>r7ysXU29T6dCG+|}L+;Z1{}1a9!Y&X10T2KI5C8!X z009sH0T2KI5CDPU1YGa`@A>@y4t@UL@Rpzf0w4eaAOHd&00JNY0w4eaAOHd&aJUEz zdH!F|=l}QU_x~L(`(X_TfB*=900@8p2!H?xfB*=900@AkO<>ZYbo zjWr7=oP|uYkT?p>>;GQzPY?Mo`8WNC4Fo^{1V8`;KmY_l00ck)1V8`;K;ZEw;P*1j Oq}NO*#(Z8TO#TNKFY4$3 literal 0 HcmV?d00001 From 0b1e9537d2eafb0835989921c8b8401e6c4aba22 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Mon, 29 Dec 2025 23:13:39 +0100 Subject: [PATCH 06/21] Added demo --- src/data/krawl.db | Bin 61440 -> 0 bytes 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 src/data/krawl.db diff --git a/src/data/krawl.db b/src/data/krawl.db deleted file mode 100644 index 88f7d5763f00e73bcacf6af9ef16e63c68c0e9e5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 61440 zcmeI5U2Gd!6~{gCx8s?cG^vb0CDW~{#;A2>#vac&rLf-AP0QwEn*`J%AYdj*3}dL}#!<9U+jJs!^!^pDWL z&c@OgzD|bzXKb$ry?nxxxb-9({)UGOe8R*1kozS2I=d9T7tN1fj=UZ@9)2ZwFMKWV zCKX}>0T2KI5ICd+8q-`bc6p9z$mL2&yHUQ8FBnFlcr9Pj46SICs}((8C>Awc&+k^Z z^W{>Xm@KU2vKu-6(#rYV6@GXF{OStd*9brB=p=sYG#iY~%`vxwMqzVT>(lQm;M!Jf zWS?5fxwRh&mrK;fjoe~xjbB;a;8#Aov~-$}gdH8bYqW?MH)|UIx$N4)#q8RwEW{nH z(DQ6@M=RxP)H1DJvGgQ49XES&AF6cI+7Q;zAQwxt7qQnM^g$<948szzZqU)QehYr3Jkg4rt9btA89T7^GPr@+Rg<(vb`Zs9<2vASO| z>NoSnYN-oWYN9EYjhmKbqNFwp3wotvMswiwO1@g(w$wVP>soD}?meBldA(SzYxX*J zS#`f&?Dm^ZHT$qB?APdI*Gkr|^$%k$9*NII1F?l;jHl^7`t@D%BizQZ~% zc3zq9w?gK@;ER!9?9)l6G1+mvVqGg~6{Ae4o0Dj-X6UrOOX+y8$GPaxn&w1tk#c4zTDnZ}k*!OoW25Woq1+gN(EEUG_12 zKbBby3^dEqh~F9y24l%2^TN5NgSm9+QWSEyor^-#1Kp2G2RJ4@#O!n@y^vC!e!H*J z<4<9)JF`1}Q7h>BbJcpOTep2|vUby&>JBgNJ#04eJy^=d7OJsaCX` zwW^Uf>gDZit!_Jp1$&3aCUdK54vtNQ&ffm!z(;xt3VyWmJ1x%d(&lX z+D%uf`GT=?bYEGw#w~QVTKeW%HuV;re{`^=lwm^ITm}~E~t~c8{_a4}}txkX6HM{5E$}z#%i4)B2%dOLXcUIXt z8*6vgKGk=19-2Gl3)}R-Z265WwoX!enz2*ux|7)RmcW6Pe`xfWYHQpfNEW@ywiT$Wx(ER+N;4K!3!!t-`bAVzrW>$#V7i^6qY7PEH8?>~f)4 zt{7E)=PZAzVraWOE#Ozz`769gtI22i`m^(5DxIK5^hK?BtvV+OqCkH|{zAE~ZB=i~ zHKmKW4M~vXGoo}x%J8B(pA_bkQbJMa8A5HGoT4VJ_L`L5)RdIEnNd=ujI1hYp|qta z1#v6AnchqZTZKnpr!p_6>Fy=eOj43Ejgv21lq-$LYLtaH2*P9w8-akdzRUNjWJuLL4>dXs;s_^wPtC)8ms(Hpz5C5tO8o zY4}-c3h6cFk=c<qVmkztx(l5j!I9b_B~q!Rno%15z=-XT#H5&}D2AOHd&00JNY0w4eaAOHd&a2N>8 z`ew3>wWdFK!F_tLqI1Q2YM5eY$@$0-MQb5=@<2t~;_SpQ#m=HB8KP({X>!99J8O>Y zaK+Z5V05@*Yt?RixMFKrEizoOweA%juGm_b3Jp{2tQ-Z0E4G$)0>czLYcKxcimk;F z-!R3_>VVrzL}>=bo{u&XOH*SVda|H;c9@&S3Dyi5K;{!IQzeouZ!-X^~!KO;XO zZ;&68?~rekm&p&vYvk3#;O4La1V8`;KmY_l00ck)1V8`;KmY{(uL$_OA!gc4rp)As z&l_eY&1Aw%h*`#&32P=%GZ{CNh?#`VBxEK*GYOc9-%NaF;x&^ouQx=4nCSI?_G=z; zm%K$@Aup0GvPfo0nENaDbM95{C9cA)asoHWzRUiay+akTfdB}A00@8p2!H?xfB*=9 z00=xd0w4EgnFFF1dc{343+`eoQo&W+jZbh9+oKZP#a1kWtGFA1;4Ze}4hD(a(FTLW z?HGeW;&x=gAaOgMV34>SMc^*BVh09^+YtlqVk=HykhmQk;4Zde0tSiOkpRcM*|2>w zxm^F}{^B9;kw1~&lAqG6{of_uBwr;jkbSaEu9Bz8IWk9%lPTio-lvM#KmY_l00ck) z1V8`;KmY_l00cnbFcVN5rX9H{NA8FtH|fYtIC8|1;~Y8Gk&8NV>T>r7ysXU29T6dCG+|}L+;Z1{}1a9!Y&X10T2KI5C8!X z009sH0T2KI5CDPU1YGa`@A>@y4t@UL@Rpzf0w4eaAOHd&00JNY0w4eaAOHd&aJUEz zdH!F|=l}QU_x~L(`(X_TfB*=900@8p2!H?xfB*=900@AkO<>ZYbo zjWr7=oP|uYkT?p>>;GQzPY?Mo`8WNC4Fo^{1V8`;KmY_l00ck)1V8`;K;ZEw;P*1j Oq}NO*#(Z8TO#TNKFY4$3 From 5a808c330ce38e9385fb9ac6b38901cacbd04ffc Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:29:58 +0100 Subject: [PATCH 07/21] Update README.md --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9954cc0..8f794ee 100644 --- a/README.md +++ b/README.md @@ -53,9 +53,9 @@ ## Try Krawl Tip: crawl the `robots.txt` paths for additional fun -### http://demo.krawlme.com +Demo URL: [http://demo.krawlme.com](http://demo.krawlme.com) ## View the dashboard -### http://demo.krawlme.com/das_dashboard +Dashboard URL: [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard) ## What is Krawl? From 5ba02d3d0c3f702950afdf5d71ac4aaac2b767ab Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:30:06 +0100 Subject: [PATCH 08/21] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8f794ee..c68b543 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ ## Try Krawl Tip: crawl the `robots.txt` paths for additional fun Demo URL: [http://demo.krawlme.com](http://demo.krawlme.com) -## View the dashboard +## View the dashboard Dashboard URL: [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard) ## What is Krawl? From bf73bc7e2cb8b9e47a19396cca005bcd6d8f48cd Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio <50186694+BlessedRebuS@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:38:19 +0100 Subject: [PATCH 09/21] Update README with demo and dashboard information Removed old sections and reorganized demo and dashboard links. --- README.md | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c68b543..58e0b06 100644 --- a/README.md +++ b/README.md @@ -48,14 +48,10 @@
-## Star History -Star History Chart - -## Try Krawl +## Demo Tip: crawl the `robots.txt` paths for additional fun -Demo URL: [http://demo.krawlme.com](http://demo.krawlme.com) -## View the dashboard -Dashboard URL: [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard) +### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com) +### View the dashboard [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard) ## What is Krawl? @@ -321,3 +317,6 @@ Contributions welcome! Please: **This is a deception/honeypot system.** Deploy in isolated environments and monitor carefully for security events. Use responsibly and in compliance with applicable laws and regulations. + +## Star History +Star History Chart From d458eb471db47ffae2ce6b72ff15228c790017e8 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 2 Jan 2026 13:39:54 -0600 Subject: [PATCH 10/21] Migrate configuration from environment variables to YAML file - Add YAML-based configuration loaded from config.yaml (CONFIG_LOCATION env var) - Add PyYAML dependency and install requirements in Dockerfile - Replace Config.from_env() with get_config() singleton pattern - Remove server_header from config (now randomized from wordlists only) - Update docker-compose.yaml to mount config.yaml read-only - Update Helm chart: restructure values.yaml, generate config.yaml in ConfigMap - Update Kubernetes manifests: ConfigMap now contains config.yaml, deployments mount it - Remove Helm secret.yaml (dashboard path now auto-generated in config.yaml) --- Dockerfile | 3 + config.yaml | 35 +++++++++ docker-compose.yaml | 18 +---- helm/templates/configmap.yaml | 49 ++++++------ helm/templates/deployment.yaml | 19 ++--- helm/templates/secret.yaml | 16 ---- helm/values.yaml | 47 +++++++----- kubernetes/krawl-all-in-one-deploy.yaml | 71 +++++++++++++----- kubernetes/manifests/configmap.yaml | 50 +++++++++---- kubernetes/manifests/deployment.yaml | 13 +++- requirements.txt | 3 + src/config.py | 99 ++++++++++++++++++------- src/generators.py | 14 +--- src/server.py | 51 +++++++------ 14 files changed, 307 insertions(+), 181 deletions(-) create mode 100644 config.yaml delete mode 100644 helm/templates/secret.yaml diff --git a/Dockerfile b/Dockerfile index adac20f..e0fb6af 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,9 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt + COPY src/ /app/src/ COPY wordlists.json /app/ diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..c4faa8f --- /dev/null +++ b/config.yaml @@ -0,0 +1,35 @@ +# Krawl Honeypot Configuration + +server: + port: 5000 + delay: 100 # Response delay in milliseconds + timezone: null # e.g., "America/New_York" or null for system default + +links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + +canary: + token_url: null # Optional canary token URL + token_tries: 10 + +dashboard: + # if set to "null" this will Auto-generates random path if not set + # can be set to "dashboard" or similar + secret_path: dashboard + +api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + +database: + path: "data/krawl.db" + retention_days: 30 + +behavior: + probability_error_codes: 0 # 0-100 percentage \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 6f81a47..776e919 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,23 +10,9 @@ services: - "5000:5000" volumes: - ./wordlists.json:/app/wordlists.json:ro + - ./config.yaml:/app/config.yaml:ro environment: - - PORT=5000 - - DELAY=100 - - LINKS_MIN_LENGTH=5 - - LINKS_MAX_LENGTH=15 - - LINKS_MIN_PER_PAGE=10 - - LINKS_MAX_PER_PAGE=15 - - MAX_COUNTER=10 - - CANARY_TOKEN_TRIES=10 - - PROBABILITY_ERROR_CODES=0 - # - SERVER_HEADER=Apache/2.2.22 (Ubuntu) - # Optional: Set your canary token URL - # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt - # Optional: Set custom dashboard path (auto-generated if not set) - # - DASHBOARD_SECRET_PATH=/my-secret-dashboard - # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome) - # - TIMEZONE=UTC + - CONFIG_LOCATION=config.yaml restart: unless-stopped healthcheck: test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"] diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 17cd952..808d9f5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -5,25 +5,30 @@ metadata: labels: {{- include "krawl.labels" . | nindent 4 }} data: - PORT: {{ .Values.config.port | quote }} - DELAY: {{ .Values.config.delay | quote }} - LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }} - LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }} - LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }} - LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }} - MAX_COUNTER: {{ .Values.config.maxCounter | quote }} - CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} - PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} - CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} - {{- if .Values.config.dashboardSecretPath }} - DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }} - {{- end }} - {{- if .Values.config.serverHeader }} - SERVER_HEADER: {{ .Values.config.serverHeader | quote }} - {{- end }} - {{- if .Values.config.timezone }} - TIMEZONE: {{ .Values.config.timezone | quote }} - {{- end }} - # Database configuration - DATABASE_PATH: {{ .Values.database.path | quote }} - DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }} + config.yaml: | + # Krawl Honeypot Configuration + server: + port: {{ .Values.config.server.port }} + delay: {{ .Values.config.server.delay }} + timezone: {{ .Values.config.server.timezone | toYaml }} + links: + min_length: {{ .Values.config.links.min_length }} + max_length: {{ .Values.config.links.max_length }} + min_per_page: {{ .Values.config.links.min_per_page }} + max_per_page: {{ .Values.config.links.max_per_page }} + char_space: {{ .Values.config.links.char_space | quote }} + max_counter: {{ .Values.config.links.max_counter }} + canary: + token_url: {{ .Values.config.canary.token_url | toYaml }} + token_tries: {{ .Values.config.canary.token_tries }} + dashboard: + secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + api: + server_url: {{ .Values.config.api.server_url | toYaml }} + server_port: {{ .Values.config.api.server_port }} + server_path: {{ .Values.config.api.server_path | quote }} + database: + path: {{ .Values.config.database.path | quote }} + retention_days: {{ .Values.config.database.retention_days }} + behavior: + probability_error_codes: {{ .Values.config.behavior.probability_error_codes }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index ecc9655..5635fa3 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -38,18 +38,16 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - name: http - containerPort: {{ .Values.config.port }} + containerPort: {{ .Values.config.server.port }} protocol: TCP - envFrom: - - configMapRef: - name: {{ include "krawl.fullname" . }}-config env: - - name: DASHBOARD_SECRET_PATH - valueFrom: - secretKeyRef: - name: {{ include "krawl.fullname" . }} - key: dashboard-path + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -63,6 +61,9 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} volumes: + - name: config + configMap: + name: {{ include "krawl.fullname" . }}-config - name: wordlists configMap: name: {{ include "krawl.fullname" . }}-wordlists diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml deleted file mode 100644 index 798289c..0000000 --- a/helm/templates/secret.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}} -{{- $dashboardPath := "" -}} -{{- if and $secret $secret.data -}} - {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}} -{{- else -}} - {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}} -{{- end -}} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "krawl.fullname" . }} - labels: - {{- include "krawl.labels" . | nindent 4 }} -type: Opaque -stringData: - dashboard-path: {{ $dashboardPath | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index c92bc0b..60b1a66 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -62,29 +62,36 @@ tolerations: [] affinity: {} -# Application configuration +# Application configuration (config.yaml structure) config: - port: 5000 - delay: 100 - linksMinLength: 5 - linksMaxLength: 15 - linksMinPerPage: 10 - linksMaxPerPage: 15 - maxCounter: 10 - canaryTokenTries: 10 - probabilityErrorCodes: 0 -# timezone: "UTC" -# serverHeader: "Apache/2.2.22 (Ubuntu)" -# dashboardSecretPath: "/my-secret-dashboard" -# canaryTokenUrl: set-your-canary-token-url-here -# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + server: + port: 5000 + delay: 100 + timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + canary: + token_url: null # Set your canary token URL here + token_tries: 10 + dashboard: + secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + database: + path: "data/krawl.db" + retention_days: 30 + behavior: + probability_error_codes: 0 -# Database configuration +# Database persistence configuration database: - # Path to the SQLite database file - path: "data/krawl.db" - # Number of days to retain access logs and attack data - retentionDays: 30 # Persistence configuration persistence: enabled: true diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index d1a026c..3344260 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -10,19 +10,41 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" -# CANARY_TOKEN_URL: set-your-canary-token-url-here - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage --- apiVersion: v1 kind: ConfigMap @@ -227,6 +249,14 @@ data: 500, 502, 503 + ], + "server_headers": [ + "Apache/2.4.41 (Ubuntu)", + "nginx/1.18.0", + "Microsoft-IIS/10.0", + "cloudflare", + "AmazonS3", + "gunicorn/20.1.0" ] } --- @@ -269,10 +299,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -287,6 +321,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists @@ -353,7 +390,7 @@ spec: - podSelector: {} - namespaceSelector: {} - ipBlock: - cidr: 0.0.0.0/0 + cidr: 0.0.0.0/0 ports: - protocol: TCP port: 5000 diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index ef357b0..38a287b 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -4,18 +4,38 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" - SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" -# CANARY_TOKEN_URL: set-your-canary-token-url-here -# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" \ No newline at end of file + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml index 1650721..f970625 100644 --- a/kubernetes/manifests/deployment.yaml +++ b/kubernetes/manifests/deployment.yaml @@ -23,10 +23,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -41,6 +45,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists diff --git a/requirements.txt b/requirements.txt index 94f74f2..8cb6dc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ # Krawl Honeypot Dependencies # Install with: pip install -r requirements.txt +# Configuration +PyYAML>=6.0 + # Database ORM SQLAlchemy>=2.0.0,<3.0.0 diff --git a/src/config.py b/src/config.py index 87fca1c..fb679b4 100644 --- a/src/config.py +++ b/src/config.py @@ -1,11 +1,15 @@ #!/usr/bin/env python3 import os +import sys from dataclasses import dataclass +from pathlib import Path from typing import Optional, Tuple from zoneinfo import ZoneInfo import time +import yaml + @dataclass class Config: @@ -23,12 +27,11 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) - server_header: Optional[str] = None # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome') - + @staticmethod # Try to fetch timezone before if not set def get_system_timezone() -> str: @@ -38,16 +41,16 @@ class Config: tz_path = os.readlink('/etc/localtime') if 'zoneinfo/' in tz_path: return tz_path.split('zoneinfo/')[-1] - + local_tz = time.tzname[time.daylight] if local_tz and local_tz != 'UTC': return local_tz except Exception: pass - + # Default fallback to UTC return 'UTC' - + def get_timezone(self) -> ZoneInfo: """Get configured timezone as ZoneInfo object""" if self.timezone: @@ -55,7 +58,7 @@ class Config: return ZoneInfo(self.timezone) except Exception: pass - + system_tz = self.get_system_timezone() try: return ZoneInfo(system_tz) @@ -63,31 +66,71 @@ class Config: return ZoneInfo('UTC') @classmethod - def from_env(cls) -> 'Config': - """Create configuration from environment variables""" + def from_yaml(cls) -> 'Config': + """Create configuration from YAML file""" + config_location = os.getenv('CONFIG_LOCATION', 'config.yaml') + config_path = Path(__file__).parent.parent / config_location + + try: + with open(config_path, 'r') as f: + data = yaml.safe_load(f) + except FileNotFoundError: + print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr) + print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr) + sys.exit(1) + except yaml.YAMLError as e: + print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr) + sys.exit(1) + + if data is None: + data = {} + + # Extract nested values with defaults + server = data.get('server', {}) + links = data.get('links', {}) + canary = data.get('canary', {}) + dashboard = data.get('dashboard', {}) + api = data.get('api', {}) + database = data.get('database', {}) + behavior = data.get('behavior', {}) + + # Handle dashboard_secret_path - auto-generate if null/not set + dashboard_path = dashboard.get('secret_path') + if dashboard_path is None: + dashboard_path = f'/{os.urandom(16).hex()}' + return cls( - port=int(os.getenv('PORT', 5000)), - delay=int(os.getenv('DELAY', 100)), + port=server.get('port', 5000), + delay=server.get('delay', 100), + timezone=server.get('timezone'), links_length_range=( - int(os.getenv('LINKS_MIN_LENGTH', 5)), - int(os.getenv('LINKS_MAX_LENGTH', 15)) + links.get('min_length', 5), + links.get('max_length', 15) ), links_per_page_range=( - int(os.getenv('LINKS_MIN_PER_PAGE', 10)), - int(os.getenv('LINKS_MAX_PER_PAGE', 15)) + links.get('min_per_page', 10), + links.get('max_per_page', 15) ), - char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), - max_counter=int(os.getenv('MAX_COUNTER', 10)), - canary_token_url=os.getenv('CANARY_TOKEN_URL'), - canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)), - dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'), - api_server_url=os.getenv('API_SERVER_URL'), - api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), - api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), - probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)), - server_header=os.getenv('SERVER_HEADER'), - database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'), - database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)), - timezone=os.getenv('TIMEZONE') # If not set, will use system timezone - + char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), + max_counter=links.get('max_counter', 10), + canary_token_url=canary.get('token_url'), + canary_token_tries=canary.get('token_tries', 10), + dashboard_secret_path=dashboard_path, + api_server_url=api.get('server_url'), + api_server_port=api.get('server_port', 8080), + api_server_path=api.get('server_path', '/api/v2/users'), + probability_error_codes=behavior.get('probability_error_codes', 0), + database_path=database.get('path', 'data/krawl.db'), + database_retention_days=database.get('retention_days', 30), ) + + +_config_instance = None + + +def get_config() -> Config: + """Get the singleton Config instance""" + global _config_instance + if _config_instance is None: + _config_instance = Config.from_yaml() + return _config_instance diff --git a/src/generators.py b/src/generators.py index 6e24ba8..6eca9fd 100644 --- a/src/generators.py +++ b/src/generators.py @@ -9,8 +9,6 @@ import string import json from templates import html_templates from wordlists import get_wordlists -from config import Config -from logger import get_app_logger def random_username() -> str: """Generate random username""" @@ -38,15 +36,9 @@ def random_email(username: str = None) -> str: return f"{username}@{random.choice(wl.email_domains)}" def random_server_header() -> str: - """Generate random server header""" - - if Config.from_env().server_header: - server_header = Config.from_env().server_header - else: - wl = get_wordlists() - server_header = random.choice(wl.server_headers) - - return server_header + """Generate random server header from wordlists""" + wl = get_wordlists() + return random.choice(wl.server_headers) def random_api_key() -> str: """Generate random API key""" diff --git a/src/server.py b/src/server.py index 06b7c82..7a59c73 100644 --- a/src/server.py +++ b/src/server.py @@ -8,7 +8,7 @@ Run this file to start the server. import sys from http.server import HTTPServer -from config import Config +from config import get_config from tracker import AccessTracker from handler import Handler from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger @@ -20,24 +20,29 @@ def print_usage(): print(f'Usage: {sys.argv[0]} [FILE]\n') print('FILE is file containing a list of webpage names to serve, one per line.') print('If no file is provided, random links will be generated.\n') - print('Environment Variables:') - print(' PORT - Server port (default: 5000)') - print(' DELAY - Response delay in ms (default: 100)') - print(' LINKS_MIN_LENGTH - Min link length (default: 5)') - print(' LINKS_MAX_LENGTH - Max link length (default: 15)') - print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)') - print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)') - print(' MAX_COUNTER - Max counter value (default: 10)') - print(' CANARY_TOKEN_URL - Canary token URL to display') - print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)') - print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') - print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') - print(' CHAR_SPACE - Characters for random links') - print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') - print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)') - print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)') - print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)') - print(' If not set, system timezone will be used') + print('Configuration:') + print(' Configuration is loaded from a YAML file (default: config.yaml)') + print(' Set CONFIG_LOCATION environment variable to use a different file.\n') + print(' Example config.yaml structure:') + print(' server:') + print(' port: 5000') + print(' delay: 100') + print(' timezone: null # or "America/New_York"') + print(' links:') + print(' min_length: 5') + print(' max_length: 15') + print(' min_per_page: 10') + print(' max_per_page: 15') + print(' canary:') + print(' token_url: null') + print(' token_tries: 10') + print(' dashboard:') + print(' secret_path: null # auto-generated if not set') + print(' database:') + print(' path: "data/krawl.db"') + print(' retention_days: 30') + print(' behavior:') + print(' probability_error_codes: 0') def main(): @@ -46,19 +51,17 @@ def main(): print_usage() exit(0) - config = Config.from_env() - + config = get_config() + # Get timezone configuration tz = config.get_timezone() - + # Initialize logging with timezone initialize_logging(timezone=tz) app_logger = get_app_logger() access_logger = get_access_logger() credential_logger = get_credential_logger() - config = Config.from_env() - # Initialize database for persistent storage try: initialize_database(config.database_path) From 349c14933529cd1fd24a0bfebd31f99e0425c3cc Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 2 Jan 2026 13:52:51 -0600 Subject: [PATCH 11/21] Add logs directory bind mount with entrypoint permission fix - Add ./logs:/app/logs volume mount to docker-compose.yaml for log access - Create entrypoint.sh script that fixes directory ownership at startup - Install gosu in Dockerfile for secure privilege dropping - Use ENTRYPOINT to run permission fix as root, then drop to krawl user This ensures bind-mounted directories have correct permissions even when Docker creates them as root on the host. --- Dockerfile | 12 +++++++++--- docker-compose.yaml | 1 + entrypoint.sh | 8 ++++++++ 3 files changed, 18 insertions(+), 3 deletions(-) create mode 100644 entrypoint.sh diff --git a/Dockerfile b/Dockerfile index e0fb6af..2c7b954 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,19 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app +# Install gosu for dropping privileges +RUN apt-get update && apt-get install -y --no-install-recommends gosu && \ + rm -rf /var/lib/apt/lists/* + COPY requirements.txt /app/ RUN pip install --no-cache-dir -r requirements.txt COPY src/ /app/src/ COPY wordlists.json /app/ +COPY entrypoint.sh /app/ RUN useradd -m -u 1000 krawl && \ - chown -R krawl:krawl /app - -USER krawl + mkdir -p /app/logs /app/data && \ + chown -R krawl:krawl /app && \ + chmod +x /app/entrypoint.sh EXPOSE 5000 ENV PYTHONUNBUFFERED=1 +ENTRYPOINT ["/app/entrypoint.sh"] CMD ["python3", "src/server.py"] diff --git a/docker-compose.yaml b/docker-compose.yaml index 776e919..02b6ae7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -11,6 +11,7 @@ services: volumes: - ./wordlists.json:/app/wordlists.json:ro - ./config.yaml:/app/config.yaml:ro + - ./logs:/app/logs environment: - CONFIG_LOCATION=config.yaml restart: unless-stopped diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..28b5fc0 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -e + +# Fix ownership of mounted directories +chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true + +# Drop to krawl user and run the application +exec gosu krawl "$@" From 5f8bb73546a9447fdf855134b0c7c42244810d42 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sat, 3 Jan 2026 17:14:58 +0100 Subject: [PATCH 12/21] added random SQL errors, random server errors, XSS baits --- src/data/krawl.db | Bin 0 -> 69632 bytes src/handler.py | 158 ++++++++++++++++++++++++- src/server_errors.py | 65 ++++++++++ src/sql_errors.py | 112 ++++++++++++++++++ src/templates/html/generic_search.html | 66 +++++++++++ src/templates/html/input_form.html | 74 ++++++++++++ src/templates/html/robots.txt | 10 ++ src/templates/html_templates.py | 10 ++ src/tracker.py | 22 ++-- src/wordlists.py | 12 ++ src/xss_detector.py | 73 ++++++++++++ 11 files changed, 589 insertions(+), 13 deletions(-) create mode 100644 src/data/krawl.db create mode 100644 src/server_errors.py create mode 100644 src/sql_errors.py create mode 100644 src/templates/html/generic_search.html create mode 100644 src/templates/html/input_form.html create mode 100644 src/xss_detector.py diff --git a/src/data/krawl.db b/src/data/krawl.db new file mode 100644 index 0000000000000000000000000000000000000000..759ffb958d54f426a0a424446aa7baec7af9d275 GIT binary patch literal 69632 zcmeI5eQX=&eaA^r6m{hB9=)2DZAE7riKR6@-aC2nvPHES$E$56lD8y73W6qSnUzG+ zypwFFLxyvb7X4%0hM;H=v?#Cz+x}=f6bRNN8@6`E3hbpAunkDJV8PaAz*cl<&;kvx z_K*D@iag#!ox~KG&Mx*l+mHFY+&$m>JkLEZe15!V_3?`}rz&1+v^GnQs71yi(P-ph zQH(?)D*PLVf88SuABMXg_&@6Vyw^uHBHej7&3!H+jQm1GcujaZ{cq`I{$KfG>ha{O z$sG4e;$3ci4~7J{yy$`n%rSM2$rEvLdVO8wuv)gAom;6yW_C9E z!r3;iTC-?7CCBdm&ogkkq2fF+X?jX^gS{iC;gmLtt?G@fs_oc;V6N3#wo|mL)w*~N zs=$>CmkJD&jncl+<;GUsY27ZC8dtmc%GX8}Yj)AzvYWMXt+8eMA`<;Ydb`FRrF4(xF%me!zM_MRI<+3BP`qk(Ikr(M{|L2E0b~G88yWce?M@N?wH7CjN zN6B6*ZP&_;x&+Rp#xu2zjZ)T-WO4pdsa&f&4g2~T@j~6HZio;dF0YDDiVB=&oDo~w zi;8Ya!2Eo*yxz#FvLeG@MSP^zs$Of{%C=+A7p|zXYMz#r)3PQi`l7BaDyo#1Wz*1H z;Rr}N4w5*|C+TMMbg8mgt7k(Llv7nLZ@LpRAZVeVQRQm!Dh$cDni7mJ4o;MDT2W7H zvM5`WlA%h9YU#4#lEWbCWIv_w73q0NR`k4LxycMDnxTsPRY_W_IR{u~EFN8#&>uG$vH_=piR%aV`CtTb|~zF2U2b?9dP^2 zwgs{#DVCbgYwj2clH?;54kdNbuq0VGX-kk2K+@6v3e_&QkQz%+qe*H`HJ|}Vr$NnJ zznZP)Mx#`*!_lKziv|oaa=E;&yJ^0ohpj`c+BQu6L(`*Ml40taV!DS@pvUO1(=b4~ zR$I&Zn(y$WDbS_p&|tbVNsyM?LmD)j#r5iKTl$#YsE1O?p%|gNGfc%$b1u(;w%I+j zg(k<8a=NPNhMO7%IrN!hYSqq&DKs^@q?xh;gUVFm?yFICNj37Ol5@EcP(z0RySm-5 z$S8RzCoN~l?sy!uKo2s_l+yt>*N-i)(m9E*3xTQ(a}wDwjU2Ss!$GCBqbV#x(8ua# zPB&rf9|JXX?9Y@|t8umAgg1l)KC&uViYXf~+#Ui&FhH7S#{Mv8JJ69xV4=w{g|%b@ zLOTV8Q&b^yN~hBGQMMs?L_|lAzXM8Efmt=e#;~Qms3C1_V_~MLjnDNCJ-w+?0 zK1y_w622q6F8q$LCj62xo&J9M8|jzRH`0%$=lCD<-{N24 zKhCf43w$K?X6nnS7gA5Bv{W+r@5$HTB6uJHB!C2v01`j~NB{{SfuD`Q{P5gTZ>rH} zkO?-Kc}dj_!!l0{PAHp}V&#tyN|=*WOH*Y-+&3XuIf7-1Z0LqIHz1*6Nm|~pbnCuB z2@OeCEYs9x2PM=bQ*T`;iKtj**ZDK$|&lYWbP{OW-*x0@aeRf-9P{OWB zm9SqzzpYbxKtj(5i64}(YZ8^sZA9ZGj zV_ftY{dAOmIzm6q&`*cyrwsjch&t2MnWABn)R~~(aq5gwhtM#AI%(?Vsgt5kk~$o9 zMyZpaNk-_WICX}pH%6VISd4>TLxKH&>bnv09r6wG3V8wM`{&6#;e_uAe=B@Z_${Fh z*8Q?Dp8jF_AJeaCp5!(Y2NLI4)aF>NB{{S z0VIF~kN^@u0!RP}AOR#0G6A~2zQ_7Mx9|Et7uNbex3m6FH<^i%3lZ{l@=5p)42jmUd+=|-&$H*r- z@BiEH1~Co^AOR$R1dsp{Kmter2_OL^fCP{L68K;QcD>fGZy);Z*93#!|JU9el71_& z|NH-T&Hvx*?Em+{?gGz70!RP}AOR$R1dsp{Kmter2_OL^fCNG%!0h-pX#c+$d8PCI zzfd(|W=H@DAOR$R1dsp{Kmter2_OL^fCP|$k3i=Ie|y;k1it^TX9*DS{y*RPKmGlG zl)M^&zjz=4B!C2v01`j~NB{{S0VIF~kN^@u0{0MsUy1T_(WRx1B1O$f@N0faInno$ zfc&D7lT1yOP5428>i_mYUxbY;R4tcRWcvI6DESfC|KCG5h*FRM5$zPH$k(bDGq)wh9=SY?uA#ved;Vp>9 z0|_7jB!C2v01`j~NB{{S0VIF~kib1aV3Ncm(NpQ!a$|F|Q7<-IjjL6AQfLPrV*}Ie zz$0uR-ww>MfvI-j6dRaq2adCWTsv@#4IFI;5;ic=4otCuBkjN>8yIf~a%|vmJ21`$ z#@d0yY~WBkaEJ+J(h4^wwIM%|&ISd8Q7`v1|$ixJWg{zZ5+{YLr${$-v|J)8VN za+Q0Hn;(5?G?}}V1;Lv)SF(NW)i zh}TNnwQ{2_fpe+xOl@PMlrk93*j^ zPtwii>788~Lll%#RV{D26Eh%ap`TIZYVm5RyuQ_x>>C>gC(5^%q^>O*s-&ovE-NlM z45Ci89mQO@pY@{bg2eRV&rHQ!8zRE6b2fLzZ)z%T0l-OusCtTHg)} zkqVW_RA7r#cXAT6drkt|S>Cr98Fm+8a-NPx+WAxW)7$9A%tz~`9 zcX-ki=u&iOFx{CXNXzXZ4Vum3diAy~eavpuL#gCYjL_W~redf$m*+s+>>k=elVeIb zUDb5MO^t#a`phx4YG=e0ni^fwOj&_JWh!y^)u_6p8hKO6x!ee-p+kUO-ELT9lsuG^ zmNR5`JPul*2bpHd>42N-$Cg*=oWwWUhN|3tv72e+puHXrDyGAy5PZq-kdC4|BG5mv&Gik-$Qe;RpJbY=9^s z3Zm}DMABc2cHRu2Se9YX`G0~xvNQi*PXBpY<^KS@cpw2JfCP{L58{a*u=?9ikrmZaqkOSjyKS&(wK=Km}o dSRv6B%QRsj`B str: + """Extract query string from the request path""" + parsed = urlparse(self.path) + return parsed.query + + def _handle_sql_endpoint(self, path: str) -> bool: + """ + Handle SQL injection honeypot endpoints. + Returns True if the path was handled, False otherwise. + """ + # SQL-vulnerable endpoints + sql_endpoints = ['/api/search', '/api/sql', '/api/database'] + + base_path = urlparse(path).path + if base_path not in sql_endpoints: + return False + + try: + # Get query parameters + query_string = self._parse_query_string() + + # Log SQL injection attempt + client_ip = self._get_client_ip() + user_agent = self._get_user_agent() + + # Always check for SQL injection patterns + error_msg, content_type, status_code = generate_sql_error_response(query_string or "") + + if error_msg: + # SQL injection detected - log and return error + self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") + self.send_response(status_code) + self.send_header('Content-type', content_type) + self.end_headers() + self.wfile.write(error_msg.encode()) + else: + # No injection detected - return fake data + self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + response_data = get_sql_response_with_data(base_path, query_string or "") + self.wfile.write(response_data.encode()) + + return True + + except BrokenPipeError: + # Client disconnected + return True + except Exception as e: + self.app_logger.error(f"Error handling SQL endpoint {path}: {str(e)}") + # Still send a response even on error + try: + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(b'{"error": "Internal server error"}') + except: + pass + return True def generate_page(self, seed: str) -> str: """Generate a webpage containing random links or canary token""" @@ -207,6 +272,68 @@ class Handler(BaseHTTPRequestHandler): user_agent = self._get_user_agent() post_data = "" + from urllib.parse import urlparse + base_path = urlparse(self.path).path + + if base_path in ['/api/search', '/api/sql', '/api/database']: + content_length = int(self.headers.get('Content-Length', 0)) + if content_length > 0: + post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") + + self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}") + + error_msg, content_type, status_code = generate_sql_error_response(post_data) + + try: + if error_msg: + self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}") + self.send_response(status_code) + self.send_header('Content-type', content_type) + self.end_headers() + self.wfile.write(error_msg.encode()) + else: + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + response_data = get_sql_response_with_data(base_path, post_data) + self.wfile.write(response_data.encode()) + except BrokenPipeError: + pass + except Exception as e: + self.app_logger.error(f"Error in SQL POST handler: {str(e)}") + return + + if base_path == '/api/contact': + content_length = int(self.headers.get('Content-Length', 0)) + if content_length > 0: + post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") + + parsed_data = {} + for pair in post_data.split('&'): + if '=' in pair: + key, value = pair.split('=', 1) + from urllib.parse import unquote_plus + parsed_data[unquote_plus(key)] = unquote_plus(value) + + xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values()) + + if xss_detected: + self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}") + else: + self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}") + + try: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + response_html = generate_xss_response(parsed_data) + self.wfile.write(response_html.encode()) + except BrokenPipeError: + pass + except Exception as e: + self.app_logger.error(f"Error in XSS POST handler: {str(e)}") + return + self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}") content_length = int(self.headers.get('Content-Length', 0)) @@ -215,20 +342,16 @@ class Handler(BaseHTTPRequestHandler): self.access_logger.warning(f"[POST DATA] {post_data[:200]}") - # Parse and log credentials username, password = self.tracker.parse_credentials(post_data) if username or password: - # Log to dedicated credentials.log file timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}" self.credential_logger.info(credential_line) - # Also record in tracker for dashboard self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A') self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}") - # send the post data (body) to the record_access function so the post data can be used to detect suspicious things. self.tracker.record_access(client_ip, self.path, user_agent, post_data) time.sleep(1) @@ -248,6 +371,10 @@ class Handler(BaseHTTPRequestHandler): def serve_special_path(self, path: str) -> bool: """Serve special paths like robots.txt, API endpoints, etc.""" + # Check SQL injection honeypot endpoints first + if self._handle_sql_endpoint(path): + return True + try: if path == '/robots.txt': self.send_response(200) @@ -285,7 +412,28 @@ class Handler(BaseHTTPRequestHandler): self.wfile.write(html_templates.login_form().encode()) return True - # WordPress login page + if path in ['/users', '/user', '/database', '/db', '/search']: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(html_templates.product_search().encode()) + return True + + if path in ['/info', '/input', '/contact', '/feedback', '/comment']: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(html_templates.input_form().encode()) + return True + + if path == '/server': + error_html, content_type = generate_server_error() + self.send_response(500) + self.send_header('Content-type', content_type) + self.end_headers() + self.wfile.write(error_html.encode()) + return True + if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']: self.send_response(200) self.send_header('Content-type', 'text/html') diff --git a/src/server_errors.py b/src/server_errors.py new file mode 100644 index 0000000..7591c64 --- /dev/null +++ b/src/server_errors.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +import random +from wordlists import get_wordlists + + +def generate_server_error() -> tuple[str, str]: + wl = get_wordlists() + server_errors = wl.server_errors + + if not server_errors: + return ("500 Internal Server Error", "text/html") + + server_type = random.choice(list(server_errors.keys())) + server_config = server_errors[server_type] + + error_codes = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 500: "Internal Server Error", + 502: "Bad Gateway", + 503: "Service Unavailable" + } + + code = random.choice(list(error_codes.keys())) + message = error_codes[code] + + template = server_config.get('template', '') + version = random.choice(server_config.get('versions', ['1.0'])) + + html = template.replace('{code}', str(code)) + html = html.replace('{message}', message) + html = html.replace('{version}', version) + + if server_type == 'apache': + os = random.choice(server_config.get('os', ['Ubuntu'])) + html = html.replace('{os}', os) + html = html.replace('{host}', 'localhost') + + return (html, "text/html") + + +def get_server_header(server_type: str = None) -> str: + wl = get_wordlists() + server_errors = wl.server_errors + + if not server_errors: + return "nginx/1.18.0" + + if not server_type: + server_type = random.choice(list(server_errors.keys())) + + server_config = server_errors.get(server_type, {}) + version = random.choice(server_config.get('versions', ['1.0'])) + + server_headers = { + 'nginx': f"nginx/{version}", + 'apache': f"Apache/{version}", + 'iis': f"Microsoft-IIS/{version}", + 'tomcat': f"Apache-Coyote/1.1" + } + + return server_headers.get(server_type, "nginx/1.18.0") diff --git a/src/sql_errors.py b/src/sql_errors.py new file mode 100644 index 0000000..dc84886 --- /dev/null +++ b/src/sql_errors.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +import random +import re +from typing import Optional, Tuple +from wordlists import get_wordlists + + +def detect_sql_injection_pattern(query_string: str) -> Optional[str]: + if not query_string: + return None + + query_lower = query_string.lower() + + patterns = { + 'quote': [r"'", r'"', r'`'], + 'comment': [r'--', r'#', r'/\*', r'\*/'], + 'union': [r'\bunion\b', r'\bunion\s+select\b'], + 'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"], + 'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'], + 'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'], + 'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'], + 'info_schema': [r'information_schema', r'table_schema', r'table_name'], + } + + for injection_type, pattern_list in patterns.items(): + for pattern in pattern_list: + if re.search(pattern, query_lower): + return injection_type + + return None + + +def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]: + wl = get_wordlists() + sql_errors = wl.sql_errors + + if not sql_errors: + return ("Database error occurred", "text/plain") + + if not db_type: + db_type = random.choice(list(sql_errors.keys())) + + db_errors = sql_errors.get(db_type, {}) + + if injection_type and injection_type in db_errors: + errors = db_errors[injection_type] + elif 'generic' in db_errors: + errors = db_errors['generic'] + else: + all_errors = [] + for error_list in db_errors.values(): + if isinstance(error_list, list): + all_errors.extend(error_list) + errors = all_errors if all_errors else ["Database error occurred"] + + error_message = random.choice(errors) if errors else "Database error occurred" + + if '{table}' in error_message: + tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions'] + error_message = error_message.replace('{table}', random.choice(tables)) + + if '{column}' in error_message: + columns = ['id', 'name', 'email', 'password', 'username', 'created_at'] + error_message = error_message.replace('{column}', random.choice(columns)) + + return (error_message, "text/plain") + + +def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]: + injection_type = detect_sql_injection_pattern(query_string) + + if not injection_type: + return (None, None, None) + + error_message, content_type = get_random_sql_error(db_type, injection_type) + + status_code = 500 + + if random.random() < 0.3: + status_code = 200 + + return (error_message, content_type, status_code) + + +def get_sql_response_with_data(path: str, params: str) -> str: + import json + from generators import random_username, random_email, random_password + + injection_type = detect_sql_injection_pattern(params) + + if injection_type in ['union', 'boolean', 'stacked']: + data = { + "success": True, + "results": [ + { + "id": i, + "username": random_username(), + "email": random_email(), + "password_hash": random_password(), + "role": random.choice(["admin", "user", "moderator"]) + } + for i in range(1, random.randint(2, 5)) + ] + } + return json.dumps(data, indent=2) + + return json.dumps({ + "success": True, + "message": "Query executed successfully", + "results": [] + }, indent=2) diff --git a/src/templates/html/generic_search.html b/src/templates/html/generic_search.html new file mode 100644 index 0000000..90171bc --- /dev/null +++ b/src/templates/html/generic_search.html @@ -0,0 +1,66 @@ + + + + Search + + + +

Search

+
+ + + +
+ + + + diff --git a/src/templates/html/input_form.html b/src/templates/html/input_form.html new file mode 100644 index 0000000..c03b1a8 --- /dev/null +++ b/src/templates/html/input_form.html @@ -0,0 +1,74 @@ + + + + Contact + + + +

Contact

+
+ + + + + +
+ + + + diff --git a/src/templates/html/robots.txt b/src/templates/html/robots.txt index 2bae8ca..3618937 100644 --- a/src/templates/html/robots.txt +++ b/src/templates/html/robots.txt @@ -11,8 +11,18 @@ Disallow: /login/ Disallow: /admin/login Disallow: /phpMyAdmin/ Disallow: /admin/login.php +Disallow: /users +Disallow: /search +Disallow: /contact +Disallow: /info +Disallow: /input +Disallow: /feedback +Disallow: /server Disallow: /api/v1/users Disallow: /api/v2/secrets +Disallow: /api/search +Disallow: /api/sql +Disallow: /api/database Disallow: /.env Disallow: /credentials.txt Disallow: /passwords.txt diff --git a/src/templates/html_templates.py b/src/templates/html_templates.py index c6ad09a..a7cefbc 100644 --- a/src/templates/html_templates.py +++ b/src/templates/html_templates.py @@ -50,3 +50,13 @@ def directory_listing(path: str, dirs: list, files: list) -> str: rows += row_template.format(href=f, name=f, date="2024-12-01 14:22", size=size) return load_template("directory_listing", path=path, rows=rows) + + +def product_search() -> str: + """Generate product search page with SQL injection honeypot""" + return load_template("generic_search") + + +def input_form() -> str: + """Generate input form page for XSS honeypot""" + return load_template("input_form") diff --git a/src/tracker.py b/src/tracker.py index 717a4c3..8465031 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -5,6 +5,7 @@ from collections import defaultdict from datetime import datetime import re import urllib.parse +from wordlists import get_wordlists class AccessTracker: @@ -21,14 +22,19 @@ class AccessTracker: 'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster' ] - # common attack types such as xss, shell injection, probes - self.attack_types = { - 'path_traversal': r'\.\.', - 'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)", - 'xss_attempt': r'( bool: + if not input_string: + return False + + wl = get_wordlists() + xss_pattern = wl.attack_patterns.get('xss_attempt', '') + + if not xss_pattern: + xss_pattern = r'( str: + xss_detected = False + reflected_content = [] + + for key, value in input_data.items(): + if detect_xss_pattern(value): + xss_detected = True + reflected_content.append(f"

{key}: {value}

") + + if xss_detected: + html = f""" + + + + Submission Received + + + +
+

Thank you for your submission!

+

We have received your information:

+ {''.join(reflected_content)} +

We will get back to you shortly.

+
+ + +""" + return html + + return """ + + + + Submission Received + + + +
+

Thank you for your submission!

+

Your message has been received and we will respond soon.

+
+ + +""" From 4a1d1cf7be3e0a4515b5821e22b01a39c00d142f Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Sat, 3 Jan 2026 17:16:37 +0100 Subject: [PATCH 13/21] added random SQL errors, random server errors, XSS baits --- .gitignore | 7 ++ src/data/krawl.db | Bin 69632 -> 0 bytes tests/sim_attacks.sh | 2 +- tests/test_sql_injection.sh | 78 +++++++++++++++++ wordlists.json | 167 +++++++++++++++++++++++++++++++++++- 5 files changed, 252 insertions(+), 2 deletions(-) delete mode 100644 src/data/krawl.db create mode 100644 tests/test_sql_injection.sh diff --git a/.gitignore b/.gitignore index 5d758cb..70b93e4 100644 --- a/.gitignore +++ b/.gitignore @@ -61,6 +61,13 @@ secrets/ *.log logs/ +# Data and databases +data/ +**/data/ +*.db +*.sqlite +*.sqlite3 + # Temporary files *.tmp *.temp diff --git a/src/data/krawl.db b/src/data/krawl.db deleted file mode 100644 index 759ffb958d54f426a0a424446aa7baec7af9d275..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 69632 zcmeI5eQX=&eaA^r6m{hB9=)2DZAE7riKR6@-aC2nvPHES$E$56lD8y73W6qSnUzG+ zypwFFLxyvb7X4%0hM;H=v?#Cz+x}=f6bRNN8@6`E3hbpAunkDJV8PaAz*cl<&;kvx z_K*D@iag#!ox~KG&Mx*l+mHFY+&$m>JkLEZe15!V_3?`}rz&1+v^GnQs71yi(P-ph zQH(?)D*PLVf88SuABMXg_&@6Vyw^uHBHej7&3!H+jQm1GcujaZ{cq`I{$KfG>ha{O z$sG4e;$3ci4~7J{yy$`n%rSM2$rEvLdVO8wuv)gAom;6yW_C9E z!r3;iTC-?7CCBdm&ogkkq2fF+X?jX^gS{iC;gmLtt?G@fs_oc;V6N3#wo|mL)w*~N zs=$>CmkJD&jncl+<;GUsY27ZC8dtmc%GX8}Yj)AzvYWMXt+8eMA`<;Ydb`FRrF4(xF%me!zM_MRI<+3BP`qk(Ikr(M{|L2E0b~G88yWce?M@N?wH7CjN zN6B6*ZP&_;x&+Rp#xu2zjZ)T-WO4pdsa&f&4g2~T@j~6HZio;dF0YDDiVB=&oDo~w zi;8Ya!2Eo*yxz#FvLeG@MSP^zs$Of{%C=+A7p|zXYMz#r)3PQi`l7BaDyo#1Wz*1H z;Rr}N4w5*|C+TMMbg8mgt7k(Llv7nLZ@LpRAZVeVQRQm!Dh$cDni7mJ4o;MDT2W7H zvM5`WlA%h9YU#4#lEWbCWIv_w73q0NR`k4LxycMDnxTsPRY_W_IR{u~EFN8#&>uG$vH_=piR%aV`CtTb|~zF2U2b?9dP^2 zwgs{#DVCbgYwj2clH?;54kdNbuq0VGX-kk2K+@6v3e_&QkQz%+qe*H`HJ|}Vr$NnJ zznZP)Mx#`*!_lKziv|oaa=E;&yJ^0ohpj`c+BQu6L(`*Ml40taV!DS@pvUO1(=b4~ zR$I&Zn(y$WDbS_p&|tbVNsyM?LmD)j#r5iKTl$#YsE1O?p%|gNGfc%$b1u(;w%I+j zg(k<8a=NPNhMO7%IrN!hYSqq&DKs^@q?xh;gUVFm?yFICNj37Ol5@EcP(z0RySm-5 z$S8RzCoN~l?sy!uKo2s_l+yt>*N-i)(m9E*3xTQ(a}wDwjU2Ss!$GCBqbV#x(8ua# zPB&rf9|JXX?9Y@|t8umAgg1l)KC&uViYXf~+#Ui&FhH7S#{Mv8JJ69xV4=w{g|%b@ zLOTV8Q&b^yN~hBGQMMs?L_|lAzXM8Efmt=e#;~Qms3C1_V_~MLjnDNCJ-w+?0 zK1y_w622q6F8q$LCj62xo&J9M8|jzRH`0%$=lCD<-{N24 zKhCf43w$K?X6nnS7gA5Bv{W+r@5$HTB6uJHB!C2v01`j~NB{{SfuD`Q{P5gTZ>rH} zkO?-Kc}dj_!!l0{PAHp}V&#tyN|=*WOH*Y-+&3XuIf7-1Z0LqIHz1*6Nm|~pbnCuB z2@OeCEYs9x2PM=bQ*T`;iKtj**ZDK$|&lYWbP{OW-*x0@aeRf-9P{OWB zm9SqzzpYbxKtj(5i64}(YZ8^sZA9ZGj zV_ftY{dAOmIzm6q&`*cyrwsjch&t2MnWABn)R~~(aq5gwhtM#AI%(?Vsgt5kk~$o9 zMyZpaNk-_WICX}pH%6VISd4>TLxKH&>bnv09r6wG3V8wM`{&6#;e_uAe=B@Z_${Fh z*8Q?Dp8jF_AJeaCp5!(Y2NLI4)aF>NB{{S z0VIF~kN^@u0!RP}AOR#0G6A~2zQ_7Mx9|Et7uNbex3m6FH<^i%3lZ{l@=5p)42jmUd+=|-&$H*r- z@BiEH1~Co^AOR$R1dsp{Kmter2_OL^fCP{L68K;QcD>fGZy);Z*93#!|JU9el71_& z|NH-T&Hvx*?Em+{?gGz70!RP}AOR$R1dsp{Kmter2_OL^fCNG%!0h-pX#c+$d8PCI zzfd(|W=H@DAOR$R1dsp{Kmter2_OL^fCP|$k3i=Ie|y;k1it^TX9*DS{y*RPKmGlG zl)M^&zjz=4B!C2v01`j~NB{{S0VIF~kN^@u0{0MsUy1T_(WRx1B1O$f@N0faInno$ zfc&D7lT1yOP5428>i_mYUxbY;R4tcRWcvI6DESfC|KCG5h*FRM5$zPH$k(bDGq)wh9=SY?uA#ved;Vp>9 z0|_7jB!C2v01`j~NB{{S0VIF~kib1aV3Ncm(NpQ!a$|F|Q7<-IjjL6AQfLPrV*}Ie zz$0uR-ww>MfvI-j6dRaq2adCWTsv@#4IFI;5;ic=4otCuBkjN>8yIf~a%|vmJ21`$ z#@d0yY~WBkaEJ+J(h4^wwIM%|&ISd8Q7`v1|$ixJWg{zZ5+{YLr${$-v|J)8VN za+Q0Hn;(5?G?}}V1;Lv)SF(NW)i zh}TNnwQ{2_fpe+xOl@PMlrk93*j^ zPtwii>788~Lll%#RV{D26Eh%ap`TIZYVm5RyuQ_x>>C>gC(5^%q^>O*s-&ovE-NlM z45Ci89mQO@pY@{bg2eRV&rHQ!8zRE6b2fLzZ)z%T0l-OusCtTHg)} zkqVW_RA7r#cXAT6drkt|S>Cr98Fm+8a-NPx+WAxW)7$9A%tz~`9 zcX-ki=u&iOFx{CXNXzXZ4Vum3diAy~eavpuL#gCYjL_W~redf$m*+s+>>k=elVeIb zUDb5MO^t#a`phx4YG=e0ni^fwOj&_JWh!y^)u_6p8hKO6x!ee-p+kUO-ELT9lsuG^ zmNR5`JPul*2bpHd>42N-$Cg*=oWwWUhN|3tv72e+puHXrDyGAy5PZq-kdC4|BG5mv&Gik-$Qe;RpJbY=9^s z3Zm}DMABc2cHRu2Se9YX`G0~xvNQi*PXBpY<^KS@cpw2JfCP{L58{a*u=?9ikrmZaqkOSjyKS&(wK=Km}o dSRv6B%QRsj`B\n\n\n{code} {message}\n\n\n\n

An error occurred.

\n

Sorry, the page you are looking for is currently unavailable.
\nPlease try again later.

\n

If you are the system administrator of this resource then you should check the error log for details.

\n

Faithfully yours, nginx/{version}.

\n\n" + }, + "apache": { + "versions": ["2.4.41", "2.4.52", "2.4.54", "2.4.57"], + "os": ["Ubuntu", "Debian", "CentOS"], + "template": "\n\n{code} {message}\n\n

{message}

\n

The requested URL was not found on this server.

\n
\n
Apache/{version} ({os}) Server at {host} Port 80
\n" + }, + "iis": { + "versions": ["10.0", "8.5", "8.0"], + "template": "\n\n\n\n{code} - {message}\n\n\n\n

Server Error

\n
\n
\n

{code} - {message}

\n

The page cannot be displayed because an internal server error has occurred.

\n
\n
\n\n" + }, + "tomcat": { + "versions": ["9.0.65", "10.0.27", "10.1.5"], + "template": "HTTP Status {code} - {message}

HTTP Status {code} - {message}


Type Status Report

Description The server encountered an internal error that prevented it from fulfilling this request.


Apache Tomcat/{version}

" + } + }, + "sql_errors": { + "mysql": { + "generic": [ + "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''1'' at line 1", + "Unknown column '{column}' in 'where clause'", + "Table '{table}' doesn't exist", + "Operand should contain 1 column(s)", + "Subquery returns more than 1 row", + "Duplicate entry 'admin' for key 'PRIMARY'" + ], + "quote": [ + "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''''' at line 1", + "Unclosed quotation mark after the character string ''", + "You have an error in your SQL syntax near '\\'' LIMIT 0,30'" + ], + "union": [ + "The used SELECT statements have a different number of columns", + "Operand should contain 1 column(s)", + "Mixing of GROUP columns (MIN(),MAX(),COUNT(),...) with no GROUP columns is illegal" + ], + "boolean": [ + "You have an error in your SQL syntax near 'OR 1=1' at line 1", + "Unknown column '1' in 'where clause'" + ], + "time_based": [ + "Query execution was interrupted", + "Lock wait timeout exceeded; try restarting transaction" + ], + "comment": [ + "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '--' at line 1" + ] + }, + "postgresql": { + "generic": [ + "ERROR: syntax error at or near \"1\"", + "ERROR: column \"{column}\" does not exist", + "ERROR: relation \"{table}\" does not exist", + "ERROR: operator does not exist: integer = text", + "ERROR: invalid input syntax for type integer: \"admin\"" + ], + "quote": [ + "ERROR: unterminated quoted string at or near \"'\"", + "ERROR: syntax error at or near \"'\"", + "ERROR: unterminated quoted identifier at or near \"'\"" + ], + "union": [ + "ERROR: each UNION query must have the same number of columns", + "ERROR: UNION types integer and text cannot be matched" + ], + "boolean": [ + "ERROR: syntax error at or near \"OR\"", + "ERROR: invalid input syntax for type boolean: \"1=1\"" + ], + "time_based": [ + "ERROR: canceling statement due to user request", + "ERROR: function pg_sleep(integer) does not exist" + ], + "info_schema": [ + "ERROR: permission denied for table {table}", + "ERROR: permission denied for schema information_schema" + ] + }, + "mssql": { + "generic": [ + "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near '1'.", + "Msg 207, Level 16, State 1, Line 1\nInvalid column name '{column}'.", + "Msg 208, Level 16, State 1, Line 1\nInvalid object name '{table}'.", + "Msg 245, Level 16, State 1, Line 1\nConversion failed when converting the varchar value 'admin' to data type int." + ], + "quote": [ + "Msg 105, Level 15, State 1, Line 1\nUnclosed quotation mark after the character string ''.", + "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near '''." + ], + "union": [ + "Msg 205, Level 16, State 1, Line 1\nAll queries combined using a UNION, INTERSECT or EXCEPT operator must have an equal number of expressions in their target lists.", + "Msg 8167, Level 16, State 1, Line 1\nThe type of column \"{column}\" conflicts with the type of other columns specified in the UNION, INTERSECT, or EXCEPT list." + ], + "boolean": [ + "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near 'OR'." + ], + "command": [ + "Msg 15281, Level 16, State 1, Procedure xp_cmdshell, Line 1\nSQL Server blocked access to procedure 'sys.xp_cmdshell' of component 'xp_cmdshell'" + ] + }, + "oracle": { + "generic": [ + "ORA-00933: SQL command not properly ended", + "ORA-00904: \"{column}\": invalid identifier", + "ORA-00942: table or view \"{table}\" does not exist", + "ORA-01722: invalid number", + "ORA-01756: quoted string not properly terminated" + ], + "quote": [ + "ORA-01756: quoted string not properly terminated", + "ORA-00933: SQL command not properly ended" + ], + "union": [ + "ORA-01789: query block has incorrect number of result columns", + "ORA-01790: expression must have same datatype as corresponding expression" + ], + "boolean": [ + "ORA-00933: SQL command not properly ended", + "ORA-00920: invalid relational operator" + ] + }, + "sqlite": { + "generic": [ + "near \"1\": syntax error", + "no such column: {column}", + "no such table: {table}", + "unrecognized token: \"'\"", + "incomplete input" + ], + "quote": [ + "unrecognized token: \"'\"", + "incomplete input", + "near \"'\": syntax error" + ], + "union": [ + "SELECTs to the left and right of UNION do not have the same number of result columns" + ] + }, + "mongodb": { + "generic": [ + "MongoError: Can't canonicalize query: BadValue unknown operator: $where", + "MongoError: Failed to parse: { $where: \"this.{column} == '1'\" }", + "SyntaxError: unterminated string literal", + "MongoError: exception: invalid operator: $gt" + ], + "quote": [ + "SyntaxError: unterminated string literal", + "SyntaxError: missing } after property list" + ], + "command": [ + "MongoError: $where is not allowed in this context", + "MongoError: can't eval: security" + ] + } + }, + "attack_patterns": { + "path_traversal": "\\.\\.", + "sql_injection": "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)", + "xss_attempt": "( Date: Sat, 3 Jan 2026 13:56:16 -0600 Subject: [PATCH 14/21] fixing dashboard to ensure starts with forward slash, put back the server_header option to allow pinning --- config.yaml | 5 ++++- src/config.py | 9 ++++++++- src/generators.py | 4 ++++ 3 files changed, 16 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index c4faa8f..f9825a0 100644 --- a/config.yaml +++ b/config.yaml @@ -5,6 +5,9 @@ server: delay: 100 # Response delay in milliseconds timezone: null # e.g., "America/New_York" or null for system default + # manually set the server header, if null a random one will be used. + server_header: "Apache/2.2.22 (Ubuntu)" + links: min_length: 5 max_length: 15 @@ -19,7 +22,7 @@ canary: dashboard: # if set to "null" this will Auto-generates random path if not set - # can be set to "dashboard" or similar + # can be set to "/dashboard" or similar <-- note this MUST include a forward slash secret_path: dashboard api: diff --git a/src/config.py b/src/config.py index fb679b4..d8aa2f2 100644 --- a/src/config.py +++ b/src/config.py @@ -16,6 +16,7 @@ class Config: """Configuration class for the deception server""" port: int = 5000 delay: int = 100 # milliseconds + server_header: str = "" links_length_range: Tuple[int, int] = (5, 15) links_per_page_range: Tuple[int, int] = (10, 15) char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' @@ -27,6 +28,7 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) + # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 @@ -98,10 +100,15 @@ class Config: dashboard_path = dashboard.get('secret_path') if dashboard_path is None: dashboard_path = f'/{os.urandom(16).hex()}' - + else: + # ensure the dashboard path starts with a / + if dashboard_path[:1] != "/": + dashboard_path = f"/{dashboard_path}" + return cls( port=server.get('port', 5000), delay=server.get('delay', 100), + server_header=server.get('server_header',""), timezone=server.get('timezone'), links_length_range=( links.get('min_length', 5), diff --git a/src/generators.py b/src/generators.py index 6eca9fd..92eb590 100644 --- a/src/generators.py +++ b/src/generators.py @@ -9,6 +9,7 @@ import string import json from templates import html_templates from wordlists import get_wordlists +from config import get_config def random_username() -> str: """Generate random username""" @@ -37,6 +38,9 @@ def random_email(username: str = None) -> str: def random_server_header() -> str: """Generate random server header from wordlists""" + config = get_config() + if config.server_header: + return config.server_header wl = get_wordlists() return random.choice(wl.server_headers) From 7d9f0616b77a924c42b8c6efbdede6bf376c4ffd Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Mon, 5 Jan 2026 11:54:02 -0600 Subject: [PATCH 15/21] Add background task to export suspicious IPs to text file - Implement export-malicious-ips task that queries distinct IPs flagged as is_suspicious from database and writes to exports/malicious_ips.txt - Add exports volume mount to docker-compose.yaml for host persistence - Update entrypoint.sh to fix ownership of exports directory for krawl user - Update Dockerfile to create /app/exports directory during build Other tasks can be added by creating them in the tasks dir using the same setup as this task. All tasks *MUST* include a TASK_CONFIG dict and a main method in the file to work correctly. --- Dockerfile | 2 +- docker-compose.yaml | 1 + entrypoint.sh | 2 +- exports/.gitkeep | 0 requirements.txt | 3 + src/server.py | 5 + src/tasks/top_attacking_ips.py | 57 +++++++ src/tasks_master.py | 288 +++++++++++++++++++++++++++++++++ 8 files changed, 356 insertions(+), 2 deletions(-) create mode 100644 exports/.gitkeep create mode 100644 src/tasks/top_attacking_ips.py create mode 100644 src/tasks_master.py diff --git a/Dockerfile b/Dockerfile index 2c7b954..92c2d9f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ COPY wordlists.json /app/ COPY entrypoint.sh /app/ RUN useradd -m -u 1000 krawl && \ - mkdir -p /app/logs /app/data && \ + mkdir -p /app/logs /app/data /app/exports && \ chown -R krawl:krawl /app && \ chmod +x /app/entrypoint.sh diff --git a/docker-compose.yaml b/docker-compose.yaml index 02b6ae7..08bcec9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -12,6 +12,7 @@ services: - ./wordlists.json:/app/wordlists.json:ro - ./config.yaml:/app/config.yaml:ro - ./logs:/app/logs + - ./exports:/app/exports environment: - CONFIG_LOCATION=config.yaml restart: unless-stopped diff --git a/entrypoint.sh b/entrypoint.sh index 28b5fc0..fe3ef45 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -2,7 +2,7 @@ set -e # Fix ownership of mounted directories -chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true +chown -R krawl:krawl /app/logs /app/data /app/exports 2>/dev/null || true # Drop to krawl user and run the application exec gosu krawl "$@" diff --git a/exports/.gitkeep b/exports/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt index 8cb6dc5..cafbb7d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,6 @@ PyYAML>=6.0 # Database ORM SQLAlchemy>=2.0.0,<3.0.0 + +# Scheduling +APScheduler>=3.11.2 \ No newline at end of file diff --git a/src/server.py b/src/server.py index 7a59c73..135284c 100644 --- a/src/server.py +++ b/src/server.py @@ -13,6 +13,7 @@ from tracker import AccessTracker from handler import Handler from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger from database import initialize_database +from tasks_master import get_tasksmaster def print_usage(): @@ -89,6 +90,10 @@ def main(): except IOError: app_logger.warning("Can't read input file. Using randomly generated links.") + # tasks master init + tasks_master = get_tasksmaster() + tasks_master.run_scheduled_tasks() + try: app_logger.info(f'Starting deception server on port {config.port}...') app_logger.info(f'Timezone configured: {tz.key}') diff --git a/src/tasks/top_attacking_ips.py b/src/tasks/top_attacking_ips.py new file mode 100644 index 0000000..d9e18d3 --- /dev/null +++ b/src/tasks/top_attacking_ips.py @@ -0,0 +1,57 @@ +# tasks/export_malicious_ips.py + +import os +from logger import get_app_logger +from database import get_database +from models import AccessLog +from sqlalchemy import distinct + +app_logger = get_app_logger() + +# ---------------------- +# TASK CONFIG +# ---------------------- +TASK_CONFIG = { + "name": "export-malicious-ips", + "cron": "*/5 * * * *", + "enabled": True, + "run_when_loaded": True +} + +EXPORTS_DIR = "exports" +OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt") + +# ---------------------- +# TASK LOGIC +# ---------------------- +def main(): + """ + Export all IPs flagged as suspicious to a text file. + TasksMaster will call this function based on the cron schedule. + """ + task_name = TASK_CONFIG.get("name") + app_logger.info(f"[Background Task] {task_name} starting...") + + try: + db = get_database() + session = db.session + + # Query distinct suspicious IPs + results = session.query(distinct(AccessLog.ip)).filter( + AccessLog.is_suspicious == True + ).all() + + # Ensure exports directory exists + os.makedirs(EXPORTS_DIR, exist_ok=True) + + # Write IPs to file (one per line) + with open(OUTPUT_FILE, 'w') as f: + for (ip,) in results: + f.write(f"{ip}\n") + + app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}") + + except Exception as e: + app_logger.error(f"[Background Task] {task_name} failed: {e}") + finally: + db.close_session() diff --git a/src/tasks_master.py b/src/tasks_master.py new file mode 100644 index 0000000..264471c --- /dev/null +++ b/src/tasks_master.py @@ -0,0 +1,288 @@ +import os +import sys +import datetime +import functools +import threading +import importlib +import importlib.util + +from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger + +app_logger = get_app_logger() + +try: + from apscheduler.schedulers.background import BackgroundScheduler + from apscheduler.triggers.cron import CronTrigger + from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR +except ModuleNotFoundError: + msg = ( + "Required modules are not installed. " + "Can not continue with module / application loading.\n" + "Install it with: pip install -r requirements" + ) + print(msg, file=sys.stderr) + app_logger.error(msg) + exit() + + +# ---------- TASKSMASTER CLASS ---------- +class TasksMaster: + + TASK_DEFAULT_CRON = '*/15 * * * *' + TASK_JITTER = 240 + TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks") + + def __init__(self, scheduler: BackgroundScheduler): + self.tasks = self._config_tasks() + self.scheduler = scheduler + self.last_run_times = {} + self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) + + def _config_tasks(self): + """ + Loads tasks from the TASKS_FOLDER and logs how many were found. + """ + tasks_defined = self._load_tasks_from_folder(self.TASKS_FOLDER) + app_logger.info(f"Scheduled Tasks Loaded from folder: {self.TASKS_FOLDER}") + return tasks_defined + + def _load_tasks_from_folder(self, folder_path): + """ + Loads and registers task modules from a specified folder. + + This function scans the given folder for Python (.py) files, dynamically + imports each as a module, and looks for two attributes: + - TASK_CONFIG: A dictionary containing task metadata, specifically the + 'name' and 'cron' (cron schedule string). + - main: A callable function that represents the task's execution logic. + + Tasks with both attributes are added to a list with their configuration and + execution function. + + Args: + folder_path (str): Path to the folder containing task scripts. + + Returns: + list[dict]: A list of task definitions with keys: + - 'name' (str): The name of the task. + - 'filename' (str): The file the task was loaded from. + - 'cron' (str): The crontab string for scheduling. + - 'enabled' (bool): Whether the task is enabled. + - 'run_when_loaded' (bool): Whether to run the task immediately. + """ + tasks = [] + + if not os.path.exists(folder_path): + app_logger.error(f"{folder_path} does not exist! Unable to load tasks!") + return tasks + + # we sort the files so that we have a set order, which helps with debugging + for filename in sorted(os.listdir(folder_path)): + + # skip any non python files, as well as any __pycache__ or .pyc files that might creep in there + if not filename.endswith('.py') or filename.startswith("__"): + continue + + path = os.path.join(folder_path, filename) + module_name = filename[:-3] + spec = importlib.util.spec_from_file_location(f"tasks.{module_name}", path) + module = importlib.util.module_from_spec(spec) + try: + spec.loader.exec_module(module) + sys.modules[f"tasks.{module_name}"] = module + except Exception as e: + app_logger.error(f"Failed to import {filename}: {e}") + continue + + # if we have a tasks config and a main function, we attempt to schedule it + if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'): + + # ensure task_config is a dict + if not isinstance(module.TASK_CONFIG, dict): + app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.") + continue + + task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON + task_name = module.TASK_CONFIG.get("name", module_name) + + # ensure the task_cron is a valid cron value + try: + CronTrigger.from_crontab(task_cron) + except ValueError as ve: + app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task") + continue + + task = { + 'name': module.TASK_CONFIG.get('name', module_name), + 'filename': filename, + 'cron': task_cron, + "enabled": module.TASK_CONFIG.get("enabled", False), + "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False) + } + + tasks.append(task) + + # we are missing things, and we log what's missing + else: + if not hasattr(module, 'TASK_CONFIG'): + app_logger.warning(f"Missing TASK_CONFIG in {filename}") + elif not hasattr(module, 'main'): + app_logger.warning(f"Missing main() in {filename}") + + return tasks + + def _add_jobs(self): + # for each task in the tasks config file... + for task_to_run in self.tasks: + + # remember, these tasks, are built from the "load_tasks_from_folder" function, + # if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here. + task_name = task_to_run.get("name") + run_when_loaded = task_to_run.get("run_when_loaded") + module_name = os.path.splitext(task_to_run.get("filename"))[0] + task_enabled = task_to_run.get("enabled", False) + + # if no crontab set for this task, we use 15 as the default. + task_cron = task_to_run.get("cron") or self.TASK_DEFAULT_CRON + + # if task is disabled, skip this one + if not task_enabled: + app_logger.info(f"{task_name} is disabled in client config. Skipping task") + continue + try: + if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))): + # schedule the task now that everything has checked out above... + self._schedule_task(task_name, module_name, task_cron, run_when_loaded) + app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run}) + else: + app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run}) + + except Exception as e: + app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run}) + + def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded): + try: + # Dynamically import the module + module = importlib.import_module(f"tasks.{module_name}") + + # Check if the module has a 'main' function + if hasattr(module, 'main'): + app_logger.info(f"Scheduling {task_name} - {module_name} Main Function") + + # unique_job_id + job_identifier = f"{module_name}__{task_name}" + + # little insurance to make sure the cron is set to something and not none + if task_cron is None: + task_cron = self.TASK_DEFAULT_CRON + + trigger = CronTrigger.from_crontab(task_cron) + + # schedule the task / job + if run_when_loaded: + app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start") + + self.scheduler.add_job( + module.main, + trigger, + id=job_identifier, + jitter=self.TASK_JITTER, + name=task_name, + next_run_time=datetime.datetime.now(), + max_instances=1 + ) + else: + self.scheduler.add_job( + module.main, + trigger, + id=job_identifier, + jitter=self.TASK_JITTER, + name=task_name, + max_instances=1 + ) + else: + app_logger.error(f"{module_name} does not define a 'main' function.") + + except Exception as e: + app_logger.error(f"Failed to load {module_name}: {e}") + + def job_listener(self, event): + job_id = event.job_id + self.last_run_times[job_id] = datetime.datetime.now() + + if event.exception: + app_logger.error(f"Job {event.job_id} failed: {event.exception}") + else: + app_logger.info(f"Job {event.job_id} completed successfully.") + + def list_jobs(self): + scheduled_jobs = self.scheduler.get_jobs() + jobs_list = [] + + for job in scheduled_jobs: + jobs_list.append({ + "id": job.id, + "name": job.name, + "next_run": job.next_run_time, + }) + return jobs_list + + def run_scheduled_tasks(self): + """ + Runs and schedules enabled tasks using the background scheduler. + + This method performs the following: + 1. Retrieves the current task configurations and updates internal state. + 2. Adds new jobs to the scheduler based on the latest configuration. + 3. Starts the scheduler to begin executing tasks at their defined intervals. + + This ensures the scheduler is always running with the most up-to-date + task definitions and enabled status. + """ + + # Add enabled tasks to the scheduler + self._add_jobs() + + # Start the scheduler to begin executing the scheduled tasks (if not already running) + if not self.scheduler.running: + self.scheduler.start() + + +# ---------- SINGLETON WRAPPER ---------- +T = type + +def singleton_loader(func): + """Decorator to ensure only one instance exists.""" + cache: dict[str, T] = {} + lock = threading.Lock() + + @functools.wraps(func) + def wrapper(*args, **kwargs) -> T: + with lock: + if func.__name__ not in cache: + cache[func.__name__] = func(*args, **kwargs) + return cache[func.__name__] + return wrapper + + +@singleton_loader +def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster: + """ + Returns the singleton TasksMaster instance. + + - Automatically creates a BackgroundScheduler if none is provided. + - Automatically starts the scheduler when the singleton is created. + + :param scheduler: Optional APScheduler instance. If None, a new BackgroundScheduler will be created. + """ + if scheduler is None: + scheduler = BackgroundScheduler() + + tm_instance = TasksMaster(scheduler) + + # Auto-start scheduler if not already running + if not scheduler.running: + scheduler.start() + app_logger.info("TasksMaster scheduler started automatically with singleton creation.") + + return tm_instance From 02aed9e65abffd19361b63067d6936e6a4400c32 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Tue, 6 Jan 2026 18:50:36 +0100 Subject: [PATCH 16/21] added drop down menu and scoring graph to the dashboard --- src/database.py | 39 ++- src/handler.py | 27 ++ src/templates/dashboard_template.py | 398 +++++++++++++++++++++++++++- src/templates/html/main_page.html | 13 +- 4 files changed, 455 insertions(+), 22 deletions(-) diff --git a/src/database.py b/src/database.py index 9d8e444..e60348a 100644 --- a/src/database.py +++ b/src/database.py @@ -256,7 +256,7 @@ class DatabaseManager: """ session = self.session - + sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() ip_stats.category = category @@ -439,6 +439,43 @@ class DatabaseManager: finally: self.close_session() + def get_ip_stats_by_ip(self, ip: str) -> Optional[Dict[str, Any]]: + """ + Retrieve IP statistics for a specific IP address. + + Args: + ip: The IP address to look up + + Returns: + Dictionary with IP stats or None if not found + """ + session = self.session + try: + stat = session.query(IpStats).filter(IpStats.ip == ip).first() + + if not stat: + return None + + return { + 'ip': stat.ip, + 'total_requests': stat.total_requests, + 'first_seen': stat.first_seen.isoformat() if stat.first_seen else None, + 'last_seen': stat.last_seen.isoformat() if stat.last_seen else None, + 'country_code': stat.country_code, + 'city': stat.city, + 'asn': stat.asn, + 'asn_org': stat.asn_org, + 'reputation_score': stat.reputation_score, + 'reputation_source': stat.reputation_source, + 'analyzed_metrics': stat.analyzed_metrics or {}, + 'category': stat.category, + 'category_scores': stat.category_scores or {}, + 'manual_category': stat.manual_category, + 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None + } + finally: + self.close_session() + def get_dashboard_counts(self) -> Dict[str, int]: """ Get aggregate statistics for the dashboard. diff --git a/src/handler.py b/src/handler.py index eef528d..2598706 100644 --- a/src/handler.py +++ b/src/handler.py @@ -413,6 +413,33 @@ class Handler(BaseHTTPRequestHandler): except Exception as e: self.app_logger.error(f"Error generating dashboard: {e}") return + + # API endpoint for fetching IP stats + if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"): + ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "") + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.send_header('Access-Control-Allow-Origin', '*') + # Prevent browser caching - force fresh data from database every time + self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0') + self.send_header('Pragma', 'no-cache') + self.send_header('Expires', '0') + self.end_headers() + try: + from database import get_database + import json + db = get_database() + ip_stats = db.get_ip_stats_by_ip(ip_address) + if ip_stats: + self.wfile.write(json.dumps(ip_stats).encode()) + else: + self.wfile.write(json.dumps({'error': 'IP not found'}).encode()) + except BrokenPipeError: + pass + except Exception as e: + self.app_logger.error(f"Error fetching IP stats: {e}") + self.wfile.write(json.dumps({'error': str(e)}).encode()) + return self.tracker.record_access(client_ip, self.path, user_agent, method='GET') diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index dfad3dd..df0378a 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -27,9 +27,20 @@ def format_timestamp(iso_timestamp: str) -> str: def generate_dashboard(stats: dict) -> str: """Generate dashboard HTML with access statistics""" - # Generate IP rows (IPs are generally safe but escape for consistency) + # Generate IP rows with clickable functionality for dropdown stats top_ips_rows = '\n'.join([ - f'
' + f''' + + + + + + + ''' for i, (ip, count) in enumerate(stats['top_ips']) ]) or '' @@ -45,27 +56,76 @@ def generate_dashboard(stats: dict) -> str: for i, (ua, count) in enumerate(stats['top_user_agents']) ]) or '' - # Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields) + # Generate suspicious accesses rows with clickable IPs suspicious_rows = '\n'.join([ - f'' + f''' + + + + + + + + ''' for log in stats['recent_suspicious'][-10:] ]) or '' - # Generate honeypot triggered IPs rows + # Generate honeypot triggered IPs rows with clickable IPs honeypot_rows = '\n'.join([ - f'' + f''' + + + + + + + ''' for ip, paths in stats.get('honeypot_triggered_ips', []) ]) or '' - # Generate attack types rows (CRITICAL: paths and user agents are user-controlled) + # Generate attack types rows with clickable IPs attack_type_rows = '\n'.join([ - f'' + f''' + + + + + + + + + ''' for log in stats.get('attack_types', [])[-10:] ]) or '' - # Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled) + # Generate credential attempts rows with clickable IPs credential_rows = '\n'.join([ - f'' + f''' + + + + + + + + + ''' for log in stats.get('credential_attempts', [])[-20:] ]) or '' @@ -180,6 +240,119 @@ def generate_dashboard(stats: dict) -> str: content: 'â–¼'; opacity: 1; }} + .ip-row {{ + transition: background-color 0.2s; + }} + .ip-clickable {{ + cursor: pointer; + color: #58a6ff !important; + font-weight: 500; + text-decoration: underline; + text-decoration-style: dotted; + text-underline-offset: 3px; + }} + .ip-clickable:hover {{ + color: #79c0ff !important; + text-decoration-style: solid; + background: #1c2128; + }} + .ip-stats-row {{ + background: #0d1117; + }} + .ip-stats-cell {{ + padding: 0 !important; + }} + .ip-stats-dropdown {{ + margin-top: 10px; + padding: 15px; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + font-size: 13px; + display: flex; + gap: 20px; + }} + .stats-left {{ + flex: 1; + }} + .stats-right {{ + flex: 0 0 200px; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + }} + .radar-chart {{ + position: relative; + width: 180px; + height: 180px; + overflow: visible; + }} + .radar-legend {{ + margin-top: 10px; + font-size: 11px; + }} + .radar-legend-item {{ + display: flex; + align-items: center; + gap: 6px; + margin: 3px 0; + }} + .radar-legend-color {{ + width: 12px; + height: 12px; + border-radius: 2px; + }} + .ip-stats-dropdown .loading {{ + color: #8b949e; + font-style: italic; + }} + .stat-row {{ + display: flex; + justify-content: space-between; + padding: 5px 0; + border-bottom: 1px solid #21262d; + }} + .stat-row:last-child {{ + border-bottom: none; + }} + .stat-label-sm {{ + color: #8b949e; + font-weight: 500; + }} + .stat-value-sm {{ + color: #58a6ff; + font-weight: 600; + }} + .category-badge {{ + display: inline-block; + padding: 4px 8px; + border-radius: 4px; + font-size: 12px; + font-weight: 600; + text-transform: uppercase; + }} + .category-attacker {{ + background: #f851491a; + color: #f85149; + border: 1px solid #f85149; + }} + .category-good-crawler {{ + background: #3fb9501a; + color: #3fb950; + border: 1px solid #3fb950; + }} + .category-bad-crawler {{ + background: #f0883e1a; + color: #f0883e; + border: 1px solid #f0883e; + }} + .category-regular-user {{ + background: #58a6ff1a; + color: #58a6ff; + border: 1px solid #58a6ff; + }} + @@ -387,6 +560,211 @@ def generate_dashboard(stats: dict) -> str: rows.forEach(row => tbody.appendChild(row)); }}); }}); + + // IP stats dropdown functionality + document.querySelectorAll('.ip-clickable').forEach(cell => {{ + cell.addEventListener('click', async function(e) {{ + const row = e.currentTarget.closest('.ip-row'); + if (!row) return; + + const ip = row.getAttribute('data-ip'); + const statsRow = row.nextElementSibling; + if (!statsRow || !statsRow.classList.contains('ip-stats-row')) return; + + const isVisible = getComputedStyle(statsRow).display !== 'none'; + + document.querySelectorAll('.ip-stats-row').forEach(r => {{ + r.style.display = 'none'; + }}); + + if (isVisible) return; + + statsRow.style.display = 'table-row'; + + const dropdown = statsRow.querySelector('.ip-stats-dropdown'); + + // Always fetch fresh data from database + if (dropdown) {{ + dropdown.innerHTML = '
Loading stats...
'; + try {{ + const response = await fetch(`${{window.location.pathname}}/api/ip-stats/${{ip}}`, {{ + cache: 'no-store', + headers: {{ + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache' + }} + }}); + if (!response.ok) throw new Error(`HTTP ${{response.status}}`); + + const data = await response.json(); + dropdown.innerHTML = data.error + ? `
Error: ${{data.error}}
` + : formatIpStats(data); + }} catch (err) {{ + dropdown.innerHTML = `
Failed to load stats: ${{err.message}}
`; + }} + }} + }}); + }}); + + function formatIpStats(stats) {{ + let html = '
'; + + // Basic info + html += '
'; + html += 'Total Requests:'; + html += `${{stats.total_requests || 0}}`; + html += '
'; + + html += '
'; + html += 'First Seen:'; + html += `${{stats.first_seen ? new Date(stats.first_seen).toLocaleString() : 'N/A'}}`; + html += '
'; + + html += '
'; + html += 'Last Seen:'; + html += `${{stats.last_seen ? new Date(stats.last_seen).toLocaleString() : 'N/A'}}`; + html += '
'; + + // Category + if (stats.category) {{ + html += '
'; + html += 'Category:'; + const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-'); + html += `${{stats.category}}`; + html += '
'; + }} + + // GeoIP info if available + if (stats.country_code || stats.city) {{ + html += '
'; + html += 'Location:'; + html += `${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}`; + html += '
'; + }} + + if (stats.asn_org) {{ + html += '
'; + html += 'ASN Org:'; + html += `${{stats.asn_org}}`; + html += '
'; + }} + + // Reputation score if available + if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{ + html += '
'; + html += 'Reputation Score:'; + html += `${{stats.reputation_score}} ${{stats.reputation_source ? '(' + stats.reputation_source + ')' : ''}}`; + html += '
'; + }} + + html += '
'; + + // Radar chart on the right + if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{ + html += '
'; + html += ''; + + const scores = {{ + attacker: stats.category_scores.attacker || 0, + good_crawler: stats.category_scores.good_crawler || 0, + bad_crawler: stats.category_scores.bad_crawler || 0, + regular_user: stats.category_scores.regular_user || 0 + }}; + + // Normalize scores for better visualization + const maxScore = Math.max(...Object.values(scores), 1); + const minVisibleRadius = 0.15; // Minimum 15% visibility even for 0 values + const normalizedScores = {{}}; + + Object.keys(scores).forEach(key => {{ + // Scale values: ensure minimum visibility + proportional to max + normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius); + }}); + + const colors = {{ + attacker: '#f85149', + good_crawler: '#3fb950', + bad_crawler: '#f0883e', + regular_user: '#58a6ff' + }}; + + const labels = {{ + attacker: 'Attacker', + good_crawler: 'Good Bot', + bad_crawler: 'Bad Bot', + regular_user: 'User' + }}; + + // Draw radar background grid + const cx = 100, cy = 100, maxRadius = 75; + for (let i = 1; i <= 5; i++) {{ + const r = (maxRadius / 5) * i; + html += ``; + }} + + // Draw axes + const angles = [0, 90, 180, 270]; + const keys = ['attacker', 'good_crawler', 'bad_crawler', 'regular_user']; + + angles.forEach((angle, i) => {{ + const rad = (angle - 90) * Math.PI / 180; + const x2 = cx + maxRadius * Math.cos(rad); + const y2 = cy + maxRadius * Math.sin(rad); + html += ``; + + // Add labels + const labelDist = maxRadius + 30; + const lx = cx + labelDist * Math.cos(rad); + const ly = cy + labelDist * Math.sin(rad); + html += `${{labels[keys[i]]}}`; + }}); + + // Draw filled polygon for scores + let points = []; + angles.forEach((angle, i) => {{ + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + points.push(`${{x}},${{y}}`); + }}); + + // Determine dominant category color + const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); + const dominantColor = colors[dominantKey]; + + // Draw single colored area + html += ``; + + // Draw points + angles.forEach((angle, i) => {{ + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + html += ``; + }}); + + html += ''; + + // Legend + html += '
'; + keys.forEach(key => {{ + html += '
'; + html += `
`; + html += `${{labels[key]}}: ${{scores[key]}}%`; + html += '
'; + }}); + html += '
'; + + html += '
'; + }} + + return html; + }} diff --git a/src/templates/html/main_page.html b/src/templates/html/main_page.html index d0b39de..ac154e8 100644 --- a/src/templates/html/main_page.html +++ b/src/templates/html/main_page.html @@ -46,21 +46,12 @@ gap: 10px; align-items: center; overflow-y: auto; + overflow-x: hidden; flex: 1; padding-top: 10px; }} .links-container::-webkit-scrollbar {{ - width: 8px; - }} - .links-container::-webkit-scrollbar-track {{ - background: #0d1117; - }} - .links-container::-webkit-scrollbar-thumb {{ - background: #30363d; - border-radius: 4px; - }} - .links-container::-webkit-scrollbar-thumb:hover {{ - background: #484f58; + width: 0px; }} .link-box {{ background: #161b22; From 769084102925b3fc7b9ca2486c653037b8a4de4c Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Wed, 7 Jan 2026 18:24:43 +0100 Subject: [PATCH 17/21] added categorization visualization and itmeline --- src/database.py | 71 +++++++++++++++- src/migrations/add_category_history.py | 40 +++++++++ src/models.py | 25 ++++++ src/templates/dashboard_template.py | 112 +++++++++++++++++++++++-- 4 files changed, 240 insertions(+), 8 deletions(-) create mode 100644 src/migrations/add_category_history.py diff --git a/src/database.py b/src/database.py index e60348a..0245105 100644 --- a/src/database.py +++ b/src/database.py @@ -13,7 +13,7 @@ from typing import Optional, List, Dict, Any from sqlalchemy import create_engine, func, distinct, case from sqlalchemy.orm import sessionmaker, scoped_session, Session -from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats +from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory from sanitizer import ( sanitize_ip, sanitize_path, @@ -226,6 +226,7 @@ class DatabaseManager: def update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None: """ Update IP statistics (ip is already persisted). + Records category change in history if category has changed. Args: ip: IP address to update @@ -241,6 +242,11 @@ class DatabaseManager: sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + # Check if category has changed and record it + old_category = ip_stats.category + if old_category != category: + self._record_category_change(sanitized_ip, old_category, category, last_analysis) + ip_stats.analyzed_metrics = analyzed_metrics ip_stats.category = category ip_stats.category_scores = category_scores @@ -259,9 +265,66 @@ class DatabaseManager: sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + # Record the manual category change + old_category = ip_stats.category + if old_category != category: + self._record_category_change(sanitized_ip, old_category, category, datetime.utcnow()) + ip_stats.category = category ip_stats.manual_category = True + def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None: + """ + Internal method to record category changes in history. + + Args: + ip: IP address + old_category: Previous category (None if first categorization) + new_category: New category + timestamp: When the change occurred + """ + session = self.session + try: + history_entry = CategoryHistory( + ip=ip, + old_category=old_category, + new_category=new_category, + timestamp=timestamp + ) + session.add(history_entry) + session.commit() + except Exception as e: + session.rollback() + print(f"Error recording category change: {e}") + + def get_category_history(self, ip: str) -> List[Dict[str, Any]]: + """ + Retrieve category change history for a specific IP. + + Args: + ip: IP address to get history for + + Returns: + List of category change records ordered by timestamp + """ + session = self.session + try: + sanitized_ip = sanitize_ip(ip) + history = session.query(CategoryHistory).filter( + CategoryHistory.ip == sanitized_ip + ).order_by(CategoryHistory.timestamp.asc()).all() + + return [ + { + 'old_category': h.old_category, + 'new_category': h.new_category, + 'timestamp': h.timestamp.isoformat() + } + for h in history + ] + finally: + self.close_session() + def get_access_logs( self, limit: int = 100, @@ -456,6 +519,9 @@ class DatabaseManager: if not stat: return None + # Get category history for this IP + category_history = self.get_category_history(ip) + return { 'ip': stat.ip, 'total_requests': stat.total_requests, @@ -471,7 +537,8 @@ class DatabaseManager: 'category': stat.category, 'category_scores': stat.category_scores or {}, 'manual_category': stat.manual_category, - 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None + 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None, + 'category_history': category_history } finally: self.close_session() diff --git a/src/migrations/add_category_history.py b/src/migrations/add_category_history.py new file mode 100644 index 0000000..654204e --- /dev/null +++ b/src/migrations/add_category_history.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 +""" +Migration script to add CategoryHistory table to existing databases. +Run this once to upgrade your database schema. +""" + +import sys +from pathlib import Path + +# Add parent directory to path to import modules +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from database import get_database, DatabaseManager +from models import Base, CategoryHistory + + +def migrate(): + """Create CategoryHistory table if it doesn't exist.""" + print("Starting migration: Adding CategoryHistory table...") + + try: + db = get_database() + + # Initialize database if not already done + if not db._initialized: + db.initialize() + + # Create only the CategoryHistory table + CategoryHistory.__table__.create(db._engine, checkfirst=True) + + print("✓ Migration completed successfully!") + print(" - CategoryHistory table created") + + except Exception as e: + print(f"✗ Migration failed: {e}") + sys.exit(1) + + +if __name__ == "__main__": + migrate() diff --git a/src/models.py b/src/models.py index 190ef26..2b86fd5 100644 --- a/src/models.py +++ b/src/models.py @@ -151,6 +151,31 @@ class IpStats(Base): def __repr__(self) -> str: return f"" + +class CategoryHistory(Base): + """ + Records category changes for IP addresses over time. + + Tracks when an IP's category changes, storing both the previous + and new category along with timestamp for timeline visualization. + """ + __tablename__ = 'category_history' + + id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) + ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True) + old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + new_category: Mapped[str] = mapped_column(String(50), nullable=False) + timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True) + + # Composite index for efficient IP-based timeline queries + __table_args__ = ( + Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'), + ) + + def __repr__(self) -> str: + return f" {self.new_category})>" + + # class IpLog(Base): # """ # Records all IPs that have accessed the honeypot, along with aggregated stats and inferred user category. diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index df0378a..332288c 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -284,8 +284,8 @@ def generate_dashboard(stats: dict) -> str: }} .radar-chart {{ position: relative; - width: 180px; - height: 180px; + width: 220px; + height: 220px; overflow: visible; }} .radar-legend {{ @@ -352,6 +352,72 @@ def generate_dashboard(stats: dict) -> str: color: #58a6ff; border: 1px solid #58a6ff; }} + .timeline-container {{ + margin-top: 15px; + padding-top: 15px; + border-top: 1px solid #30363d; + }} + .timeline-title {{ + color: #58a6ff; + font-size: 13px; + font-weight: 600; + margin-bottom: 10px; + }} + .timeline {{ + position: relative; + padding-left: 30px; + }} + .timeline::before {{ + content: ''; + position: absolute; + left: 12px; + top: 5px; + bottom: 5px; + width: 3px; + background: #30363d; + }} + .timeline-item {{ + position: relative; + padding-bottom: 15px; + }} + .timeline-item:last-child {{ + padding-bottom: 0; + }} + .timeline-marker {{ + position: absolute; + left: -26px; + width: 16px; + height: 16px; + border-radius: 50%; + border: 2px solid #0d1117; + }} + .timeline-marker.attacker {{ + background: #f85149; + }} + .timeline-marker.good-crawler {{ + background: #3fb950; + }} + .timeline-marker.bad-crawler {{ + background: #f0883e; + }} + .timeline-marker.regular-user {{ + background: #58a6ff; + }} + .timeline-content {{ + font-size: 12px; + }} + .timeline-category {{ + font-weight: 600; + }} + .timeline-timestamp {{ + color: #8b949e; + font-size: 11px; + margin-top: 2px; + }} + .timeline-arrow {{ + color: #8b949e; + margin: 0 7px; + }} @@ -658,11 +724,45 @@ def generate_dashboard(stats: dict) -> str: html += ''; }} + // Category History Timeline + if (stats.category_history && stats.category_history.length > 0) {{ + html += '
'; + html += '
Behavior Timeline
'; + html += '
'; + + stats.category_history.forEach((change, index) => {{ + const categoryClass = change.new_category.toLowerCase().replace('_', '-'); + const timestamp = new Date(change.timestamp).toLocaleString(); + + html += '
'; + html += `
`; + html += '
'; + + if (change.old_category) {{ + const oldCategoryBadge = 'category-' + change.old_category.toLowerCase().replace('_', '-'); + html += `${{change.old_category}}`; + html += '→'; + }} else {{ + html += 'Initial: '; + }} + + const newCategoryBadge = 'category-' + change.new_category.toLowerCase().replace('_', '-'); + html += `${{change.new_category}}`; + html += `
${{timestamp}}
`; + html += '
'; + html += '
'; + }}); + + html += '
'; + html += '
'; + }} + html += ''; // Radar chart on the right if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{ html += '
'; + html += '
Category Score
'; html += ''; const scores = {{ @@ -705,7 +805,7 @@ def generate_dashboard(stats: dict) -> str: // Draw axes const angles = [0, 90, 180, 270]; - const keys = ['attacker', 'good_crawler', 'bad_crawler', 'regular_user']; + const keys = ['good_crawler', 'regular_user', 'bad_crawler', 'attacker']; angles.forEach((angle, i) => {{ const rad = (angle - 90) * Math.PI / 180; @@ -713,8 +813,8 @@ def generate_dashboard(stats: dict) -> str: const y2 = cy + maxRadius * Math.sin(rad); html += ``; - // Add labels - const labelDist = maxRadius + 30; + // Add labels at consistent distance + const labelDist = maxRadius + 35; const lx = cx + labelDist * Math.cos(rad); const ly = cy + labelDist * Math.sin(rad); html += `${{labels[keys[i]]}}`; @@ -755,7 +855,7 @@ def generate_dashboard(stats: dict) -> str: keys.forEach(key => {{ html += '
'; html += `
`; - html += `${{labels[key]}}: ${{scores[key]}}%`; + html += `${{labels[key]}}: ${{scores[key]}} pt`; html += '
'; }}); html += '
'; From edb288a27157cf85993dad9940f90c053caa3ae1 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Wed, 7 Jan 2026 12:33:43 -0600 Subject: [PATCH 18/21] Fixed some print statements to leverage logging, pulled in most recent dev edits, added exports to gitignore --- .gitignore | 3 +++ src/analyzer.py | 18 ++++++++++++------ src/database.py | 12 ++++++++---- 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 70b93e4..63ae0e9 100644 --- a/.gitignore +++ b/.gitignore @@ -76,3 +76,6 @@ data/ # Personal canary tokens or sensitive configs *canary*token*.yaml personal-values.yaml + +#exports dir (keeping .gitkeep so we have the dir) +/exports/* \ No newline at end of file diff --git a/src/analyzer.py b/src/analyzer.py index a745813..b10e4e7 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -8,10 +8,13 @@ from datetime import datetime, timedelta import re from wordlists import get_wordlists from config import get_config +from logger import get_app_logger """ Functions for user activity analysis """ +app_logger = get_app_logger() + class Analyzer: """ Analyzes users activity and produces aggregated insights @@ -56,7 +59,7 @@ class Analyzer: attack_urls_threshold = config.attack_urls_threshold uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds - print(f"http_risky_methods_threshold: {http_risky_methods_threshold}") + app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}") score = {} score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} @@ -185,7 +188,7 @@ class Analyzer: variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs) std = variance ** 0.5 cv = std/mean - print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}") + app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}") if cv >= uneven_request_timing_threshold: score["attacker"]["uneven_request_timing"] = True @@ -268,10 +271,13 @@ class Analyzer: regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"] regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"] - print(f"Attacker score: {attacker_score}") - print(f"Good Crawler score: {good_crawler_score}") - print(f"Bad Crawler score: {bad_crawler_score}") - print(f"Regular User score: {regular_user_score}") + score_details = f""" + Attacker score: {attacker_score} + Good Crawler score: {good_crawler_score} + Bad Crawler score: {bad_crawler_score} + Regular User score: {regular_user_score} + """ + app_logger.debug(score_details) analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} diff --git a/src/database.py b/src/database.py index 0245105..c184e9e 100644 --- a/src/database.py +++ b/src/database.py @@ -22,6 +22,9 @@ from sanitizer import ( sanitize_attack_pattern, ) +from logger import get_app_logger + +applogger = get_app_logger() class DatabaseManager: """ @@ -154,7 +157,7 @@ class DatabaseManager: except Exception as e: session.rollback() # Log error but don't crash - database persistence is secondary to honeypot function - print(f"Database error persisting access: {e}") + applogger.critical(f"Database error persisting access: {e}") return None finally: self.close_session() @@ -193,7 +196,7 @@ class DatabaseManager: except Exception as e: session.rollback() - print(f"Database error persisting credential: {e}") + applogger.critical(f"Database error persisting credential: {e}") return None finally: self.close_session() @@ -236,7 +239,8 @@ class DatabaseManager: last_analysis: timestamp of last analysis """ - print(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}") + applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}") + applogger.info(f"IP: {ip} category has been updated to {category}") session = self.session sanitized_ip = sanitize_ip(ip) @@ -295,7 +299,7 @@ class DatabaseManager: session.commit() except Exception as e: session.rollback() - print(f"Error recording category change: {e}") + applogger.error(f"Error recording category change: {e}") def get_category_history(self, ip: str) -> List[Dict[str, Any]]: """ From b61461d0282f7d3b775f66c65412124040e95d89 Mon Sep 17 00:00:00 2001 From: Patrick Di Fazio Date: Thu, 8 Jan 2026 19:20:22 +0100 Subject: [PATCH 19/21] fixed categorization visualization, fixed date in the dashboard, fixed attack regex detection --- Dockerfile | 1 + config.yaml | 18 +++--- src/analyzer.py | 37 ++++++++++--- src/database.py | 47 +++++++++++----- src/handler.py | 3 +- src/templates/dashboard_template.py | 86 +++++++++++++++++++++++------ src/wordlists.py | 3 +- wordlists.json | 17 +++--- 8 files changed, 154 insertions(+), 58 deletions(-) diff --git a/Dockerfile b/Dockerfile index 2c7b954..78023a7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,6 +14,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY src/ /app/src/ COPY wordlists.json /app/ COPY entrypoint.sh /app/ +COPY config.yaml /app/ RUN useradd -m -u 1000 krawl && \ mkdir -p /app/logs /app/data && \ diff --git a/config.yaml b/config.yaml index 2150e1f..52daa09 100644 --- a/config.yaml +++ b/config.yaml @@ -3,7 +3,7 @@ server: port: 5000 delay: 100 # Response delay in milliseconds - timezone: null # e.g., "America/New_York" or null for system default + timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default # manually set the server header, if null a random one will be used. server_header: null @@ -11,8 +11,8 @@ server: links: min_length: 5 max_length: 15 - min_per_page: 10 - max_per_page: 15 + min_per_page: 5 + max_per_page: 10 char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" max_counter: 10 @@ -38,9 +38,9 @@ behavior: probability_error_codes: 0 # 0-100 percentage analyzer: - # http_risky_methods_threshold: 0.1 - # violated_robots_threshold: 0.1 - # uneven_request_timing_threshold: 5 - # uneven_request_timing_time_window_seconds: 300 - # user_agents_used_threshold: 2 - # attack_urls_threshold: 1 + http_risky_methods_threshold: 0.1 + violated_robots_threshold: 0.1 + uneven_request_timing_threshold: 2 + uneven_request_timing_time_window_seconds: 300 + user_agents_used_threshold: 2 + attack_urls_threshold: 1 diff --git a/src/analyzer.py b/src/analyzer.py index a745813..b63cd5e 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -6,6 +6,7 @@ from zoneinfo import ZoneInfo from pathlib import Path from datetime import datetime, timedelta import re +import urllib.parse from wordlists import get_wordlists from config import get_config """ @@ -101,6 +102,15 @@ class Analyzer: total_accesses_count = len(accesses) if total_accesses_count <= 0: return + + # Set category as "unknown" for the first 5 requests + if total_accesses_count < 3: + category = "unknown" + analyzed_metrics = {} + category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0} + last_analysis = datetime.now(tz=ZoneInfo('UTC')) + self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) + return 0 #--------------------- HTTP Methods --------------------- @@ -147,7 +157,7 @@ class Analyzer: robots_disallows.append(parts[1].strip()) #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker - violated_robots_count = len([item for item in accesses if item["path"].rstrip("/") in tuple(robots_disallows)]) + violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)]) #print(f"Violated robots count: {violated_robots_count}") if total_accesses_count > 0: violated_robots_ratio = violated_robots_count / total_accesses_count @@ -168,7 +178,8 @@ class Analyzer: #--------------------- Requests Timing --------------------- #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses] - timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] + now_utc = datetime.now(tz=ZoneInfo('UTC')) + timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] timestamps = sorted(timestamps, reverse=True) time_diffs = [] @@ -221,13 +232,25 @@ class Analyzer: attack_urls_found_list = [] wl = get_wordlists() - if wl.attack_urls: + if wl.attack_patterns: queried_paths = [item["path"] for item in accesses] for queried_path in queried_paths: - for name, pattern in wl.attack_urls.items(): - if re.search(pattern, queried_path, re.IGNORECASE): - attack_urls_found_list.append(pattern) + # URL decode the path to catch encoded attacks + try: + decoded_path = urllib.parse.unquote(queried_path) + # Double decode to catch double-encoded attacks + decoded_path_twice = urllib.parse.unquote(decoded_path) + except Exception: + decoded_path = queried_path + decoded_path_twice = queried_path + + for name, pattern in wl.attack_patterns.items(): + # Check original, decoded, and double-decoded paths + if (re.search(pattern, queried_path, re.IGNORECASE) or + re.search(pattern, decoded_path, re.IGNORECASE) or + re.search(pattern, decoded_path_twice, re.IGNORECASE)): + attack_urls_found_list.append(f"{name}: {pattern}") if len(attack_urls_found_list) > attack_urls_threshold: score["attacker"]["attack_url"] = True @@ -276,7 +299,7 @@ class Analyzer: analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} category = max(category_scores, key=category_scores.get) - last_analysis = datetime.utcnow() + last_analysis = datetime.now(tz=ZoneInfo('UTC')) self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) diff --git a/src/database.py b/src/database.py index 0245105..35a6e2e 100644 --- a/src/database.py +++ b/src/database.py @@ -9,6 +9,7 @@ import os import stat from datetime import datetime from typing import Optional, List, Dict, Any +from zoneinfo import ZoneInfo from sqlalchemy import create_engine, func, distinct, case from sqlalchemy.orm import sessionmaker, scoped_session, Session @@ -127,7 +128,7 @@ class DatabaseManager: method=method[:10], is_suspicious=is_suspicious, is_honeypot_trigger=is_honeypot_trigger, - timestamp=datetime.utcnow() + timestamp=datetime.now(tz=ZoneInfo('UTC')) ) session.add(access_log) session.flush() # Get the ID before committing @@ -185,7 +186,7 @@ class DatabaseManager: path=sanitize_path(path), username=sanitize_credential(username), password=sanitize_credential(password), - timestamp=datetime.utcnow() + timestamp=datetime.now(tz=ZoneInfo('UTC')) ) session.add(credential) session.commit() @@ -207,7 +208,7 @@ class DatabaseManager: ip: IP address to update """ sanitized_ip = sanitize_ip(ip) - now = datetime.utcnow() + now = datetime.now(tz=ZoneInfo('UTC')) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() @@ -251,6 +252,12 @@ class DatabaseManager: ip_stats.category = category ip_stats.category_scores = category_scores ip_stats.last_analysis = last_analysis + + try: + session.commit() + except Exception as e: + session.rollback() + print(f"Error updating IP stats analysis: {e}") def manual_update_category(self, ip: str, category: str) -> None: """ @@ -268,14 +275,21 @@ class DatabaseManager: # Record the manual category change old_category = ip_stats.category if old_category != category: - self._record_category_change(sanitized_ip, old_category, category, datetime.utcnow()) + self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC'))) ip_stats.category = category ip_stats.manual_category = True + + try: + session.commit() + except Exception as e: + session.rollback() + print(f"Error updating manual category: {e}") def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None: """ Internal method to record category changes in history. + Only records if there's an actual change from a previous category. Args: ip: IP address @@ -283,6 +297,11 @@ class DatabaseManager: new_category: New category timestamp: When the change occurred """ + # Don't record initial categorization (when old_category is None) + # Only record actual category changes + if old_category is None: + return + session = self.session try: history_entry = CategoryHistory( @@ -318,7 +337,7 @@ class DatabaseManager: { 'old_category': h.old_category, 'new_category': h.new_category, - 'timestamp': h.timestamp.isoformat() + 'timestamp': h.timestamp.isoformat() + '+00:00' } for h in history ] @@ -364,7 +383,7 @@ class DatabaseManager: 'method': log.method, 'is_suspicious': log.is_suspicious, 'is_honeypot_trigger': log.is_honeypot_trigger, - 'timestamp': log.timestamp.isoformat(), + 'timestamp': log.timestamp.isoformat() + '+00:00', 'attack_types': [d.attack_type for d in log.attack_detections] } for log in logs @@ -457,7 +476,7 @@ class DatabaseManager: 'path': attempt.path, 'username': attempt.username, 'password': attempt.password, - 'timestamp': attempt.timestamp.isoformat() + 'timestamp': attempt.timestamp.isoformat() + '+00:00' } for attempt in attempts ] @@ -484,8 +503,8 @@ class DatabaseManager: { 'ip': s.ip, 'total_requests': s.total_requests, - 'first_seen': s.first_seen.isoformat(), - 'last_seen': s.last_seen.isoformat(), + 'first_seen': s.first_seen.isoformat() + '+00:00', + 'last_seen': s.last_seen.isoformat() + '+00:00', 'country_code': s.country_code, 'city': s.city, 'asn': s.asn, @@ -525,8 +544,8 @@ class DatabaseManager: return { 'ip': stat.ip, 'total_requests': stat.total_requests, - 'first_seen': stat.first_seen.isoformat() if stat.first_seen else None, - 'last_seen': stat.last_seen.isoformat() if stat.last_seen else None, + 'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None, + 'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None, 'country_code': stat.country_code, 'city': stat.city, 'asn': stat.asn, @@ -537,7 +556,7 @@ class DatabaseManager: 'category': stat.category, 'category_scores': stat.category_scores or {}, 'manual_category': stat.manual_category, - 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None, + 'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None, 'category_history': category_history } finally: @@ -671,7 +690,7 @@ class DatabaseManager: 'ip': log.ip, 'path': log.path, 'user_agent': log.user_agent, - 'timestamp': log.timestamp.isoformat() + 'timestamp': log.timestamp.isoformat() + '+00:00' } for log in logs ] @@ -729,7 +748,7 @@ class DatabaseManager: 'ip': log.ip, 'path': log.path, 'user_agent': log.user_agent, - 'timestamp': log.timestamp.isoformat(), + 'timestamp': log.timestamp.isoformat() + '+00:00', 'attack_types': [d.attack_type for d in log.attack_detections] } for log in logs diff --git a/src/handler.py b/src/handler.py index 2598706..ebc0b66 100644 --- a/src/handler.py +++ b/src/handler.py @@ -407,7 +407,8 @@ class Handler(BaseHTTPRequestHandler): self.end_headers() try: stats = self.tracker.get_stats() - self.wfile.write(generate_dashboard(stats).encode()) + timezone = str(self.config.timezone) if self.config.timezone else 'UTC' + self.wfile.write(generate_dashboard(stats, timezone).encode()) except BrokenPipeError: pass except Exception as e: diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py index 332288c..bbb6ad9 100644 --- a/src/templates/dashboard_template.py +++ b/src/templates/dashboard_template.py @@ -7,6 +7,7 @@ Customize this template to change the dashboard appearance. import html from datetime import datetime +from zoneinfo import ZoneInfo def _escape(value) -> str: """Escape HTML special characters to prevent XSS attacks.""" @@ -14,18 +15,36 @@ def _escape(value) -> str: return "" return html.escape(str(value)) -def format_timestamp(iso_timestamp: str) -> str: - """Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)""" +def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str: + """Format ISO timestamp for display with timezone conversion + + Args: + iso_timestamp: ISO format timestamp string (UTC) + timezone: IANA timezone string to convert to + time_only: If True, return only HH:MM:SS, otherwise full datetime + """ try: + # Parse UTC timestamp dt = datetime.fromisoformat(iso_timestamp) + # Convert to target timezone + if dt.tzinfo is not None: + dt = dt.astimezone(ZoneInfo(timezone)) + + if time_only: + return dt.strftime("%H:%M:%S") return dt.strftime("%Y-%m-%d %H:%M:%S") except Exception: # Fallback for old format return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp -def generate_dashboard(stats: dict) -> str: - """Generate dashboard HTML with access statistics""" +def generate_dashboard(stats: dict, timezone: str = 'UTC') -> str: + """Generate dashboard HTML with access statistics + + Args: + stats: Statistics dictionary + timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York') + """ # Generate IP rows with clickable functionality for dropdown stats top_ips_rows = '\n'.join([ @@ -62,7 +81,7 @@ def generate_dashboard(stats: dict) -> str:
- + - + - +
{i+1}{_escape(ip)}{count}
{i+1}{_escape(ip)}{count}
No data
No data
{_escape(log["ip"])}{_escape(log["path"])}{_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}
{_escape(log["ip"])}{_escape(log["path"])}{_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}
No suspicious activity detected
{_escape(ip)}{_escape(", ".join(paths))}{len(paths)}
{_escape(ip)}{_escape(", ".join(paths))}{len(paths)}
No honeypot triggers yet
{_escape(log["ip"])}{_escape(log["path"])}{_escape(", ".join(log["attack_types"]))}{_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}
{_escape(log["ip"])}{_escape(log["path"])}{_escape(", ".join(log["attack_types"]))}{_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}
No attacks detected
{_escape(log["ip"])}{_escape(log["username"])}{_escape(log["password"])}{_escape(log["path"])}{_escape(log["timestamp"].split("T")[1][:8])}
{_escape(log["ip"])}{_escape(log["username"])}{_escape(log["password"])}{_escape(log["path"])}{_escape(log["timestamp"].split("T")[1][:8])}
No credentials captured yet
{_escape(log["ip"])} {_escape(log["path"])} {_escape(log["user_agent"][:60])}{_escape(log["timestamp"].split("T")[1][:8])}{format_timestamp(log["timestamp"], timezone, time_only=True)}