diff --git a/.tmux.conf b/.tmux.conf
deleted file mode 100644
index 701ea12..0000000
--- a/.tmux.conf
+++ /dev/null
@@ -1,6 +0,0 @@
-splitw -v -p 10
-neww -n worker
-select-window -t 1
-select-pane -t 0
-send-keys -t 0 "nvim" C-m
-send-keys -t 1 "docker compose watch" C-m
diff --git a/README.md b/README.md
index 038d023..b975134 100644
--- a/README.md
+++ b/README.md
@@ -264,35 +264,51 @@ Below is a complete overview of the Krawl honeypot’s capabilities
The actual (juicy) robots.txt configuration [is the following](src/templates/html/robots.txt).
## Honeypot pages
+
+### Common Login Attempts
Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing).

-
+### Common Misconfiguration Paths
Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with “interesting” files, each assigned a random file size to look realistic.

-The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a “juicy” misconfiguration that crawlers and scanners often flag as information leakage.
+### Environment File Leakage
+The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage.
+### Server Error Information
The `/server` page displays randomly generated fake error information for each known server.

+### API Endpoints with Sensitive Data
The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format

+### Exposed Credential Files
The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets

+### SQL Injection and XSS Detection
Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**.

Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard.
+### Path Traversal Detection
+Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern.
+
+### XXE (XML External Entity) Injection
+The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`
+
+ success
+ {content}
+
+ entity_processed:
+ template: |
+
+
+ success
+ Entity processed successfully
+ {entity_value}
+
+ entity_values:
+ - "admin_credentials"
+ - "database_connection"
+ - "api_secret_key"
+ - "internal_server_ip"
+ - "encrypted_password"
+ error:
+ template: |
+
+
+ error
+ {message}
+
+ messages:
+ - "External entity not allowed"
+ - "XML parsing error"
+ - "Invalid entity reference"
+ default_content: "root:x:0:0:root:/root:/bin/bash\nwww-data:x:33:33:www-data:/var/www:/usr/sbin/nologin"
+ command_outputs:
+ id:
+ - "uid={uid}(www-data) gid={gid}(www-data) groups={gid}(www-data)"
+ - "uid={uid}(nginx) gid={gid}(nginx) groups={gid}(nginx)"
+ - "uid={uid}(apache) gid={gid}(apache) groups={gid}(apache)"
+ whoami:
+ - www-data
+ - nginx
+ - apache
+ - webapp
+ - nobody
+ uname:
+ - "Linux webserver 5.4.0-42-generic #46-Ubuntu SMP Fri Jul 10 00:24:02 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux"
+ - "Linux app-server 4.15.0-112-generic #113-Ubuntu SMP Thu Jul 9 23:41:39 UTC 2020 x86_64 GNU/Linux"
+ - "Linux prod-server 5.15.0-56-generic #62-Ubuntu SMP Tue Nov 22 19:54:14 UTC 2022 x86_64 GNU/Linux"
+ pwd:
+ - /var/www/html
+ - /home/webapp/public_html
+ - /usr/share/nginx/html
+ - /opt/app/public
+ ls:
+ - ["index.php", "config.php", "uploads", "assets", "README.md", ".htaccess", "admin"]
+ - ["app.js", "package.json", "node_modules", "public", "views", "routes"]
+ - ["index.html", "css", "js", "images", "data", "api"]
+ cat_config: |
+
+ network_commands:
+ - "bash: wget: command not found"
+ - "curl: (6) Could not resolve host: example.com"
+ - "Connection timeout"
+ - "bash: nc: command not found"
+ - "Downloaded {size} bytes"
+ generic:
+ - "sh: 1: syntax error: unexpected end of file"
+ - "Command executed successfully"
+ - ""
+ - "/bin/sh: {num}: not found"
+ - "bash: command not found"
+ uid_min: 1000
+ uid_max: 2000
+ gid_min: 1000
+ gid_max: 2000
+ download_size_min: 100
+ download_size_max: 10000
+ sql_errors:
+ mysql:
+ syntax_errors:
+ - "You have an error in your SQL syntax"
+ - "check the manual that corresponds to your MySQL server version"
+ table_errors:
+ - "Table '{table}' doesn't exist"
+ - "Unknown table '{table}'"
+ column_errors:
+ - "Unknown column '{column}' in 'field list'"
+ - "Unknown column '{column}' in 'where clause'"
+ postgresql:
+ syntax_errors:
+ - "ERROR: syntax error at or near"
+ - "ERROR: unterminated quoted string"
+ relation_errors:
+ - "ERROR: relation \"{table}\" does not exist"
+ column_errors:
+ - "ERROR: column \"{column}\" does not exist"
+ mssql:
+ syntax_errors:
+ - "Incorrect syntax near"
+ - "Unclosed quotation mark"
+ object_errors:
+ - "Invalid object name '{table}'"
+ column_errors:
+ - "Invalid column name '{column}'"
+ oracle:
+ syntax_errors:
+ - "ORA-00933: SQL command not properly ended"
+ - "ORA-00904: invalid identifier"
+ table_errors:
+ - "ORA-00942: table or view does not exist"
+ sqlite:
+ syntax_errors:
+ - "near \"{token}\": syntax error"
+ table_errors:
+ - "no such table: {table}"
+ column_errors:
+ - "no such column: {column}"
+ mongodb:
+ query_errors:
+ - "Failed to parse"
+ - "unknown operator"
+ collection_errors:
+ - "ns not found"
+ server_errors:
+ nginx:
+ versions:
+ - "1.18.0"
+ - "1.20.1"
+ - "1.22.0"
+ - "1.24.0"
+ template: |
+
+
+
+ {code} {message}
+
+
+
+ An error occurred.
+ Sorry, the page you are looking for is currently unavailable.
+ Please try again later.
+ If you are the system administrator of this resource then you should check the error log for details.
+ Faithfully yours, nginx/{version}.
+
+
+ apache:
+ versions:
+ - "2.4.41"
+ - "2.4.52"
+ - "2.4.54"
+ - "2.4.57"
+ os:
+ - Ubuntu
+ - Debian
+ - CentOS
+ template: |
+
+
+ {code} {message}
+
+ {message}
+ The requested URL was not found on this server.
+
+ Apache/{version} ({os}) Server at {host} Port 80
+
+ iis:
+ versions:
+ - "10.0"
+ - "8.5"
+ - "8.0"
+ template: |
+
+
+
+
+ {code} - {message}
+
+
+
+
+
{code} - {message}
+ The page cannot be displayed because an internal server error has occurred.
+
+
+
+ attack_patterns:
+ path_traversal: "(\\.\\.| %2e%2e|%252e|/etc/passwd|/etc/shadow|\\.\\.\\\\/|\\.\\./|/windows/system32|c:\\\\windows|/proc/self|\\.\\.\\.%2f|\\.\\.\\.%5c|etc/passwd|etc/shadow)"
+ sql_injection: "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)"
+ xss_attempt: "( str:
+ """Generate response for XSS attempts with reflected content"""
+ xss_detected = False
+ reflected_content = []
+
+ for key, value in input_data.items():
+ if detect_xss_pattern(value):
+ xss_detected = True
+ reflected_content.append(f"{key}: {value}
")
+
+ if xss_detected:
+ logger.info("XSS attempt detected and reflected")
+ html = f"""
+
+
+
+ Submission Received
+
+
+
+
+
Thank you for your submission!
+
We have received your information:
+ {''.join(reflected_content)}
+
We will get back to you shortly.
+
+
+
+"""
+ return html
+
+ return """
+
+
+
+ Submission Received
+
+
+
+
+
Thank you for your submission!
+
Your message has been received and we will respond soon.
+
+
+
+"""
+
+
+def generate_server_error() -> Tuple[str, str]:
+ """Generate fake server error page"""
+ wl = get_wordlists()
+ server_errors = wl.server_errors
+
+ if not server_errors:
+ return ("500 Internal Server Error", "text/html")
+
+ server_type = random.choice(list(server_errors.keys()))
+ server_config = server_errors[server_type]
+
+ error_codes = {
+ 400: "Bad Request",
+ 401: "Unauthorized",
+ 403: "Forbidden",
+ 404: "Not Found",
+ 500: "Internal Server Error",
+ 502: "Bad Gateway",
+ 503: "Service Unavailable",
+ }
+
+ code = random.choice(list(error_codes.keys()))
+ message = error_codes[code]
+
+ template = server_config.get("template", "")
+ version = random.choice(server_config.get("versions", ["1.0"]))
+
+ html = template.replace("{code}", str(code))
+ html = html.replace("{message}", message)
+ html = html.replace("{version}", version)
+
+ if server_type == "apache":
+ os = random.choice(server_config.get("os", ["Ubuntu"]))
+ html = html.replace("{os}", os)
+ html = html.replace("{host}", "localhost")
+
+ logger.debug(f"Generated {server_type} server error: {code}")
+ return (html, "text/html")
+
+
+def get_server_header(server_type: str = None) -> str:
+ """Get a fake server header string"""
+ wl = get_wordlists()
+ server_errors = wl.server_errors
+
+ if not server_errors:
+ return "nginx/1.18.0"
+
+ if not server_type:
+ server_type = random.choice(list(server_errors.keys()))
+
+ server_config = server_errors.get(server_type, {})
+ version = random.choice(server_config.get("versions", ["1.0"]))
+
+ server_headers = {
+ "nginx": f"nginx/{version}",
+ "apache": f"Apache/{version}",
+ "iis": f"Microsoft-IIS/{version}",
+ "tomcat": f"Apache-Coyote/1.1",
+ }
+
+ return server_headers.get(server_type, "nginx/1.18.0")
+
+
+def detect_and_respond_deception(path: str, query: str = "", body: str = "", method: str = "GET") -> Optional[Tuple[str, str, int]]:
+ """
+ Main deception detection and response function.
+ Returns (response_body, content_type, status_code) if deception should be applied, None otherwise.
+ """
+
+ logger.debug(f"Checking deception for {method} {path} query={query[:50] if query else 'empty'}")
+
+ if detect_path_traversal(path, query, body):
+ logger.info(f"Path traversal detected in: {path}")
+ return generate_path_traversal_response(f"{path}?{query}" if query else path)
+
+ if body and detect_xxe_injection(body):
+ logger.info(f"XXE injection detected")
+ return generate_xxe_response(body)
+
+ if detect_command_injection(path, query, body):
+ logger.info(f"Command injection detected in: {path}")
+ full_input = f"{path} {query} {body}"
+ return generate_command_injection_response(full_input)
+
+ return None
diff --git a/src/handler.py b/src/handler.py
index e6da601..45e9de3 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -6,7 +6,7 @@ import time
from datetime import datetime
from http.server import BaseHTTPRequestHandler
from typing import Optional, List
-from urllib.parse import urlparse, parse_qs
+from urllib.parse import urlparse, parse_qs, unquote_plus
import json
import os
@@ -19,7 +19,6 @@ from firewall.iptables import Iptables
from firewall.raw import Raw
from tracker import AccessTracker
-from analyzer import Analyzer
from templates import html_templates
from templates.dashboard_template import generate_dashboard
from generators import (
@@ -32,9 +31,14 @@ from generators import (
random_server_header,
)
from wordlists import get_wordlists
-from sql_errors import generate_sql_error_response, get_sql_response_with_data
-from xss_detector import detect_xss_pattern, generate_xss_response
-from server_errors import generate_server_error
+from deception_responses import (
+ detect_and_respond_deception,
+ generate_sql_error_response,
+ get_sql_response_with_data,
+ detect_xss_pattern,
+ generate_xss_response,
+ generate_server_error,
+)
from models import AccessLog
from ip_utils import is_valid_public_ip
from sqlalchemy import distinct
@@ -46,7 +50,6 @@ class Handler(BaseHTTPRequestHandler):
webpages: Optional[List[str]] = None
config: Config = None
tracker: AccessTracker = None
- analyzer: Analyzer = None
counter: int = 0
app_logger: logging.Logger = None
access_logger: logging.Logger = None
@@ -70,6 +73,28 @@ class Handler(BaseHTTPRequestHandler):
# Fallback to direct connection IP
return self.client_address[0]
+ def _build_raw_request(self, body: str = "") -> str:
+ """Build raw HTTP request string for forensic analysis"""
+ try:
+ # Request line
+ raw = f"{self.command} {self.path} {self.request_version}\r\n"
+
+ # Headers
+ if hasattr(self, "headers") and self.headers:
+ for header, value in self.headers.items():
+ raw += f"{header}: {value}\r\n"
+
+ raw += "\r\n"
+
+ # Body (if present)
+ if body:
+ raw += body
+
+ return raw
+ except Exception as e:
+ # Fallback to minimal representation if building fails
+ return f"{self.command} {self.path} (error building full request: {str(e)})"
+
def _get_category_by_ip(self, client_ip: str) -> str:
"""Get the category of an IP from the database"""
return self.tracker.get_category_by_ip(client_ip)
@@ -113,7 +138,8 @@ class Handler(BaseHTTPRequestHandler):
return False
try:
- # Get query parameters
+ parsed_url = urlparse(path)
+ request_query = parsed_url.query
# Log SQL injection attempt
client_ip = self._get_client_ip()
@@ -163,6 +189,64 @@ class Handler(BaseHTTPRequestHandler):
pass
return True
+ def _handle_deception_response(self, path: str, query: str = "", body: str = "", method: str = "GET") -> bool:
+ """
+ Handle deception responses for path traversal, XXE, and command injection.
+ Returns True if a deception response was sent, False otherwise.
+ """
+ try:
+ self.app_logger.debug(f"Checking deception for: {method} {path}")
+ result = detect_and_respond_deception(path, query, body, method)
+
+ if result:
+ response_body, content_type, status_code = result
+ client_ip = self._get_client_ip()
+ user_agent = self.headers.get("User-Agent", "")
+
+ # Determine attack type using standardized names from wordlists
+ full_input = f"{path} {query} {body}".lower()
+ attack_type_db = None # For database (standardized)
+ attack_type_log = "UNKNOWN" # For logging (human-readable)
+
+ if "passwd" in path.lower() or "shadow" in path.lower() or ".." in path or ".." in query:
+ attack_type_db = "path_traversal"
+ attack_type_log = "PATH_TRAVERSAL"
+ elif body and (" str:
"""Generate a webpage containing random links or canary token"""
@@ -245,14 +329,20 @@ class Handler(BaseHTTPRequestHandler):
post_data = ""
base_path = urlparse(self.path).path
+
+ content_length = int(self.headers.get("Content-Length", 0))
+ if content_length > 0:
+ post_data = self.rfile.read(content_length).decode(
+ "utf-8", errors="replace"
+ )
+
+ parsed_url = urlparse(self.path)
+ query_string = parsed_url.query
+
+ if self._handle_deception_response(self.path, query_string, post_data, "POST"):
+ return
if base_path in ["/api/search", "/api/sql", "/api/database"]:
- content_length = int(self.headers.get("Content-Length", 0))
- if content_length > 0:
- post_data = self.rfile.read(content_length).decode(
- "utf-8", errors="replace"
- )
-
self.access_logger.info(
f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}"
)
@@ -283,20 +373,17 @@ class Handler(BaseHTTPRequestHandler):
return
if base_path == "/api/contact":
- content_length = int(self.headers.get("Content-Length", 0))
- if content_length > 0:
- post_data = self.rfile.read(content_length).decode(
- "utf-8", errors="replace"
- )
-
+ # Parse URL-encoded POST data properly
parsed_data = {}
- for pair in post_data.split("&"):
- if "=" in pair:
- key, value = pair.split("=", 1)
+ if post_data:
+ # Use parse_qs for proper URL decoding
+ parsed_qs = parse_qs(post_data)
+ # parse_qs returns lists, get first value of each
+ parsed_data = {k: v[0] if v else '' for k, v in parsed_qs.items()}
+
+ self.app_logger.debug(f"Parsed contact data: {parsed_data}")
- parsed_data[unquote_plus(key)] = unquote_plus(value)
-
- xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
+ xss_detected = any(detect_xss_pattern(str(v)) for v in parsed_data.values())
if xss_detected:
self.access_logger.warning(
@@ -307,6 +394,16 @@ class Handler(BaseHTTPRequestHandler):
f"[XSS ENDPOINT POST] {client_ip} - {base_path}"
)
+ # Record access for dashboard tracking (including XSS detection)
+ self.tracker.record_access(
+ ip=client_ip,
+ path=self.path,
+ user_agent=user_agent,
+ body=post_data,
+ method="POST",
+ raw_request=self._build_raw_request(post_data)
+ )
+
try:
self.send_response(200)
self.send_header("Content-type", "text/html")
@@ -323,12 +420,8 @@ class Handler(BaseHTTPRequestHandler):
f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}"
)
- content_length = int(self.headers.get("Content-Length", 0))
- if content_length > 0:
- post_data = self.rfile.read(content_length).decode(
- "utf-8", errors="replace"
- )
-
+ # post_data was already read at the beginning of do_POST, don't read again
+ if post_data:
self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
# Parse and log credentials
@@ -350,7 +443,8 @@ class Handler(BaseHTTPRequestHandler):
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(
- client_ip, self.path, user_agent, post_data, method="POST"
+ client_ip, self.path, user_agent, post_data, method="POST",
+ raw_request=self._build_raw_request(post_data)
)
time.sleep(1)
@@ -498,8 +592,13 @@ class Handler(BaseHTTPRequestHandler):
user_agent = self.headers.get("User-Agent", "")
request_path = urlparse(self.path).path
self.app_logger.info(f"request_query: {request_path}")
- query_params = parse_qs(urlparse(self.path).query)
+ parsed_url = urlparse(self.path)
+ query_string = parsed_url.query
+ query_params = parse_qs(query_string)
self.app_logger.info(f"query_params: {query_params}")
+
+ if self._handle_deception_response(self.path, query_string, "", "GET"):
+ return
# get database reference
db = get_database()
@@ -934,6 +1033,68 @@ class Handler(BaseHTTPRequestHandler):
self.wfile.write(json.dumps({"error": str(e)}).encode())
return
+ # API endpoint for attack types statistics (aggregated)
+ if self.config.dashboard_secret_path and self.path.startswith(
+ f"{self.config.dashboard_secret_path}/api/attack-types-stats"
+ ):
+ self.send_response(200)
+ self.send_header("Content-type", "application/json")
+ self.send_header("Access-Control-Allow-Origin", "*")
+ self.send_header(
+ "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
+ )
+ self.send_header("Pragma", "no-cache")
+ self.send_header("Expires", "0")
+ self.end_headers()
+ try:
+ parsed_url = urlparse(self.path)
+ query_params = parse_qs(parsed_url.query)
+ limit = int(query_params.get("limit", ["20"])[0])
+ limit = min(max(1, limit), 100) # Cap at 100
+
+ result = db.get_attack_types_stats(limit=limit)
+ self.wfile.write(json.dumps(result).encode())
+ except BrokenPipeError:
+ pass
+ except Exception as e:
+ self.app_logger.error(f"Error fetching attack types stats: {e}")
+ self.wfile.write(json.dumps({"error": str(e)}).encode())
+ return
+
+ # API endpoint for fetching raw request by log ID
+ if self.config.dashboard_secret_path and self.path.startswith(
+ f"{self.config.dashboard_secret_path}/api/raw-request/"
+ ):
+ try:
+ # Extract log ID from path: /api/raw-request/123
+ log_id = int(self.path.split("/")[-1])
+ raw_request = db.get_raw_request_by_id(log_id)
+
+ if raw_request is None:
+ self.send_response(404)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ self.wfile.write(json.dumps({"error": "Raw request not found"}).encode())
+ else:
+ self.send_response(200)
+ self.send_header("Content-type", "application/json")
+ self.send_header("Access-Control-Allow-Origin", "*")
+ self.send_header("Cache-Control", "no-store, no-cache, must-revalidate, max-age=0")
+ self.end_headers()
+ self.wfile.write(json.dumps({"raw_request": raw_request}).encode())
+ except (ValueError, IndexError):
+ self.send_response(400)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ self.wfile.write(json.dumps({"error": "Invalid log ID"}).encode())
+ except Exception as e:
+ self.app_logger.error(f"Error fetching raw request: {e}")
+ self.send_response(500)
+ self.send_header("Content-type", "application/json")
+ self.end_headers()
+ self.wfile.write(json.dumps({"error": str(e)}).encode())
+ return
+
# API endpoint for downloading malicious IPs blocklist file
if (
self.config.dashboard_secret_path
@@ -1014,10 +1175,9 @@ class Handler(BaseHTTPRequestHandler):
self.wfile.write(b"Internal server error")
return
- self.tracker.record_access(client_ip, self.path, user_agent, method="GET")
+ self.tracker.record_access(client_ip, self.path, user_agent, method="GET",
+ raw_request=self._build_raw_request())
- # self.analyzer.infer_user_category(client_ip)
- # self.analyzer.update_ip_rep_infos(client_ip)
if self.tracker.is_suspicious_user_agent(user_agent):
self.access_logger.warning(
diff --git a/src/migrations/README.md b/src/migrations/README.md
new file mode 100644
index 0000000..5f07c58
--- /dev/null
+++ b/src/migrations/README.md
@@ -0,0 +1,60 @@
+# Database Migrations
+
+This directory contains database migration scripts for Krawl.
+From the 1.0.0 stable version we added some features that require schema changes and performance optimizations. These migration scripts ensure that existing users can seamlessly upgrade without data loss or downtime.
+
+## Available Migrations
+
+### add_raw_request_column.py
+
+Adds the `raw_request` column to the `access_logs` table to store complete HTTP requests for forensic analysis.
+
+**Usage:**
+```bash
+# Run with default database path (src/data/krawl.db)
+python3 migrations/add_raw_request_column.py
+
+# Run with custom database path
+python3 migrations/add_raw_request_column.py /path/to/krawl.db
+```
+
+### add_performance_indexes.py
+
+Adds critical performance indexes to the `attack_detections` table for efficient aggregation and filtering with large datasets (100k+ records).
+
+**Indexes Added:**
+- `ix_attack_detections_attack_type` - Speeds up GROUP BY on attack_type
+- `ix_attack_detections_type_log` - Composite index for attack_type + access_log_id
+
+**Usage:**
+```bash
+# Run with default database path
+python3 migrations/add_performance_indexes.py
+
+# Run with custom database path
+python3 migrations/add_performance_indexes.py /path/to/krawl.db
+```
+
+**Post-Migration Optimization:**
+```bash
+# Compact database and update query planner statistics
+sqlite3 /path/to/krawl.db "VACUUM; ANALYZE;"
+```
+
+## Running Migrations
+
+All migration scripts are designed to be idempotent and safe to run multiple times. They will:
+1. Check if the migration is already applied
+2. Skip if already applied
+3. Apply the migration if needed
+4. Report the result
+
+## Creating New Migrations
+
+When creating a new migration:
+1. Name the file descriptively: `action_description.py`
+2. Make it idempotent (safe to run multiple times)
+3. Add checks before making changes
+4. Provide clear error messages
+5. Support custom database paths via command line
+6. Update this README with usage instructions
diff --git a/src/migrations/add_performance_indexes.py b/src/migrations/add_performance_indexes.py
new file mode 100644
index 0000000..3359612
--- /dev/null
+++ b/src/migrations/add_performance_indexes.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+
+"""
+Migration script to add performance indexes to attack_detections table.
+This dramatically improves query performance with large datasets (100k+ records).
+"""
+
+import sqlite3
+import sys
+import os
+
+
+def index_exists(cursor, index_name: str) -> bool:
+ """Check if an index exists."""
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,))
+ return cursor.fetchone() is not None
+
+
+def add_performance_indexes(db_path: str) -> bool:
+ """
+ Add performance indexes to optimize queries.
+
+ Args:
+ db_path: Path to the SQLite database file
+
+ Returns:
+ True if indexes were added or already exist, False on error
+ """
+ try:
+ # Check if database exists
+ if not os.path.exists(db_path):
+ print(f"Database file not found: {db_path}")
+ return False
+
+ # Connect to database
+ conn = sqlite3.connect(db_path)
+ cursor = conn.cursor()
+
+ indexes_added = []
+ indexes_existed = []
+
+ # Index 1: attack_type for efficient GROUP BY operations
+ if not index_exists(cursor, "ix_attack_detections_attack_type"):
+ print("Adding index on attack_detections.attack_type...")
+ cursor.execute("""
+ CREATE INDEX ix_attack_detections_attack_type
+ ON attack_detections(attack_type)
+ """)
+ indexes_added.append("ix_attack_detections_attack_type")
+ else:
+ indexes_existed.append("ix_attack_detections_attack_type")
+
+ # Index 2: Composite index for attack_type + access_log_id
+ if not index_exists(cursor, "ix_attack_detections_type_log"):
+ print("Adding composite index on attack_detections(attack_type, access_log_id)...")
+ cursor.execute("""
+ CREATE INDEX ix_attack_detections_type_log
+ ON attack_detections(attack_type, access_log_id)
+ """)
+ indexes_added.append("ix_attack_detections_type_log")
+ else:
+ indexes_existed.append("ix_attack_detections_type_log")
+
+ conn.commit()
+ conn.close()
+
+ # Report results
+ if indexes_added:
+ print(f"Successfully added {len(indexes_added)} index(es):")
+ for idx in indexes_added:
+ print(f" - {idx}")
+
+ if indexes_existed:
+ print(f"ℹ️ {len(indexes_existed)} index(es) already existed:")
+ for idx in indexes_existed:
+ print(f" - {idx}")
+
+ if not indexes_added and not indexes_existed:
+ print("No indexes processed")
+
+ return True
+
+ except sqlite3.Error as e:
+ print(f"SQLite error: {e}")
+ return False
+ except Exception as e:
+ print(f"Unexpected error: {e}")
+ return False
+
+
+def main():
+ """Main migration function."""
+ # Default database path
+ default_db_path = os.path.join(
+ os.path.dirname(os.path.dirname(__file__)),
+ "data",
+ "krawl.db"
+ )
+
+ # Allow custom path as command line argument
+ db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path
+
+ print(f"Adding performance indexes to database: {db_path}")
+ print("=" * 60)
+
+ success = add_performance_indexes(db_path)
+
+ print("=" * 60)
+ if success:
+ print("Migration completed successfully")
+ print("\n💡 Performance tip: Run 'VACUUM' and 'ANALYZE' on your database")
+ print(" to optimize query planner statistics after adding indexes.")
+ sys.exit(0)
+ else:
+ print("Migration failed")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/migrations/add_raw_request_column.py b/src/migrations/add_raw_request_column.py
new file mode 100644
index 0000000..8cb63ee
--- /dev/null
+++ b/src/migrations/add_raw_request_column.py
@@ -0,0 +1,95 @@
+#!/usr/bin/env python3
+
+"""
+Migration script to add raw_request column to access_logs table.
+This script is safe to run multiple times - it checks if the column exists before adding it.
+"""
+
+import sqlite3
+import sys
+import os
+from pathlib import Path
+
+
+def column_exists(cursor, table_name: str, column_name: str) -> bool:
+ """Check if a column exists in a table."""
+ cursor.execute(f"PRAGMA table_info({table_name})")
+ columns = [row[1] for row in cursor.fetchall()]
+ return column_name in columns
+
+
+def add_raw_request_column(db_path: str) -> bool:
+ """
+ Add raw_request column to access_logs table if it doesn't exist.
+
+ Args:
+ db_path: Path to the SQLite database file
+
+ Returns:
+ True if column was added or already exists, False on error
+ """
+ try:
+ # Check if database exists
+ if not os.path.exists(db_path):
+ print(f"Database file not found: {db_path}")
+ return False
+
+ # Connect to database
+ conn = sqlite3.connect(db_path)
+ cursor = conn.cursor()
+
+ # Check if column already exists
+ if column_exists(cursor, "access_logs", "raw_request"):
+ print("Column 'raw_request' already exists in access_logs table")
+ conn.close()
+ return True
+
+ # Add the column
+ print("Adding 'raw_request' column to access_logs table...")
+ cursor.execute("""
+ ALTER TABLE access_logs
+ ADD COLUMN raw_request TEXT
+ """)
+
+ conn.commit()
+ conn.close()
+
+ print("✅ Successfully added 'raw_request' column to access_logs table")
+ return True
+
+ except sqlite3.Error as e:
+ print(f"SQLite error: {e}")
+ return False
+ except Exception as e:
+ print(f"Unexpected error: {e}")
+ return False
+
+
+def main():
+ """Main migration function."""
+ # Default database path
+ default_db_path = os.path.join(
+ os.path.dirname(os.path.dirname(__file__)),
+ "data",
+ "krawl.db"
+ )
+
+ # Allow custom path as command line argument
+ db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path
+
+ print(f"🔄 Running migration on database: {db_path}")
+ print("=" * 60)
+
+ success = add_raw_request_column(db_path)
+
+ print("=" * 60)
+ if success:
+ print("Migration completed successfully")
+ sys.exit(0)
+ else:
+ print("Migration failed")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/models.py b/src/models.py
index 30e8a92..c9d6a26 100644
--- a/src/models.py
+++ b/src/models.py
@@ -63,6 +63,10 @@ class AccessLog(Base):
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
+ # Raw HTTP request for forensic analysis (nullable for backward compatibility)
+ raw_request: Mapped[Optional[str]] = mapped_column(
+ String, nullable=True
+ )
# Relationship to attack detections
attack_detections: Mapped[List["AttackDetection"]] = relationship(
@@ -126,7 +130,7 @@ class AttackDetection(Base):
nullable=False,
index=True,
)
- attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
+ attack_type: Mapped[str] = mapped_column(String(50), nullable=False, index=True)
matched_pattern: Mapped[Optional[str]] = mapped_column(
String(MAX_ATTACK_PATTERN_LENGTH), nullable=True
)
@@ -136,6 +140,9 @@ class AttackDetection(Base):
"AccessLog", back_populates="attack_detections"
)
+ # Composite index for efficient aggregation queries
+ __table_args__ = (Index("ix_attack_detections_type_log", "attack_type", "access_log_id"),)
+
def __repr__(self) -> str:
return f""
diff --git a/src/server.py b/src/server.py
index 94f1d1e..ed7ecad 100644
--- a/src/server.py
+++ b/src/server.py
@@ -10,7 +10,6 @@ from http.server import HTTPServer
from config import get_config
from tracker import AccessTracker
-from analyzer import Analyzer
from handler import Handler
from logger import (
initialize_logging,
@@ -75,11 +74,9 @@ def main():
)
tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds)
- analyzer = Analyzer()
Handler.config = config
Handler.tracker = tracker
- Handler.analyzer = analyzer
Handler.counter = config.canary_token_tries
Handler.app_logger = app_logger
Handler.access_logger = access_logger
diff --git a/src/server_errors.py b/src/server_errors.py
deleted file mode 100644
index 7b55654..0000000
--- a/src/server_errors.py
+++ /dev/null
@@ -1,65 +0,0 @@
-#!/usr/bin/env python3
-
-import random
-from wordlists import get_wordlists
-
-
-def generate_server_error() -> tuple[str, str]:
- wl = get_wordlists()
- server_errors = wl.server_errors
-
- if not server_errors:
- return ("500 Internal Server Error", "text/html")
-
- server_type = random.choice(list(server_errors.keys()))
- server_config = server_errors[server_type]
-
- error_codes = {
- 400: "Bad Request",
- 401: "Unauthorized",
- 403: "Forbidden",
- 404: "Not Found",
- 500: "Internal Server Error",
- 502: "Bad Gateway",
- 503: "Service Unavailable",
- }
-
- code = random.choice(list(error_codes.keys()))
- message = error_codes[code]
-
- template = server_config.get("template", "")
- version = random.choice(server_config.get("versions", ["1.0"]))
-
- html = template.replace("{code}", str(code))
- html = html.replace("{message}", message)
- html = html.replace("{version}", version)
-
- if server_type == "apache":
- os = random.choice(server_config.get("os", ["Ubuntu"]))
- html = html.replace("{os}", os)
- html = html.replace("{host}", "localhost")
-
- return (html, "text/html")
-
-
-def get_server_header(server_type: str = None) -> str:
- wl = get_wordlists()
- server_errors = wl.server_errors
-
- if not server_errors:
- return "nginx/1.18.0"
-
- if not server_type:
- server_type = random.choice(list(server_errors.keys()))
-
- server_config = server_errors.get(server_type, {})
- version = random.choice(server_config.get("versions", ["1.0"]))
-
- server_headers = {
- "nginx": f"nginx/{version}",
- "apache": f"Apache/{version}",
- "iis": f"Microsoft-IIS/{version}",
- "tomcat": f"Apache-Coyote/1.1",
- }
-
- return server_headers.get(server_type, "nginx/1.18.0")
diff --git a/src/sql_errors.py b/src/sql_errors.py
deleted file mode 100644
index 583f7ed..0000000
--- a/src/sql_errors.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/usr/bin/env python3
-
-import random
-import re
-from typing import Optional, Tuple
-from wordlists import get_wordlists
-
-
-def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
- if not query_string:
- return None
-
- query_lower = query_string.lower()
-
- patterns = {
- "quote": [r"'", r'"', r"`"],
- "comment": [r"--", r"#", r"/\*", r"\*/"],
- "union": [r"\bunion\b", r"\bunion\s+select\b"],
- "boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"],
- "time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"],
- "stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"],
- "command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"],
- "info_schema": [r"information_schema", r"table_schema", r"table_name"],
- }
-
- for injection_type, pattern_list in patterns.items():
- for pattern in pattern_list:
- if re.search(pattern, query_lower):
- return injection_type
-
- return None
-
-
-def get_random_sql_error(
- db_type: str = None, injection_type: str = None
-) -> Tuple[str, str]:
- wl = get_wordlists()
- sql_errors = wl.sql_errors
-
- if not sql_errors:
- return ("Database error occurred", "text/plain")
-
- if not db_type:
- db_type = random.choice(list(sql_errors.keys()))
-
- db_errors = sql_errors.get(db_type, {})
-
- if injection_type and injection_type in db_errors:
- errors = db_errors[injection_type]
- elif "generic" in db_errors:
- errors = db_errors["generic"]
- else:
- all_errors = []
- for error_list in db_errors.values():
- if isinstance(error_list, list):
- all_errors.extend(error_list)
- errors = all_errors if all_errors else ["Database error occurred"]
-
- error_message = random.choice(errors) if errors else "Database error occurred"
-
- if "{table}" in error_message:
- tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
- error_message = error_message.replace("{table}", random.choice(tables))
-
- if "{column}" in error_message:
- columns = ["id", "name", "email", "password", "username", "created_at"]
- error_message = error_message.replace("{column}", random.choice(columns))
-
- return (error_message, "text/plain")
-
-
-def generate_sql_error_response(
- query_string: str, db_type: str = None
-) -> Tuple[str, str, int]:
- injection_type = detect_sql_injection_pattern(query_string)
-
- if not injection_type:
- return (None, None, None)
-
- error_message, content_type = get_random_sql_error(db_type, injection_type)
-
- status_code = 500
-
- if random.random() < 0.3:
- status_code = 200
-
- return (error_message, content_type, status_code)
-
-
-def get_sql_response_with_data(path: str, params: str) -> str:
- import json
- from generators import random_username, random_email, random_password
-
- injection_type = detect_sql_injection_pattern(params)
-
- if injection_type in ["union", "boolean", "stacked"]:
- data = {
- "success": True,
- "results": [
- {
- "id": i,
- "username": random_username(),
- "email": random_email(),
- "password_hash": random_password(),
- "role": random.choice(["admin", "user", "moderator"]),
- }
- for i in range(1, random.randint(2, 5))
- ],
- }
- return json.dumps(data, indent=2)
-
- return json.dumps(
- {"success": True, "message": "Query executed successfully", "results": []},
- indent=2,
- )
diff --git a/src/tasks/analyze_ips.py b/src/tasks/analyze_ips.py
index 7602f18..e51ab9a 100644
--- a/src/tasks/analyze_ips.py
+++ b/src/tasks/analyze_ips.py
@@ -112,24 +112,8 @@ def main():
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
total_accesses_count = len(ip_accesses)
if total_accesses_count <= 0:
- return
+ continue
- # Set category as "unknown" for the first 3 requests
- if total_accesses_count < 3:
- category = "unknown"
- analyzed_metrics = {}
- category_scores = {
- "attacker": 0,
- "good_crawler": 0,
- "bad_crawler": 0,
- "regular_user": 0,
- "unknown": 0,
- }
- last_analysis = datetime.now()
- db_manager.update_ip_stats_analysis(
- ip, analyzed_metrics, category, category_scores, last_analysis
- )
- return 0
# --------------------- HTTP Methods ---------------------
get_accesses_count = len(
[item for item in ip_accesses if item["method"] == "GET"]
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 30628c7..0521881 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -48,22 +48,67 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
dashboard_path: The secret dashboard path for generating API URLs
"""
- # Generate suspicious accesses rows with clickable IPs
+ # Generate comprehensive suspicious activity rows combining all suspicious events
+ suspicious_activities = []
+
+ # Add recent suspicious accesses (attacks)
+ for log in stats.get("recent_suspicious", [])[-20:]:
+ suspicious_activities.append({
+ "type": "Attack",
+ "ip": log["ip"],
+ "path": log["path"],
+ "user_agent": log["user_agent"][:60],
+ "timestamp": log["timestamp"],
+ "details": ", ".join(log.get("attack_types", [])) if log.get("attack_types") else "Suspicious behavior"
+ })
+
+ # Add credential attempts
+ for cred in stats.get("credential_attempts", [])[-20:]:
+ suspicious_activities.append({
+ "type": "Credentials",
+ "ip": cred["ip"],
+ "path": cred["path"],
+ "user_agent": "",
+ "timestamp": cred["timestamp"],
+ "details": f"User: {cred.get('username', 'N/A')}"
+ })
+
+ # Add honeypot triggers
+ for honeypot in stats.get("honeypot_triggered_ips", [])[-20:]:
+ paths = honeypot.get("paths", []) if isinstance(honeypot.get("paths"), list) else []
+ suspicious_activities.append({
+ "type": "Honeypot",
+ "ip": honeypot["ip"],
+ "path": paths[0] if paths else "Multiple",
+ "user_agent": "",
+ "timestamp": honeypot.get("last_seen", honeypot.get("timestamp", "")),
+ "details": f"{len(paths)} trap(s) triggered"
+ })
+
+ # Sort by timestamp (most recent first) and take last 20
+ try:
+ suspicious_activities.sort(key=lambda x: x["timestamp"], reverse=True)
+ except:
+ pass
+ suspicious_activities = suspicious_activities[:20]
+
+ # Generate table rows
suspicious_rows = (
- "\n".join([f"""
- | {_escape(log["ip"])} |
- {_escape(log["path"])} |
- {_escape(log["user_agent"][:60])} |
- {format_timestamp(log["timestamp"], time_only=True)} |
+ "\n".join([f"""
+ | {_escape(activity["ip"])} |
+ {_escape(activity["type"])} |
+ {_escape(activity["path"])} |
+ {_escape(activity["details"])} |
+ {format_timestamp(activity["timestamp"], time_only=True)} |
-
- |
+ |
+ |
|
-
""" for log in stats["recent_suspicious"][-10:]])
- or '| No suspicious activity detected |
'
+ """ for activity in suspicious_activities])
+ or '| No suspicious activity detected |
'
)
return f"""
@@ -708,6 +753,91 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
max-height: 400px;
}}
+ /* Raw Request Modal */
+ .raw-request-modal {{
+ display: none;
+ position: fixed;
+ z-index: 1000;
+ left: 0;
+ top: 0;
+ width: 100%;
+ height: 100%;
+ background-color: rgba(0, 0, 0, 0.7);
+ overflow: auto;
+ }}
+ .raw-request-modal-content {{
+ background-color: #161b22;
+ margin: 5% auto;
+ padding: 0;
+ border: 1px solid #30363d;
+ border-radius: 6px;
+ width: 80%;
+ max-width: 900px;
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5);
+ }}
+ .raw-request-modal-header {{
+ padding: 16px 20px;
+ background-color: #21262d;
+ border-bottom: 1px solid #30363d;
+ border-radius: 6px 6px 0 0;
+ display: flex;
+ justify-content: space-between;
+ align-items: center;
+ }}
+ .raw-request-modal-header h3 {{
+ margin: 0;
+ color: #58a6ff;
+ font-size: 16px;
+ }}
+ .raw-request-modal-close {{
+ color: #8b949e;
+ font-size: 28px;
+ font-weight: bold;
+ cursor: pointer;
+ line-height: 20px;
+ transition: color 0.2s;
+ }}
+ .raw-request-modal-close:hover {{
+ color: #c9d1d9;
+ }}
+ .raw-request-modal-body {{
+ padding: 20px;
+ }}
+ .raw-request-content {{
+ background-color: #0d1117;
+ border: 1px solid #30363d;
+ border-radius: 6px;
+ padding: 16px;
+ font-family: 'Courier New', Courier, monospace;
+ font-size: 12px;
+ color: #c9d1d9;
+ white-space: pre-wrap;
+ word-wrap: break-word;
+ max-height: 400px;
+ overflow-y: auto;
+ }}
+ .raw-request-modal-footer {{
+ padding: 16px 20px;
+ background-color: #21262d;
+ border-top: 1px solid #30363d;
+ border-radius: 0 0 6px 6px;
+ text-align: right;
+ }}
+ .raw-request-download-btn {{
+ padding: 8px 16px;
+ background: #238636;
+ color: #ffffff;
+ border: none;
+ border-radius: 6px;
+ font-weight: 500;
+ font-size: 13px;
+ cursor: pointer;
+ transition: background 0.2s;
+ }}
+ .raw-request-download-btn:hover {{
+ background: #2ea043;
+ }}
+
/* Mobile Optimization - Tablets (768px and down) */
@media (max-width: 768px) {{
body {{
@@ -1100,8 +1230,9 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
| IP Address |
+ Type |
Path |
- User-Agent |
+ Details |
Time |
@@ -1311,10 +1442,11 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
Attack Types |
User-Agent |
Time |
+ Actions |
- | Loading... |
+ | Loading... |
@@ -1338,6 +1470,23 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
+
+