Merge pull request #85 from BlessedRebuS/fix/backup-dir-absent

Fixed non existent backup dir
This commit is contained in:
Patrick Di Fazio
2026-02-15 18:29:29 +01:00
committed by GitHub
11 changed files with 296 additions and 204 deletions

View File

@@ -1,7 +1,7 @@
# Krawl Honeypot Configuration
server:
port: 1234
port: 5000
delay: 100 # Response delay in milliseconds
# manually set the server header, if null a random one will be used.

View File

@@ -1697,7 +1697,7 @@ class DatabaseManager:
results = (
session.query(
AttackDetection.attack_type,
func.count(AttackDetection.id).label('count')
func.count(AttackDetection.id).label("count"),
)
.group_by(AttackDetection.attack_type)
.order_by(func.count(AttackDetection.id).desc())
@@ -1707,8 +1707,7 @@ class DatabaseManager:
return {
"attack_types": [
{"type": row.attack_type, "count": row.count}
for row in results
{"type": row.attack_type, "count": row.count} for row in results
]
}
finally:

View File

@@ -8,7 +8,7 @@ from typing import Optional, Tuple, Dict
from generators import random_username, random_password, random_email
from wordlists import get_wordlists
logger = logging.getLogger('krawl')
logger = logging.getLogger("krawl")
def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool:
@@ -20,7 +20,7 @@ def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool:
if not pattern:
# Fallback pattern if wordlists not loaded
pattern = r'(\.\.|%2e%2e|/etc/passwd|/etc/shadow)'
pattern = r"(\.\.|%2e%2e|/etc/passwd|/etc/shadow)"
if re.search(pattern, full_input, re.IGNORECASE):
logger.debug(f"Path traversal detected in {full_input[:100]}")
@@ -38,7 +38,7 @@ def detect_xxe_injection(body: str) -> bool:
if not pattern:
# Fallback pattern if wordlists not loaded
pattern = r'(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC|file://)'
pattern = r"(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC|file://)"
if re.search(pattern, body, re.IGNORECASE):
return True
@@ -49,7 +49,9 @@ def detect_command_injection(path: str, query: str = "", body: str = "") -> bool
"""Detect command injection attempts"""
full_input = f"{path} {query} {body}"
logger.debug(f"[CMD_INJECTION_CHECK] path='{path}' query='{query}' body='{body[:50] if body else ''}'")
logger.debug(
f"[CMD_INJECTION_CHECK] path='{path}' query='{query}' body='{body[:50] if body else ''}'"
)
logger.debug(f"[CMD_INJECTION_CHECK] full_input='{full_input[:200]}'")
wl = get_wordlists()
@@ -57,7 +59,7 @@ def detect_command_injection(path: str, query: str = "", body: str = "") -> bool
if not pattern:
# Fallback pattern if wordlists not loaded
pattern = r'(cmd=|exec=|command=|&&|;|\||whoami|id|uname|cat|ls)'
pattern = r"(cmd=|exec=|command=|&&|;|\||whoami|id|uname|cat|ls)"
if re.search(pattern, full_input, re.IGNORECASE):
logger.debug(f"[CMD_INJECTION_CHECK] Command injection pattern matched!")
@@ -198,7 +200,9 @@ def generate_path_traversal_response(path: str) -> Tuple[str, str, int]:
logger.debug("Returning fake shadow file")
return (generate_fake_shadow(), "text/plain", 200)
if any(ext in path_lower for ext in [".conf", ".config", ".php", ".env", ".properties"]):
if any(
ext in path_lower for ext in [".conf", ".config", ".php", ".env", ".properties"]
):
logger.debug("Returning fake config file")
return (generate_fake_config_file(path), "text/plain", 200)
@@ -221,7 +225,11 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]:
elif "shadow" in body:
content = generate_fake_shadow()
else:
content = xxe_config.get("default_content", "root:x:0:0:root:/root:/bin/bash") if xxe_config else "root:x:0:0:root:/root:/bin/bash"
content = (
xxe_config.get("default_content", "root:x:0:0:root:/root:/bin/bash")
if xxe_config
else "root:x:0:0:root:/root:/bin/bash"
)
if xxe_config and "file_access" in xxe_config:
template = xxe_config["file_access"]["template"]
@@ -271,10 +279,14 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
input_lower = input_text.lower()
# id command
if re.search(r'\bid\b', input_lower):
if re.search(r"\bid\b", input_lower):
if cmd_config and "id" in cmd_config:
uid = random.randint(cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000))
gid = random.randint(cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000))
uid = random.randint(
cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000)
)
gid = random.randint(
cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000)
)
template = random.choice(cmd_config["id"])
output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid))
else:
@@ -282,52 +294,69 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
return (output, "text/plain", 200)
# whoami command
if re.search(r'\bwhoami\b', input_lower):
if re.search(r"\bwhoami\b", input_lower):
users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"]
return (random.choice(users), "text/plain", 200)
# uname command
if re.search(r'\buname\b', input_lower):
outputs = cmd_config.get("uname", ["Linux server 5.4.0 x86_64"]) if cmd_config else ["Linux server 5.4.0 x86_64"]
if re.search(r"\buname\b", input_lower):
outputs = (
cmd_config.get("uname", ["Linux server 5.4.0 x86_64"])
if cmd_config
else ["Linux server 5.4.0 x86_64"]
)
return (random.choice(outputs), "text/plain", 200)
# pwd command
if re.search(r'\bpwd\b', input_lower):
paths = cmd_config.get("pwd", ["/var/www/html"]) if cmd_config else ["/var/www/html"]
if re.search(r"\bpwd\b", input_lower):
paths = (
cmd_config.get("pwd", ["/var/www/html"])
if cmd_config
else ["/var/www/html"]
)
return (random.choice(paths), "text/plain", 200)
# ls command
if re.search(r'\bls\b', input_lower):
if re.search(r"\bls\b", input_lower):
if cmd_config and "ls" in cmd_config:
files = random.choice(cmd_config["ls"])
else:
files = ["index.php", "config.php", "uploads"]
output = "\n".join(random.sample(files, k=random.randint(3, min(6, len(files)))))
output = "\n".join(
random.sample(files, k=random.randint(3, min(6, len(files))))
)
return (output, "text/plain", 200)
# cat command
if re.search(r'\bcat\b', input_lower):
if re.search(r"\bcat\b", input_lower):
if "passwd" in input_lower:
return (generate_fake_passwd(), "text/plain", 200)
if "shadow" in input_lower:
return (generate_fake_shadow(), "text/plain", 200)
cat_content = cmd_config.get("cat_config", "<?php\n$config = 'fake';\n?>") if cmd_config else "<?php\n$config = 'fake';\n?>"
cat_content = (
cmd_config.get("cat_config", "<?php\n$config = 'fake';\n?>")
if cmd_config
else "<?php\n$config = 'fake';\n?>"
)
return (cat_content, "text/plain", 200)
# echo command
if re.search(r'\becho\b', input_lower):
match = re.search(r'echo\s+(.+?)(?:[;&|]|$)', input_text, re.IGNORECASE)
if re.search(r"\becho\b", input_lower):
match = re.search(r"echo\s+(.+?)(?:[;&|]|$)", input_text, re.IGNORECASE)
if match:
return (match.group(1).strip('"\''), "text/plain", 200)
return (match.group(1).strip("\"'"), "text/plain", 200)
return ("", "text/plain", 200)
# network commands
if any(cmd in input_lower for cmd in ['wget', 'curl', 'nc', 'netcat']):
if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]):
if cmd_config and "network_commands" in cmd_config:
outputs = cmd_config["network_commands"]
output = random.choice(outputs)
if "{size}" in output:
size = random.randint(cmd_config.get("download_size_min", 100), cmd_config.get("download_size_max", 10000))
size = random.randint(
cmd_config.get("download_size_min", 100),
cmd_config.get("download_size_max", 10000),
)
output = output.replace("{size}", str(size))
else:
outputs = ["bash: command not found", "Connection timeout"]
@@ -374,7 +403,9 @@ def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
return None
def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
def get_random_sql_error(
db_type: str = None, injection_type: str = None
) -> Tuple[str, str]:
"""Generate a random SQL error message"""
wl = get_wordlists()
sql_errors = wl.sql_errors
@@ -411,7 +442,9 @@ def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tup
return (error_message, "text/plain")
def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[Optional[str], Optional[str], Optional[int]]:
def generate_sql_error_response(
query_string: str, db_type: str = None
) -> Tuple[Optional[str], Optional[str], Optional[int]]:
"""Generate SQL error response for detected injection attempts"""
injection_type = detect_sql_injection_pattern(query_string)
@@ -593,13 +626,17 @@ def get_server_header(server_type: str = None) -> str:
return server_headers.get(server_type, "nginx/1.18.0")
def detect_and_respond_deception(path: str, query: str = "", body: str = "", method: str = "GET") -> Optional[Tuple[str, str, int]]:
def detect_and_respond_deception(
path: str, query: str = "", body: str = "", method: str = "GET"
) -> Optional[Tuple[str, str, int]]:
"""
Main deception detection and response function.
Returns (response_body, content_type, status_code) if deception should be applied, None otherwise.
"""
logger.debug(f"Checking deception for {method} {path} query={query[:50] if query else 'empty'}")
logger.debug(
f"Checking deception for {method} {path} query={query[:50] if query else 'empty'}"
)
if detect_path_traversal(path, query, body):
logger.info(f"Path traversal detected in: {path}")

View File

@@ -189,7 +189,9 @@ class Handler(BaseHTTPRequestHandler):
pass
return True
def _handle_deception_response(self, path: str, query: str = "", body: str = "", method: str = "GET") -> bool:
def _handle_deception_response(
self, path: str, query: str = "", body: str = "", method: str = "GET"
) -> bool:
"""
Handle deception responses for path traversal, XXE, and command injection.
Returns True if a deception response was sent, False otherwise.
@@ -208,13 +210,36 @@ class Handler(BaseHTTPRequestHandler):
attack_type_db = None # For database (standardized)
attack_type_log = "UNKNOWN" # For logging (human-readable)
if "passwd" in path.lower() or "shadow" in path.lower() or ".." in path or ".." in query:
if (
"passwd" in path.lower()
or "shadow" in path.lower()
or ".." in path
or ".." in query
):
attack_type_db = "path_traversal"
attack_type_log = "PATH_TRAVERSAL"
elif body and ("<!DOCTYPE" in body or "<!ENTITY" in body):
attack_type_db = "xxe_injection"
attack_type_log = "XXE_INJECTION"
elif any(pattern in full_input for pattern in ['cmd=', 'exec=', 'command=', 'execute=', 'system=', ';', '|', '&&', 'whoami', 'id', 'uname', 'cat', 'ls', 'pwd']):
elif any(
pattern in full_input
for pattern in [
"cmd=",
"exec=",
"command=",
"execute=",
"system=",
";",
"|",
"&&",
"whoami",
"id",
"uname",
"cat",
"ls",
"pwd",
]
):
attack_type_db = "command_injection"
attack_type_log = "COMMAND_INJECTION"
@@ -230,7 +255,7 @@ class Handler(BaseHTTPRequestHandler):
user_agent=user_agent,
body=body,
method=method,
raw_request=self._build_raw_request(body)
raw_request=self._build_raw_request(body),
)
# Send the deception response
@@ -243,7 +268,9 @@ class Handler(BaseHTTPRequestHandler):
except BrokenPipeError:
return True
except Exception as e:
self.app_logger.error(f"Error handling deception response for {path}: {str(e)}")
self.app_logger.error(
f"Error handling deception response for {path}: {str(e)}"
)
return False
@@ -379,7 +406,7 @@ class Handler(BaseHTTPRequestHandler):
# Use parse_qs for proper URL decoding
parsed_qs = parse_qs(post_data)
# parse_qs returns lists, get first value of each
parsed_data = {k: v[0] if v else '' for k, v in parsed_qs.items()}
parsed_data = {k: v[0] if v else "" for k, v in parsed_qs.items()}
self.app_logger.debug(f"Parsed contact data: {parsed_data}")
@@ -401,7 +428,7 @@ class Handler(BaseHTTPRequestHandler):
user_agent=user_agent,
body=post_data,
method="POST",
raw_request=self._build_raw_request(post_data)
raw_request=self._build_raw_request(post_data),
)
try:
@@ -443,8 +470,12 @@ class Handler(BaseHTTPRequestHandler):
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(
client_ip, self.path, user_agent, post_data, method="POST",
raw_request=self._build_raw_request(post_data)
client_ip,
self.path,
user_agent,
post_data,
method="POST",
raw_request=self._build_raw_request(post_data),
)
time.sleep(1)
@@ -1074,12 +1105,17 @@ class Handler(BaseHTTPRequestHandler):
self.send_response(404)
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(json.dumps({"error": "Raw request not found"}).encode())
self.wfile.write(
json.dumps({"error": "Raw request not found"}).encode()
)
else:
self.send_response(200)
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
self.send_header("Cache-Control", "no-store, no-cache, must-revalidate, max-age=0")
self.send_header(
"Cache-Control",
"no-store, no-cache, must-revalidate, max-age=0",
)
self.end_headers()
self.wfile.write(json.dumps({"raw_request": raw_request}).encode())
except (ValueError, IndexError):
@@ -1175,9 +1211,13 @@ class Handler(BaseHTTPRequestHandler):
self.wfile.write(b"Internal server error")
return
self.tracker.record_access(client_ip, self.path, user_agent, method="GET",
raw_request=self._build_raw_request())
self.tracker.record_access(
client_ip,
self.path,
user_agent,
method="GET",
raw_request=self._build_raw_request(),
)
if self.tracker.is_suspicious_user_agent(user_agent):
self.access_logger.warning(

View File

@@ -12,7 +12,9 @@ import os
def index_exists(cursor, index_name: str) -> bool:
"""Check if an index exists."""
cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,))
cursor.execute(
"SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,)
)
return cursor.fetchone() is not None
@@ -52,7 +54,9 @@ def add_performance_indexes(db_path: str) -> bool:
# Index 2: Composite index for attack_type + access_log_id
if not index_exists(cursor, "ix_attack_detections_type_log"):
print("Adding composite index on attack_detections(attack_type, access_log_id)...")
print(
"Adding composite index on attack_detections(attack_type, access_log_id)..."
)
cursor.execute("""
CREATE INDEX ix_attack_detections_type_log
ON attack_detections(attack_type, access_log_id)
@@ -92,9 +96,7 @@ def main():
"""Main migration function."""
# Default database path
default_db_path = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"data",
"krawl.db"
os.path.dirname(os.path.dirname(__file__)), "data", "krawl.db"
)
# Allow custom path as command line argument

View File

@@ -69,9 +69,7 @@ def main():
"""Main migration function."""
# Default database path
default_db_path = os.path.join(
os.path.dirname(os.path.dirname(__file__)),
"data",
"krawl.db"
os.path.dirname(os.path.dirname(__file__)), "data", "krawl.db"
)
# Allow custom path as command line argument

View File

@@ -64,9 +64,7 @@ class AccessLog(Base):
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Raw HTTP request for forensic analysis (nullable for backward compatibility)
raw_request: Mapped[Optional[str]] = mapped_column(
String, nullable=True
)
raw_request: Mapped[Optional[str]] = mapped_column(String, nullable=True)
# Relationship to attack detections
attack_detections: Mapped[List["AttackDetection"]] = relationship(
@@ -141,7 +139,9 @@ class AttackDetection(Base):
)
# Composite index for efficient aggregation queries
__table_args__ = (Index("ix_attack_detections_type_log", "attack_type", "access_log_id"),)
__table_args__ = (
Index("ix_attack_detections_type_log", "attack_type", "access_log_id"),
)
def __repr__(self) -> str:
return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"

View File

@@ -1,7 +1,4 @@
from sqlalchemy import select
from typing import Optional
from database import get_database, DatabaseManager
from zoneinfo import ZoneInfo
from database import get_database
from pathlib import Path
from datetime import datetime, timedelta
import re
@@ -9,8 +6,6 @@ import urllib.parse
from wordlists import get_wordlists
from config import get_config
from logger import get_app_logger
import requests
from sanitizer import sanitize_for_storage, sanitize_dict
# ----------------------
# TASK CONFIG

View File

@@ -3,7 +3,7 @@
from logger import get_app_logger
from database import get_database
from config import get_config
from sqlalchemy import MetaData, inspect
from sqlalchemy import MetaData
from sqlalchemy.schema import CreateTable
import os
@@ -36,18 +36,16 @@ def main():
engine = db._engine
metadata = MetaData()
# Reflect the database structure
metadata.reflect(bind=engine)
# create backup directory
os.makedirs(config.backups_path, exist_ok=True)
output_file = os.path.join(config.backups_path, "db_dump.sql")
with open(output_file, "w") as f:
# Write header
app_logger.info(f"[Background Task] {task_name} started database dump")
# Get inspector for additional metadata
inspector = inspect(engine)
# Dump schema (CREATE TABLE statements)
f.write("-- Schema\n")
f.write("-- " + "=" * 70 + "\n\n")

View File

@@ -53,44 +53,56 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
# Add recent suspicious accesses (attacks)
for log in stats.get("recent_suspicious", [])[-20:]:
suspicious_activities.append({
"type": "Attack",
"ip": log["ip"],
"path": log["path"],
"user_agent": log["user_agent"][:60],
"timestamp": log["timestamp"],
"details": ", ".join(log.get("attack_types", [])) if log.get("attack_types") else "Suspicious behavior"
})
suspicious_activities.append(
{
"type": "Attack",
"ip": log["ip"],
"path": log["path"],
"user_agent": log["user_agent"][:60],
"timestamp": log["timestamp"],
"details": (
", ".join(log.get("attack_types", []))
if log.get("attack_types")
else "Suspicious behavior"
),
}
)
# Add credential attempts
for cred in stats.get("credential_attempts", [])[-20:]:
suspicious_activities.append({
"type": "Credentials",
"ip": cred["ip"],
"path": cred["path"],
"user_agent": "",
"timestamp": cred["timestamp"],
"details": f"User: {cred.get('username', 'N/A')}"
})
suspicious_activities.append(
{
"type": "Credentials",
"ip": cred["ip"],
"path": cred["path"],
"user_agent": "",
"timestamp": cred["timestamp"],
"details": f"User: {cred.get('username', 'N/A')}",
}
)
# Add honeypot triggers
for honeypot in stats.get("honeypot_triggered_ips", [])[-20:]:
# honeypot is a tuple (ip, paths)
ip = honeypot[0]
paths = honeypot[1] if isinstance(honeypot[1], list) else []
suspicious_activities.append({
"type": "Honeypot",
"ip": ip,
"path": paths[0] if paths else "Multiple",
"user_agent": "",
"timestamp": "", # Tuples don't have timestamp
"details": f"{len(paths)} trap(s) triggered"
})
suspicious_activities.append(
{
"type": "Honeypot",
"ip": ip,
"path": paths[0] if paths else "Multiple",
"user_agent": "",
"timestamp": "", # Tuples don't have timestamp
"details": f"{len(paths)} trap(s) triggered",
}
)
# Sort by timestamp (most recent first) and take last 20
# Put entries with empty timestamps at the end
try:
suspicious_activities.sort(key=lambda x: (x["timestamp"] == "", x["timestamp"]), reverse=True)
suspicious_activities.sort(
key=lambda x: (x["timestamp"] == "", x["timestamp"]), reverse=True
)
except:
pass
suspicious_activities = suspicious_activities[:20]

View File

@@ -161,8 +161,19 @@ class AccessTracker:
except Exception:
# If parsing fails, try simple regex patterns
wl = get_wordlists()
username_fields = wl.username_fields or ["username", "user", "login", "email", "log"]
password_fields = wl.password_fields or ["password", "pass", "passwd", "pwd"]
username_fields = wl.username_fields or [
"username",
"user",
"login",
"email",
"log",
]
password_fields = wl.password_fields or [
"password",
"pass",
"passwd",
"pwd",
]
# Build regex pattern from wordlist fields
username_pattern = "(?:" + "|".join(username_fields) + ")=([^&\\s]+)"