Merge pull request #85 from BlessedRebuS/fix/backup-dir-absent

Fixed non existent backup dir
2026-02-15 18:29:29 +01:00
parent b62803fa2f 17b657744b
commit 04823dab63
11 changed files with 296 additions and 204 deletions
--- a/config.yaml
+++ b/config.yaml
@@ -1,7 +1,7 @@
 # Krawl Honeypot Configuration

 server:
-  port: 1234
+  port: 5000
  delay: 100  # Response delay in milliseconds

  # manually set the server header, if null a random one will be used.
--- a/src/database.py
+++ b/src/database.py
@@ -1697,7 +1697,7 @@ class DatabaseManager:
            results = (
                session.query(
                    AttackDetection.attack_type,
-                    func.count(AttackDetection.id).label('count')
+                    func.count(AttackDetection.id).label("count"),
                )
                .group_by(AttackDetection.attack_type)
                .order_by(func.count(AttackDetection.id).desc())
@@ -1707,8 +1707,7 @@ class DatabaseManager:

            return {
                "attack_types": [
-                    {"type": row.attack_type, "count": row.count}
-                    for row in results
+                    {"type": row.attack_type, "count": row.count} for row in results
                ]
            }
        finally:
--- a/src/deception_responses.py
+++ b/src/deception_responses.py
@@ -8,7 +8,7 @@ from typing import Optional, Tuple, Dict
 from generators import random_username, random_password, random_email
 from wordlists import get_wordlists

-logger = logging.getLogger('krawl')
+logger = logging.getLogger("krawl")


 def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool:
@@ -20,7 +20,7 @@ def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool:

    if not pattern:
        # Fallback pattern if wordlists not loaded
-        pattern = r'(\.\.|%2e%2e|/etc/passwd|/etc/shadow)'
+        pattern = r"(\.\.|%2e%2e|/etc/passwd|/etc/shadow)"

    if re.search(pattern, full_input, re.IGNORECASE):
        logger.debug(f"Path traversal detected in {full_input[:100]}")
@@ -38,7 +38,7 @@ def detect_xxe_injection(body: str) -> bool:

    if not pattern:
        # Fallback pattern if wordlists not loaded
-        pattern = r'(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC|file://)'
+        pattern = r"(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC|file://)"

    if re.search(pattern, body, re.IGNORECASE):
        return True
@@ -49,7 +49,9 @@ def detect_command_injection(path: str, query: str = "", body: str = "") -> bool
    """Detect command injection attempts"""
    full_input = f"{path} {query} {body}"

-    logger.debug(f"[CMD_INJECTION_CHECK] path='{path}' query='{query}' body='{body[:50] if body else ''}'")
+    logger.debug(
+        f"[CMD_INJECTION_CHECK] path='{path}' query='{query}' body='{body[:50] if body else ''}'"
+    )
    logger.debug(f"[CMD_INJECTION_CHECK] full_input='{full_input[:200]}'")

    wl = get_wordlists()
@@ -57,7 +59,7 @@ def detect_command_injection(path: str, query: str = "", body: str = "") -> bool

    if not pattern:
        # Fallback pattern if wordlists not loaded
-        pattern = r'(cmd=|exec=|command=|&&|;|\||whoami|id|uname|cat|ls)'
+        pattern = r"(cmd=|exec=|command=|&&|;|\||whoami|id|uname|cat|ls)"

    if re.search(pattern, full_input, re.IGNORECASE):
        logger.debug(f"[CMD_INJECTION_CHECK] Command injection pattern matched!")
@@ -198,7 +200,9 @@ def generate_path_traversal_response(path: str) -> Tuple[str, str, int]:
        logger.debug("Returning fake shadow file")
        return (generate_fake_shadow(), "text/plain", 200)

-    if any(ext in path_lower for ext in [".conf", ".config", ".php", ".env", ".properties"]):
+    if any(
+        ext in path_lower for ext in [".conf", ".config", ".php", ".env", ".properties"]
+    ):
        logger.debug("Returning fake config file")
        return (generate_fake_config_file(path), "text/plain", 200)

@@ -221,7 +225,11 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]:
        elif "shadow" in body:
            content = generate_fake_shadow()
        else:
-            content = xxe_config.get("default_content", "root:x:0:0:root:/root:/bin/bash") if xxe_config else "root:x:0:0:root:/root:/bin/bash"
+            content = (
+                xxe_config.get("default_content", "root:x:0:0:root:/root:/bin/bash")
+                if xxe_config
+                else "root:x:0:0:root:/root:/bin/bash"
+            )

        if xxe_config and "file_access" in xxe_config:
            template = xxe_config["file_access"]["template"]
@@ -271,10 +279,14 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
    input_lower = input_text.lower()

    # id command
-    if re.search(r'\bid\b', input_lower):
+    if re.search(r"\bid\b", input_lower):
        if cmd_config and "id" in cmd_config:
-            uid = random.randint(cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000))
-            gid = random.randint(cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000))
+            uid = random.randint(
+                cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000)
+            )
+            gid = random.randint(
+                cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000)
+            )
            template = random.choice(cmd_config["id"])
            output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid))
        else:
@@ -282,52 +294,69 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
        return (output, "text/plain", 200)

    # whoami command
-    if re.search(r'\bwhoami\b', input_lower):
+    if re.search(r"\bwhoami\b", input_lower):
        users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"]
        return (random.choice(users), "text/plain", 200)

    # uname command
-    if re.search(r'\buname\b', input_lower):
-        outputs = cmd_config.get("uname", ["Linux server 5.4.0 x86_64"]) if cmd_config else ["Linux server 5.4.0 x86_64"]
+    if re.search(r"\buname\b", input_lower):
+        outputs = (
+            cmd_config.get("uname", ["Linux server 5.4.0 x86_64"])
+            if cmd_config
+            else ["Linux server 5.4.0 x86_64"]
+        )
        return (random.choice(outputs), "text/plain", 200)

    # pwd command
-    if re.search(r'\bpwd\b', input_lower):
-        paths = cmd_config.get("pwd", ["/var/www/html"]) if cmd_config else ["/var/www/html"]
+    if re.search(r"\bpwd\b", input_lower):
+        paths = (
+            cmd_config.get("pwd", ["/var/www/html"])
+            if cmd_config
+            else ["/var/www/html"]
+        )
        return (random.choice(paths), "text/plain", 200)

    # ls command
-    if re.search(r'\bls\b', input_lower):
+    if re.search(r"\bls\b", input_lower):
        if cmd_config and "ls" in cmd_config:
            files = random.choice(cmd_config["ls"])
        else:
            files = ["index.php", "config.php", "uploads"]
-        output = "\n".join(random.sample(files, k=random.randint(3, min(6, len(files)))))
+        output = "\n".join(
+            random.sample(files, k=random.randint(3, min(6, len(files))))
+        )
        return (output, "text/plain", 200)

    # cat command
-    if re.search(r'\bcat\b', input_lower):
+    if re.search(r"\bcat\b", input_lower):
        if "passwd" in input_lower:
            return (generate_fake_passwd(), "text/plain", 200)
        if "shadow" in input_lower:
            return (generate_fake_shadow(), "text/plain", 200)
-        cat_content = cmd_config.get("cat_config", "<?php\n$config = 'fake';\n?>") if cmd_config else "<?php\n$config = 'fake';\n?>"
+        cat_content = (
+            cmd_config.get("cat_config", "<?php\n$config = 'fake';\n?>")
+            if cmd_config
+            else "<?php\n$config = 'fake';\n?>"
+        )
        return (cat_content, "text/plain", 200)

    # echo command
-    if re.search(r'\becho\b', input_lower):
-        match = re.search(r'echo\s+(.+?)(?:[;&|]|$)', input_text, re.IGNORECASE)
+    if re.search(r"\becho\b", input_lower):
+        match = re.search(r"echo\s+(.+?)(?:[;&|]|$)", input_text, re.IGNORECASE)
        if match:
-            return (match.group(1).strip('"\''), "text/plain", 200)
+            return (match.group(1).strip("\"'"), "text/plain", 200)
        return ("", "text/plain", 200)

    # network commands
-    if any(cmd in input_lower for cmd in ['wget', 'curl', 'nc', 'netcat']):
+    if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]):
        if cmd_config and "network_commands" in cmd_config:
            outputs = cmd_config["network_commands"]
            output = random.choice(outputs)
            if "{size}" in output:
-                size = random.randint(cmd_config.get("download_size_min", 100), cmd_config.get("download_size_max", 10000))
+                size = random.randint(
+                    cmd_config.get("download_size_min", 100),
+                    cmd_config.get("download_size_max", 10000),
+                )
                output = output.replace("{size}", str(size))
        else:
            outputs = ["bash: command not found", "Connection timeout"]
@@ -374,7 +403,9 @@ def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
    return None


-def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
+def get_random_sql_error(
+    db_type: str = None, injection_type: str = None
+) -> Tuple[str, str]:
    """Generate a random SQL error message"""
    wl = get_wordlists()
    sql_errors = wl.sql_errors
@@ -411,7 +442,9 @@ def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tup
    return (error_message, "text/plain")


-def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[Optional[str], Optional[str], Optional[int]]:
+def generate_sql_error_response(
+    query_string: str, db_type: str = None
+) -> Tuple[Optional[str], Optional[str], Optional[int]]:
    """Generate SQL error response for detected injection attempts"""
    injection_type = detect_sql_injection_pattern(query_string)

@@ -593,13 +626,17 @@ def get_server_header(server_type: str = None) -> str:
    return server_headers.get(server_type, "nginx/1.18.0")


-def detect_and_respond_deception(path: str, query: str = "", body: str = "", method: str = "GET") -> Optional[Tuple[str, str, int]]:
+def detect_and_respond_deception(
+    path: str, query: str = "", body: str = "", method: str = "GET"
+) -> Optional[Tuple[str, str, int]]:
    """
    Main deception detection and response function.
    Returns (response_body, content_type, status_code) if deception should be applied, None otherwise.
    """

-    logger.debug(f"Checking deception for {method} {path} query={query[:50] if query else 'empty'}")
+    logger.debug(
+        f"Checking deception for {method} {path} query={query[:50] if query else 'empty'}"
+    )

    if detect_path_traversal(path, query, body):
        logger.info(f"Path traversal detected in: {path}")
--- a/src/handler.py
+++ b/src/handler.py
@@ -189,7 +189,9 @@ class Handler(BaseHTTPRequestHandler):
                pass
            return True

-    def _handle_deception_response(self, path: str, query: str = "", body: str = "", method: str = "GET") -> bool:
+    def _handle_deception_response(
+        self, path: str, query: str = "", body: str = "", method: str = "GET"
+    ) -> bool:
        """
        Handle deception responses for path traversal, XXE, and command injection.
        Returns True if a deception response was sent, False otherwise.
@@ -208,13 +210,36 @@ class Handler(BaseHTTPRequestHandler):
                attack_type_db = None  # For database (standardized)
                attack_type_log = "UNKNOWN"  # For logging (human-readable)

-                if "passwd" in path.lower() or "shadow" in path.lower() or ".." in path or ".." in query:
+                if (
+                    "passwd" in path.lower()
+                    or "shadow" in path.lower()
+                    or ".." in path
+                    or ".." in query
+                ):
                    attack_type_db = "path_traversal"
                    attack_type_log = "PATH_TRAVERSAL"
                elif body and ("<!DOCTYPE" in body or "<!ENTITY" in body):
                    attack_type_db = "xxe_injection"
                    attack_type_log = "XXE_INJECTION"
-                elif any(pattern in full_input for pattern in ['cmd=', 'exec=', 'command=', 'execute=', 'system=', ';', '|', '&&', 'whoami', 'id', 'uname', 'cat', 'ls', 'pwd']):
+                elif any(
+                    pattern in full_input
+                    for pattern in [
+                        "cmd=",
+                        "exec=",
+                        "command=",
+                        "execute=",
+                        "system=",
+                        ";",
+                        "|",
+                        "&&",
+                        "whoami",
+                        "id",
+                        "uname",
+                        "cat",
+                        "ls",
+                        "pwd",
+                    ]
+                ):
                    attack_type_db = "command_injection"
                    attack_type_log = "COMMAND_INJECTION"

@@ -230,7 +255,7 @@ class Handler(BaseHTTPRequestHandler):
                    user_agent=user_agent,
                    body=body,
                    method=method,
-                    raw_request=self._build_raw_request(body)
+                    raw_request=self._build_raw_request(body),
                )

                # Send the deception response
@@ -243,7 +268,9 @@ class Handler(BaseHTTPRequestHandler):
        except BrokenPipeError:
            return True
        except Exception as e:
-            self.app_logger.error(f"Error handling deception response for {path}: {str(e)}")
+            self.app_logger.error(
+                f"Error handling deception response for {path}: {str(e)}"
+            )

        return False

@@ -379,7 +406,7 @@ class Handler(BaseHTTPRequestHandler):
                # Use parse_qs for proper URL decoding
                parsed_qs = parse_qs(post_data)
                # parse_qs returns lists, get first value of each
-                parsed_data = {k: v[0] if v else '' for k, v in parsed_qs.items()}
+                parsed_data = {k: v[0] if v else "" for k, v in parsed_qs.items()}

            self.app_logger.debug(f"Parsed contact data: {parsed_data}")

@@ -401,7 +428,7 @@ class Handler(BaseHTTPRequestHandler):
                user_agent=user_agent,
                body=post_data,
                method="POST",
-                raw_request=self._build_raw_request(post_data)
+                raw_request=self._build_raw_request(post_data),
            )

            try:
@@ -443,8 +470,12 @@ class Handler(BaseHTTPRequestHandler):

        # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
        self.tracker.record_access(
-            client_ip, self.path, user_agent, post_data, method="POST",
-            raw_request=self._build_raw_request(post_data)
+            client_ip,
+            self.path,
+            user_agent,
+            post_data,
+            method="POST",
+            raw_request=self._build_raw_request(post_data),
        )

        time.sleep(1)
@@ -1074,12 +1105,17 @@ class Handler(BaseHTTPRequestHandler):
                    self.send_response(404)
                    self.send_header("Content-type", "application/json")
                    self.end_headers()
-                    self.wfile.write(json.dumps({"error": "Raw request not found"}).encode())
+                    self.wfile.write(
+                        json.dumps({"error": "Raw request not found"}).encode()
+                    )
                else:
                    self.send_response(200)
                    self.send_header("Content-type", "application/json")
                    self.send_header("Access-Control-Allow-Origin", "*")
-                    self.send_header("Cache-Control", "no-store, no-cache, must-revalidate, max-age=0")
+                    self.send_header(
+                        "Cache-Control",
+                        "no-store, no-cache, must-revalidate, max-age=0",
+                    )
                    self.end_headers()
                    self.wfile.write(json.dumps({"raw_request": raw_request}).encode())
            except (ValueError, IndexError):
@@ -1175,9 +1211,13 @@ class Handler(BaseHTTPRequestHandler):
                self.wfile.write(b"Internal server error")
            return

-        self.tracker.record_access(client_ip, self.path, user_agent, method="GET",
-                                          raw_request=self._build_raw_request())
-
+        self.tracker.record_access(
+            client_ip,
+            self.path,
+            user_agent,
+            method="GET",
+            raw_request=self._build_raw_request(),
+        )

        if self.tracker.is_suspicious_user_agent(user_agent):
            self.access_logger.warning(
--- a/src/migrations/add_performance_indexes.py
+++ b/src/migrations/add_performance_indexes.py
@@ -12,7 +12,9 @@ import os

 def index_exists(cursor, index_name: str) -> bool:
    """Check if an index exists."""
-    cursor.execute("SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,))
+    cursor.execute(
+        "SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,)
+    )
    return cursor.fetchone() is not None


@@ -52,7 +54,9 @@ def add_performance_indexes(db_path: str) -> bool:

        # Index 2: Composite index for attack_type + access_log_id
        if not index_exists(cursor, "ix_attack_detections_type_log"):
-            print("Adding composite index on attack_detections(attack_type, access_log_id)...")
+            print(
+                "Adding composite index on attack_detections(attack_type, access_log_id)..."
+            )
            cursor.execute("""
                CREATE INDEX ix_attack_detections_type_log 
                ON attack_detections(attack_type, access_log_id)
@@ -92,9 +96,7 @@ def main():
    """Main migration function."""
    # Default database path
    default_db_path = os.path.join(
-        os.path.dirname(os.path.dirname(__file__)), 
-        "data", 
-        "krawl.db"
+        os.path.dirname(os.path.dirname(__file__)), "data", "krawl.db"
    )

    # Allow custom path as command line argument
--- a/src/migrations/add_raw_request_column.py
+++ b/src/migrations/add_raw_request_column.py
@@ -69,9 +69,7 @@ def main():
    """Main migration function."""
    # Default database path
    default_db_path = os.path.join(
-        os.path.dirname(os.path.dirname(__file__)), 
-        "data", 
-        "krawl.db"
+        os.path.dirname(os.path.dirname(__file__)), "data", "krawl.db"
    )

    # Allow custom path as command line argument
--- a/src/models.py
+++ b/src/models.py
@@ -64,9 +64,7 @@ class AccessLog(Base):
        DateTime, nullable=False, default=datetime.utcnow, index=True
    )
    # Raw HTTP request for forensic analysis (nullable for backward compatibility)
-    raw_request: Mapped[Optional[str]] = mapped_column(
-        String, nullable=True
-    )
+    raw_request: Mapped[Optional[str]] = mapped_column(String, nullable=True)

    # Relationship to attack detections
    attack_detections: Mapped[List["AttackDetection"]] = relationship(
@@ -141,7 +139,9 @@ class AttackDetection(Base):
    )

    # Composite index for efficient aggregation queries
-    __table_args__ = (Index("ix_attack_detections_type_log", "attack_type", "access_log_id"),)
+    __table_args__ = (
+        Index("ix_attack_detections_type_log", "attack_type", "access_log_id"),
+    )

    def __repr__(self) -> str:
        return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"
--- a/src/tasks/analyze_ips.py
+++ b/src/tasks/analyze_ips.py
@@ -1,7 +1,4 @@
-from sqlalchemy import select
-from typing import Optional
-from database import get_database, DatabaseManager
-from zoneinfo import ZoneInfo
+from database import get_database
 from pathlib import Path
 from datetime import datetime, timedelta
 import re
@@ -9,8 +6,6 @@ import urllib.parse
 from wordlists import get_wordlists
 from config import get_config
 from logger import get_app_logger
-import requests
-from sanitizer import sanitize_for_storage, sanitize_dict

 # ----------------------
 # TASK CONFIG
--- a/src/tasks/db_dump.py
+++ b/src/tasks/db_dump.py
@@ -3,7 +3,7 @@
 from logger import get_app_logger
 from database import get_database
 from config import get_config
-from sqlalchemy import MetaData, inspect
+from sqlalchemy import MetaData
 from sqlalchemy.schema import CreateTable
 import os

@@ -36,18 +36,16 @@ def main():
        engine = db._engine

        metadata = MetaData()
-
-        # Reflect the database structure
        metadata.reflect(bind=engine)
+
+        # create backup directory
+        os.makedirs(config.backups_path, exist_ok=True)
        output_file = os.path.join(config.backups_path, "db_dump.sql")

        with open(output_file, "w") as f:
            # Write header
            app_logger.info(f"[Background Task] {task_name} started database dump")

-            # Get inspector for additional metadata
-            inspector = inspect(engine)
-
            # Dump schema (CREATE TABLE statements)
            f.write("-- Schema\n")
            f.write("-- " + "=" * 70 + "\n\n")
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -53,44 +53,56 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:

    # Add recent suspicious accesses (attacks)
    for log in stats.get("recent_suspicious", [])[-20:]:
-        suspicious_activities.append({
-            "type": "Attack",
-            "ip": log["ip"],
-            "path": log["path"],
-            "user_agent": log["user_agent"][:60],
-            "timestamp": log["timestamp"],
-            "details": ", ".join(log.get("attack_types", [])) if log.get("attack_types") else "Suspicious behavior"
-        })
+        suspicious_activities.append(
+            {
+                "type": "Attack",
+                "ip": log["ip"],
+                "path": log["path"],
+                "user_agent": log["user_agent"][:60],
+                "timestamp": log["timestamp"],
+                "details": (
+                    ", ".join(log.get("attack_types", []))
+                    if log.get("attack_types")
+                    else "Suspicious behavior"
+                ),
+            }
+        )

    # Add credential attempts
    for cred in stats.get("credential_attempts", [])[-20:]:
-        suspicious_activities.append({
-            "type": "Credentials",
-            "ip": cred["ip"],
-            "path": cred["path"],
-            "user_agent": "",
-            "timestamp": cred["timestamp"],
-            "details": f"User: {cred.get('username', 'N/A')}"
-        })
+        suspicious_activities.append(
+            {
+                "type": "Credentials",
+                "ip": cred["ip"],
+                "path": cred["path"],
+                "user_agent": "",
+                "timestamp": cred["timestamp"],
+                "details": f"User: {cred.get('username', 'N/A')}",
+            }
+        )

    # Add honeypot triggers
    for honeypot in stats.get("honeypot_triggered_ips", [])[-20:]:
        # honeypot is a tuple (ip, paths)
        ip = honeypot[0]
        paths = honeypot[1] if isinstance(honeypot[1], list) else []
-        suspicious_activities.append({
-            "type": "Honeypot",
-            "ip": ip,
-            "path": paths[0] if paths else "Multiple",
-            "user_agent": "",
-            "timestamp": "",  # Tuples don't have timestamp
-            "details": f"{len(paths)} trap(s) triggered"
-        })
+        suspicious_activities.append(
+            {
+                "type": "Honeypot",
+                "ip": ip,
+                "path": paths[0] if paths else "Multiple",
+                "user_agent": "",
+                "timestamp": "",  # Tuples don't have timestamp
+                "details": f"{len(paths)} trap(s) triggered",
+            }
+        )

    # Sort by timestamp (most recent first) and take last 20
    # Put entries with empty timestamps at the end
    try:
-        suspicious_activities.sort(key=lambda x: (x["timestamp"] == "", x["timestamp"]), reverse=True)
+        suspicious_activities.sort(
+            key=lambda x: (x["timestamp"] == "", x["timestamp"]), reverse=True
+        )
    except:
        pass
    suspicious_activities = suspicious_activities[:20]
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -161,8 +161,19 @@ class AccessTracker:
        except Exception:
            # If parsing fails, try simple regex patterns
            wl = get_wordlists()
-            username_fields = wl.username_fields or ["username", "user", "login", "email", "log"]
-            password_fields = wl.password_fields or ["password", "pass", "passwd", "pwd"]
+            username_fields = wl.username_fields or [
+                "username",
+                "user",
+                "login",
+                "email",
+                "log",
+            ]
+            password_fields = wl.password_fields or [
+                "password",
+                "pass",
+                "passwd",
+                "pwd",
+            ]

            # Build regex pattern from wordlist fields
            username_pattern = "(?:" + "|".join(username_fields) + ")=([^&\\s]+)"