added raw request handling, enanched attack detection for GET and POSTS, templatized suspicioius activity to fetch from wordlists.json, aligned helm to load new wordlist config, added migration scripts from 1.0.0 to new krawl versions, removed old and unused functions, added test scripts

2026-02-08 16:02:18 +01:00
parent 594eae7447
commit 771174c6a9
26 changed files with 2312 additions and 867 deletions
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -49,32 +49,37 @@ class AccessTracker:
        # Track pages visited by each IP (for good crawler limiting)
        self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)

-        self.suspicious_patterns = [
-            "bot",
-            "crawler",
-            "spider",
-            "scraper",
-            "curl",
-            "wget",
-            "python-requests",
-            "scanner",
-            "nikto",
-            "sqlmap",
-            "nmap",
-            "masscan",
-            "nessus",
-            "acunetix",
-            "burp",
-            "zap",
-            "w3af",
-            "metasploit",
-            "nuclei",
-            "gobuster",
-            "dirbuster",
-        ]
+        # Load suspicious patterns from wordlists
+        wl = get_wordlists()
+        self.suspicious_patterns = wl.suspicious_patterns
+
+        # Fallback if wordlists not loaded
+        if not self.suspicious_patterns:
+            self.suspicious_patterns = [
+                "bot",
+                "crawler",
+                "spider",
+                "scraper",
+                "curl",
+                "wget",
+                "python-requests",
+                "scanner",
+                "nikto",
+                "sqlmap",
+                "nmap",
+                "masscan",
+                "nessus",
+                "acunetix",
+                "burp",
+                "zap",
+                "w3af",
+                "metasploit",
+                "nuclei",
+                "gobuster",
+                "dirbuster",
+            ]

        # Load attack patterns from wordlists
-        wl = get_wordlists()
        self.attack_types = wl.attack_patterns

        # Fallback if wordlists not loaded
@@ -84,7 +89,7 @@ class AccessTracker:
                "sql_injection": r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
                "xss_attempt": r"(<script|javascript:|onerror=|onload=)",
                "common_probes": r"(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)",
-                "shell_injection": r"(\||;|`|\$\(|&&)",
+                "command_injection": r"(\||;|`|\$\(|&&)",
            }

        # Track IPs that accessed honeypot paths from robots.txt
@@ -124,23 +129,30 @@ class AccessTracker:
            # Parse URL-encoded form data
            parsed = urllib.parse.parse_qs(post_data)

-            # Common username field names
-            username_fields = [
-                "username",
-                "user",
-                "login",
-                "email",
-                "log",
-                "userid",
-                "account",
-            ]
+            # Get credential field names from wordlists
+            wl = get_wordlists()
+            username_fields = wl.username_fields
+            password_fields = wl.password_fields
+
+            # Fallback if wordlists not loaded
+            if not username_fields:
+                username_fields = [
+                    "username",
+                    "user",
+                    "login",
+                    "email",
+                    "log",
+                    "userid",
+                    "account",
+                ]
+            if not password_fields:
+                password_fields = ["password", "pass", "passwd", "pwd", "passphrase"]
+
            for field in username_fields:
                if field in parsed and parsed[field]:
                    username = parsed[field][0]
                    break

-            # Common password field names
-            password_fields = ["password", "pass", "passwd", "pwd", "passphrase"]
            for field in password_fields:
                if field in parsed and parsed[field]:
                    password = parsed[field][0]
@@ -148,12 +160,16 @@ class AccessTracker:

        except Exception:
            # If parsing fails, try simple regex patterns
-            username_match = re.search(
-                r"(?:username|user|login|email|log)=([^&\s]+)", post_data, re.IGNORECASE
-            )
-            password_match = re.search(
-                r"(?:password|pass|passwd|pwd)=([^&\s]+)", post_data, re.IGNORECASE
-            )
+            wl = get_wordlists()
+            username_fields = wl.username_fields or ["username", "user", "login", "email", "log"]
+            password_fields = wl.password_fields or ["password", "pass", "passwd", "pwd"]
+            
+            # Build regex pattern from wordlist fields
+            username_pattern = "(?:" + "|".join(username_fields) + ")=([^&\\s]+)"
+            password_pattern = "(?:" + "|".join(password_fields) + ")=([^&\\s]+)"
+            
+            username_match = re.search(username_pattern, post_data, re.IGNORECASE)
+            password_match = re.search(password_pattern, post_data, re.IGNORECASE)

            if username_match:
                username = urllib.parse.unquote_plus(username_match.group(1))
@@ -213,6 +229,7 @@ class AccessTracker:
        user_agent: str = "",
        body: str = "",
        method: str = "GET",
+        raw_request: str = "",
    ):
        """
        Record an access attempt.
@@ -226,6 +243,7 @@ class AccessTracker:
            user_agent: Client user agent string
            body: Request body (for POST/PUT)
            method: HTTP method
+            raw_request: Full raw HTTP request for forensic analysis
        """
        # Skip if this is the server's own IP
        from config import get_config
@@ -245,7 +263,9 @@ class AccessTracker:

        # POST/PUT body attack detection
        if len(body) > 0:
-            attack_findings.extend(self.detect_attack_type(body))
+            # Decode URL-encoded body so patterns can match (e.g., %3Cscript%3E -> <script>)
+            decoded_body = urllib.parse.unquote(body)
+            attack_findings.extend(self.detect_attack_type(decoded_body))

        is_suspicious = (
            self.is_suspicious_user_agent(user_agent)
@@ -286,6 +306,7 @@ class AccessTracker:
                    is_suspicious=is_suspicious,
                    is_honeypot_trigger=is_honeypot,
                    attack_types=attack_findings if attack_findings else None,
+                    raw_request=raw_request if raw_request else None,
                )
            except Exception:
                # Don't crash if database persistence fails