Merge branch 'feat/attack-classificiation'

2025-12-24 10:25:23 -06:00
parent fc72f9fb69 72f7293995
commit 9128d2759f
5 changed files with 90 additions and 221 deletions
--- a/src/dashboard_template.py
+++ b/src/dashboard_template.py
@@ -1,214 +0,0 @@
-#!/usr/bin/env python3
-
-"""
-Dashboard template for viewing honeypot statistics.
-Customize this template to change the dashboard appearance.
-"""
-
-
-def generate_dashboard(stats: dict) -> str:
-    """Generate dashboard HTML with access statistics"""
-    
-    top_ips_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td>{ip}</td><td>{count}</td></tr>'
-        for i, (ip, count) in enumerate(stats['top_ips'])
-    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
-
-    # Generate paths rows
-    top_paths_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td>{path}</td><td>{count}</td></tr>'
-        for i, (path, count) in enumerate(stats['top_paths'])
-    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
-
-    # Generate User-Agent rows
-    top_ua_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{ua[:80]}</td><td>{count}</td></tr>'
-        for i, (ua, count) in enumerate(stats['top_user_agents'])
-    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
-
-    # Generate suspicious accesses rows
-    suspicious_rows = '\n'.join([
-        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
-        for log in stats['recent_suspicious'][-10:]
-    ]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
-
-    return f"""<!DOCTYPE html>
-<html>
-<head>
-    <meta charset="UTF-8">
-    <title>Krawl Dashboard</title>
-    <style>
-        body {{
-            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-            background-color: #0d1117;
-            color: #c9d1d9;
-            margin: 0;
-            padding: 20px;
-        }}
-        .container {{
-            max-width: 1400px;
-            margin: 0 auto;
-        }}
-        h1 {{
-            color: #58a6ff;
-            text-align: center;
-            margin-bottom: 40px;
-        }}
-        .stats-grid {{
-            display: grid;
-            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
-            gap: 20px;
-            margin-bottom: 40px;
-        }}
-        .stat-card {{
-            background: #161b22;
-            border: 1px solid #30363d;
-            border-radius: 6px;
-            padding: 20px;
-            text-align: center;
-        }}
-        .stat-card.alert {{
-            border-color: #f85149;
-        }}
-        .stat-value {{
-            font-size: 36px;
-            font-weight: bold;
-            color: #58a6ff;
-        }}
-        .stat-value.alert {{
-            color: #f85149;
-        }}
-        .stat-label {{
-            font-size: 14px;
-            color: #8b949e;
-            margin-top: 5px;
-        }}
-        .table-container {{
-            background: #161b22;
-            border: 1px solid #30363d;
-            border-radius: 6px;
-            padding: 20px;
-            margin-bottom: 20px;
-        }}
-        h2 {{
-            color: #58a6ff;
-            margin-top: 0;
-        }}
-        table {{
-            width: 100%;
-            border-collapse: collapse;
-        }}
-        th, td {{
-            padding: 12px;
-            text-align: left;
-            border-bottom: 1px solid #30363d;
-        }}
-        th {{
-            background: #0d1117;
-            color: #58a6ff;
-            font-weight: 600;
-        }}
-        tr:hover {{
-            background: #1c2128;
-        }}
-        .rank {{
-            color: #8b949e;
-            font-weight: bold;
-        }}
-        .alert-section {{
-            background: #1c1917;
-            border-left: 4px solid #f85149;
-        }}
-    </style>
-</head>
-<body>
-    <div class="container">
-        <h1>&#128375;&#65039; Krawl Dashboard</h1>
-        
-        <div class="stats-grid">
-            <div class="stat-card">
-                <div class="stat-value">{stats['total_accesses']}</div>
-                <div class="stat-label">Total Accesses</div>
-            </div>
-            <div class="stat-card">
-                <div class="stat-value">{stats['unique_ips']}</div>
-                <div class="stat-label">Unique IPs</div>
-            </div>
-            <div class="stat-card">
-                <div class="stat-value">{stats['unique_paths']}</div>
-                <div class="stat-label">Unique Paths</div>
-            </div>
-            <div class="stat-card alert">
-                <div class="stat-value alert">{stats['suspicious_accesses']}</div>
-                <div class="stat-label">Suspicious Accesses</div>
-            </div>
-        </div>
-
-        <div class="table-container alert-section">
-            <h2>&#9888;&#65039; Recent Suspicious Activity</h2>
-            <table>
-                <thead>
-                    <tr>
-                        <th>IP Address</th>
-                        <th>Path</th>
-                        <th>User-Agent</th>
-                        <th>Time</th>
-                    </tr>
-                </thead>
-                <tbody>
-                    {suspicious_rows}
-                </tbody>
-            </table>
-        </div>
-
-        <div class="table-container">
-            <h2>Top IP Addresses</h2>
-            <table>
-                <thead>
-                    <tr>
-                        <th>#</th>
-                        <th>IP Address</th>
-                        <th>Access Count</th>
-                    </tr>
-                </thead>
-                <tbody>
-                    {top_ips_rows}
-                </tbody>
-            </table>
-        </div>
-
-        <div class="table-container">
-            <h2>Top Paths</h2>
-            <table>
-                <thead>
-                    <tr>
-                        <th>#</th>
-                        <th>Path</th>
-                        <th>Access Count</th>
-                    </tr>
-                </thead>
-                <tbody>
-                    {top_paths_rows}
-                </tbody>
-            </table>
-        </div>
-
-        <div class="table-container">
-            <h2>Top User-Agents</h2>
-            <table>
-                <thead>
-                    <tr>
-                        <th>#</th>
-                        <th>User-Agent</th>
-                        <th>Count</th>
-                    </tr>
-                </thead>
-                <tbody>
-                    {top_ua_rows}
-                </tbody>
-            </table>
-        </div>
-    </div>
-</body>
-</html>
-"""
--- a/src/handler.py
+++ b/src/handler.py
@@ -197,15 +197,18 @@ class Handler(BaseHTTPRequestHandler):
        """Handle POST requests (mainly login attempts)"""
        client_ip = self._get_client_ip()
        user_agent = self._get_user_agent()
-        
-        self.tracker.record_access(client_ip, self.path, user_agent)
-        
+        post_data = ""
+               
        print(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
        
        content_length = int(self.headers.get('Content-Length', 0))
        if content_length > 0:
-            post_data = self.rfile.read(content_length).decode('utf-8')
+            post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+            
            print(f"[POST DATA] {post_data[:200]}")
+
+        # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
+        self.tracker.record_access(client_ip, self.path, user_agent, post_data)
        
        time.sleep(1)
        
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -39,6 +39,12 @@ def generate_dashboard(stats: dict) -> str:
        for ip, paths in stats.get('honeypot_triggered_ips', [])
    ]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'

+    # Generate attack types rows
+    attack_type_rows = '\n'.join([
+        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
+        for log in stats.get('attack_types', [])[-10:]
+    ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
+
    return f"""<!DOCTYPE html>
 <html>
 <head>
@@ -188,6 +194,24 @@ def generate_dashboard(stats: dict) -> str:
            </table>
        </div>

+        <div class="table-container alert-section">
+            <h2>&#128520; Detected Attack Types</h2>
+            <table>
+                <thead>
+                    <tr>
+                        <th>IP Address</th>
+                        <th>Path</th>
+                        <th>Attack Types</th>
+                        <th>User-Agent</th>
+                        <th>Time</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {attack_type_rows}
+                </tbody>
+            </table>
+        </div>
+
        <div class="table-container">
            <h2>Top IP Addresses</h2>
            <table>
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -3,6 +3,7 @@
 from typing import Dict, List, Tuple
 from collections import defaultdict
 from datetime import datetime
+import re


 class AccessTracker:
@@ -17,17 +18,35 @@ class AccessTracker:
            'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
            'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
        ]
+
+        # common attack types such as xss, shell injection, probes
+        self.attack_types = {
+            'path_traversal': r'\.\.',
+            'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
+            'xss_attempt': r'(<script|javascript:|onerror=|onload=)',
+            'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)',
+            'shell_injection': r'(\||;|`|\$\(|&&)',
+        }
+
        # Track IPs that accessed honeypot paths from robots.txt
        self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list)

-    def record_access(self, ip: str, path: str, user_agent: str = ''):
+    def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''):
        """Record an access attempt"""
        self.ip_counts[ip] += 1
        self.path_counts[path] += 1
        if user_agent:
            self.user_agent_counts[user_agent] += 1
        
-        is_suspicious = self.is_suspicious_user_agent(user_agent) or self.is_honeypot_path(path)
+        # path attack type detection
+        attack_findings = self.detect_attack_type(path)
+
+        # post / put data
+        if len(body) > 0:
+            attack_findings.extend(self.detect_attack_type(body))
+
+        is_suspicious = self.is_suspicious_user_agent(user_agent) or self.is_honeypot_path(path) or len(attack_findings) > 0
+
        
        # Track if this IP accessed a honeypot path
        if self.is_honeypot_path(path):
@@ -39,9 +58,20 @@ class AccessTracker:
            'user_agent': user_agent,
            'suspicious': is_suspicious,
            'honeypot_triggered': self.is_honeypot_path(path),
+            'attack_types':attack_findings,
            'timestamp': datetime.now().isoformat()
        })

+    def detect_attack_type(self, data:str) -> list[str]:
+        """
+        Returns a list of all attack types found in path data
+        """
+        findings = []
+        for name, pattern in self.attack_types.items():
+            if re.search(pattern, data, re.IGNORECASE):
+                findings.append(name)
+        return findings
+
    def is_honeypot_path(self, path: str) -> bool:
        """Check if path is one of the honeypot traps from robots.txt"""
        honeypot_paths = [
@@ -91,6 +121,11 @@ class AccessTracker:
        suspicious = [log for log in self.access_log if log.get('suspicious', False)]
        return suspicious[-limit:]

+    def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
+        """Get recent accesses with detected attack types"""
+        attacks = [log for log in self.access_log if log.get('attack_types')]
+        return attacks[-limit:]
+
    def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
        """Get IPs that accessed honeypot paths"""
        return [(ip, paths) for ip, paths in self.honeypot_triggered.items()]
@@ -110,5 +145,6 @@ class AccessTracker:
            'top_paths': self.get_top_paths(10),
            'top_user_agents': self.get_top_user_agents(10),
            'recent_suspicious': self.get_suspicious_accesses(20),
-            'honeypot_triggered_ips': self.get_honeypot_triggered_ips()
+            'honeypot_triggered_ips': self.get_honeypot_triggered_ips(),
+            'attack_types': self.get_attack_type_accesses(20)
        }
--- a/tests/sim_attacks.sh
+++ b/tests/sim_attacks.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+TARGET="http://localhost:5000"
+
+echo "=== Testing Path Traversal ==="
+curl -s "$TARGET/../../etc/passwd"
+
+echo -e "\n=== Testing SQL Injection ==="
+curl -s -X POST "$TARGET/login" -d "user=' OR 1=1--"
+
+echo -e "\n=== Testing XSS ==="
+curl -s -X POST "$TARGET/comment" -d "msg=<script>alert(1)</script>"
+
+echo -e "\n=== Testing Common Probes ==="
+curl -s "$TARGET/.env"
+curl -s "$TARGET/wp-admin/"
+
+echo -e "\n=== Testing Shell Injection ==="
+curl -s -X POST "$TARGET/ping" -d "host=127.0.0.1; cat /etc/passwd"
+
+echo -e "\n=== Done ==="