Merge branch 'feat/attack-classificiation'

2025-12-24 10:25:23 -06:00
parent fc72f9fb69 72f7293995
commit 9128d2759f
5 changed files with 90 additions and 221 deletions
--- a/src/dashboard_template.py
+++ b/src/dashboard_template.py
@@ -1,214 +0,0 @@
 #!/usr/bin/env python3
 """
 Dashboard template for viewing honeypot statistics.
 Customize this template to change the dashboard appearance.
 """
 def generate_dashboard(stats: dict) -> str:
    """Generate dashboard HTML with access statistics"""
    top_ips_rows = '\n'.join([
        f'<tr><td class="rank">{i+1}</td><td>{ip}</td><td>{count}</td></tr>'
        for i, (ip, count) in enumerate(stats['top_ips'])
    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
    # Generate paths rows
    top_paths_rows = '\n'.join([
        f'<tr><td class="rank">{i+1}</td><td>{path}</td><td>{count}</td></tr>'
        for i, (path, count) in enumerate(stats['top_paths'])
    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
    # Generate User-Agent rows
    top_ua_rows = '\n'.join([
        f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{ua[:80]}</td><td>{count}</td></tr>'
        for i, (ua, count) in enumerate(stats['top_user_agents'])
    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
    # Generate suspicious accesses rows
    suspicious_rows = '\n'.join([
        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
        for log in stats['recent_suspicious'][-10:]
    ]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
    return f"""<!DOCTYPE html>
 <html>
 <head>
    <meta charset="UTF-8">
    <title>Krawl Dashboard</title>
    <style>
        body {{
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background-color: #0d1117;
            color: #c9d1d9;
            margin: 0;
            padding: 20px;
        }}
        .container {{
            max-width: 1400px;
            margin: 0 auto;
        }}
        h1 {{
            color: #58a6ff;
            text-align: center;
            margin-bottom: 40px;
        }}
        .stats-grid {{
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 20px;
            margin-bottom: 40px;
        }}
        .stat-card {{
            background: #161b22;
            border: 1px solid #30363d;
            border-radius: 6px;
            padding: 20px;
            text-align: center;
        }}
        .stat-card.alert {{
            border-color: #f85149;
        }}
        .stat-value {{
            font-size: 36px;
            font-weight: bold;
            color: #58a6ff;
        }}
        .stat-value.alert {{
            color: #f85149;
        }}
        .stat-label {{
            font-size: 14px;
            color: #8b949e;
            margin-top: 5px;
        }}
        .table-container {{
            background: #161b22;
            border: 1px solid #30363d;
            border-radius: 6px;
            padding: 20px;
            margin-bottom: 20px;
        }}
        h2 {{
            color: #58a6ff;
            margin-top: 0;
        }}
        table {{
            width: 100%;
            border-collapse: collapse;
        }}
        th, td {{
            padding: 12px;
            text-align: left;
            border-bottom: 1px solid #30363d;
        }}
        th {{
            background: #0d1117;
            color: #58a6ff;
            font-weight: 600;
        }}
        tr:hover {{
            background: #1c2128;
        }}
        .rank {{
            color: #8b949e;
            font-weight: bold;
        }}
        .alert-section {{
            background: #1c1917;
            border-left: 4px solid #f85149;
        }}
    </style>
 </head>
 <body>
    <div class="container">
        <h1>&#128375;&#65039; Krawl Dashboard</h1>
        <div class="stats-grid">
            <div class="stat-card">
                <div class="stat-value">{stats['total_accesses']}</div>
                <div class="stat-label">Total Accesses</div>
            </div>
            <div class="stat-card">
                <div class="stat-value">{stats['unique_ips']}</div>
                <div class="stat-label">Unique IPs</div>
            </div>
            <div class="stat-card">
                <div class="stat-value">{stats['unique_paths']}</div>
                <div class="stat-label">Unique Paths</div>
            </div>
            <div class="stat-card alert">
                <div class="stat-value alert">{stats['suspicious_accesses']}</div>
                <div class="stat-label">Suspicious Accesses</div>
            </div>
        </div>
        <div class="table-container alert-section">
            <h2>&#9888;&#65039; Recent Suspicious Activity</h2>
            <table>
                <thead>
                    <tr>
                        <th>IP Address</th>
                        <th>Path</th>
                        <th>User-Agent</th>
                        <th>Time</th>
                    </tr>
                </thead>
                <tbody>
                    {suspicious_rows}
                </tbody>
            </table>
        </div>
        <div class="table-container">
            <h2>Top IP Addresses</h2>
            <table>
                <thead>
                    <tr>
                        <th>#</th>
                        <th>IP Address</th>
                        <th>Access Count</th>
                    </tr>
                </thead>
                <tbody>
                    {top_ips_rows}
                </tbody>
            </table>
        </div>
        <div class="table-container">
            <h2>Top Paths</h2>
            <table>
                <thead>
                    <tr>
                        <th>#</th>
                        <th>Path</th>
                        <th>Access Count</th>
                    </tr>
                </thead>
                <tbody>
                    {top_paths_rows}
                </tbody>
            </table>
        </div>
        <div class="table-container">
            <h2>Top User-Agents</h2>
            <table>
                <thead>
                    <tr>
                        <th>#</th>
                        <th>User-Agent</th>
                        <th>Count</th>
                    </tr>
                </thead>
                <tbody>
                    {top_ua_rows}
                </tbody>
            </table>
        </div>
    </div>
 </body>
 </html>
 """
--- a/src/handler.py
+++ b/src/handler.py
@@ -197,15 +197,18 @@ class Handler(BaseHTTPRequestHandler):
        """Handle POST requests (mainly login attempts)"""
        client_ip = self._get_client_ip()
        user_agent = self._get_user_agent()
-        
+        post_data = ""
-        self.tracker.record_access(client_ip, self.path, user_agent)
+               
        print(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
        content_length = int(self.headers.get('Content-Length', 0))
        if content_length > 0:
-            post_data = self.rfile.read(content_length).decode('utf-8')
+            post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
            print(f"[POST DATA] {post_data[:200]}")
        # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
        self.tracker.record_access(client_ip, self.path, user_agent, post_data)
        time.sleep(1)
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -39,6 +39,12 @@ def generate_dashboard(stats: dict) -> str:
        for ip, paths in stats.get('honeypot_triggered_ips', [])
    ]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
    # Generate attack types rows
    attack_type_rows = '\n'.join([
        f'<tr><td>{log["ip"]}</td><td>{log["path"]}</td><td>{", ".join(log["attack_types"])}</td><td style="word-break: break-all;">{log["user_agent"][:60]}</td><td>{log["timestamp"].split("T")[1][:8]}</td></tr>'
        for log in stats.get('attack_types', [])[-10:]
    ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
    return f"""<!DOCTYPE html>
 <html>
 <head>
@@ -188,6 +194,24 @@ def generate_dashboard(stats: dict) -> str:
            </table>
        </div>
        <div class="table-container alert-section">
            <h2>&#128520; Detected Attack Types</h2>
            <table>
                <thead>
                    <tr>
                        <th>IP Address</th>
                        <th>Path</th>
                        <th>Attack Types</th>
                        <th>User-Agent</th>
                        <th>Time</th>
                    </tr>
                </thead>
                <tbody>
                    {attack_type_rows}
                </tbody>
            </table>
        </div>
        <div class="table-container">
            <h2>Top IP Addresses</h2>
            <table>
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -3,6 +3,7 @@
 from typing import Dict, List, Tuple
 from collections import defaultdict
 from datetime import datetime
 import re
 class AccessTracker:
@@ -17,17 +18,35 @@ class AccessTracker:
            'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
            'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
        ]
        # common attack types such as xss, shell injection, probes
        self.attack_types = {
            'path_traversal': r'\.\.',
            'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
            'xss_attempt': r'(<script|javascript:|onerror=|onload=)',
            'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)',
            'shell_injection': r'(\||;|`|\$\(|&&)',
        }
        # Track IPs that accessed honeypot paths from robots.txt
        self.honeypot_triggered: Dict[str, List[str]] = defaultdict(list)
-    def record_access(self, ip: str, path: str, user_agent: str = ''):
+    def record_access(self, ip: str, path: str, user_agent: str = '', body: str = ''):
        """Record an access attempt"""
        self.ip_counts[ip] += 1
        self.path_counts[path] += 1
        if user_agent:
            self.user_agent_counts[user_agent] += 1
-        is_suspicious = self.is_suspicious_user_agent(user_agent) or self.is_honeypot_path(path)
+        # path attack type detection
        attack_findings = self.detect_attack_type(path)
        # post / put data
        if len(body) > 0:
            attack_findings.extend(self.detect_attack_type(body))
        is_suspicious = self.is_suspicious_user_agent(user_agent) or self.is_honeypot_path(path) or len(attack_findings) > 0
        # Track if this IP accessed a honeypot path
        if self.is_honeypot_path(path):
@@ -39,9 +58,20 @@ class AccessTracker:
            'user_agent': user_agent,
            'suspicious': is_suspicious,
            'honeypot_triggered': self.is_honeypot_path(path),
            'attack_types':attack_findings,
            'timestamp': datetime.now().isoformat()
        })
    def detect_attack_type(self, data:str) -> list[str]:
        """
        Returns a list of all attack types found in path data
        """
        findings = []
        for name, pattern in self.attack_types.items():
            if re.search(pattern, data, re.IGNORECASE):
                findings.append(name)
        return findings
    def is_honeypot_path(self, path: str) -> bool:
        """Check if path is one of the honeypot traps from robots.txt"""
        honeypot_paths = [
@@ -91,6 +121,11 @@ class AccessTracker:
        suspicious = [log for log in self.access_log if log.get('suspicious', False)]
        return suspicious[-limit:]
    def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
        """Get recent accesses with detected attack types"""
        attacks = [log for log in self.access_log if log.get('attack_types')]
        return attacks[-limit:]
    def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
        """Get IPs that accessed honeypot paths"""
        return [(ip, paths) for ip, paths in self.honeypot_triggered.items()]
@@ -110,5 +145,6 @@ class AccessTracker:
            'top_paths': self.get_top_paths(10),
            'top_user_agents': self.get_top_user_agents(10),
            'recent_suspicious': self.get_suspicious_accesses(20),
-            'honeypot_triggered_ips': self.get_honeypot_triggered_ips()
+            'honeypot_triggered_ips': self.get_honeypot_triggered_ips(),
            'attack_types': self.get_attack_type_accesses(20)
        }
--- a/tests/sim_attacks.sh
+++ b/tests/sim_attacks.sh
@@ -0,0 +1,20 @@
 #!/bin/bash
 TARGET="http://localhost:5000"
 echo "=== Testing Path Traversal ==="
 curl -s "$TARGET/../../etc/passwd"
 echo -e "\n=== Testing SQL Injection ==="
 curl -s -X POST "$TARGET/login" -d "user=' OR 1=1--"
 echo -e "\n=== Testing XSS ==="
 curl -s -X POST "$TARGET/comment" -d "msg=<script>alert(1)</script>"
 echo -e "\n=== Testing Common Probes ==="
 curl -s "$TARGET/.env"
 curl -s "$TARGET/wp-admin/"
 echo -e "\n=== Testing Shell Injection ==="
 curl -s -X POST "$TARGET/ping" -d "host=127.0.0.1; cat /etc/passwd"
 echo -e "\n=== Done ==="