made ip analysis and ip rep info fetch a scheduled task

2026-01-10 14:53:31 +01:00
parent 4f42b946f3 5a57c0774f
commit 77196952d1
22 changed files with 1833 additions and 288 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -76,3 +76,6 @@ data/
 # Personal canary tokens or sensitive configs
 *canary*token*.yaml
 personal-values.yaml
 #exports dir (keeping .gitkeep so we have the dir)
 /exports/*
--- a/3
+++ b/3
@@ -14,9 +14,10 @@ RUN pip install --no-cache-dir -r requirements.txt
 COPY src/ /app/src/
 COPY wordlists.json /app/
 COPY entrypoint.sh /app/
 COPY config.yaml /app/
 RUN useradd -m -u 1000 krawl && \
-    mkdir -p /app/logs /app/data && \
+    mkdir -p /app/logs /app/data /app/exports && \
    chown -R krawl:krawl /app && \
    chmod +x /app/entrypoint.sh
--- a/config.yaml
+++ b/config.yaml
@@ -3,7 +3,7 @@
 server:
  port: 5000
  delay: 100  # Response delay in milliseconds
-  timezone: null  # e.g., "America/New_York" or null for system default
+  timezone: null  # e.g., "America/New_York", "Europe/Paris" or null for system default
  # manually set the server header, if null a random one will be used.
  server_header: null
@@ -11,8 +11,8 @@ server:
 links:
  min_length: 5
  max_length: 15
-  min_per_page: 10
+  min_per_page: 5
-  max_per_page: 15
+  max_per_page: 10
  char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
  max_counter: 10
@@ -38,9 +38,9 @@ behavior:
  probability_error_codes: 0  # 0-100 percentage
 analyzer:
-  # http_risky_methods_threshold: 0.1
+  http_risky_methods_threshold: 0.1
-  # violated_robots_threshold: 0.1
+  violated_robots_threshold: 0.1
-  # uneven_request_timing_threshold: 5
+  uneven_request_timing_threshold: 2
-  # uneven_request_timing_time_window_seconds: 300
+  uneven_request_timing_time_window_seconds: 300
-  # user_agents_used_threshold: 2
+  user_agents_used_threshold: 2
-  # attack_urls_threshold: 1
+  attack_urls_threshold: 1
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -12,6 +12,7 @@ services:
      - ./wordlists.json:/app/wordlists.json:ro
      - ./config.yaml:/app/config.yaml:ro
      - ./logs:/app/logs
      - ./exports:/app/exports
    environment:
      - CONFIG_LOCATION=config.yaml
    restart: unless-stopped
--- a/entrypoint.sh
+++ b/entrypoint.sh
@@ -2,7 +2,7 @@
 set -e
 # Fix ownership of mounted directories
-chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
+chown -R krawl:krawl /app/logs /app/data /app/exports 2>/dev/null || true
 # Drop to krawl user and run the application
 exec gosu krawl "$@"
--- a/exports/.gitkeep
+++ b/exports/.gitkeep
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,3 +6,6 @@ PyYAML>=6.0
 # Database ORM
 SQLAlchemy>=2.0.0,<3.0.0
 # Scheduling
 APScheduler>=3.11.2
--- a/src/analyzer.py
+++ b/src/analyzer.py
@@ -6,8 +6,10 @@ from zoneinfo import ZoneInfo
 from pathlib import Path
 from datetime import datetime, timedelta
 import re
 import urllib.parse
 from wordlists import get_wordlists
 from config import get_config
 from logger import get_app_logger
 import requests
 from sanitizer import sanitize_for_storage, sanitize_dict
@@ -15,6 +17,8 @@ from sanitizer import sanitize_for_storage, sanitize_dict
 Functions for user activity analysis
 """
 app_logger = get_app_logger()
 class Analyzer:
    """
    Analyzes users activity and produces aggregated insights
@@ -48,272 +52,299 @@ class Analyzer:
                pass
        return self._db_manager
-    def infer_user_category(self, ip: str) -> str:
+    # def infer_user_category(self, ip: str) -> str:
-        config = get_config()
+    #     config = get_config()
-        http_risky_methods_threshold = config.http_risky_methods_threshold
+    #     http_risky_methods_threshold = config.http_risky_methods_threshold
-        violated_robots_threshold = config.violated_robots_threshold
+    #     violated_robots_threshold = config.violated_robots_threshold
-        uneven_request_timing_threshold = config.uneven_request_timing_threshold
+    #     uneven_request_timing_threshold = config.uneven_request_timing_threshold
-        user_agents_used_threshold = config.user_agents_used_threshold
+    #     user_agents_used_threshold = config.user_agents_used_threshold
-        attack_urls_threshold = config.attack_urls_threshold
+    #     attack_urls_threshold = config.attack_urls_threshold
-        uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
+    #     uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
-        print(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
+    #     app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
-        score = {}
+    #     score = {}
-        score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
+    #     #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
-        weights = {
+    #     weights = {
-            "attacker": {
+    #         "attacker": {
-                "risky_http_methods": 6,
+    #             "risky_http_methods": 6,
-                "robots_violations": 4,
+    #             "robots_violations": 4,
-                "uneven_request_timing": 3,
+    #             "uneven_request_timing": 3,
-                "different_user_agents": 8,
+    #             "different_user_agents": 8,
-                "attack_url": 15
+    #             "attack_url": 15
-            },
+    #         },
-            "good_crawler": {
+    #         "good_crawler": {
-                "risky_http_methods": 1,
+    #             "risky_http_methods": 1,
-                "robots_violations": 0,
+    #             "robots_violations": 0,
-                "uneven_request_timing": 0,
+    #             "uneven_request_timing": 0,
-                "different_user_agents": 0,
+    #             "different_user_agents": 0,
-                "attack_url": 0
+    #             "attack_url": 0
-            },
+    #         },
-            "bad_crawler": {
+    #         "bad_crawler": {
-                "risky_http_methods": 2,
+    #             "risky_http_methods": 2,
-                "robots_violations": 7,
+    #             "robots_violations": 7,
-                "uneven_request_timing": 0,
+    #             "uneven_request_timing": 0,
-                "different_user_agents": 5,
+    #             "different_user_agents": 5,
-                "attack_url": 5
+    #             "attack_url": 5
-            },
+    #         },
-            "regular_user": {
+    #         "regular_user": {
-                "risky_http_methods": 0,
+    #             "risky_http_methods": 0,
-                "robots_violations": 0,
+    #             "robots_violations": 0,
-                "uneven_request_timing": 8,
+    #             "uneven_request_timing": 8,
-                "different_user_agents": 3,
+    #             "different_user_agents": 3,
-                "attack_url": 0
+    #             "attack_url": 0
-            }
+    #         }
-        }
+    #     }
-        accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
+    #     accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
-        total_accesses_count = len(accesses)
+    #     total_accesses_count = len(accesses)
-        if total_accesses_count <= 0:
+    #     if total_accesses_count <= 0:
-            return
+    #         return
    #     # Set category as "unknown" for the first 5 requests
    #     if total_accesses_count < 3:
    #         category = "unknown"
    #         analyzed_metrics = {}
    #         category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
    #         last_analysis = datetime.now(tz=ZoneInfo('UTC'))
    #         self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
    #         return 0
-        #--------------------- HTTP Methods ---------------------
+    #     #--------------------- HTTP Methods ---------------------
-        get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
+    #     get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
-        post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
+    #     post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
-        put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
+    #     put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
-        delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
+    #     delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
-        head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
+    #     head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
-        options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
+    #     options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
-        patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])  
+    #     patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])  
-        if total_accesses_count > http_risky_methods_threshold:
+    #     if total_accesses_count > http_risky_methods_threshold:
-            http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
+    #         http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
-        else:
+    #     else:
-            http_method_attacker_score = 0
+    #         http_method_attacker_score = 0
-        #print(f"HTTP Method attacker score: {http_method_attacker_score}")
+    #     #print(f"HTTP Method attacker score: {http_method_attacker_score}")
-        if http_method_attacker_score >= http_risky_methods_threshold:
+    #     if http_method_attacker_score >= http_risky_methods_threshold:
-            score["attacker"]["risky_http_methods"] = True
+    #         score["attacker"]["risky_http_methods"] = True
-            score["good_crawler"]["risky_http_methods"] = False
+    #         score["good_crawler"]["risky_http_methods"] = False
-            score["bad_crawler"]["risky_http_methods"] = True
+    #         score["bad_crawler"]["risky_http_methods"] = True
-            score["regular_user"]["risky_http_methods"] = False
+    #         score["regular_user"]["risky_http_methods"] = False
-        else:
+    #     else:
-            score["attacker"]["risky_http_methods"] = False
+    #         score["attacker"]["risky_http_methods"] = False
-            score["good_crawler"]["risky_http_methods"] = True
+    #         score["good_crawler"]["risky_http_methods"] = True
-            score["bad_crawler"]["risky_http_methods"] = False
+    #         score["bad_crawler"]["risky_http_methods"] = False
-            score["regular_user"]["risky_http_methods"] = False
+    #         score["regular_user"]["risky_http_methods"] = False
-        #--------------------- Robots Violations ---------------------
+    #     #--------------------- Robots Violations ---------------------
-        #respect robots.txt and login/config pages access frequency
+    #     #respect robots.txt and login/config pages access frequency
-        robots_disallows = []
+    #     robots_disallows = []
-        robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
+    #     robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
-        with open(robots_path, "r") as f:
+    #     with open(robots_path, "r") as f:
-            for line in f:
+    #         for line in f:
-                line = line.strip()
+    #             line = line.strip()
-                if not line:
+    #             if not line:
-                    continue
+    #                 continue
-                parts = line.split(":")
+    #             parts = line.split(":")
-                if parts[0] == "Disallow":
+    #             if parts[0] == "Disallow":
-                    parts[1] = parts[1].rstrip("/")
+    #                 parts[1] = parts[1].rstrip("/")
-                    #print(f"DISALLOW {parts[1]}")
+    #                 #print(f"DISALLOW {parts[1]}")
-                    robots_disallows.append(parts[1].strip())
+    #                 robots_disallows.append(parts[1].strip())
-        #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
+    #     #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
-        violated_robots_count = len([item for item in accesses if item["path"].rstrip("/") in tuple(robots_disallows)])
+    #     violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
-        #print(f"Violated robots count: {violated_robots_count}")
+    #     #print(f"Violated robots count: {violated_robots_count}")
-        if total_accesses_count > 0:
+    #     if total_accesses_count > 0:
-            violated_robots_ratio = violated_robots_count / total_accesses_count
+    #         violated_robots_ratio = violated_robots_count / total_accesses_count
-        else:
+    #     else:
-            violated_robots_ratio = 0
+    #         violated_robots_ratio = 0
-        if violated_robots_ratio >= violated_robots_threshold:
+    #     if violated_robots_ratio >= violated_robots_threshold:
-            score["attacker"]["robots_violations"] = True
+    #         score["attacker"]["robots_violations"] = True
-            score["good_crawler"]["robots_violations"] = False
+    #         score["good_crawler"]["robots_violations"] = False
-            score["bad_crawler"]["robots_violations"] = True
+    #         score["bad_crawler"]["robots_violations"] = True
-            score["regular_user"]["robots_violations"] = False
+    #         score["regular_user"]["robots_violations"] = False
-        else:
+    #     else:
-            score["attacker"]["robots_violations"] = False
+    #         score["attacker"]["robots_violations"] = False
-            score["good_crawler"]["robots_violations"] = False
+    #         score["good_crawler"]["robots_violations"] = False
-            score["bad_crawler"]["robots_violations"] = False
+    #         score["bad_crawler"]["robots_violations"] = False
-            score["regular_user"]["robots_violations"] = False
+    #         score["regular_user"]["robots_violations"] = False
-        #--------------------- Requests Timing ---------------------
+    #     #--------------------- Requests Timing ---------------------
-        #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
+    #     #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
-        timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
+    #     timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
-        timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
+    #     now_utc = datetime.now(tz=ZoneInfo('UTC'))
-        timestamps = sorted(timestamps, reverse=True)
+    #     timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
    #     timestamps = sorted(timestamps, reverse=True)
-        time_diffs = []
+    #     time_diffs = []
-        for i in range(0, len(timestamps)-1):
+    #     for i in range(0, len(timestamps)-1):
-            diff = (timestamps[i] - timestamps[i+1]).total_seconds()
+    #         diff = (timestamps[i] - timestamps[i+1]).total_seconds()
-            time_diffs.append(diff)
+    #         time_diffs.append(diff)
-        mean = 0
+    #     mean = 0
-        variance = 0
+    #     variance = 0
-        std = 0
+    #     std = 0
-        cv = 0
+    #     cv = 0
-        if time_diffs:
+    #     if time_diffs:
-            mean = sum(time_diffs) / len(time_diffs)
+    #         mean = sum(time_diffs) / len(time_diffs)
-            variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
+    #         variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
-            std = variance ** 0.5
+    #         std = variance ** 0.5
-            cv = std/mean
+    #         cv = std/mean
-            print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
+    #         app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
-        if cv >= uneven_request_timing_threshold:
+    #     if cv >= uneven_request_timing_threshold:
-            score["attacker"]["uneven_request_timing"] = True
+    #         score["attacker"]["uneven_request_timing"] = True
-            score["good_crawler"]["uneven_request_timing"] = False
+    #         score["good_crawler"]["uneven_request_timing"] = False
-            score["bad_crawler"]["uneven_request_timing"] = False
+    #         score["bad_crawler"]["uneven_request_timing"] = False
-            score["regular_user"]["uneven_request_timing"] = True
+    #         score["regular_user"]["uneven_request_timing"] = True
-        else:
+    #     else:
-            score["attacker"]["uneven_request_timing"] = False
+    #         score["attacker"]["uneven_request_timing"] = False
-            score["good_crawler"]["uneven_request_timing"] = False
+    #         score["good_crawler"]["uneven_request_timing"] = False
-            score["bad_crawler"]["uneven_request_timing"] = False
+    #         score["bad_crawler"]["uneven_request_timing"] = False
-            score["regular_user"]["uneven_request_timing"] = False
+    #         score["regular_user"]["uneven_request_timing"] = False
-        #--------------------- Different User Agents ---------------------
+    #     #--------------------- Different User Agents ---------------------
-        #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
+    #     #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
-        user_agents_used = [item["user_agent"] for item in accesses]
+    #     user_agents_used = [item["user_agent"] for item in accesses]
-        user_agents_used = list(dict.fromkeys(user_agents_used))
+    #     user_agents_used = list(dict.fromkeys(user_agents_used))
-        #print(f"User agents used: {user_agents_used}")
+    #     #print(f"User agents used: {user_agents_used}")
-        if len(user_agents_used) >= user_agents_used_threshold:
+    #     if len(user_agents_used) >= user_agents_used_threshold:
-            score["attacker"]["different_user_agents"] = True
+    #         score["attacker"]["different_user_agents"] = True
-            score["good_crawler"]["different_user_agents"] = False
+    #         score["good_crawler"]["different_user_agents"] = False
-            score["bad_crawler"]["different_user_agentss"] = True
+    #         score["bad_crawler"]["different_user_agentss"] = True
-            score["regular_user"]["different_user_agents"] = False
+    #         score["regular_user"]["different_user_agents"] = False
-        else:
+    #     else:
-            score["attacker"]["different_user_agents"] = False
+    #         score["attacker"]["different_user_agents"] = False
-            score["good_crawler"]["different_user_agents"] = False
+    #         score["good_crawler"]["different_user_agents"] = False
-            score["bad_crawler"]["different_user_agents"] = False
+    #         score["bad_crawler"]["different_user_agents"] = False
-            score["regular_user"]["different_user_agents"] = False
+    #         score["regular_user"]["different_user_agents"] = False
-        #--------------------- Attack URLs ---------------------
+    #     #--------------------- Attack URLs ---------------------
-        attack_urls_found_list = []
+    #     attack_urls_found_list = []
-        wl = get_wordlists()
+    #     wl = get_wordlists()
-        if wl.attack_urls:
+    #     if wl.attack_patterns:
-            queried_paths = [item["path"] for item in accesses]
+    #         queried_paths = [item["path"] for item in accesses]
-            for queried_path in queried_paths:
+    #         for queried_path in queried_paths:
-                for name, pattern in wl.attack_urls.items():
+    #             # URL decode the path to catch encoded attacks
-                    if re.search(pattern, queried_path, re.IGNORECASE):
+    #             try:
-                        attack_urls_found_list.append(pattern)
+    #                 decoded_path = urllib.parse.unquote(queried_path)
    #                 # Double decode to catch double-encoded attacks
    #                 decoded_path_twice = urllib.parse.unquote(decoded_path)
    #             except Exception:
    #                 decoded_path = queried_path
    #                 decoded_path_twice = queried_path
    #             for name, pattern in wl.attack_patterns.items():
    #                 # Check original, decoded, and double-decoded paths
    #                 if (re.search(pattern, queried_path, re.IGNORECASE) or 
    #                     re.search(pattern, decoded_path, re.IGNORECASE) or
    #                     re.search(pattern, decoded_path_twice, re.IGNORECASE)):
    #                     attack_urls_found_list.append(f"{name}: {pattern}")
-            #remove duplicates
+    #         #remove duplicates
-            attack_urls_found_list = set(attack_urls_found_list)
+    #         attack_urls_found_list = set(attack_urls_found_list)
-            attack_urls_found_list = list(attack_urls_found_list)
+    #         attack_urls_found_list = list(attack_urls_found_list)
-            if len(attack_urls_found_list) > attack_urls_threshold:
+    #         if len(attack_urls_found_list) > attack_urls_threshold:
-                score["attacker"]["attack_url"] = True
+    #             score["attacker"]["attack_url"] = True
-                score["good_crawler"]["attack_url"] = False
+    #             score["good_crawler"]["attack_url"] = False
-                score["bad_crawler"]["attack_url"] = False
+    #             score["bad_crawler"]["attack_url"] = False
-                score["regular_user"]["attack_url"] = False
+    #             score["regular_user"]["attack_url"] = False
-            else:
+    #         else:
-                score["attacker"]["attack_url"] = False
+    #             score["attacker"]["attack_url"] = False
-                score["good_crawler"]["attack_url"] = False
+    #             score["good_crawler"]["attack_url"] = False
-                score["bad_crawler"]["attack_url"] = False
+    #             score["bad_crawler"]["attack_url"] = False
-                score["regular_user"]["attack_url"] = False
+    #             score["regular_user"]["attack_url"] = False
-        #--------------------- Calculate score ---------------------
+    #     #--------------------- Calculate score ---------------------
-        attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
+    #     attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
-        attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
+    #     attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
-        attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
+    #     attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
-        attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
+    #     attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
-        attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
+    #     attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
-        attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
+    #     attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
-        good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
+    #     good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
-        bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
+    #     bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
-        regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
+    #     regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
-        regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
-        regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
-        regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
-        regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
-        print(f"Attacker score: {attacker_score}")
+    #     score_details = f"""
-        print(f"Good Crawler score: {good_crawler_score}")
+    #     Attacker score: {attacker_score}
-        print(f"Bad Crawler score: {bad_crawler_score}")
+    #     Good Crawler score: {good_crawler_score}
-        print(f"Regular User score: {regular_user_score}")
+    #     Bad Crawler score: {bad_crawler_score}
    #     Regular User score: {regular_user_score}
    #     """
    #     app_logger.debug(score_details)
-        analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
+    #     analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
-        category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
+    #     category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
-        category = max(category_scores, key=category_scores.get)
+    #     category = max(category_scores, key=category_scores.get)
-        last_analysis = datetime.utcnow()
+    #     last_analysis = datetime.now(tz=ZoneInfo('UTC'))
-        self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+    #     self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
-        return 0
+    #     return 0
    def update_ip_rep_infos(self, ip: str) -> list[str]:
        api_url = "https://iprep.lcrawl.com/api/iprep/"
        params = {
            "cidr": ip
        }
        headers = {
            "Content-Type": "application/json"
        }
        response = requests.get(api_url, headers=headers, params=params)
        payload = response.json()
-        if payload["results"]:
+    # def update_ip_rep_infos(self, ip: str) -> list[str]:
-            data = payload["results"][0]
+    #     api_url = "https://iprep.lcrawl.com/api/iprep/"
    #     params = {
    #         "cidr": ip
    #     }
    #     headers = {
    #         "Content-Type": "application/json"
    #     }
-            country_iso_code = data["geoip_data"]["country_iso_code"]
+    #     response = requests.get(api_url, headers=headers, params=params)
-            asn = data["geoip_data"]["asn_autonomous_system_number"]
+    #     payload = response.json()
            asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
            list_on = data["list_on"]
-            sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
+    #     if payload["results"]:
-            sanitized_asn = sanitize_for_storage(asn, 100)
+    #         data = payload["results"][0]
-            sanitized_asn_org = sanitize_for_storage(asn_org, 100)
+
-            sanitized_list_on = sanitize_dict(list_on, 100000)
+    #         country_iso_code = data["geoip_data"]["country_iso_code"]
    #         asn = data["geoip_data"]["asn_autonomous_system_number"]
    #         asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
    #         list_on = data["list_on"]
    #         sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
    #         sanitized_asn = sanitize_for_storage(asn, 100)
    #         sanitized_asn_org = sanitize_for_storage(asn_org, 100)
    #         sanitized_list_on = sanitize_dict(list_on, 100000)
-            self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
+    #         self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
-        return
+    #     return
--- a/src/database.py
+++ b/src/database.py
@@ -9,11 +9,12 @@ import os
 import stat
 from datetime import datetime
 from typing import Optional, List, Dict, Any
 from zoneinfo import ZoneInfo
 from sqlalchemy import create_engine, func, distinct, case
 from sqlalchemy.orm import sessionmaker, scoped_session, Session
-from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats
+from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
 from sanitizer import (
    sanitize_ip,
    sanitize_path,
@@ -22,6 +23,9 @@ from sanitizer import (
    sanitize_attack_pattern,
 )
 from logger import get_app_logger
 applogger = get_app_logger()
 class DatabaseManager:
    """
@@ -127,7 +131,7 @@ class DatabaseManager:
                method=method[:10],
                is_suspicious=is_suspicious,
                is_honeypot_trigger=is_honeypot_trigger,
-                timestamp=datetime.utcnow()
+                timestamp=datetime.now(tz=ZoneInfo('UTC'))
            )
            session.add(access_log)
            session.flush()  # Get the ID before committing
@@ -154,7 +158,7 @@ class DatabaseManager:
        except Exception as e:
            session.rollback()
            # Log error but don't crash - database persistence is secondary to honeypot function
-            print(f"Database error persisting access: {e}")
+            applogger.critical(f"Database error persisting access: {e}")
            return None
        finally:
            self.close_session()
@@ -185,7 +189,7 @@ class DatabaseManager:
                path=sanitize_path(path),
                username=sanitize_credential(username),
                password=sanitize_credential(password),
-                timestamp=datetime.utcnow()
+                timestamp=datetime.now(tz=ZoneInfo('UTC'))
            )
            session.add(credential)
            session.commit()
@@ -193,7 +197,7 @@ class DatabaseManager:
        except Exception as e:
            session.rollback()
-            print(f"Database error persisting credential: {e}")
+            applogger.critical(f"Database error persisting credential: {e}")
            return None
        finally:
            self.close_session()
@@ -207,7 +211,7 @@ class DatabaseManager:
            ip: IP address to update
        """
        sanitized_ip = sanitize_ip(ip)
-        now = datetime.utcnow()
+        now = datetime.now(tz=ZoneInfo('UTC'))
        ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
@@ -226,6 +230,7 @@ class DatabaseManager:
    def  update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None:
        """
        Update IP statistics (ip is already persisted).
        Records category change in history if category has changed.
        Args:
            ip: IP address to update
@@ -235,16 +240,28 @@ class DatabaseManager:
            last_analysis: timestamp of last analysis
        """
-        print(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
+        applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
        applogger.info(f"IP: {ip} category has been updated to {category}")
        session = self.session
        sanitized_ip = sanitize_ip(ip)
        ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
        # Check if category has changed and record it
        old_category = ip_stats.category
        if old_category != category:
            self._record_category_change(sanitized_ip, old_category, category, last_analysis)
        ip_stats.analyzed_metrics = analyzed_metrics
        ip_stats.category = category
        ip_stats.category_scores = category_scores
        ip_stats.last_analysis = last_analysis
        try:
            session.commit()
        except Exception as e:
            session.rollback()
            print(f"Error updating IP stats analysis: {e}")
    def manual_update_category(self, ip: str, category: str) -> None:
        """
@@ -256,13 +273,81 @@ class DatabaseManager:
        """
        session = self.session
        sanitized_ip = sanitize_ip(ip)
        ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
        # Record the manual category change
        old_category = ip_stats.category
        if old_category != category:
            self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC')))
        ip_stats.category = category
        ip_stats.manual_category = True
        try:
            session.commit()
        except Exception as e:
            session.rollback()
            print(f"Error updating manual category: {e}")
    def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
        """
        Internal method to record category changes in history.
        Only records if there's an actual change from a previous category.
        Args:
            ip: IP address
            old_category: Previous category (None if first categorization)
            new_category: New category
            timestamp: When the change occurred
        """
        # Don't record initial categorization (when old_category is None)
        # Only record actual category changes
        if old_category is None:
            return
        session = self.session
        try:
            history_entry = CategoryHistory(
                ip=ip,
                old_category=old_category,
                new_category=new_category,
                timestamp=timestamp
            )
            session.add(history_entry)
            session.commit()
        except Exception as e:
            session.rollback()
            applogger.error(f"Error recording category change: {e}")
    def get_category_history(self, ip: str) -> List[Dict[str, Any]]:
        """
        Retrieve category change history for a specific IP.
        Args:
            ip: IP address to get history for
        Returns:
            List of category change records ordered by timestamp
        """
        session = self.session
        try:
            sanitized_ip = sanitize_ip(ip)
            history = session.query(CategoryHistory).filter(
                CategoryHistory.ip == sanitized_ip
            ).order_by(CategoryHistory.timestamp.asc()).all()
            return [
                {
                    'old_category': h.old_category,
                    'new_category': h.new_category,
                    'timestamp': h.timestamp.isoformat() + '+00:00'
                }
                for h in history
            ]
        finally:
            self.close_session()
    def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
        """
@@ -326,7 +411,7 @@ class DatabaseManager:
                    'method': log.method,
                    'is_suspicious': log.is_suspicious,
                    'is_honeypot_trigger': log.is_honeypot_trigger,
-                    'timestamp': log.timestamp.isoformat(),
+                    'timestamp': log.timestamp.isoformat() + '+00:00',
                    'attack_types': [d.attack_type for d in log.attack_detections]
                }
                for log in logs
@@ -419,7 +504,7 @@ class DatabaseManager:
                    'path': attempt.path,
                    'username': attempt.username,
                    'password': attempt.password,
-                    'timestamp': attempt.timestamp.isoformat()
+                    'timestamp': attempt.timestamp.isoformat() + '+00:00'
                }
                for attempt in attempts
            ]
@@ -446,8 +531,8 @@ class DatabaseManager:
                {
                    'ip': s.ip,
                    'total_requests': s.total_requests,
-                    'first_seen': s.first_seen.isoformat(),
+                    'first_seen': s.first_seen.isoformat() + '+00:00',
-                    'last_seen': s.last_seen.isoformat(),
+                    'last_seen': s.last_seen.isoformat() + '+00:00',
                    'country_code': s.country_code,
                    'city': s.city,
                    'asn': s.asn,
@@ -464,6 +549,47 @@ class DatabaseManager:
        finally:
            self.close_session()
    def get_ip_stats_by_ip(self, ip: str) -> Optional[Dict[str, Any]]:
        """
        Retrieve IP statistics for a specific IP address.
        Args:
            ip: The IP address to look up
        Returns:
            Dictionary with IP stats or None if not found
        """
        session = self.session
        try:
            stat = session.query(IpStats).filter(IpStats.ip == ip).first()
            if not stat:
                return None
            # Get category history for this IP
            category_history = self.get_category_history(ip)
            return {
                'ip': stat.ip,
                'total_requests': stat.total_requests,
                'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None,
                'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None,
                'country_code': stat.country_code,
                'city': stat.city,
                'asn': stat.asn,
                'asn_org': stat.asn_org,
                'reputation_score': stat.reputation_score,
                'reputation_source': stat.reputation_source,
                'analyzed_metrics': stat.analyzed_metrics or {},
                'category': stat.category,
                'category_scores': stat.category_scores or {},
                'manual_category': stat.manual_category,
                'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None,
                'category_history': category_history
            }
        finally:
            self.close_session()
    def get_dashboard_counts(self) -> Dict[str, int]:
        """
        Get aggregate statistics for the dashboard.
@@ -592,7 +718,7 @@ class DatabaseManager:
                    'ip': log.ip,
                    'path': log.path,
                    'user_agent': log.user_agent,
-                    'timestamp': log.timestamp.isoformat()
+                    'timestamp': log.timestamp.isoformat() + '+00:00'
                }
                for log in logs
            ]
@@ -650,7 +776,7 @@ class DatabaseManager:
                    'ip': log.ip,
                    'path': log.path,
                    'user_agent': log.user_agent,
-                    'timestamp': log.timestamp.isoformat(),
+                    'timestamp': log.timestamp.isoformat() + '+00:00',
                    'attack_types': [d.attack_type for d in log.attack_detections]
                }
                for log in logs
--- a/src/exports/malicious_ips.txt
+++ b/src/exports/malicious_ips.txt
@@ -0,0 +1 @@
 127.0.0.1
--- a/src/handler.py
+++ b/src/handler.py
@@ -407,17 +407,75 @@ class Handler(BaseHTTPRequestHandler):
            self.end_headers()
            try:
                stats = self.tracker.get_stats()
-                self.wfile.write(generate_dashboard(stats).encode())
+                timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
                dashboard_path = self.config.dashboard_secret_path
                self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode())
            except BrokenPipeError:
                pass
            except Exception as e:
                self.app_logger.error(f"Error generating dashboard: {e}")
            return
        # API endpoint for fetching IP stats
        if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
            ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
            self.send_response(200)
            self.send_header('Content-type', 'application/json')
            self.send_header('Access-Control-Allow-Origin', '*')
            # Prevent browser caching - force fresh data from database every time
            self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
            self.send_header('Pragma', 'no-cache')
            self.send_header('Expires', '0')
            self.end_headers()
            try:
                from database import get_database
                import json
                db = get_database()
                ip_stats = db.get_ip_stats_by_ip(ip_address)
                if ip_stats:
                    self.wfile.write(json.dumps(ip_stats).encode())
                else:
                    self.wfile.write(json.dumps({'error': 'IP not found'}).encode())
            except BrokenPipeError:
                pass
            except Exception as e:
                self.app_logger.error(f"Error fetching IP stats: {e}")
                self.wfile.write(json.dumps({'error': str(e)}).encode())
            return
        # API endpoint for downloading malicious IPs file
        if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt":
            import os
            file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
            try:
                if os.path.exists(file_path):
                    with open(file_path, 'rb') as f:
                        content = f.read()
                    self.send_response(200)
                    self.send_header('Content-type', 'text/plain')
                    self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"')
                    self.send_header('Content-Length', str(len(content)))
                    self.end_headers()
                    self.wfile.write(content)
                else:
                    self.send_response(404)
                    self.send_header('Content-type', 'text/plain')
                    self.end_headers()
                    self.wfile.write(b'File not found')
            except BrokenPipeError:
                pass
            except Exception as e:
                self.app_logger.error(f"Error serving malicious IPs file: {e}")
                self.send_response(500)
                self.send_header('Content-type', 'text/plain')
                self.end_headers()
                self.wfile.write(b'Internal server error')
            return
        self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
-        self.analyzer.infer_user_category(client_ip)
+        # self.analyzer.infer_user_category(client_ip)
-        self.analyzer.update_ip_rep_infos(client_ip)
+        # self.analyzer.update_ip_rep_infos(client_ip)
        if self.tracker.is_suspicious_user_agent(user_agent):
            self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
--- a/src/migrations/add_category_history.py
+++ b/src/migrations/add_category_history.py
@@ -0,0 +1,40 @@
 #!/usr/bin/env python3
 """
 Migration script to add CategoryHistory table to existing databases.
 Run this once to upgrade your database schema.
 """
 import sys
 from pathlib import Path
 # Add parent directory to path to import modules
 sys.path.insert(0, str(Path(__file__).parent.parent))
 from database import get_database, DatabaseManager
 from models import Base, CategoryHistory
 def migrate():
    """Create CategoryHistory table if it doesn't exist."""
    print("Starting migration: Adding CategoryHistory table...")
    try:
        db = get_database()
        # Initialize database if not already done
        if not db._initialized:
            db.initialize()
        # Create only the CategoryHistory table
        CategoryHistory.__table__.create(db._engine, checkfirst=True)
        print("✓ Migration completed successfully!")
        print("  - CategoryHistory table created")
    except Exception as e:
        print(f"✗ Migration failed: {e}")
        sys.exit(1)
 if __name__ == "__main__":
    migrate()
--- a/src/models.py
+++ b/src/models.py
@@ -150,4 +150,59 @@ class IpStats(Base):
    def __repr__(self) -> str:
-        return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
+        return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
 class CategoryHistory(Base):
    """
    Records category changes for IP addresses over time.
    Tracks when an IP's category changes, storing both the previous
    and new category along with timestamp for timeline visualization.
    """
    __tablename__ = 'category_history'
    id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
    ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
    old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
    new_category: Mapped[str] = mapped_column(String(50), nullable=False)
    timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
    # Composite index for efficient IP-based timeline queries
    __table_args__ = (
        Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
    )
    def __repr__(self) -> str:
        return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>"
 # class IpLog(Base):
 #     """
 #     Records all IPs that have accessed the honeypot, along with aggregated stats and inferred user category.
 #     """
 #     __tablename__ = 'ip_logs'
 #     id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
 #     ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
 #     stats: Mapped[List[str]] = mapped_column(String(MAX_PATH_LENGTH))
 #     category: Mapped[str] = mapped_column(String(15))
 #     manual_category: Mapped[bool] = mapped_column(Boolean, default=False)
 #     last_analysis: Mapped[datetime] = mapped_column(DateTime, index=True),
 #     # Relationship to attack detections
 #     access_logs: Mapped[List["AccessLog"]] = relationship(
 #         "AccessLog",
 #         back_populates="ip",
 #         cascade="all, delete-orphan"
 #     )
 #     # Indexes for common queries
 #     __table_args__ = (
 #         Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
 #         Index('ix_access_logs_is_suspicious', 'is_suspicious'),
 #         Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'),
 #     )
 #     def __repr__(self) -> str:
 #         return f"<AccessLog(id={self.id}, ip='{self.ip}', path='{self.path[:50]}')>"
--- a/src/server.py
+++ b/src/server.py
@@ -14,6 +14,7 @@ from analyzer import Analyzer
 from handler import Handler
 from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
 from database import initialize_database
 from tasks_master import get_tasksmaster
 def print_usage():
@@ -92,6 +93,10 @@ def main():
        except IOError:
            app_logger.warning("Can't read input file. Using randomly generated links.")
    # tasks master init
    tasks_master = get_tasksmaster()
    tasks_master.run_scheduled_tasks()
    try:
        app_logger.info(f'Starting deception server on port {config.port}...')
        app_logger.info(f'Timezone configured: {tz.key}')
--- a/src/tasks/analyze_ips.py
+++ b/src/tasks/analyze_ips.py
@@ -0,0 +1,265 @@
 from sqlalchemy import select
 from typing import Optional
 from database import get_database, DatabaseManager
 from zoneinfo import ZoneInfo
 from pathlib import Path
 from datetime import datetime, timedelta
 import re
 import urllib.parse
 from wordlists import get_wordlists
 from config import get_config
 from logger import get_app_logger
 import requests
 from sanitizer import sanitize_for_storage, sanitize_dict
 # ----------------------
 # TASK CONFIG
 # ----------------------
 TASK_CONFIG = {
    "name": "analyze-ips",
    "cron": "*/1 * * * *",
    "enabled": True,
    "run_when_loaded": True
 }
 def main():
    config = get_config()
    db_manager = get_database()
    app_logger = get_app_logger()
    http_risky_methods_threshold = config.http_risky_methods_threshold
    violated_robots_threshold = config.violated_robots_threshold
    uneven_request_timing_threshold = config.uneven_request_timing_threshold
    user_agents_used_threshold = config.user_agents_used_threshold
    attack_urls_threshold = config.attack_urls_threshold
    uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
    app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
    score = {}
    score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
    score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
    score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
    score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
    #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
    weights = {
        "attacker": {
            "risky_http_methods": 6,
            "robots_violations": 4,
            "uneven_request_timing": 3,
            "different_user_agents": 8,
            "attack_url": 15
        },
        "good_crawler": {
            "risky_http_methods": 1,
            "robots_violations": 0,
            "uneven_request_timing": 0,
            "different_user_agents": 0,
            "attack_url": 0
        },
        "bad_crawler": {
            "risky_http_methods": 2,
            "robots_violations": 7,
            "uneven_request_timing": 0,
            "different_user_agents": 5,
            "attack_url": 5
        },
        "regular_user": {
            "risky_http_methods": 0,
            "robots_violations": 0,
            "uneven_request_timing": 8,
            "different_user_agents": 3,
            "attack_url": 0
        }
    }
    accesses = db_manager.get_access_logs(limit=999999999)
    ips = {item['ip'] for item in accesses}
    for ip in ips:
        ip_accesses = [item for item in accesses if item["ip"] == ip]
        total_accesses_count = len(accesses)
        if total_accesses_count <= 0:
            return
        # Set category as "unknown" for the first 3 requests
        if total_accesses_count < 3:
            category = "unknown"
            analyzed_metrics = {}
            category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
            last_analysis = datetime.now(tz=ZoneInfo('UTC'))
            db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
            return 0
        #--------------------- HTTP Methods ---------------------
        get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
        post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
        put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
        delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
        head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
        options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
        patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])  
        if total_accesses_count > http_risky_methods_threshold:
            http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
        else:
            http_method_attacker_score = 0
        #print(f"HTTP Method attacker score: {http_method_attacker_score}")
        if http_method_attacker_score >= http_risky_methods_threshold:
            score["attacker"]["risky_http_methods"] = True
            score["good_crawler"]["risky_http_methods"] = False
            score["bad_crawler"]["risky_http_methods"] = True
            score["regular_user"]["risky_http_methods"] = False
        else:
            score["attacker"]["risky_http_methods"] = False
            score["good_crawler"]["risky_http_methods"] = True
            score["bad_crawler"]["risky_http_methods"] = False
            score["regular_user"]["risky_http_methods"] = False
        #--------------------- Robots Violations ---------------------
        #respect robots.txt and login/config pages access frequency
        robots_disallows = []
        robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
        with open(robots_path, "r") as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                parts = line.split(":")
                if parts[0] == "Disallow":
                    parts[1] = parts[1].rstrip("/")
                    #print(f"DISALLOW {parts[1]}")
                    robots_disallows.append(parts[1].strip())
        #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
        violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
        #print(f"Violated robots count: {violated_robots_count}")
        if total_accesses_count > 0:
            violated_robots_ratio = violated_robots_count / total_accesses_count
        else:
            violated_robots_ratio = 0
        if violated_robots_ratio >= violated_robots_threshold:
            score["attacker"]["robots_violations"] = True
            score["good_crawler"]["robots_violations"] = False
            score["bad_crawler"]["robots_violations"] = True
            score["regular_user"]["robots_violations"] = False
        else:
            score["attacker"]["robots_violations"] = False
            score["good_crawler"]["robots_violations"] = False
            score["bad_crawler"]["robots_violations"] = False
            score["regular_user"]["robots_violations"] = False
        #--------------------- Requests Timing ---------------------
        #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
        timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
        now_utc = datetime.now(tz=ZoneInfo('UTC'))
        timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
        timestamps = sorted(timestamps, reverse=True)
        time_diffs = []
        for i in range(0, len(timestamps)-1):
            diff = (timestamps[i] - timestamps[i+1]).total_seconds()
            time_diffs.append(diff)
        mean = 0
        variance = 0
        std = 0
        cv = 0
        if time_diffs:
            mean = sum(time_diffs) / len(time_diffs)
            variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
            std = variance ** 0.5
            cv = std/mean
            app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
        if cv >= uneven_request_timing_threshold:
            score["attacker"]["uneven_request_timing"] = True
            score["good_crawler"]["uneven_request_timing"] = False
            score["bad_crawler"]["uneven_request_timing"] = False
            score["regular_user"]["uneven_request_timing"] = True
        else:
            score["attacker"]["uneven_request_timing"] = False
            score["good_crawler"]["uneven_request_timing"] = False
            score["bad_crawler"]["uneven_request_timing"] = False
            score["regular_user"]["uneven_request_timing"] = False
        #--------------------- Different User Agents ---------------------
        #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
        user_agents_used = [item["user_agent"] for item in ip_accesses]
        user_agents_used = list(dict.fromkeys(user_agents_used))
        #print(f"User agents used: {user_agents_used}")
        if len(user_agents_used) >= user_agents_used_threshold:
            score["attacker"]["different_user_agents"] = True
            score["good_crawler"]["different_user_agents"] = False
            score["bad_crawler"]["different_user_agentss"] = True
            score["regular_user"]["different_user_agents"] = False
        else:
            score["attacker"]["different_user_agents"] = False
            score["good_crawler"]["different_user_agents"] = False
            score["bad_crawler"]["different_user_agents"] = False
            score["regular_user"]["different_user_agents"] = False
        #--------------------- Attack URLs ---------------------
        attack_urls_found_list = []
        wl = get_wordlists()
        if wl.attack_patterns:
            queried_paths = [item["path"] for item in ip_accesses]
            for queried_path in queried_paths:
                # URL decode the path to catch encoded attacks
                try:
                    decoded_path = urllib.parse.unquote(queried_path)
                    # Double decode to catch double-encoded attacks
                    decoded_path_twice = urllib.parse.unquote(decoded_path)
                except Exception:
                    decoded_path = queried_path
                    decoded_path_twice = queried_path
                for name, pattern in wl.attack_patterns.items():
                    # Check original, decoded, and double-decoded paths
                    if (re.search(pattern, queried_path, re.IGNORECASE) or 
                        re.search(pattern, decoded_path, re.IGNORECASE) or
                        re.search(pattern, decoded_path_twice, re.IGNORECASE)):
                        attack_urls_found_list.append(f"{name}: {pattern}")
            #remove duplicates
            attack_urls_found_list = set(attack_urls_found_list)
            attack_urls_found_list = list(attack_urls_found_list)
            if len(attack_urls_found_list) >= attack_urls_threshold:
                score["attacker"]["attack_url"] = True
                score["good_crawler"]["attack_url"] = False
                score["bad_crawler"]["attack_url"] = False
                score["regular_user"]["attack_url"] = False
            else:
                score["attacker"]["attack_url"] = False
                score["good_crawler"]["attack_url"] = False
                score["bad_crawler"]["attack_url"] = False
                score["regular_user"]["attack_url"] = False
        #--------------------- Calculate score ---------------------
        attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
        attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
        attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
        attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
        attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
        attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
        good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
        good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
        good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
        good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
        good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
        bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
        regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
        regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
        regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
        regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
        regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
        score_details = f"""
        Attacker score: {attacker_score}
        Good Crawler score: {good_crawler_score}
        Bad Crawler score: {bad_crawler_score}
        Regular User score: {regular_user_score}
        """
        app_logger.debug(score_details)
        analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
        category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
        category = max(category_scores, key=category_scores.get)
        last_analysis = datetime.now(tz=ZoneInfo('UTC'))
        db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
    return
--- a/src/tasks/fetch_ip_rep.py
+++ b/src/tasks/fetch_ip_rep.py
@@ -0,0 +1,59 @@
 from sqlalchemy import select
 from typing import Optional
 from database import get_database, DatabaseManager
 from zoneinfo import ZoneInfo
 from pathlib import Path
 from datetime import datetime, timedelta
 import re
 import urllib.parse
 from wordlists import get_wordlists
 from config import get_config
 from logger import get_app_logger
 import requests
 from sanitizer import sanitize_for_storage, sanitize_dict
 # ----------------------
 # TASK CONFIG
 # ----------------------
 TASK_CONFIG = {
    "name": "fetch-ip-rep",
    "cron": "*/1 * * * *",
    "enabled": True,
    "run_when_loaded": True
 }
 def main():
    config = get_config()
    db_manager = get_database()
    app_logger = get_app_logger()
    accesses = db_manager.get_access_logs(limit=999999999)
    ips = {item['ip'] for item in accesses}
    for ip in ips:
        api_url = "https://iprep.lcrawl.com/api/iprep/"
        params = {
            "cidr": ip
        }
        headers = {
            "Content-Type": "application/json"
        }
        response = requests.get(api_url, headers=headers, params=params)
        payload = response.json()
        if payload["results"]:
            data = payload["results"][0]
            country_iso_code = data["geoip_data"]["country_iso_code"]
            asn = data["geoip_data"]["asn_autonomous_system_number"]
            asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
            list_on = data["list_on"]
            sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
            sanitized_asn = sanitize_for_storage(asn, 100)
            sanitized_asn_org = sanitize_for_storage(asn_org, 100)
            sanitized_list_on = sanitize_dict(list_on, 100000)
            db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
    return
--- a/src/tasks/top_attacking_ips.py
+++ b/src/tasks/top_attacking_ips.py
@@ -0,0 +1,57 @@
 # tasks/export_malicious_ips.py
 import os
 from logger import get_app_logger
 from database import get_database
 from models import AccessLog
 from sqlalchemy import distinct
 app_logger = get_app_logger()
 # ----------------------
 # TASK CONFIG
 # ----------------------
 TASK_CONFIG = {
    "name": "export-malicious-ips",
    "cron": "*/5 * * * *",
    "enabled": True,
    "run_when_loaded": True
 }
 EXPORTS_DIR = "exports"
 OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
 # ----------------------
 # TASK LOGIC
 # ----------------------
 def main():
    """
    Export all IPs flagged as suspicious to a text file.
    TasksMaster will call this function based on the cron schedule.
    """
    task_name = TASK_CONFIG.get("name")
    app_logger.info(f"[Background Task] {task_name} starting...")
    try:
        db = get_database()
        session = db.session
        # Query distinct suspicious IPs
        results = session.query(distinct(AccessLog.ip)).filter(
            AccessLog.is_suspicious == True
        ).all()
        # Ensure exports directory exists
        os.makedirs(EXPORTS_DIR, exist_ok=True)
        # Write IPs to file (one per line)
        with open(OUTPUT_FILE, 'w') as f:
            for (ip,) in results:
                f.write(f"{ip}\n")
        app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
    except Exception as e:
        app_logger.error(f"[Background Task] {task_name} failed: {e}")
    finally:
        db.close_session()
--- a/src/tasks_master.py
+++ b/src/tasks_master.py
@@ -0,0 +1,288 @@
 import os
 import sys
 import datetime
 import functools
 import threading
 import importlib
 import importlib.util
 from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
 app_logger = get_app_logger()
 try:
    from apscheduler.schedulers.background import BackgroundScheduler
    from apscheduler.triggers.cron import CronTrigger
    from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
 except ModuleNotFoundError:
    msg = (
        "Required modules are not installed. "
        "Can not continue with module / application loading.\n"
        "Install it with: pip install -r requirements"
    )
    print(msg, file=sys.stderr)
    app_logger.error(msg)
    exit()
 # ---------- TASKSMASTER CLASS ----------
 class TasksMaster:
    TASK_DEFAULT_CRON = '*/15 * * * *'
    TASK_JITTER = 240
    TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
    def __init__(self, scheduler: BackgroundScheduler):
        self.tasks = self._config_tasks()
        self.scheduler = scheduler
        self.last_run_times = {}
        self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
    def _config_tasks(self):
        """
        Loads tasks from the TASKS_FOLDER and logs how many were found.
        """
        tasks_defined = self._load_tasks_from_folder(self.TASKS_FOLDER)
        app_logger.info(f"Scheduled Tasks Loaded from folder: {self.TASKS_FOLDER}")
        return tasks_defined
    def _load_tasks_from_folder(self, folder_path):
        """
        Loads and registers task modules from a specified folder.
        This function scans the given folder for Python (.py) files, dynamically
        imports each as a module, and looks for two attributes:
        - TASK_CONFIG: A dictionary containing task metadata, specifically the
        'name' and 'cron' (cron schedule string).
        - main: A callable function that represents the task's execution logic.
        Tasks with both attributes are added to a list with their configuration and
        execution function.
        Args:
            folder_path (str): Path to the folder containing task scripts.
        Returns:
            list[dict]: A list of task definitions with keys:
            - 'name' (str): The name of the task.
            - 'filename' (str): The file the task was loaded from.
            - 'cron' (str): The crontab string for scheduling.
            - 'enabled' (bool): Whether the task is enabled.
            - 'run_when_loaded' (bool): Whether to run the task immediately.
        """
        tasks = []
        if not os.path.exists(folder_path):
            app_logger.error(f"{folder_path} does not exist! Unable to load tasks!")
            return tasks
        # we sort the files so that we have a set order, which helps with debugging
        for filename in sorted(os.listdir(folder_path)):
            # skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
            if not filename.endswith('.py') or filename.startswith("__"):
                continue
            path = os.path.join(folder_path, filename)
            module_name = filename[:-3]
            spec = importlib.util.spec_from_file_location(f"tasks.{module_name}", path)
            module = importlib.util.module_from_spec(spec)
            try:
                spec.loader.exec_module(module)
                sys.modules[f"tasks.{module_name}"] = module
            except Exception as e:
                app_logger.error(f"Failed to import {filename}: {e}")
                continue
            # if we have a tasks config and a main function, we attempt to schedule it
            if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
                # ensure task_config is a dict
                if not isinstance(module.TASK_CONFIG, dict):
                    app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
                    continue
                task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
                task_name = module.TASK_CONFIG.get("name", module_name)
                # ensure the task_cron is a valid cron value
                try:
                    CronTrigger.from_crontab(task_cron)
                except ValueError as ve:
                    app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
                    continue
                task = {
                    'name': module.TASK_CONFIG.get('name', module_name),
                    'filename': filename,
                    'cron': task_cron,
                    "enabled": module.TASK_CONFIG.get("enabled", False),
                    "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
                }
                tasks.append(task)
            # we are missing things, and we log what's missing
            else:
                if not hasattr(module, 'TASK_CONFIG'):
                    app_logger.warning(f"Missing TASK_CONFIG in {filename}")
                elif not hasattr(module, 'main'):
                    app_logger.warning(f"Missing main() in {filename}")
        return tasks
    def _add_jobs(self):
        # for each task in the tasks config file...
        for task_to_run in self.tasks:
            # remember, these tasks, are built from the "load_tasks_from_folder" function, 
            # if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here.
            task_name = task_to_run.get("name")
            run_when_loaded = task_to_run.get("run_when_loaded")
            module_name = os.path.splitext(task_to_run.get("filename"))[0]
            task_enabled = task_to_run.get("enabled", False)
            # if no crontab set for this task, we use 15 as the default.
            task_cron = task_to_run.get("cron") or self.TASK_DEFAULT_CRON
            # if task is disabled, skip this one
            if not task_enabled:
                app_logger.info(f"{task_name} is disabled in client config. Skipping task")
                continue
            try:
                if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))):
                    # schedule the task now that everything has checked out above...
                    self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
                    app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
                else:
                    app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
            except Exception as e:
                app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
    def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
        try:
            # Dynamically import the module
            module = importlib.import_module(f"tasks.{module_name}")
            # Check if the module has a 'main' function
            if hasattr(module, 'main'):
                app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
                # unique_job_id
                job_identifier = f"{module_name}__{task_name}"
                # little insurance to make sure the cron is set to something and not none
                if task_cron is None:
                    task_cron = self.TASK_DEFAULT_CRON
                trigger = CronTrigger.from_crontab(task_cron)
                # schedule the task / job
                if run_when_loaded:
                    app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
                    self.scheduler.add_job(
                        module.main, 
                        trigger, 
                        id=job_identifier,
                        jitter=self.TASK_JITTER, 
                        name=task_name, 
                        next_run_time=datetime.datetime.now(),
                        max_instances=1
                    )
                else:
                    self.scheduler.add_job(
                        module.main, 
                        trigger, 
                        id=job_identifier,
                        jitter=self.TASK_JITTER,
                        name=task_name,
                        max_instances=1
                    )
            else:
                app_logger.error(f"{module_name} does not define a 'main' function.")
        except Exception as e:
            app_logger.error(f"Failed to load {module_name}: {e}")
    def job_listener(self, event):
        job_id = event.job_id
        self.last_run_times[job_id] = datetime.datetime.now()
        if event.exception:
            app_logger.error(f"Job {event.job_id} failed: {event.exception}")
        else:
            app_logger.info(f"Job {event.job_id} completed successfully.")
    def list_jobs(self):
        scheduled_jobs = self.scheduler.get_jobs()
        jobs_list = []
        for job in scheduled_jobs:
            jobs_list.append({
                    "id": job.id,
                    "name": job.name,
                    "next_run": job.next_run_time,
                })
        return jobs_list
    def run_scheduled_tasks(self):
        """
        Runs and schedules enabled tasks using the background scheduler.
        This method performs the following:
        1. Retrieves the current task configurations and updates internal state.
        2. Adds new jobs to the scheduler based on the latest configuration.
        3. Starts the scheduler to begin executing tasks at their defined intervals.
        This ensures the scheduler is always running with the most up-to-date
        task definitions and enabled status.
        """
        # Add enabled tasks to the scheduler
        self._add_jobs()
        # Start the scheduler to begin executing the scheduled tasks (if not already running)
        if not self.scheduler.running:
            self.scheduler.start()
 # ---------- SINGLETON WRAPPER ----------
 T = type
 def singleton_loader(func):
    """Decorator to ensure only one instance exists."""
    cache: dict[str, T] = {}
    lock = threading.Lock()
    @functools.wraps(func)
    def wrapper(*args, **kwargs) -> T:
        with lock:
            if func.__name__ not in cache:
                cache[func.__name__] = func(*args, **kwargs)
            return cache[func.__name__]
    return wrapper
@singleton_loader
 def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster:
    """
    Returns the singleton TasksMaster instance.
    - Automatically creates a BackgroundScheduler if none is provided.
    - Automatically starts the scheduler when the singleton is created.
    :param scheduler: Optional APScheduler instance. If None, a new BackgroundScheduler will be created.
    """
    if scheduler is None:
        scheduler = BackgroundScheduler()
    tm_instance = TasksMaster(scheduler)
    # Auto-start scheduler if not already running
    if not scheduler.running:
        scheduler.start()
        app_logger.info("TasksMaster scheduler started automatically with singleton creation.")
    return tm_instance
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -7,6 +7,7 @@ Customize this template to change the dashboard appearance.
 import html
 from datetime import datetime
 from zoneinfo import ZoneInfo
 def _escape(value) -> str:
    """Escape HTML special characters to prevent XSS attacks."""
@@ -14,22 +15,52 @@ def _escape(value) -> str:
        return ""
    return html.escape(str(value))
-def format_timestamp(iso_timestamp: str) -> str:
+def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str:
-    """Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)"""
+    """Format ISO timestamp for display with timezone conversion
    Args:
        iso_timestamp: ISO format timestamp string (UTC)
        timezone: IANA timezone string to convert to
        time_only: If True, return only HH:MM:SS, otherwise full datetime
    """
    try:
        # Parse UTC timestamp
        dt = datetime.fromisoformat(iso_timestamp)
        # Convert to target timezone
        if dt.tzinfo is not None:
            dt = dt.astimezone(ZoneInfo(timezone))
        if time_only:
            return dt.strftime("%H:%M:%S")
        return dt.strftime("%Y-%m-%d %H:%M:%S")
    except Exception:
        # Fallback for old format
        return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
-def generate_dashboard(stats: dict) -> str:
+def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str:
-    """Generate dashboard HTML with access statistics"""
+    """Generate dashboard HTML with access statistics
-    # Generate IP rows (IPs are generally safe but escape for consistency)
+    Args:
        stats: Statistics dictionary
        timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
        dashboard_path: The secret dashboard path for generating API URLs
    """
    # Generate IP rows with clickable functionality for dropdown stats
    top_ips_rows = '\n'.join([
-        f'<tr><td class="rank">{i+1}</td><td>{_escape(ip)}</td><td>{count}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(ip)}">
            <td class="rank">{i+1}</td>
            <td class="ip-clickable">{_escape(ip)}</td>
            <td>{count}</td>
        </tr>
        <tr class="ip-stats-row" id="stats-row-{_escape(ip).replace(".", "-")}" style="display: none;">
            <td colspan="3" class="ip-stats-cell">
                <div class="ip-stats-dropdown">
                    <div class="loading">Loading stats...</div>
                </div>
            </td>
        </tr>'''
        for i, (ip, count) in enumerate(stats['top_ips'])
    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
@@ -45,27 +76,76 @@ def generate_dashboard(stats: dict) -> str:
        for i, (ua, count) in enumerate(stats['top_user_agents'])
    ]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
-    # Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields)
+    # Generate suspicious accesses rows with clickable IPs
    suspicious_rows = '\n'.join([
-        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
            <td class="ip-clickable">{_escape(log["ip"])}</td>
            <td>{_escape(log["path"])}</td>
            <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
            <td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
        </tr>
        <tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
            <td colspan="4" class="ip-stats-cell">
                <div class="ip-stats-dropdown">
                    <div class="loading">Loading stats...</div>
                </div>
            </td>
        </tr>'''
        for log in stats['recent_suspicious'][-10:]
    ]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
-    # Generate honeypot triggered IPs rows
+    # Generate honeypot triggered IPs rows with clickable IPs
    honeypot_rows = '\n'.join([
-        f'<tr><td>{_escape(ip)}</td><td style="word-break: break-all;">{_escape(", ".join(paths))}</td><td>{len(paths)}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(ip)}">
            <td class="ip-clickable">{_escape(ip)}</td>
            <td style="word-break: break-all;">{_escape(", ".join(paths))}</td>
            <td>{len(paths)}</td>
        </tr>
        <tr class="ip-stats-row" id="stats-row-honeypot-{_escape(ip).replace(".", "-")}" style="display: none;">
            <td colspan="3" class="ip-stats-cell">
                <div class="ip-stats-dropdown">
                    <div class="loading">Loading stats...</div>
                </div>
            </td>
        </tr>'''
        for ip, paths in stats.get('honeypot_triggered_ips', [])
    ]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
-    # Generate attack types rows (CRITICAL: paths and user agents are user-controlled)
+    # Generate attack types rows with clickable IPs
    attack_type_rows = '\n'.join([
-        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td>{_escape(", ".join(log["attack_types"]))}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
            <td class="ip-clickable">{_escape(log["ip"])}</td>
            <td>{_escape(log["path"])}</td>
            <td>{_escape(", ".join(log["attack_types"]))}</td>
            <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
            <td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
        </tr>
        <tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
            <td colspan="5" class="ip-stats-cell">
                <div class="ip-stats-dropdown">
                    <div class="loading">Loading stats...</div>
                </div>
            </td>
        </tr>'''
        for log in stats.get('attack_types', [])[-10:]
    ]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
-    # Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled)
+    # Generate credential attempts rows with clickable IPs
    credential_rows = '\n'.join([
-        f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["username"])}</td><td>{_escape(log["password"])}</td><td>{_escape(log["path"])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
+        f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
            <td class="ip-clickable">{_escape(log["ip"])}</td>
            <td>{_escape(log["username"])}</td>
            <td>{_escape(log["password"])}</td>
            <td>{_escape(log["path"])}</td>
            <td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
        </tr>
        <tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
            <td colspan="5" class="ip-stats-cell">
                <div class="ip-stats-dropdown">
                    <div class="loading">Loading stats...</div>
                </div>
            </td>
        </tr>'''
        for log in stats.get('credential_attempts', [])[-20:]
    ]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
@@ -85,12 +165,36 @@ def generate_dashboard(stats: dict) -> str:
        .container {{
            max-width: 1400px;
            margin: 0 auto;
            position: relative;
        }}
        h1 {{
            color: #58a6ff;
            text-align: center;
            margin-bottom: 40px;
        }}
        .download-section {{
            position: absolute;
            top: 0;
            right: 0;
        }}
        .download-btn {{
            display: inline-block;
            padding: 8px 14px;
            background: #238636;
            color: #ffffff;
            text-decoration: none;
            border-radius: 6px;
            font-weight: 500;
            font-size: 13px;
            transition: background 0.2s;
            border: 1px solid #2ea043;
        }}
        .download-btn:hover {{
            background: #2ea043;
        }}
        .download-btn:active {{
            background: #1f7a2f;
        }}
        .stats-grid {{
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
@@ -180,10 +284,202 @@ def generate_dashboard(stats: dict) -> str:
            content: '▼';
            opacity: 1;
        }}
        .ip-row {{
            transition: background-color 0.2s;
        }}
        .ip-clickable {{
            cursor: pointer;
            color: #58a6ff !important;
            font-weight: 500;
            text-decoration: underline;
            text-decoration-style: dotted;
            text-underline-offset: 3px;
        }}
        .ip-clickable:hover {{
            color: #79c0ff !important;
            text-decoration-style: solid;
            background: #1c2128;
        }}
        .ip-stats-row {{
            background: #0d1117;
        }}
        .ip-stats-cell {{
            padding: 0 !important;
        }}
        .ip-stats-dropdown {{
            margin-top: 10px;
            padding: 15px;
            background: #0d1117;
            border: 1px solid #30363d;
            border-radius: 6px;
            font-size: 13px;
            display: flex;
            gap: 20px;
        }}
        .stats-left {{
            flex: 1;
        }}
        .stats-right {{
            flex: 0 0 200px;
            display: flex;
            flex-direction: column;
            align-items: center;
            justify-content: center;
        }}
        .radar-chart {{
            position: relative;
            width: 220px;
            height: 220px;
            overflow: visible;
        }}
        .radar-legend {{
            margin-top: 10px;
            font-size: 11px;
        }}
        .radar-legend-item {{
            display: flex;
            align-items: center;
            gap: 6px;
            margin: 3px 0;
        }}
        .radar-legend-color {{
            width: 12px;
            height: 12px;
            border-radius: 2px;
        }}
        .ip-stats-dropdown .loading {{
            color: #8b949e;
            font-style: italic;
        }}
        .stat-row {{
            display: flex;
            justify-content: space-between;
            padding: 5px 0;
            border-bottom: 1px solid #21262d;
        }}
        .stat-row:last-child {{
            border-bottom: none;
        }}
        .stat-label-sm {{
            color: #8b949e;
            font-weight: 500;
        }}
        .stat-value-sm {{
            color: #58a6ff;
            font-weight: 600;
        }}
        .category-badge {{
            display: inline-block;
            padding: 4px 8px;
            border-radius: 4px;
            font-size: 12px;
            font-weight: 600;
            text-transform: uppercase;
        }}
        .category-attacker {{
            background: #f851491a;
            color: #f85149;
            border: 1px solid #f85149;
        }}
        .category-good-crawler {{
            background: #3fb9501a;
            color: #3fb950;
            border: 1px solid #3fb950;
        }}
        .category-bad-crawler {{
            background: #f0883e1a;
            color: #f0883e;
            border: 1px solid #f0883e;
        }}
        .category-regular-user {{
            background: #58a6ff1a;
            color: #58a6ff;
            border: 1px solid #58a6ff;
        }}
        .category-unknown {{
            background: #8b949e1a;
            color: #8b949e;
            border: 1px solid #8b949e;
        }}
        .timeline-container {{
            margin-top: 15px;
            padding-top: 15px;
            border-top: 1px solid #30363d;
        }}
        .timeline-title {{
            color: #58a6ff;
            font-size: 13px;
            font-weight: 600;
            margin-bottom: 10px;
        }}
        .timeline {{
            position: relative;
            padding-left: 30px;
        }}
        .timeline::before {{
            content: '';
            position: absolute;
            left: 12px;
            top: 5px;
            bottom: 5px;
            width: 3px;
            background: #30363d;
        }}
        .timeline-item {{
            position: relative;
            padding-bottom: 15px;
        }}
        .timeline-item:last-child {{
            padding-bottom: 0;
        }}
        .timeline-marker {{
            position: absolute;
            left: -26px;
            width: 16px;
            height: 16px;
            border-radius: 50%;
            border: 2px solid #0d1117;
        }}
        .timeline-marker.attacker {{
            background: #f85149;
        }}
        .timeline-marker.good-crawler {{
            background: #3fb950;
        }}
        .timeline-marker.bad-crawler {{
            background: #f0883e;
        }}
        .timeline-marker.regular-user {{
            background: #58a6ff;
        }}
        .timeline-marker.unknown {{
            background: #8b949e;
        }}
        .timeline-content {{
            font-size: 12px;
        }}
        .timeline-category {{
            font-weight: 600;
        }}
        .timeline-timestamp {{
            color: #8b949e;
            font-size: 11px;
            margin-top: 2px;
        }}
        .timeline-arrow {{
            color: #8b949e;
            margin: 0 7px;
        }}
    </style>
 </head>
 <body>
    <div class="container">
        <div class="download-section">
            <a href="{dashboard_path}/api/download/malicious_ips.txt" class="download-btn" download>
            Export Malicious IPs
            </a>
        </div>
        <h1>Krawl Dashboard</h1>
        <div class="stats-grid">
@@ -331,6 +627,31 @@ def generate_dashboard(stats: dict) -> str:
        </div>
    </div>
    <script>
        // Server timezone configuration
        const SERVER_TIMEZONE = '{timezone}';
        const DASHBOARD_PATH = '{dashboard_path}';
        // Convert UTC timestamp to configured timezone
        function formatTimestamp(isoTimestamp) {{
            if (!isoTimestamp) return 'N/A';
            try {{
                const date = new Date(isoTimestamp);
                return date.toLocaleString('en-US', {{ 
                    timeZone: SERVER_TIMEZONE,
                    year: 'numeric',
                    month: '2-digit',
                    day: '2-digit',
                    hour: '2-digit',
                    minute: '2-digit',
                    second: '2-digit',
                    hour12: false
                }});
            }} catch (err) {{
                console.error('Error formatting timestamp:', err);
                return new Date(isoTimestamp).toLocaleString();
            }}
        }}
        // Add sorting functionality to tables
        document.querySelectorAll('th.sortable').forEach(header => {{
            header.addEventListener('click', function() {{
@@ -387,6 +708,248 @@ def generate_dashboard(stats: dict) -> str:
                rows.forEach(row => tbody.appendChild(row));
            }});
        }});
        // IP stats dropdown functionality
        document.querySelectorAll('.ip-clickable').forEach(cell => {{
            cell.addEventListener('click', async function(e) {{
                const row = e.currentTarget.closest('.ip-row');
                if (!row) return;
                const ip = row.getAttribute('data-ip');
                const statsRow = row.nextElementSibling;
                if (!statsRow || !statsRow.classList.contains('ip-stats-row')) return;
                const isVisible = getComputedStyle(statsRow).display !== 'none';
                document.querySelectorAll('.ip-stats-row').forEach(r => {{
                    r.style.display = 'none';
                }});
                if (isVisible) return;
                statsRow.style.display = 'table-row';
                const dropdown = statsRow.querySelector('.ip-stats-dropdown');
                // Always fetch fresh data from database
                if (dropdown) {{
                    dropdown.innerHTML = '<div class="loading">Loading stats...</div>';
                    try {{
                        const response = await fetch(`${{DASHBOARD_PATH}}/api/ip-stats/${{ip}}`, {{
                            cache: 'no-store',
                            headers: {{
                                'Cache-Control': 'no-cache',
                                'Pragma': 'no-cache'
                            }}
                        }});
                        if (!response.ok) throw new Error(`HTTP ${{response.status}}`);
                        const data = await response.json();
                        dropdown.innerHTML = data.error
                            ? `<div style="color:#f85149;">Error: ${{data.error}}</div>`
                            : formatIpStats(data);
                    }} catch (err) {{
                        dropdown.innerHTML = `<div style="color:#f85149;">Failed to load stats: ${{err.message}}</div>`;
                    }}
                }}
            }});
        }});
        function formatIpStats(stats) {{
            let html = '<div class="stats-left">';
            // Basic info
            html += '<div class="stat-row">';
            html += '<span class="stat-label-sm">Total Requests:</span>';
            html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
            html += '</div>';
            html += '<div class="stat-row">';
            html += '<span class="stat-label-sm">First Seen:</span>';
            html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`;
            html += '</div>';
            html += '<div class="stat-row">';
            html += '<span class="stat-label-sm">Last Seen:</span>';
            html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
            html += '</div>';
            // Category
            if (stats.category) {{
                html += '<div class="stat-row">';
                html += '<span class="stat-label-sm">Category:</span>';
                const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
                html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
                html += '</div>';
            }}
            // GeoIP info if available
            if (stats.country_code || stats.city) {{
                html += '<div class="stat-row">';
                html += '<span class="stat-label-sm">Location:</span>';
                html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
                html += '</div>';
            }}
            if (stats.asn_org) {{
                html += '<div class="stat-row">';
                html += '<span class="stat-label-sm">ASN Org:</span>';
                html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`;
                html += '</div>';
            }}
            // Reputation score if available
            if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
                html += '<div class="stat-row">';
                html += '<span class="stat-label-sm">Reputation Score:</span>';
                html += `<span class="stat-value-sm">${{stats.reputation_score}} ${{stats.reputation_source ? '(' + stats.reputation_source + ')' : ''}}</span>`;
                html += '</div>';
            }}
            // Category History Timeline
            if (stats.category_history && stats.category_history.length > 0) {{
                html += '<div class="timeline-container">';
                html += '<div class="timeline-title">Behavior Timeline</div>';
                html += '<div class="timeline">';
                stats.category_history.forEach((change, index) => {{
                    const categoryClass = change.new_category.toLowerCase().replace('_', '-');
                    const timestamp = formatTimestamp(change.timestamp);
                    html += '<div class="timeline-item">';
                    html += `<div class="timeline-marker ${{categoryClass}}"></div>`;
                    html += '<div class="timeline-content">';
                    if (change.old_category) {{
                        const oldCategoryBadge = 'category-' + change.old_category.toLowerCase().replace('_', '-');
                        html += `<span class="category-badge ${{oldCategoryBadge}}">${{change.old_category}}</span>`;
                        html += '<span class="timeline-arrow">→</span>';
                    }} else {{
                        html += '<span style="color: #8b949e;">Initial:</span> ';
                    }}
                    const newCategoryBadge = 'category-' + change.new_category.toLowerCase().replace('_', '-');
                    html += `<span class="category-badge ${{newCategoryBadge}}">${{change.new_category}}</span>`;
                    html += `<div class="timeline-timestamp">${{timestamp}}</div>`;
                    html += '</div>';
                    html += '</div>';
                }});
                html += '</div>';
                html += '</div>';
            }}
            html += '</div>';
            // Radar chart on the right
            if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
                html += '<div class="stats-right">';
                html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
                html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
                const scores = {{
                    attacker: stats.category_scores.attacker || 0,
                    good_crawler: stats.category_scores.good_crawler || 0,
                    bad_crawler: stats.category_scores.bad_crawler || 0,
                    regular_user: stats.category_scores.regular_user || 0,
                    unknown: stats.category_scores.unknown || 0
                }};
                // Normalize scores for better visualization
                const maxScore = Math.max(...Object.values(scores), 1);
                const minVisibleRadius = 0.15; // Minimum 15% visibility even for 0 values
                const normalizedScores = {{}};
                Object.keys(scores).forEach(key => {{
                    // Scale values: ensure minimum visibility + proportional to max
                    normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
                }});
                const colors = {{
                    attacker: '#f85149',
                    good_crawler: '#3fb950',
                    bad_crawler: '#f0883e',
                    regular_user: '#58a6ff',
                    unknown: '#8b949e'
                }};
                const labels = {{
                    attacker: 'Attacker',
                    good_crawler: 'Good Bot',
                    bad_crawler: 'Bad Bot',
                    regular_user: 'User',
                    unknown: 'Unknown'
                }};
                // Draw radar background grid
                const cx = 100, cy = 100, maxRadius = 75;
                for (let i = 1; i <= 5; i++) {{
                    const r = (maxRadius / 5) * i;
                    html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
                }}
                // Draw axes (now with 5 points for pentagon)
                const angles = [0, 72, 144, 216, 288];
                const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
                angles.forEach((angle, i) => {{
                    const rad = (angle - 90) * Math.PI / 180;
                    const x2 = cx + maxRadius * Math.cos(rad);
                    const y2 = cy + maxRadius * Math.sin(rad);
                    html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
                    // Add labels at consistent distance
                    const labelDist = maxRadius + 35;
                    const lx = cx + labelDist * Math.cos(rad);
                    const ly = cy + labelDist * Math.sin(rad);
                    html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
                }});
                // Draw filled polygon for scores
                let points = [];
                angles.forEach((angle, i) => {{
                    const normalizedScore = normalizedScores[keys[i]];
                    const rad = (angle - 90) * Math.PI / 180;
                    const r = normalizedScore * maxRadius;
                    const x = cx + r * Math.cos(rad);
                    const y = cy + r * Math.sin(rad);
                    points.push(`${{x}},${{y}}`);
                }});
                // Determine dominant category color
                const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
                const dominantColor = colors[dominantKey];
                // Draw single colored area
                html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
                // Draw points
                angles.forEach((angle, i) => {{
                    const normalizedScore = normalizedScores[keys[i]];
                    const rad = (angle - 90) * Math.PI / 180;
                    const r = normalizedScore * maxRadius;
                    const x = cx + r * Math.cos(rad);
                    const y = cy + r * Math.sin(rad);
                    html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`;
                }});
                html += '</svg>';
                // Legend
                html += '<div class="radar-legend">';
                keys.forEach(key => {{
                    html += '<div class="radar-legend-item">';
                    html += `<div class="radar-legend-color" style="background: ${{colors[key]}};"></div>`;
                    html += `<span style="color: #8b949e;">${{labels[key]}}: ${{scores[key]}} pt</span>`;
                    html += '</div>';
                }});
                html += '</div>';
                html += '</div>';
            }}
            return html;
        }}
    </script>
 </body>
 </html>
--- a/src/templates/html/main_page.html
+++ b/src/templates/html/main_page.html
@@ -46,21 +46,12 @@
            gap: 10px;
            align-items: center;
            overflow-y: auto;
            overflow-x: hidden;
            flex: 1;
            padding-top: 10px;
        }}
        .links-container::-webkit-scrollbar {{
-            width: 8px;
+            width: 0px;
        }}
        .links-container::-webkit-scrollbar-track {{
            background: #0d1117;
        }}
        .links-container::-webkit-scrollbar-thumb {{
            background: #30363d;
            border-radius: 4px;
        }}
        .links-container::-webkit-scrollbar-thumb:hover {{
            background: #484f58;
        }}
        .link-box {{
            background: #161b22;
--- a/src/wordlists.py
+++ b/src/wordlists.py
@@ -131,7 +131,8 @@ class Wordlists:
    @property
    def attack_urls(self):
-        return self._data.get("attack_urls", [])
+        """Deprecated: use attack_patterns instead. Returns attack_patterns for backward compatibility."""
        return self._data.get("attack_patterns", {})
 _wordlists_instance = None
--- a/wordlists.json
+++ b/wordlists.json
@@ -353,11 +353,14 @@
    }
  },
  "attack_patterns": {
-    "path_traversal": "\\.\\.",
+    "path_traversal": "(\\.\\.|%2e%2e|%252e%252e|\\.{2,}|%c0%ae|%c1%9c)",
    "sql_injection": "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)",
    "xss_attempt": "(<script|</script|javascript:|onerror=|onload=|onclick=|onmouseover=|onfocus=|onblur=|<iframe|<img|<svg|<embed|<object|<body|<input|eval\\(|alert\\(|prompt\\(|confirm\\(|document\\.|window\\.|<style|expression\\(|vbscript:|data:text/html)",
-    "common_probes": "(wp-admin|phpmyadmin|\\.env|\\.git|/admin|/config)",
+    "shell_injection": "(\\||;|`|\\$\\(|&&|\\bnc\\b|\\bnetcat\\b|\\bwget\\b|\\bcurl\\b|/bin/bash|/bin/sh|cmd\\.exe)",
-    "shell_injection": "(\\||;|`|\\$\\(|&&)"
+    "lfi_rfi": "(file://|php://|expect://|data://|zip://|phar://|/etc/passwd|/etc/shadow|/proc/self|c:\\\\windows)",
    "xxe_injection": "(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC)",
    "ldap_injection": "(\\*\\)|\\(\\||\\(&)",
    "command_injection": "(&&|\\|\\||;|\\$\\{|\\$\\(|`)"
  },
  "server_headers": [
    "Apache/2.4.41 (Ubuntu)",
@@ -366,11 +369,5 @@
    "cloudflare",
    "AmazonS3",
    "gunicorn/20.1.0"
-  ],
+  ]
  "attack_urls": { 
    "path_traversal": "\\.\\.",
    "sql_injection": "('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
    "xss_attempt": "(<script|javascript:|onerror=|onload=)",
    "shell_injection": "(\\||;|`|\\$\\(|&&)"
  }
 }