Merge pull request #34 from BlessedRebuS/feat/scoring-alogorithm

Feat/scoring alogorithm
2026-01-10 13:30:18 -06:00
parent 5a57c0774f c65dca850c
commit edaafb0263
11 changed files with 726 additions and 277 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -78,4 +78,4 @@ data/
 personal-values.yaml

 #exports dir (keeping .gitkeep so we have the dir)
-/exports/*
+/exports/*
--- a/config.yaml
+++ b/config.yaml
@@ -23,7 +23,7 @@ canary:
 dashboard:
  # if set to "null" this will Auto-generates random path if not set
  # can be set to "/dashboard" or similar <-- note this MUST include a forward slash
-  secret_path: dashboard
+  secret_path: super-secret-dashboard-path

 api:
  server_url: null
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -16,9 +16,3 @@ services:
    environment:
      - CONFIG_LOCATION=config.yaml
    restart: unless-stopped
-    healthcheck:
-      test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]
-      interval: 30s
-      timeout: 5s
-      retries: 3
-      start_period: 10s
--- a/src/analyzer.py
+++ b/src/analyzer.py
@@ -10,6 +10,9 @@ import urllib.parse
 from wordlists import get_wordlists
 from config import get_config
 from logger import get_app_logger
+import requests
+from sanitizer import sanitize_for_storage, sanitize_dict
+
 """
 Functions for user activity analysis
 """
@@ -49,264 +52,299 @@ class Analyzer:
                pass
        return self._db_manager
    
-    def infer_user_category(self, ip: str) -> str:
+    # def infer_user_category(self, ip: str) -> str:

-        config = get_config()
+    #     config = get_config()
        
-        http_risky_methods_threshold = config.http_risky_methods_threshold
-        violated_robots_threshold = config.violated_robots_threshold
-        uneven_request_timing_threshold = config.uneven_request_timing_threshold
-        user_agents_used_threshold = config.user_agents_used_threshold
-        attack_urls_threshold = config.attack_urls_threshold
-        uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
+    #     http_risky_methods_threshold = config.http_risky_methods_threshold
+    #     violated_robots_threshold = config.violated_robots_threshold
+    #     uneven_request_timing_threshold = config.uneven_request_timing_threshold
+    #     user_agents_used_threshold = config.user_agents_used_threshold
+    #     attack_urls_threshold = config.attack_urls_threshold
+    #     uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds

-        app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
+    #     app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")

-        score = {}
-        score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
-        score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score = {}
+    #     score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    #     score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
        
-        #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
-        weights = {
-            "attacker": {
-                "risky_http_methods": 6,
-                "robots_violations": 4,
-                "uneven_request_timing": 3,
-                "different_user_agents": 8,
-                "attack_url": 15
-            },
-            "good_crawler": {
-                "risky_http_methods": 1,
-                "robots_violations": 0,
-                "uneven_request_timing": 0,
-                "different_user_agents": 0,
-                "attack_url": 0
-            },
-            "bad_crawler": {
-                "risky_http_methods": 2,
-                "robots_violations": 7,
-                "uneven_request_timing": 0,
-                "different_user_agents": 5,
-                "attack_url": 5
-            },
-            "regular_user": {
-                "risky_http_methods": 0,
-                "robots_violations": 0,
-                "uneven_request_timing": 8,
-                "different_user_agents": 3,
-                "attack_url": 0
-            }
-        }
+    #     #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
+    #     weights = {
+    #         "attacker": {
+    #             "risky_http_methods": 6,
+    #             "robots_violations": 4,
+    #             "uneven_request_timing": 3,
+    #             "different_user_agents": 8,
+    #             "attack_url": 15
+    #         },
+    #         "good_crawler": {
+    #             "risky_http_methods": 1,
+    #             "robots_violations": 0,
+    #             "uneven_request_timing": 0,
+    #             "different_user_agents": 0,
+    #             "attack_url": 0
+    #         },
+    #         "bad_crawler": {
+    #             "risky_http_methods": 2,
+    #             "robots_violations": 7,
+    #             "uneven_request_timing": 0,
+    #             "different_user_agents": 5,
+    #             "attack_url": 5
+    #         },
+    #         "regular_user": {
+    #             "risky_http_methods": 0,
+    #             "robots_violations": 0,
+    #             "uneven_request_timing": 8,
+    #             "different_user_agents": 3,
+    #             "attack_url": 0
+    #         }
+    #     }


-        accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
-        total_accesses_count = len(accesses)
-        if total_accesses_count <= 0:
-            return
+    #     accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
+    #     total_accesses_count = len(accesses)
+    #     if total_accesses_count <= 0:
+    #         return
        
-        # Set category as "unknown" for the first 5 requests
-        if total_accesses_count < 3:
-            category = "unknown"
-            analyzed_metrics = {}
-            category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
-            last_analysis = datetime.now(tz=ZoneInfo('UTC'))
-            self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
-            return 0
+    #     # Set category as "unknown" for the first 5 requests
+    #     if total_accesses_count < 3:
+    #         category = "unknown"
+    #         analyzed_metrics = {}
+    #         category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
+    #         last_analysis = datetime.now(tz=ZoneInfo('UTC'))
+    #         self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+    #         return 0

-        #--------------------- HTTP Methods ---------------------
+    #     #--------------------- HTTP Methods ---------------------


-        get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
-        post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
-        put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
-        delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
-        head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
-        options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
-        patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])  
+    #     get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
+    #     post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
+    #     put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
+    #     delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
+    #     head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
+    #     options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
+    #     patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])  

-        if total_accesses_count > http_risky_methods_threshold:
-            http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
-        else:
-            http_method_attacker_score = 0
+    #     if total_accesses_count > http_risky_methods_threshold:
+    #         http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
+    #     else:
+    #         http_method_attacker_score = 0

-        #print(f"HTTP Method attacker score: {http_method_attacker_score}")
-        if http_method_attacker_score >= http_risky_methods_threshold:
-            score["attacker"]["risky_http_methods"] = True
-            score["good_crawler"]["risky_http_methods"] = False
-            score["bad_crawler"]["risky_http_methods"] = True
-            score["regular_user"]["risky_http_methods"] = False
-        else:
-            score["attacker"]["risky_http_methods"] = False
-            score["good_crawler"]["risky_http_methods"] = True
-            score["bad_crawler"]["risky_http_methods"] = False
-            score["regular_user"]["risky_http_methods"] = False
+    #     #print(f"HTTP Method attacker score: {http_method_attacker_score}")
+    #     if http_method_attacker_score >= http_risky_methods_threshold:
+    #         score["attacker"]["risky_http_methods"] = True
+    #         score["good_crawler"]["risky_http_methods"] = False
+    #         score["bad_crawler"]["risky_http_methods"] = True
+    #         score["regular_user"]["risky_http_methods"] = False
+    #     else:
+    #         score["attacker"]["risky_http_methods"] = False
+    #         score["good_crawler"]["risky_http_methods"] = True
+    #         score["bad_crawler"]["risky_http_methods"] = False
+    #         score["regular_user"]["risky_http_methods"] = False

-        #--------------------- Robots Violations ---------------------
-        #respect robots.txt and login/config pages access frequency
-        robots_disallows = []
-        robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
-        with open(robots_path, "r") as f:
-            for line in f:
-                line = line.strip()
-                if not line:
-                    continue
-                parts = line.split(":")
+    #     #--------------------- Robots Violations ---------------------
+    #     #respect robots.txt and login/config pages access frequency
+    #     robots_disallows = []
+    #     robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
+    #     with open(robots_path, "r") as f:
+    #         for line in f:
+    #             line = line.strip()
+    #             if not line:
+    #                 continue
+    #             parts = line.split(":")
                
-                if parts[0] == "Disallow":
-                    parts[1] = parts[1].rstrip("/")
-                    #print(f"DISALLOW {parts[1]}")
-                    robots_disallows.append(parts[1].strip())
+    #             if parts[0] == "Disallow":
+    #                 parts[1] = parts[1].rstrip("/")
+    #                 #print(f"DISALLOW {parts[1]}")
+    #                 robots_disallows.append(parts[1].strip())

-        #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
-        violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
-        #print(f"Violated robots count: {violated_robots_count}")
-        if total_accesses_count > 0:
-            violated_robots_ratio = violated_robots_count / total_accesses_count
-        else:
-            violated_robots_ratio = 0
+    #     #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
+    #     violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
+    #     #print(f"Violated robots count: {violated_robots_count}")
+    #     if total_accesses_count > 0:
+    #         violated_robots_ratio = violated_robots_count / total_accesses_count
+    #     else:
+    #         violated_robots_ratio = 0

-        if violated_robots_ratio >= violated_robots_threshold:
-            score["attacker"]["robots_violations"] = True
-            score["good_crawler"]["robots_violations"] = False
-            score["bad_crawler"]["robots_violations"] = True
-            score["regular_user"]["robots_violations"] = False
-        else:
-            score["attacker"]["robots_violations"] = False
-            score["good_crawler"]["robots_violations"] = False
-            score["bad_crawler"]["robots_violations"] = False
-            score["regular_user"]["robots_violations"] = False
+    #     if violated_robots_ratio >= violated_robots_threshold:
+    #         score["attacker"]["robots_violations"] = True
+    #         score["good_crawler"]["robots_violations"] = False
+    #         score["bad_crawler"]["robots_violations"] = True
+    #         score["regular_user"]["robots_violations"] = False
+    #     else:
+    #         score["attacker"]["robots_violations"] = False
+    #         score["good_crawler"]["robots_violations"] = False
+    #         score["bad_crawler"]["robots_violations"] = False
+    #         score["regular_user"]["robots_violations"] = False
        
-        #--------------------- Requests Timing ---------------------
-        #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
-        timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
-        now_utc = datetime.now(tz=ZoneInfo('UTC'))
-        timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
-        timestamps = sorted(timestamps, reverse=True)
+    #     #--------------------- Requests Timing ---------------------
+    #     #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
+    #     timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
+    #     now_utc = datetime.now(tz=ZoneInfo('UTC'))
+    #     timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
+    #     timestamps = sorted(timestamps, reverse=True)

-        time_diffs = []
-        for i in range(0, len(timestamps)-1):
-            diff = (timestamps[i] - timestamps[i+1]).total_seconds()
-            time_diffs.append(diff)
+    #     time_diffs = []
+    #     for i in range(0, len(timestamps)-1):
+    #         diff = (timestamps[i] - timestamps[i+1]).total_seconds()
+    #         time_diffs.append(diff)
        
-        mean = 0
-        variance = 0
-        std = 0
-        cv = 0
-        if time_diffs:
-            mean = sum(time_diffs) / len(time_diffs)
-            variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
-            std = variance ** 0.5
-            cv = std/mean
-            app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
+    #     mean = 0
+    #     variance = 0
+    #     std = 0
+    #     cv = 0
+    #     if time_diffs:
+    #         mean = sum(time_diffs) / len(time_diffs)
+    #         variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
+    #         std = variance ** 0.5
+    #         cv = std/mean
+    #         app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")

-        if cv >= uneven_request_timing_threshold:
-            score["attacker"]["uneven_request_timing"] = True
-            score["good_crawler"]["uneven_request_timing"] = False
-            score["bad_crawler"]["uneven_request_timing"] = False
-            score["regular_user"]["uneven_request_timing"] = True
-        else:
-            score["attacker"]["uneven_request_timing"] = False
-            score["good_crawler"]["uneven_request_timing"] = False
-            score["bad_crawler"]["uneven_request_timing"] = False
-            score["regular_user"]["uneven_request_timing"] = False
+    #     if cv >= uneven_request_timing_threshold:
+    #         score["attacker"]["uneven_request_timing"] = True
+    #         score["good_crawler"]["uneven_request_timing"] = False
+    #         score["bad_crawler"]["uneven_request_timing"] = False
+    #         score["regular_user"]["uneven_request_timing"] = True
+    #     else:
+    #         score["attacker"]["uneven_request_timing"] = False
+    #         score["good_crawler"]["uneven_request_timing"] = False
+    #         score["bad_crawler"]["uneven_request_timing"] = False
+    #         score["regular_user"]["uneven_request_timing"] = False


-        #--------------------- Different User Agents ---------------------
-        #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
-        user_agents_used = [item["user_agent"] for item in accesses]
-        user_agents_used = list(dict.fromkeys(user_agents_used))
-        #print(f"User agents used: {user_agents_used}")
+    #     #--------------------- Different User Agents ---------------------
+    #     #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
+    #     user_agents_used = [item["user_agent"] for item in accesses]
+    #     user_agents_used = list(dict.fromkeys(user_agents_used))
+    #     #print(f"User agents used: {user_agents_used}")

-        if len(user_agents_used) >= user_agents_used_threshold:
-            score["attacker"]["different_user_agents"] = True
-            score["good_crawler"]["different_user_agents"] = False
-            score["bad_crawler"]["different_user_agentss"] = True
-            score["regular_user"]["different_user_agents"] = False
-        else:
-            score["attacker"]["different_user_agents"] = False
-            score["good_crawler"]["different_user_agents"] = False
-            score["bad_crawler"]["different_user_agents"] = False
-            score["regular_user"]["different_user_agents"] = False
+    #     if len(user_agents_used) >= user_agents_used_threshold:
+    #         score["attacker"]["different_user_agents"] = True
+    #         score["good_crawler"]["different_user_agents"] = False
+    #         score["bad_crawler"]["different_user_agentss"] = True
+    #         score["regular_user"]["different_user_agents"] = False
+    #     else:
+    #         score["attacker"]["different_user_agents"] = False
+    #         score["good_crawler"]["different_user_agents"] = False
+    #         score["bad_crawler"]["different_user_agents"] = False
+    #         score["regular_user"]["different_user_agents"] = False

-        #--------------------- Attack URLs ---------------------
+    #     #--------------------- Attack URLs ---------------------

-        attack_urls_found_list = []
+    #     attack_urls_found_list = []

-        wl = get_wordlists()
-        if wl.attack_patterns:
-            queried_paths = [item["path"] for item in accesses]
+    #     wl = get_wordlists()
+    #     if wl.attack_patterns:
+    #         queried_paths = [item["path"] for item in accesses]

-            for queried_path in queried_paths:
-                # URL decode the path to catch encoded attacks
-                try:
-                    decoded_path = urllib.parse.unquote(queried_path)
-                    # Double decode to catch double-encoded attacks
-                    decoded_path_twice = urllib.parse.unquote(decoded_path)
-                except Exception:
-                    decoded_path = queried_path
-                    decoded_path_twice = queried_path
+    #         for queried_path in queried_paths:
+    #             # URL decode the path to catch encoded attacks
+    #             try:
+    #                 decoded_path = urllib.parse.unquote(queried_path)
+    #                 # Double decode to catch double-encoded attacks
+    #                 decoded_path_twice = urllib.parse.unquote(decoded_path)
+    #             except Exception:
+    #                 decoded_path = queried_path
+    #                 decoded_path_twice = queried_path
                
-                for name, pattern in wl.attack_patterns.items():
-                    # Check original, decoded, and double-decoded paths
-                    if (re.search(pattern, queried_path, re.IGNORECASE) or 
-                        re.search(pattern, decoded_path, re.IGNORECASE) or
-                        re.search(pattern, decoded_path_twice, re.IGNORECASE)):
-                        attack_urls_found_list.append(f"{name}: {pattern}")
+    #             for name, pattern in wl.attack_patterns.items():
+    #                 # Check original, decoded, and double-decoded paths
+    #                 if (re.search(pattern, queried_path, re.IGNORECASE) or 
+    #                     re.search(pattern, decoded_path, re.IGNORECASE) or
+    #                     re.search(pattern, decoded_path_twice, re.IGNORECASE)):
+    #                     attack_urls_found_list.append(f"{name}: {pattern}")
+
+    #         #remove duplicates
+    #         attack_urls_found_list = set(attack_urls_found_list)
+    #         attack_urls_found_list = list(attack_urls_found_list)
            
-            if len(attack_urls_found_list) > attack_urls_threshold:
-                score["attacker"]["attack_url"] = True
-                score["good_crawler"]["attack_url"] = False
-                score["bad_crawler"]["attack_url"] = False
-                score["regular_user"]["attack_url"] = False
-            else:
-                score["attacker"]["attack_url"] = False
-                score["good_crawler"]["attack_url"] = False
-                score["bad_crawler"]["attack_url"] = False
-                score["regular_user"]["attack_url"] = False
+    #         if len(attack_urls_found_list) > attack_urls_threshold:
+    #             score["attacker"]["attack_url"] = True
+    #             score["good_crawler"]["attack_url"] = False
+    #             score["bad_crawler"]["attack_url"] = False
+    #             score["regular_user"]["attack_url"] = False
+    #         else:
+    #             score["attacker"]["attack_url"] = False
+    #             score["good_crawler"]["attack_url"] = False
+    #             score["bad_crawler"]["attack_url"] = False
+    #             score["regular_user"]["attack_url"] = False

-        #--------------------- Calculate score ---------------------
+    #     #--------------------- Calculate score ---------------------

-        attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
+    #     attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0

-        attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
-        attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
-        attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
-        attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
-        attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
+    #     attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
+    #     attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
+    #     attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
+    #     attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
+    #     attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]

-        good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
-        good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
+    #     good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
+    #     good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]

-        bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
-        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
+    #     bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
+    #     bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]

-        regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
-        regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
-        regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
-        regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
-        regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
+    #     regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
+    #     regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]

-        score_details = f"""
-        Attacker score: {attacker_score}
-        Good Crawler score: {good_crawler_score}
-        Bad Crawler score: {bad_crawler_score}
-        Regular User score: {regular_user_score}
-        """
-        app_logger.debug(score_details)
+    #     score_details = f"""
+    #     Attacker score: {attacker_score}
+    #     Good Crawler score: {good_crawler_score}
+    #     Bad Crawler score: {bad_crawler_score}
+    #     Regular User score: {regular_user_score}
+    #     """
+    #     app_logger.debug(score_details)

-        analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
-        category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
-        category = max(category_scores, key=category_scores.get)
-        last_analysis = datetime.now(tz=ZoneInfo('UTC'))
+    #     analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
+    #     category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
+    #     category = max(category_scores, key=category_scores.get)
+    #     last_analysis = datetime.now(tz=ZoneInfo('UTC'))

-        self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+    #     self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)

-        return 0
+    #     return 0
+
+
+
+    # def update_ip_rep_infos(self, ip: str) -> list[str]:
+    #     api_url = "https://iprep.lcrawl.com/api/iprep/"
+    #     params = {
+    #         "cidr": ip
+    #     }
+    #     headers = {
+    #         "Content-Type": "application/json"
+    #     }
+
+    #     response = requests.get(api_url, headers=headers, params=params)
+    #     payload = response.json()
+
+    #     if payload["results"]:
+    #         data = payload["results"][0]
+
+    #         country_iso_code = data["geoip_data"]["country_iso_code"]
+    #         asn = data["geoip_data"]["asn_autonomous_system_number"]
+    #         asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
+    #         list_on = data["list_on"]
+
+    #         sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
+    #         sanitized_asn = sanitize_for_storage(asn, 100)
+    #         sanitized_asn_org = sanitize_for_storage(asn_org, 100)
+    #         sanitized_list_on = sanitize_dict(list_on, 100000)
+            
+    #         self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
+        
+    #     return
--- a/src/database.py
+++ b/src/database.py
@@ -263,7 +263,7 @@ class DatabaseManager:
            session.rollback()
            print(f"Error updating IP stats analysis: {e}")

-    def  manual_update_category(self, ip: str, category: str) -> None:
+    def manual_update_category(self, ip: str, category: str) -> None:
        """
        Update IP category as a result of a manual intervention by an admin

@@ -275,6 +275,7 @@ class DatabaseManager:
        session = self.session
        sanitized_ip = sanitize_ip(ip)
        ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
+        

        # Record the manual category change
        old_category = ip_stats.category
@@ -348,6 +349,29 @@ class DatabaseManager:
        finally:
            self.close_session()

+    def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
+        """
+        Update IP rep stats
+
+        Args:
+            ip: IP address
+            country_code: IP address country code
+            asn: IP address ASN
+            asn_org: IP address ASN ORG
+            list_on: public lists containing the IP address
+        
+        """
+        session = self.session
+        
+        sanitized_ip = sanitize_ip(ip)
+        ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
+
+        ip_stats.country_code = country_code
+        ip_stats.asn = asn
+        ip_stats.asn_org = asn_org
+        ip_stats.list_on = list_on
+
+
    def get_access_logs(
        self,
        limit: int = 100,
@@ -554,6 +578,7 @@ class DatabaseManager:
                'city': stat.city,
                'asn': stat.asn,
                'asn_org': stat.asn_org,
+                'list_on': stat.list_on or {},
                'reputation_score': stat.reputation_score,
                'reputation_source': stat.reputation_source,
                'analyzed_metrics': stat.analyzed_metrics or {},
--- a/src/handler.py
+++ b/src/handler.py
@@ -474,7 +474,8 @@ class Handler(BaseHTTPRequestHandler):

        self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
        
-        self.analyzer.infer_user_category(client_ip)
+        # self.analyzer.infer_user_category(client_ip)
+        # self.analyzer.update_ip_rep_infos(client_ip)

        if self.tracker.is_suspicious_user_agent(user_agent):
            self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
--- a/src/models.py
+++ b/src/models.py
@@ -134,6 +134,7 @@ class IpStats(Base):
    city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
    asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
    asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
+    list_on: Mapped[Optional[Dict[str,str]]] = mapped_column(JSON, nullable=True)

    # Reputation fields (populated by future enrichment)
    reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
--- a/src/sanitizer.py
+++ b/src/sanitizer.py
@@ -7,7 +7,7 @@ Protects against SQL injection payloads, XSS, and storage exhaustion attacks.

 import html
 import re
-from typing import Optional
+from typing import Optional, Dict


 # Field length limits for database storage
@@ -111,3 +111,6 @@ def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
        value_str = value_str[:max_display_length] + "..."

    return html.escape(value_str)
+
+def sanitize_dict(value: Optional[Dict[str,str]], max_display_length):
+    return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}
--- a/src/tasks/analyze_ips.py
+++ b/src/tasks/analyze_ips.py
@@ -0,0 +1,265 @@
+from sqlalchemy import select
+from typing import Optional
+from database import get_database, DatabaseManager
+from zoneinfo import ZoneInfo
+from pathlib import Path
+from datetime import datetime, timedelta
+import re
+import urllib.parse
+from wordlists import get_wordlists
+from config import get_config
+from logger import get_app_logger
+import requests
+from sanitizer import sanitize_for_storage, sanitize_dict
+
+# ----------------------
+# TASK CONFIG
+# ----------------------
+
+TASK_CONFIG = {
+    "name": "analyze-ips",
+    "cron": "*/1 * * * *",
+    "enabled": True,
+    "run_when_loaded": True
+}
+
+
+def main():
+    config = get_config()
+    db_manager = get_database()
+    app_logger = get_app_logger()
+        
+    http_risky_methods_threshold = config.http_risky_methods_threshold
+    violated_robots_threshold = config.violated_robots_threshold
+    uneven_request_timing_threshold = config.uneven_request_timing_threshold
+    user_agents_used_threshold = config.user_agents_used_threshold
+    attack_urls_threshold = config.attack_urls_threshold
+    uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
+    app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
+    score = {}
+    score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+    
+    #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
+    weights = {
+        "attacker": {
+            "risky_http_methods": 6,
+            "robots_violations": 4,
+            "uneven_request_timing": 3,
+            "different_user_agents": 8,
+            "attack_url": 15
+        },
+        "good_crawler": {
+            "risky_http_methods": 1,
+            "robots_violations": 0,
+            "uneven_request_timing": 0,
+            "different_user_agents": 0,
+            "attack_url": 0
+        },
+        "bad_crawler": {
+            "risky_http_methods": 2,
+            "robots_violations": 7,
+            "uneven_request_timing": 0,
+            "different_user_agents": 5,
+            "attack_url": 5
+        },
+        "regular_user": {
+            "risky_http_methods": 0,
+            "robots_violations": 0,
+            "uneven_request_timing": 8,
+            "different_user_agents": 3,
+            "attack_url": 0
+        }
+    }
+    accesses = db_manager.get_access_logs(limit=999999999)
+    ips = {item['ip'] for item in accesses}
+
+    for ip in ips:
+        ip_accesses = [item for item in accesses if item["ip"] == ip]
+        total_accesses_count = len(accesses)
+        if total_accesses_count <= 0:
+            return
+        
+        # Set category as "unknown" for the first 3 requests
+        if total_accesses_count < 3:
+            category = "unknown"
+            analyzed_metrics = {}
+            category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
+            last_analysis = datetime.now(tz=ZoneInfo('UTC'))
+            db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+            return 0
+        #--------------------- HTTP Methods ---------------------
+        get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
+        post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
+        put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
+        delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
+        head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
+        options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
+        patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])  
+        if total_accesses_count > http_risky_methods_threshold:
+            http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
+        else:
+            http_method_attacker_score = 0
+        #print(f"HTTP Method attacker score: {http_method_attacker_score}")
+        if http_method_attacker_score >= http_risky_methods_threshold:
+            score["attacker"]["risky_http_methods"] = True
+            score["good_crawler"]["risky_http_methods"] = False
+            score["bad_crawler"]["risky_http_methods"] = True
+            score["regular_user"]["risky_http_methods"] = False
+        else:
+            score["attacker"]["risky_http_methods"] = False
+            score["good_crawler"]["risky_http_methods"] = True
+            score["bad_crawler"]["risky_http_methods"] = False
+            score["regular_user"]["risky_http_methods"] = False
+        #--------------------- Robots Violations ---------------------
+        #respect robots.txt and login/config pages access frequency
+        robots_disallows = []
+        robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
+        with open(robots_path, "r") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                parts = line.split(":")
+                
+                if parts[0] == "Disallow":
+                    parts[1] = parts[1].rstrip("/")
+                    #print(f"DISALLOW {parts[1]}")
+                    robots_disallows.append(parts[1].strip())
+        #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
+        violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
+        #print(f"Violated robots count: {violated_robots_count}")
+        if total_accesses_count > 0:
+            violated_robots_ratio = violated_robots_count / total_accesses_count
+        else:
+            violated_robots_ratio = 0
+        if violated_robots_ratio >= violated_robots_threshold:
+            score["attacker"]["robots_violations"] = True
+            score["good_crawler"]["robots_violations"] = False
+            score["bad_crawler"]["robots_violations"] = True
+            score["regular_user"]["robots_violations"] = False
+        else:
+            score["attacker"]["robots_violations"] = False
+            score["good_crawler"]["robots_violations"] = False
+            score["bad_crawler"]["robots_violations"] = False
+            score["regular_user"]["robots_violations"] = False
+        
+        #--------------------- Requests Timing ---------------------
+        #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
+        timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
+        now_utc = datetime.now(tz=ZoneInfo('UTC'))
+        timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
+        timestamps = sorted(timestamps, reverse=True)
+        time_diffs = []
+        for i in range(0, len(timestamps)-1):
+            diff = (timestamps[i] - timestamps[i+1]).total_seconds()
+            time_diffs.append(diff)
+        
+        mean = 0
+        variance = 0
+        std = 0
+        cv = 0
+        if time_diffs:
+            mean = sum(time_diffs) / len(time_diffs)
+            variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
+            std = variance ** 0.5
+            cv = std/mean
+            app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
+        if cv >= uneven_request_timing_threshold:
+            score["attacker"]["uneven_request_timing"] = True
+            score["good_crawler"]["uneven_request_timing"] = False
+            score["bad_crawler"]["uneven_request_timing"] = False
+            score["regular_user"]["uneven_request_timing"] = True
+        else:
+            score["attacker"]["uneven_request_timing"] = False
+            score["good_crawler"]["uneven_request_timing"] = False
+            score["bad_crawler"]["uneven_request_timing"] = False
+            score["regular_user"]["uneven_request_timing"] = False
+        #--------------------- Different User Agents ---------------------
+        #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
+        user_agents_used = [item["user_agent"] for item in ip_accesses]
+        user_agents_used = list(dict.fromkeys(user_agents_used))
+        #print(f"User agents used: {user_agents_used}")
+        if len(user_agents_used) >= user_agents_used_threshold:
+            score["attacker"]["different_user_agents"] = True
+            score["good_crawler"]["different_user_agents"] = False
+            score["bad_crawler"]["different_user_agentss"] = True
+            score["regular_user"]["different_user_agents"] = False
+        else:
+            score["attacker"]["different_user_agents"] = False
+            score["good_crawler"]["different_user_agents"] = False
+            score["bad_crawler"]["different_user_agents"] = False
+            score["regular_user"]["different_user_agents"] = False
+        #--------------------- Attack URLs ---------------------
+        attack_urls_found_list = []
+        wl = get_wordlists()
+        if wl.attack_patterns:
+            queried_paths = [item["path"] for item in ip_accesses]
+            for queried_path in queried_paths:
+                # URL decode the path to catch encoded attacks
+                try:
+                    decoded_path = urllib.parse.unquote(queried_path)
+                    # Double decode to catch double-encoded attacks
+                    decoded_path_twice = urllib.parse.unquote(decoded_path)
+                except Exception:
+                    decoded_path = queried_path
+                    decoded_path_twice = queried_path
+                
+                for name, pattern in wl.attack_patterns.items():
+                    # Check original, decoded, and double-decoded paths
+                    if (re.search(pattern, queried_path, re.IGNORECASE) or 
+                        re.search(pattern, decoded_path, re.IGNORECASE) or
+                        re.search(pattern, decoded_path_twice, re.IGNORECASE)):
+                        attack_urls_found_list.append(f"{name}: {pattern}")
+            
+            #remove duplicates
+            attack_urls_found_list = set(attack_urls_found_list)
+            attack_urls_found_list = list(attack_urls_found_list)
+
+            if len(attack_urls_found_list) >= attack_urls_threshold:
+                score["attacker"]["attack_url"] = True
+                score["good_crawler"]["attack_url"] = False
+                score["bad_crawler"]["attack_url"] = False
+                score["regular_user"]["attack_url"] = False
+            else:
+                score["attacker"]["attack_url"] = False
+                score["good_crawler"]["attack_url"] = False
+                score["bad_crawler"]["attack_url"] = False
+                score["regular_user"]["attack_url"] = False
+        #--------------------- Calculate score ---------------------
+        attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
+        attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
+        attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
+        attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
+        attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
+        attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
+        good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
+        good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
+        good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
+        good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
+        good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
+        bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
+        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
+        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
+        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
+        bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
+        regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
+        regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
+        regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
+        regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
+        regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
+        score_details = f"""
+        Attacker score: {attacker_score}
+        Good Crawler score: {good_crawler_score}
+        Bad Crawler score: {bad_crawler_score}
+        Regular User score: {regular_user_score}
+        """
+        app_logger.debug(score_details)
+        analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
+        category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
+        category = max(category_scores, key=category_scores.get)
+        last_analysis = datetime.now(tz=ZoneInfo('UTC'))
+        db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+    return
--- a/src/tasks/fetch_ip_rep.py
+++ b/src/tasks/fetch_ip_rep.py
@@ -0,0 +1,59 @@
+from sqlalchemy import select
+from typing import Optional
+from database import get_database, DatabaseManager
+from zoneinfo import ZoneInfo
+from pathlib import Path
+from datetime import datetime, timedelta
+import re
+import urllib.parse
+from wordlists import get_wordlists
+from config import get_config
+from logger import get_app_logger
+import requests
+from sanitizer import sanitize_for_storage, sanitize_dict
+
+# ----------------------
+# TASK CONFIG
+# ----------------------
+
+TASK_CONFIG = {
+    "name": "fetch-ip-rep",
+    "cron": "*/1 * * * *",
+    "enabled": True,
+    "run_when_loaded": True
+}
+
+
+def main():
+    
+    config = get_config()
+    db_manager = get_database()
+    app_logger = get_app_logger()
+
+    accesses = db_manager.get_access_logs(limit=999999999)
+    ips = {item['ip'] for item in accesses}
+
+    for ip in ips:
+        api_url = "https://iprep.lcrawl.com/api/iprep/"
+        params = {
+            "cidr": ip
+        }
+        headers = {
+            "Content-Type": "application/json"
+        }
+        response = requests.get(api_url, headers=headers, params=params)
+        payload = response.json()
+        if payload["results"]:
+            data = payload["results"][0]
+            country_iso_code = data["geoip_data"]["country_iso_code"]
+            asn = data["geoip_data"]["asn_autonomous_system_number"]
+            asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
+            list_on = data["list_on"]
+            sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
+            sanitized_asn = sanitize_for_storage(asn, 100)
+            sanitized_asn_org = sanitize_for_storage(asn_org, 100)
+            sanitized_list_on = sanitize_dict(list_on, 100000)
+            
+            db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
+        
+    return
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -410,6 +410,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
            color: #58a6ff;
            font-size: 13px;
            font-weight: 600;
+        }}
+        .timeline-header {{
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            gap: 10px;
            margin-bottom: 10px;
        }}
        .timeline {{
@@ -470,6 +476,56 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
            color: #8b949e;
            margin: 0 7px;
        }}
+        .reputation-container {{
+            margin-top: 15px;
+            padding-top: 15px;
+            border-top: 1px solid #30363d;
+        }}
+        .reputation-title {{
+            color: #58a6ff;
+            font-size: 13px;
+            font-weight: 600;
+        }}
+        .reputation-badges {{
+            display: flex;
+            flex-wrap: wrap;
+            gap: 6px;
+            align-items: center;
+        }}
+        .reputation-badge {{
+            display: inline-flex;
+            align-items: center;
+            gap: 4px;
+            padding: 4px 8px;
+            background: #161b22;
+            border: 1px solid #f851494d;
+            border-radius: 4px;
+            font-size: 11px;
+            color: #f85149;
+            text-decoration: none;
+            transition: all 0.2s;
+        }}
+        .reputation-badge:hover {{
+            background: #1c2128;
+            border-color: #f85149;
+        }}
+        .reputation-badge-icon {{
+            font-size: 12px;
+        }}
+        .reputation-clean {{
+            display: inline-flex;
+            align-items: center;
+            gap: 6px;
+            padding: 4px 10px;
+            background: #161b22;
+            border: 1px solid #3fb9504d;
+            border-radius: 4px;
+            font-size: 11px;
+            color: #3fb950;
+        }}
+        .reputation-clean-icon {{
+            font-size: 13px;
+        }}

    </style>
 </head>
@@ -627,11 +683,9 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
        </div>
    </div>
    <script>
-        // Server timezone configuration
        const SERVER_TIMEZONE = '{timezone}';
        const DASHBOARD_PATH = '{dashboard_path}';
        
-        // Convert UTC timestamp to configured timezone
        function formatTimestamp(isoTimestamp) {{
            if (!isoTimestamp) return 'N/A';
            try {{
@@ -652,7 +706,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
            }}
        }}
        
-        // Add sorting functionality to tables
        document.querySelectorAll('th.sortable').forEach(header => {{
            header.addEventListener('click', function() {{
                const table = this.closest('table');
@@ -661,30 +714,24 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                const sortType = this.getAttribute('data-sort');
                const columnIndex = Array.from(this.parentElement.children).indexOf(this);
                
-                // Determine sort direction
                const isAscending = this.classList.contains('asc');
                
-                // Remove sort classes from all headers in this table
                table.querySelectorAll('th.sortable').forEach(th => {{
                    th.classList.remove('asc', 'desc');
                }});
                
-                // Add appropriate class to clicked header
                this.classList.add(isAscending ? 'desc' : 'asc');
                
-                // Sort rows
                rows.sort((a, b) => {{
                    let aValue = a.cells[columnIndex].textContent.trim();
                    let bValue = b.cells[columnIndex].textContent.trim();
                    
-                    // Handle numeric sorting
                    if (sortType === 'count') {{
                        aValue = parseInt(aValue) || 0;
                        bValue = parseInt(bValue) || 0;
                        return isAscending ? bValue - aValue : aValue - bValue;
                    }}
                    
-                    // Handle IP address sorting
                    if (sortType === 'ip') {{
                        const ipToNum = ip => {{
                            const parts = ip.split('.');
@@ -696,7 +743,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                        return isAscending ? bNum - aNum : aNum - bNum;
                    }}
                    
-                    // Default string sorting
                    if (isAscending) {{
                        return bValue.localeCompare(aValue);
                    }} else {{
@@ -704,12 +750,10 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                    }}
                }});
                
-                // Re-append sorted rows
                rows.forEach(row => tbody.appendChild(row));
            }});
        }});

-        // IP stats dropdown functionality
        document.querySelectorAll('.ip-clickable').forEach(cell => {{
            cell.addEventListener('click', async function(e) {{
                const row = e.currentTarget.closest('.ip-row');
@@ -731,7 +775,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =

                const dropdown = statsRow.querySelector('.ip-stats-dropdown');

-                // Always fetch fresh data from database
                if (dropdown) {{
                    dropdown.innerHTML = '<div class="loading">Loading stats...</div>';
                    try {{
@@ -758,7 +801,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
        function formatIpStats(stats) {{
            let html = '<div class="stats-left">';
            
-            // Basic info
            html += '<div class="stat-row">';
            html += '<span class="stat-label-sm">Total Requests:</span>';
            html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
@@ -774,16 +816,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
            html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
            html += '</div>';
            
-            // Category
-            if (stats.category) {{
-                html += '<div class="stat-row">';
-                html += '<span class="stat-label-sm">Category:</span>';
-                const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
-                html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
-                html += '</div>';
-            }}
-            
-            // GeoIP info if available
            if (stats.country_code || stats.city) {{
                html += '<div class="stat-row">';
                html += '<span class="stat-label-sm">Location:</span>';
@@ -798,28 +830,70 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                html += '</div>';
            }}
            
-            // Reputation score if available
            if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
                html += '<div class="stat-row">';
                html += '<span class="stat-label-sm">Reputation Score:</span>';
                html += `<span class="stat-value-sm">${{stats.reputation_score}} ${{stats.reputation_source ? '(' + stats.reputation_source + ')' : ''}}</span>`;
                html += '</div>';
            }}
-            
-            // Category History Timeline
+
+            if (stats.category) {{
+                html += '<div class="stat-row">';
+                html += '<span class="stat-label-sm">Category:</span>';
+                const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
+                html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
+                html += '</div>';
+            }}
+
            if (stats.category_history && stats.category_history.length > 0) {{
                html += '<div class="timeline-container">';
-                html += '<div class="timeline-title">Behavior Timeline</div>';
-                html += '<div class="timeline">';
                
+                html += '<div class="timeline-header">';
+                html += '<div class="timeline-title">Behavior Timeline</div>';
+
+                if (stats.list_on && Object.keys(stats.list_on).length > 0) {{
+                    html += '<div class="reputation-badges">';
+                    html += '<span class="reputation-title" style="margin-bottom:0; margin-right:4px;">Listed on</span>';
+
+                    const sortedSources = Object.entries(stats.list_on).sort((a, b) => a[0].localeCompare(b[0]));
+
+                    sortedSources.forEach(([source, url]) => {{
+                        if (url && url !== 'N/A') {{
+                            html += `<a href="${{url}}" target="_blank" rel="noopener noreferrer" class="reputation-badge" title="Listed on ${'{'}source{'}'}">`;
+                            html += '<span class="reputation-badge-icon"></span>';
+                            html += `<span>${{source}}</span>`;
+                            html += '</a>';
+                        }} else {{
+                            html += '<span class="reputation-badge" style="cursor: default;" title="Listed on">';
+                            html += '<span class="reputation-badge-icon"></span>';
+                            html += `<span>${{source}}</span>`;
+                            html += '</span>';
+                        }}
+                    }});
+
+                    html += '</div>';
+                }} else if (stats.country_code || stats.asn) {{
+                    html += '<div class="reputation-badges">';
+                    html += '<span class="reputation-title" style="margin-bottom:0; margin-right:4px;">Reputation</span>';
+                    html += '<span class="reputation-clean" title="Not found on public blacklists">';
+                    html += '<span class="reputation-clean-icon">✓</span>';
+                    html += '<span>Clean</span>';
+                    html += '</span>';
+                    html += '</div>';
+                }}
+
+                html += '</div>';
+
+                html += '<div class="timeline">';
+
                stats.category_history.forEach((change, index) => {{
                    const categoryClass = change.new_category.toLowerCase().replace('_', '-');
                    const timestamp = formatTimestamp(change.timestamp);
-                    
+
                    html += '<div class="timeline-item">';
                    html += `<div class="timeline-marker ${{categoryClass}}"></div>`;
                    html += '<div class="timeline-content">';
-                    
+
                    if (change.old_category) {{
                        const oldCategoryBadge = 'category-' + change.old_category.toLowerCase().replace('_', '-');
                        html += `<span class="category-badge ${{oldCategoryBadge}}">${{change.old_category}}</span>`;
@@ -827,21 +901,20 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                    }} else {{
                        html += '<span style="color: #8b949e;">Initial:</span> ';
                    }}
-                    
+
                    const newCategoryBadge = 'category-' + change.new_category.toLowerCase().replace('_', '-');
                    html += `<span class="category-badge ${{newCategoryBadge}}">${{change.new_category}}</span>`;
                    html += `<div class="timeline-timestamp">${{timestamp}}</div>`;
                    html += '</div>';
                    html += '</div>';
                }});
-                
+
                html += '</div>';
                html += '</div>';
            }}
            
            html += '</div>';
            
-            // Radar chart on the right
            if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
                html += '<div class="stats-right">';
                html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
@@ -855,13 +928,11 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                    unknown: stats.category_scores.unknown || 0
                }};
                
-                // Normalize scores for better visualization
                const maxScore = Math.max(...Object.values(scores), 1);
-                const minVisibleRadius = 0.15; // Minimum 15% visibility even for 0 values
+                const minVisibleRadius = 0.15;
                const normalizedScores = {{}};
                
                Object.keys(scores).forEach(key => {{
-                    // Scale values: ensure minimum visibility + proportional to max
                    normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
                }});
                
@@ -881,14 +952,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                    unknown: 'Unknown'
                }};
                
-                // Draw radar background grid
                const cx = 100, cy = 100, maxRadius = 75;
                for (let i = 1; i <= 5; i++) {{
                    const r = (maxRadius / 5) * i;
                    html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
                }}
                
-                // Draw axes (now with 5 points for pentagon)
                const angles = [0, 72, 144, 216, 288];
                const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
                
@@ -898,14 +967,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                    const y2 = cy + maxRadius * Math.sin(rad);
                    html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
                    
-                    // Add labels at consistent distance
                    const labelDist = maxRadius + 35;
                    const lx = cx + labelDist * Math.cos(rad);
                    const ly = cy + labelDist * Math.sin(rad);
                    html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
                }});
                
-                // Draw filled polygon for scores
                let points = [];
                angles.forEach((angle, i) => {{
                    const normalizedScore = normalizedScores[keys[i]];
@@ -916,14 +983,11 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                    points.push(`${{x}},${{y}}`);
                }});
                
-                // Determine dominant category color
                const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
                const dominantColor = colors[dominantKey];
                
-                // Draw single colored area
                html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
                
-                // Draw points
                angles.forEach((angle, i) => {{
                    const normalizedScore = normalizedScores[keys[i]];
                    const rad = (angle - 90) * Math.PI / 180;
@@ -935,7 +999,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
                
                html += '</svg>';
                
-                // Legend
                html += '<div class="radar-legend">';
                keys.forEach(key => {{
                    html += '<div class="radar-legend-item">';