Merge pull request #34 from BlessedRebuS/feat/scoring-alogorithm

Feat/scoring alogorithm
This commit is contained in:
Phillip Tarrant
2026-01-10 13:30:18 -06:00
committed by GitHub
11 changed files with 726 additions and 277 deletions

2
.gitignore vendored
View File

@@ -78,4 +78,4 @@ data/
personal-values.yaml
#exports dir (keeping .gitkeep so we have the dir)
/exports/*
/exports/*

View File

@@ -23,7 +23,7 @@ canary:
dashboard:
# if set to "null" this will Auto-generates random path if not set
# can be set to "/dashboard" or similar <-- note this MUST include a forward slash
secret_path: dashboard
secret_path: super-secret-dashboard-path
api:
server_url: null

View File

@@ -16,9 +16,3 @@ services:
environment:
- CONFIG_LOCATION=config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]
interval: 30s
timeout: 5s
retries: 3
start_period: 10s

View File

@@ -10,6 +10,9 @@ import urllib.parse
from wordlists import get_wordlists
from config import get_config
from logger import get_app_logger
import requests
from sanitizer import sanitize_for_storage, sanitize_dict
"""
Functions for user activity analysis
"""
@@ -49,264 +52,299 @@ class Analyzer:
pass
return self._db_manager
def infer_user_category(self, ip: str) -> str:
# def infer_user_category(self, ip: str) -> str:
config = get_config()
# config = get_config()
http_risky_methods_threshold = config.http_risky_methods_threshold
violated_robots_threshold = config.violated_robots_threshold
uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
# http_risky_methods_threshold = config.http_risky_methods_threshold
# violated_robots_threshold = config.violated_robots_threshold
# uneven_request_timing_threshold = config.uneven_request_timing_threshold
# user_agents_used_threshold = config.user_agents_used_threshold
# attack_urls_threshold = config.attack_urls_threshold
# uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
# app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score = {}
# score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
"risky_http_methods": 6,
"robots_violations": 4,
"uneven_request_timing": 3,
"different_user_agents": 8,
"attack_url": 15
},
"good_crawler": {
"risky_http_methods": 1,
"robots_violations": 0,
"uneven_request_timing": 0,
"different_user_agents": 0,
"attack_url": 0
},
"bad_crawler": {
"risky_http_methods": 2,
"robots_violations": 7,
"uneven_request_timing": 0,
"different_user_agents": 5,
"attack_url": 5
},
"regular_user": {
"risky_http_methods": 0,
"robots_violations": 0,
"uneven_request_timing": 8,
"different_user_agents": 3,
"attack_url": 0
}
}
# #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
# weights = {
# "attacker": {
# "risky_http_methods": 6,
# "robots_violations": 4,
# "uneven_request_timing": 3,
# "different_user_agents": 8,
# "attack_url": 15
# },
# "good_crawler": {
# "risky_http_methods": 1,
# "robots_violations": 0,
# "uneven_request_timing": 0,
# "different_user_agents": 0,
# "attack_url": 0
# },
# "bad_crawler": {
# "risky_http_methods": 2,
# "robots_violations": 7,
# "uneven_request_timing": 0,
# "different_user_agents": 5,
# "attack_url": 5
# },
# "regular_user": {
# "risky_http_methods": 0,
# "robots_violations": 0,
# "uneven_request_timing": 8,
# "different_user_agents": 3,
# "attack_url": 0
# }
# }
accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
total_accesses_count = len(accesses)
if total_accesses_count <= 0:
return
# accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
# total_accesses_count = len(accesses)
# if total_accesses_count <= 0:
# return
# Set category as "unknown" for the first 5 requests
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return 0
# # Set category as "unknown" for the first 5 requests
# if total_accesses_count < 3:
# category = "unknown"
# analyzed_metrics = {}
# category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
# last_analysis = datetime.now(tz=ZoneInfo('UTC'))
# self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
# return 0
#--------------------- HTTP Methods ---------------------
# #--------------------- HTTP Methods ---------------------
get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
# get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
# post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
# put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
# delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
# head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
# options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
else:
http_method_attacker_score = 0
# if total_accesses_count > http_risky_methods_threshold:
# http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
# else:
# http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
score["bad_crawler"]["risky_http_methods"] = True
score["regular_user"]["risky_http_methods"] = False
else:
score["attacker"]["risky_http_methods"] = False
score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
# #print(f"HTTP Method attacker score: {http_method_attacker_score}")
# if http_method_attacker_score >= http_risky_methods_threshold:
# score["attacker"]["risky_http_methods"] = True
# score["good_crawler"]["risky_http_methods"] = False
# score["bad_crawler"]["risky_http_methods"] = True
# score["regular_user"]["risky_http_methods"] = False
# else:
# score["attacker"]["risky_http_methods"] = False
# score["good_crawler"]["risky_http_methods"] = True
# score["bad_crawler"]["risky_http_methods"] = False
# score["regular_user"]["risky_http_methods"] = False
#--------------------- Robots Violations ---------------------
#respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(":")
# #--------------------- Robots Violations ---------------------
# #respect robots.txt and login/config pages access frequency
# robots_disallows = []
# robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
# with open(robots_path, "r") as f:
# for line in f:
# line = line.strip()
# if not line:
# continue
# parts = line.split(":")
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
# if parts[0] == "Disallow":
# parts[1] = parts[1].rstrip("/")
# #print(f"DISALLOW {parts[1]}")
# robots_disallows.append(parts[1].strip())
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
#print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count
else:
violated_robots_ratio = 0
# #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
# violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
# #print(f"Violated robots count: {violated_robots_count}")
# if total_accesses_count > 0:
# violated_robots_ratio = violated_robots_count / total_accesses_count
# else:
# violated_robots_ratio = 0
if violated_robots_ratio >= violated_robots_threshold:
score["attacker"]["robots_violations"] = True
score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = True
score["regular_user"]["robots_violations"] = False
else:
score["attacker"]["robots_violations"] = False
score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
# if violated_robots_ratio >= violated_robots_threshold:
# score["attacker"]["robots_violations"] = True
# score["good_crawler"]["robots_violations"] = False
# score["bad_crawler"]["robots_violations"] = True
# score["regular_user"]["robots_violations"] = False
# else:
# score["attacker"]["robots_violations"] = False
# score["good_crawler"]["robots_violations"] = False
# score["bad_crawler"]["robots_violations"] = False
# score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
now_utc = datetime.now(tz=ZoneInfo('UTC'))
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = sorted(timestamps, reverse=True)
# #--------------------- Requests Timing ---------------------
# #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
# timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
# now_utc = datetime.now(tz=ZoneInfo('UTC'))
# timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
# timestamps = sorted(timestamps, reverse=True)
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
time_diffs.append(diff)
# time_diffs = []
# for i in range(0, len(timestamps)-1):
# diff = (timestamps[i] - timestamps[i+1]).total_seconds()
# time_diffs.append(diff)
mean = 0
variance = 0
std = 0
cv = 0
if time_diffs:
mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5
cv = std/mean
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
# mean = 0
# variance = 0
# std = 0
# cv = 0
# if time_diffs:
# mean = sum(time_diffs) / len(time_diffs)
# variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
# std = variance ** 0.5
# cv = std/mean
# app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = True
else:
score["attacker"]["uneven_request_timing"] = False
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
# if cv >= uneven_request_timing_threshold:
# score["attacker"]["uneven_request_timing"] = True
# score["good_crawler"]["uneven_request_timing"] = False
# score["bad_crawler"]["uneven_request_timing"] = False
# score["regular_user"]["uneven_request_timing"] = True
# else:
# score["attacker"]["uneven_request_timing"] = False
# score["good_crawler"]["uneven_request_timing"] = False
# score["bad_crawler"]["uneven_request_timing"] = False
# score["regular_user"]["uneven_request_timing"] = False
#--------------------- Different User Agents ---------------------
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in accesses]
user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}")
# #--------------------- Different User Agents ---------------------
# #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# user_agents_used = [item["user_agent"] for item in accesses]
# user_agents_used = list(dict.fromkeys(user_agents_used))
# #print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agentss"] = True
score["regular_user"]["different_user_agents"] = False
else:
score["attacker"]["different_user_agents"] = False
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
# if len(user_agents_used) >= user_agents_used_threshold:
# score["attacker"]["different_user_agents"] = True
# score["good_crawler"]["different_user_agents"] = False
# score["bad_crawler"]["different_user_agentss"] = True
# score["regular_user"]["different_user_agents"] = False
# else:
# score["attacker"]["different_user_agents"] = False
# score["good_crawler"]["different_user_agents"] = False
# score["bad_crawler"]["different_user_agents"] = False
# score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs ---------------------
# #--------------------- Attack URLs ---------------------
attack_urls_found_list = []
# attack_urls_found_list = []
wl = get_wordlists()
if wl.attack_patterns:
queried_paths = [item["path"] for item in accesses]
# wl = get_wordlists()
# if wl.attack_patterns:
# queried_paths = [item["path"] for item in accesses]
for queried_path in queried_paths:
# URL decode the path to catch encoded attacks
try:
decoded_path = urllib.parse.unquote(queried_path)
# Double decode to catch double-encoded attacks
decoded_path_twice = urllib.parse.unquote(decoded_path)
except Exception:
decoded_path = queried_path
decoded_path_twice = queried_path
# for queried_path in queried_paths:
# # URL decode the path to catch encoded attacks
# try:
# decoded_path = urllib.parse.unquote(queried_path)
# # Double decode to catch double-encoded attacks
# decoded_path_twice = urllib.parse.unquote(decoded_path)
# except Exception:
# decoded_path = queried_path
# decoded_path_twice = queried_path
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
attack_urls_found_list.append(f"{name}: {pattern}")
# for name, pattern in wl.attack_patterns.items():
# # Check original, decoded, and double-decoded paths
# if (re.search(pattern, queried_path, re.IGNORECASE) or
# re.search(pattern, decoded_path, re.IGNORECASE) or
# re.search(pattern, decoded_path_twice, re.IGNORECASE)):
# attack_urls_found_list.append(f"{name}: {pattern}")
# #remove duplicates
# attack_urls_found_list = set(attack_urls_found_list)
# attack_urls_found_list = list(attack_urls_found_list)
if len(attack_urls_found_list) > attack_urls_threshold:
score["attacker"]["attack_url"] = True
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
else:
score["attacker"]["attack_url"] = False
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
# if len(attack_urls_found_list) > attack_urls_threshold:
# score["attacker"]["attack_url"] = True
# score["good_crawler"]["attack_url"] = False
# score["bad_crawler"]["attack_url"] = False
# score["regular_user"]["attack_url"] = False
# else:
# score["attacker"]["attack_url"] = False
# score["good_crawler"]["attack_url"] = False
# score["bad_crawler"]["attack_url"] = False
# score["regular_user"]["attack_url"] = False
#--------------------- Calculate score ---------------------
# #--------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
# attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
# attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
# attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
# attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
# attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
# attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
# good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
# good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
# good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
# good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
# good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
# bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
# regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
# regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
# regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
# regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
# regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
score_details = f"""
Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score}
Bad Crawler score: {bad_crawler_score}
Regular User score: {regular_user_score}
"""
app_logger.debug(score_details)
# score_details = f"""
# Attacker score: {attacker_score}
# Good Crawler score: {good_crawler_score}
# Bad Crawler score: {bad_crawler_score}
# Regular User score: {regular_user_score}
# """
# app_logger.debug(score_details)
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
# analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
# category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
# category = max(category_scores, key=category_scores.get)
# last_analysis = datetime.now(tz=ZoneInfo('UTC'))
self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
# self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return 0
# return 0
# def update_ip_rep_infos(self, ip: str) -> list[str]:
# api_url = "https://iprep.lcrawl.com/api/iprep/"
# params = {
# "cidr": ip
# }
# headers = {
# "Content-Type": "application/json"
# }
# response = requests.get(api_url, headers=headers, params=params)
# payload = response.json()
# if payload["results"]:
# data = payload["results"][0]
# country_iso_code = data["geoip_data"]["country_iso_code"]
# asn = data["geoip_data"]["asn_autonomous_system_number"]
# asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
# list_on = data["list_on"]
# sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
# sanitized_asn = sanitize_for_storage(asn, 100)
# sanitized_asn_org = sanitize_for_storage(asn_org, 100)
# sanitized_list_on = sanitize_dict(list_on, 100000)
# self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
# return

View File

@@ -263,7 +263,7 @@ class DatabaseManager:
session.rollback()
print(f"Error updating IP stats analysis: {e}")
def manual_update_category(self, ip: str, category: str) -> None:
def manual_update_category(self, ip: str, category: str) -> None:
"""
Update IP category as a result of a manual intervention by an admin
@@ -275,6 +275,7 @@ class DatabaseManager:
session = self.session
sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
# Record the manual category change
old_category = ip_stats.category
@@ -348,6 +349,29 @@ class DatabaseManager:
finally:
self.close_session()
def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
"""
Update IP rep stats
Args:
ip: IP address
country_code: IP address country code
asn: IP address ASN
asn_org: IP address ASN ORG
list_on: public lists containing the IP address
"""
session = self.session
sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
ip_stats.country_code = country_code
ip_stats.asn = asn
ip_stats.asn_org = asn_org
ip_stats.list_on = list_on
def get_access_logs(
self,
limit: int = 100,
@@ -554,6 +578,7 @@ class DatabaseManager:
'city': stat.city,
'asn': stat.asn,
'asn_org': stat.asn_org,
'list_on': stat.list_on or {},
'reputation_score': stat.reputation_score,
'reputation_source': stat.reputation_source,
'analyzed_metrics': stat.analyzed_metrics or {},

View File

@@ -474,7 +474,8 @@ class Handler(BaseHTTPRequestHandler):
self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
self.analyzer.infer_user_category(client_ip)
# self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip)
if self.tracker.is_suspicious_user_agent(user_agent):
self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")

View File

@@ -134,6 +134,7 @@ class IpStats(Base):
city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
list_on: Mapped[Optional[Dict[str,str]]] = mapped_column(JSON, nullable=True)
# Reputation fields (populated by future enrichment)
reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)

View File

@@ -7,7 +7,7 @@ Protects against SQL injection payloads, XSS, and storage exhaustion attacks.
import html
import re
from typing import Optional
from typing import Optional, Dict
# Field length limits for database storage
@@ -111,3 +111,6 @@ def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
value_str = value_str[:max_display_length] + "..."
return html.escape(value_str)
def sanitize_dict(value: Optional[Dict[str,str]], max_display_length):
return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}

265
src/tasks/analyze_ips.py Normal file
View File

@@ -0,0 +1,265 @@
from sqlalchemy import select
from typing import Optional
from database import get_database, DatabaseManager
from zoneinfo import ZoneInfo
from pathlib import Path
from datetime import datetime, timedelta
import re
import urllib.parse
from wordlists import get_wordlists
from config import get_config
from logger import get_app_logger
import requests
from sanitizer import sanitize_for_storage, sanitize_dict
# ----------------------
# TASK CONFIG
# ----------------------
TASK_CONFIG = {
"name": "analyze-ips",
"cron": "*/1 * * * *",
"enabled": True,
"run_when_loaded": True
}
def main():
config = get_config()
db_manager = get_database()
app_logger = get_app_logger()
http_risky_methods_threshold = config.http_risky_methods_threshold
violated_robots_threshold = config.violated_robots_threshold
uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
"risky_http_methods": 6,
"robots_violations": 4,
"uneven_request_timing": 3,
"different_user_agents": 8,
"attack_url": 15
},
"good_crawler": {
"risky_http_methods": 1,
"robots_violations": 0,
"uneven_request_timing": 0,
"different_user_agents": 0,
"attack_url": 0
},
"bad_crawler": {
"risky_http_methods": 2,
"robots_violations": 7,
"uneven_request_timing": 0,
"different_user_agents": 5,
"attack_url": 5
},
"regular_user": {
"risky_http_methods": 0,
"robots_violations": 0,
"uneven_request_timing": 8,
"different_user_agents": 3,
"attack_url": 0
}
}
accesses = db_manager.get_access_logs(limit=999999999)
ips = {item['ip'] for item in accesses}
for ip in ips:
ip_accesses = [item for item in accesses if item["ip"] == ip]
total_accesses_count = len(accesses)
if total_accesses_count <= 0:
return
# Set category as "unknown" for the first 3 requests
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return 0
#--------------------- HTTP Methods ---------------------
get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
else:
http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
score["bad_crawler"]["risky_http_methods"] = True
score["regular_user"]["risky_http_methods"] = False
else:
score["attacker"]["risky_http_methods"] = False
score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
#--------------------- Robots Violations ---------------------
#respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(":")
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
#print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count
else:
violated_robots_ratio = 0
if violated_robots_ratio >= violated_robots_threshold:
score["attacker"]["robots_violations"] = True
score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = True
score["regular_user"]["robots_violations"] = False
else:
score["attacker"]["robots_violations"] = False
score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now(tz=ZoneInfo('UTC'))
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = sorted(timestamps, reverse=True)
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
time_diffs.append(diff)
mean = 0
variance = 0
std = 0
cv = 0
if time_diffs:
mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5
cv = std/mean
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = True
else:
score["attacker"]["uneven_request_timing"] = False
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
#--------------------- Different User Agents ---------------------
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in ip_accesses]
user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agentss"] = True
score["regular_user"]["different_user_agents"] = False
else:
score["attacker"]["different_user_agents"] = False
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs ---------------------
attack_urls_found_list = []
wl = get_wordlists()
if wl.attack_patterns:
queried_paths = [item["path"] for item in ip_accesses]
for queried_path in queried_paths:
# URL decode the path to catch encoded attacks
try:
decoded_path = urllib.parse.unquote(queried_path)
# Double decode to catch double-encoded attacks
decoded_path_twice = urllib.parse.unquote(decoded_path)
except Exception:
decoded_path = queried_path
decoded_path_twice = queried_path
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates
attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list)
if len(attack_urls_found_list) >= attack_urls_threshold:
score["attacker"]["attack_url"] = True
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
else:
score["attacker"]["attack_url"] = False
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
#--------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
score_details = f"""
Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score}
Bad Crawler score: {bad_crawler_score}
Regular User score: {regular_user_score}
"""
app_logger.debug(score_details)
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return

59
src/tasks/fetch_ip_rep.py Normal file
View File

@@ -0,0 +1,59 @@
from sqlalchemy import select
from typing import Optional
from database import get_database, DatabaseManager
from zoneinfo import ZoneInfo
from pathlib import Path
from datetime import datetime, timedelta
import re
import urllib.parse
from wordlists import get_wordlists
from config import get_config
from logger import get_app_logger
import requests
from sanitizer import sanitize_for_storage, sanitize_dict
# ----------------------
# TASK CONFIG
# ----------------------
TASK_CONFIG = {
"name": "fetch-ip-rep",
"cron": "*/1 * * * *",
"enabled": True,
"run_when_loaded": True
}
def main():
config = get_config()
db_manager = get_database()
app_logger = get_app_logger()
accesses = db_manager.get_access_logs(limit=999999999)
ips = {item['ip'] for item in accesses}
for ip in ips:
api_url = "https://iprep.lcrawl.com/api/iprep/"
params = {
"cidr": ip
}
headers = {
"Content-Type": "application/json"
}
response = requests.get(api_url, headers=headers, params=params)
payload = response.json()
if payload["results"]:
data = payload["results"][0]
country_iso_code = data["geoip_data"]["country_iso_code"]
asn = data["geoip_data"]["asn_autonomous_system_number"]
asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
list_on = data["list_on"]
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
sanitized_asn = sanitize_for_storage(asn, 100)
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
return

View File

@@ -410,6 +410,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
color: #58a6ff;
font-size: 13px;
font-weight: 600;
}}
.timeline-header {{
display: flex;
justify-content: space-between;
align-items: center;
gap: 10px;
margin-bottom: 10px;
}}
.timeline {{
@@ -470,6 +476,56 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
color: #8b949e;
margin: 0 7px;
}}
.reputation-container {{
margin-top: 15px;
padding-top: 15px;
border-top: 1px solid #30363d;
}}
.reputation-title {{
color: #58a6ff;
font-size: 13px;
font-weight: 600;
}}
.reputation-badges {{
display: flex;
flex-wrap: wrap;
gap: 6px;
align-items: center;
}}
.reputation-badge {{
display: inline-flex;
align-items: center;
gap: 4px;
padding: 4px 8px;
background: #161b22;
border: 1px solid #f851494d;
border-radius: 4px;
font-size: 11px;
color: #f85149;
text-decoration: none;
transition: all 0.2s;
}}
.reputation-badge:hover {{
background: #1c2128;
border-color: #f85149;
}}
.reputation-badge-icon {{
font-size: 12px;
}}
.reputation-clean {{
display: inline-flex;
align-items: center;
gap: 6px;
padding: 4px 10px;
background: #161b22;
border: 1px solid #3fb9504d;
border-radius: 4px;
font-size: 11px;
color: #3fb950;
}}
.reputation-clean-icon {{
font-size: 13px;
}}
</style>
</head>
@@ -627,11 +683,9 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
</div>
</div>
<script>
// Server timezone configuration
const SERVER_TIMEZONE = '{timezone}';
const DASHBOARD_PATH = '{dashboard_path}';
// Convert UTC timestamp to configured timezone
function formatTimestamp(isoTimestamp) {{
if (!isoTimestamp) return 'N/A';
try {{
@@ -652,7 +706,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
}}
}}
// Add sorting functionality to tables
document.querySelectorAll('th.sortable').forEach(header => {{
header.addEventListener('click', function() {{
const table = this.closest('table');
@@ -661,30 +714,24 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const sortType = this.getAttribute('data-sort');
const columnIndex = Array.from(this.parentElement.children).indexOf(this);
// Determine sort direction
const isAscending = this.classList.contains('asc');
// Remove sort classes from all headers in this table
table.querySelectorAll('th.sortable').forEach(th => {{
th.classList.remove('asc', 'desc');
}});
// Add appropriate class to clicked header
this.classList.add(isAscending ? 'desc' : 'asc');
// Sort rows
rows.sort((a, b) => {{
let aValue = a.cells[columnIndex].textContent.trim();
let bValue = b.cells[columnIndex].textContent.trim();
// Handle numeric sorting
if (sortType === 'count') {{
aValue = parseInt(aValue) || 0;
bValue = parseInt(bValue) || 0;
return isAscending ? bValue - aValue : aValue - bValue;
}}
// Handle IP address sorting
if (sortType === 'ip') {{
const ipToNum = ip => {{
const parts = ip.split('.');
@@ -696,7 +743,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
return isAscending ? bNum - aNum : aNum - bNum;
}}
// Default string sorting
if (isAscending) {{
return bValue.localeCompare(aValue);
}} else {{
@@ -704,12 +750,10 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
}}
}});
// Re-append sorted rows
rows.forEach(row => tbody.appendChild(row));
}});
}});
// IP stats dropdown functionality
document.querySelectorAll('.ip-clickable').forEach(cell => {{
cell.addEventListener('click', async function(e) {{
const row = e.currentTarget.closest('.ip-row');
@@ -731,7 +775,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const dropdown = statsRow.querySelector('.ip-stats-dropdown');
// Always fetch fresh data from database
if (dropdown) {{
dropdown.innerHTML = '<div class="loading">Loading stats...</div>';
try {{
@@ -758,7 +801,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
function formatIpStats(stats) {{
let html = '<div class="stats-left">';
// Basic info
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Total Requests:</span>';
html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
@@ -774,16 +816,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
html += '</div>';
// Category
if (stats.category) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Category:</span>';
const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
html += '</div>';
}}
// GeoIP info if available
if (stats.country_code || stats.city) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Location:</span>';
@@ -798,28 +830,70 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += '</div>';
}}
// Reputation score if available
if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Reputation Score:</span>';
html += `<span class="stat-value-sm">${{stats.reputation_score}} ${{stats.reputation_source ? '(' + stats.reputation_source + ')' : ''}}</span>`;
html += '</div>';
}}
// Category History Timeline
if (stats.category) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Category:</span>';
const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
html += '</div>';
}}
if (stats.category_history && stats.category_history.length > 0) {{
html += '<div class="timeline-container">';
html += '<div class="timeline-title">Behavior Timeline</div>';
html += '<div class="timeline">';
html += '<div class="timeline-header">';
html += '<div class="timeline-title">Behavior Timeline</div>';
if (stats.list_on && Object.keys(stats.list_on).length > 0) {{
html += '<div class="reputation-badges">';
html += '<span class="reputation-title" style="margin-bottom:0; margin-right:4px;">Listed on</span>';
const sortedSources = Object.entries(stats.list_on).sort((a, b) => a[0].localeCompare(b[0]));
sortedSources.forEach(([source, url]) => {{
if (url && url !== 'N/A') {{
html += `<a href="${{url}}" target="_blank" rel="noopener noreferrer" class="reputation-badge" title="Listed on ${'{'}source{'}'}">`;
html += '<span class="reputation-badge-icon"></span>';
html += `<span>${{source}}</span>`;
html += '</a>';
}} else {{
html += '<span class="reputation-badge" style="cursor: default;" title="Listed on">';
html += '<span class="reputation-badge-icon"></span>';
html += `<span>${{source}}</span>`;
html += '</span>';
}}
}});
html += '</div>';
}} else if (stats.country_code || stats.asn) {{
html += '<div class="reputation-badges">';
html += '<span class="reputation-title" style="margin-bottom:0; margin-right:4px;">Reputation</span>';
html += '<span class="reputation-clean" title="Not found on public blacklists">';
html += '<span class="reputation-clean-icon">✓</span>';
html += '<span>Clean</span>';
html += '</span>';
html += '</div>';
}}
html += '</div>';
html += '<div class="timeline">';
stats.category_history.forEach((change, index) => {{
const categoryClass = change.new_category.toLowerCase().replace('_', '-');
const timestamp = formatTimestamp(change.timestamp);
html += '<div class="timeline-item">';
html += `<div class="timeline-marker ${{categoryClass}}"></div>`;
html += '<div class="timeline-content">';
if (change.old_category) {{
const oldCategoryBadge = 'category-' + change.old_category.toLowerCase().replace('_', '-');
html += `<span class="category-badge ${{oldCategoryBadge}}">${{change.old_category}}</span>`;
@@ -827,21 +901,20 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
}} else {{
html += '<span style="color: #8b949e;">Initial:</span> ';
}}
const newCategoryBadge = 'category-' + change.new_category.toLowerCase().replace('_', '-');
html += `<span class="category-badge ${{newCategoryBadge}}">${{change.new_category}}</span>`;
html += `<div class="timeline-timestamp">${{timestamp}}</div>`;
html += '</div>';
html += '</div>';
}});
html += '</div>';
html += '</div>';
}}
html += '</div>';
// Radar chart on the right
if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
html += '<div class="stats-right">';
html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
@@ -855,13 +928,11 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
unknown: stats.category_scores.unknown || 0
}};
// Normalize scores for better visualization
const maxScore = Math.max(...Object.values(scores), 1);
const minVisibleRadius = 0.15; // Minimum 15% visibility even for 0 values
const minVisibleRadius = 0.15;
const normalizedScores = {{}};
Object.keys(scores).forEach(key => {{
// Scale values: ensure minimum visibility + proportional to max
normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
}});
@@ -881,14 +952,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
unknown: 'Unknown'
}};
// Draw radar background grid
const cx = 100, cy = 100, maxRadius = 75;
for (let i = 1; i <= 5; i++) {{
const r = (maxRadius / 5) * i;
html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
}}
// Draw axes (now with 5 points for pentagon)
const angles = [0, 72, 144, 216, 288];
const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
@@ -898,14 +967,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const y2 = cy + maxRadius * Math.sin(rad);
html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
// Add labels at consistent distance
const labelDist = maxRadius + 35;
const lx = cx + labelDist * Math.cos(rad);
const ly = cy + labelDist * Math.sin(rad);
html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
}});
// Draw filled polygon for scores
let points = [];
angles.forEach((angle, i) => {{
const normalizedScore = normalizedScores[keys[i]];
@@ -916,14 +983,11 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
points.push(`${{x}},${{y}}`);
}});
// Determine dominant category color
const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
const dominantColor = colors[dominantKey];
// Draw single colored area
html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
// Draw points
angles.forEach((angle, i) => {{
const normalizedScore = normalizedScores[keys[i]];
const rad = (angle - 90) * Math.PI / 180;
@@ -935,7 +999,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += '</svg>';
// Legend
html += '<div class="radar-legend">';
keys.forEach(key => {{
html += '<div class="radar-legend-item">';