made ip analysis and ip rep info fetch a scheduled task
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -76,3 +76,6 @@ data/
|
||||
# Personal canary tokens or sensitive configs
|
||||
*canary*token*.yaml
|
||||
personal-values.yaml
|
||||
|
||||
#exports dir (keeping .gitkeep so we have the dir)
|
||||
/exports/*
|
||||
@@ -14,9 +14,10 @@ RUN pip install --no-cache-dir -r requirements.txt
|
||||
COPY src/ /app/src/
|
||||
COPY wordlists.json /app/
|
||||
COPY entrypoint.sh /app/
|
||||
COPY config.yaml /app/
|
||||
|
||||
RUN useradd -m -u 1000 krawl && \
|
||||
mkdir -p /app/logs /app/data && \
|
||||
mkdir -p /app/logs /app/data /app/exports && \
|
||||
chown -R krawl:krawl /app && \
|
||||
chmod +x /app/entrypoint.sh
|
||||
|
||||
|
||||
18
config.yaml
18
config.yaml
@@ -3,7 +3,7 @@
|
||||
server:
|
||||
port: 5000
|
||||
delay: 100 # Response delay in milliseconds
|
||||
timezone: null # e.g., "America/New_York" or null for system default
|
||||
timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default
|
||||
|
||||
# manually set the server header, if null a random one will be used.
|
||||
server_header: null
|
||||
@@ -11,8 +11,8 @@ server:
|
||||
links:
|
||||
min_length: 5
|
||||
max_length: 15
|
||||
min_per_page: 10
|
||||
max_per_page: 15
|
||||
min_per_page: 5
|
||||
max_per_page: 10
|
||||
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
|
||||
max_counter: 10
|
||||
|
||||
@@ -38,9 +38,9 @@ behavior:
|
||||
probability_error_codes: 0 # 0-100 percentage
|
||||
|
||||
analyzer:
|
||||
# http_risky_methods_threshold: 0.1
|
||||
# violated_robots_threshold: 0.1
|
||||
# uneven_request_timing_threshold: 5
|
||||
# uneven_request_timing_time_window_seconds: 300
|
||||
# user_agents_used_threshold: 2
|
||||
# attack_urls_threshold: 1
|
||||
http_risky_methods_threshold: 0.1
|
||||
violated_robots_threshold: 0.1
|
||||
uneven_request_timing_threshold: 2
|
||||
uneven_request_timing_time_window_seconds: 300
|
||||
user_agents_used_threshold: 2
|
||||
attack_urls_threshold: 1
|
||||
|
||||
@@ -12,6 +12,7 @@ services:
|
||||
- ./wordlists.json:/app/wordlists.json:ro
|
||||
- ./config.yaml:/app/config.yaml:ro
|
||||
- ./logs:/app/logs
|
||||
- ./exports:/app/exports
|
||||
environment:
|
||||
- CONFIG_LOCATION=config.yaml
|
||||
restart: unless-stopped
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
set -e
|
||||
|
||||
# Fix ownership of mounted directories
|
||||
chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
|
||||
chown -R krawl:krawl /app/logs /app/data /app/exports 2>/dev/null || true
|
||||
|
||||
# Drop to krawl user and run the application
|
||||
exec gosu krawl "$@"
|
||||
|
||||
0
exports/.gitkeep
Normal file
0
exports/.gitkeep
Normal file
@@ -6,3 +6,6 @@ PyYAML>=6.0
|
||||
|
||||
# Database ORM
|
||||
SQLAlchemy>=2.0.0,<3.0.0
|
||||
|
||||
# Scheduling
|
||||
APScheduler>=3.11.2
|
||||
477
src/analyzer.py
477
src/analyzer.py
@@ -6,8 +6,10 @@ from zoneinfo import ZoneInfo
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
import urllib.parse
|
||||
from wordlists import get_wordlists
|
||||
from config import get_config
|
||||
from logger import get_app_logger
|
||||
import requests
|
||||
from sanitizer import sanitize_for_storage, sanitize_dict
|
||||
|
||||
@@ -15,6 +17,8 @@ from sanitizer import sanitize_for_storage, sanitize_dict
|
||||
Functions for user activity analysis
|
||||
"""
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
class Analyzer:
|
||||
"""
|
||||
Analyzes users activity and produces aggregated insights
|
||||
@@ -48,272 +52,299 @@ class Analyzer:
|
||||
pass
|
||||
return self._db_manager
|
||||
|
||||
def infer_user_category(self, ip: str) -> str:
|
||||
# def infer_user_category(self, ip: str) -> str:
|
||||
|
||||
config = get_config()
|
||||
# config = get_config()
|
||||
|
||||
http_risky_methods_threshold = config.http_risky_methods_threshold
|
||||
violated_robots_threshold = config.violated_robots_threshold
|
||||
uneven_request_timing_threshold = config.uneven_request_timing_threshold
|
||||
user_agents_used_threshold = config.user_agents_used_threshold
|
||||
attack_urls_threshold = config.attack_urls_threshold
|
||||
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
|
||||
# http_risky_methods_threshold = config.http_risky_methods_threshold
|
||||
# violated_robots_threshold = config.violated_robots_threshold
|
||||
# uneven_request_timing_threshold = config.uneven_request_timing_threshold
|
||||
# user_agents_used_threshold = config.user_agents_used_threshold
|
||||
# attack_urls_threshold = config.attack_urls_threshold
|
||||
# uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
|
||||
|
||||
print(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
|
||||
# app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
|
||||
|
||||
score = {}
|
||||
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
# score = {}
|
||||
# score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
# score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
# score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
# score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
|
||||
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
|
||||
weights = {
|
||||
"attacker": {
|
||||
"risky_http_methods": 6,
|
||||
"robots_violations": 4,
|
||||
"uneven_request_timing": 3,
|
||||
"different_user_agents": 8,
|
||||
"attack_url": 15
|
||||
},
|
||||
"good_crawler": {
|
||||
"risky_http_methods": 1,
|
||||
"robots_violations": 0,
|
||||
"uneven_request_timing": 0,
|
||||
"different_user_agents": 0,
|
||||
"attack_url": 0
|
||||
},
|
||||
"bad_crawler": {
|
||||
"risky_http_methods": 2,
|
||||
"robots_violations": 7,
|
||||
"uneven_request_timing": 0,
|
||||
"different_user_agents": 5,
|
||||
"attack_url": 5
|
||||
},
|
||||
"regular_user": {
|
||||
"risky_http_methods": 0,
|
||||
"robots_violations": 0,
|
||||
"uneven_request_timing": 8,
|
||||
"different_user_agents": 3,
|
||||
"attack_url": 0
|
||||
}
|
||||
}
|
||||
# #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
|
||||
# weights = {
|
||||
# "attacker": {
|
||||
# "risky_http_methods": 6,
|
||||
# "robots_violations": 4,
|
||||
# "uneven_request_timing": 3,
|
||||
# "different_user_agents": 8,
|
||||
# "attack_url": 15
|
||||
# },
|
||||
# "good_crawler": {
|
||||
# "risky_http_methods": 1,
|
||||
# "robots_violations": 0,
|
||||
# "uneven_request_timing": 0,
|
||||
# "different_user_agents": 0,
|
||||
# "attack_url": 0
|
||||
# },
|
||||
# "bad_crawler": {
|
||||
# "risky_http_methods": 2,
|
||||
# "robots_violations": 7,
|
||||
# "uneven_request_timing": 0,
|
||||
# "different_user_agents": 5,
|
||||
# "attack_url": 5
|
||||
# },
|
||||
# "regular_user": {
|
||||
# "risky_http_methods": 0,
|
||||
# "robots_violations": 0,
|
||||
# "uneven_request_timing": 8,
|
||||
# "different_user_agents": 3,
|
||||
# "attack_url": 0
|
||||
# }
|
||||
# }
|
||||
|
||||
|
||||
accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
|
||||
total_accesses_count = len(accesses)
|
||||
if total_accesses_count <= 0:
|
||||
return
|
||||
# accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
|
||||
# total_accesses_count = len(accesses)
|
||||
# if total_accesses_count <= 0:
|
||||
# return
|
||||
|
||||
# # Set category as "unknown" for the first 5 requests
|
||||
# if total_accesses_count < 3:
|
||||
# category = "unknown"
|
||||
# analyzed_metrics = {}
|
||||
# category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
|
||||
# last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
||||
# self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
# return 0
|
||||
|
||||
#--------------------- HTTP Methods ---------------------
|
||||
# #--------------------- HTTP Methods ---------------------
|
||||
|
||||
|
||||
get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
|
||||
post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
|
||||
put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
|
||||
delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
|
||||
head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
|
||||
options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
|
||||
patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
|
||||
# get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
|
||||
# post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
|
||||
# put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
|
||||
# delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
|
||||
# head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
|
||||
# options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
|
||||
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
|
||||
|
||||
if total_accesses_count > http_risky_methods_threshold:
|
||||
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
|
||||
else:
|
||||
http_method_attacker_score = 0
|
||||
# if total_accesses_count > http_risky_methods_threshold:
|
||||
# http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
|
||||
# else:
|
||||
# http_method_attacker_score = 0
|
||||
|
||||
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
|
||||
if http_method_attacker_score >= http_risky_methods_threshold:
|
||||
score["attacker"]["risky_http_methods"] = True
|
||||
score["good_crawler"]["risky_http_methods"] = False
|
||||
score["bad_crawler"]["risky_http_methods"] = True
|
||||
score["regular_user"]["risky_http_methods"] = False
|
||||
else:
|
||||
score["attacker"]["risky_http_methods"] = False
|
||||
score["good_crawler"]["risky_http_methods"] = True
|
||||
score["bad_crawler"]["risky_http_methods"] = False
|
||||
score["regular_user"]["risky_http_methods"] = False
|
||||
# #print(f"HTTP Method attacker score: {http_method_attacker_score}")
|
||||
# if http_method_attacker_score >= http_risky_methods_threshold:
|
||||
# score["attacker"]["risky_http_methods"] = True
|
||||
# score["good_crawler"]["risky_http_methods"] = False
|
||||
# score["bad_crawler"]["risky_http_methods"] = True
|
||||
# score["regular_user"]["risky_http_methods"] = False
|
||||
# else:
|
||||
# score["attacker"]["risky_http_methods"] = False
|
||||
# score["good_crawler"]["risky_http_methods"] = True
|
||||
# score["bad_crawler"]["risky_http_methods"] = False
|
||||
# score["regular_user"]["risky_http_methods"] = False
|
||||
|
||||
#--------------------- Robots Violations ---------------------
|
||||
#respect robots.txt and login/config pages access frequency
|
||||
robots_disallows = []
|
||||
robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
|
||||
with open(robots_path, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
# #--------------------- Robots Violations ---------------------
|
||||
# #respect robots.txt and login/config pages access frequency
|
||||
# robots_disallows = []
|
||||
# robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt"
|
||||
# with open(robots_path, "r") as f:
|
||||
# for line in f:
|
||||
# line = line.strip()
|
||||
# if not line:
|
||||
# continue
|
||||
# parts = line.split(":")
|
||||
|
||||
if parts[0] == "Disallow":
|
||||
parts[1] = parts[1].rstrip("/")
|
||||
#print(f"DISALLOW {parts[1]}")
|
||||
robots_disallows.append(parts[1].strip())
|
||||
# if parts[0] == "Disallow":
|
||||
# parts[1] = parts[1].rstrip("/")
|
||||
# #print(f"DISALLOW {parts[1]}")
|
||||
# robots_disallows.append(parts[1].strip())
|
||||
|
||||
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
|
||||
violated_robots_count = len([item for item in accesses if item["path"].rstrip("/") in tuple(robots_disallows)])
|
||||
#print(f"Violated robots count: {violated_robots_count}")
|
||||
if total_accesses_count > 0:
|
||||
violated_robots_ratio = violated_robots_count / total_accesses_count
|
||||
else:
|
||||
violated_robots_ratio = 0
|
||||
# #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
|
||||
# violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
|
||||
# #print(f"Violated robots count: {violated_robots_count}")
|
||||
# if total_accesses_count > 0:
|
||||
# violated_robots_ratio = violated_robots_count / total_accesses_count
|
||||
# else:
|
||||
# violated_robots_ratio = 0
|
||||
|
||||
if violated_robots_ratio >= violated_robots_threshold:
|
||||
score["attacker"]["robots_violations"] = True
|
||||
score["good_crawler"]["robots_violations"] = False
|
||||
score["bad_crawler"]["robots_violations"] = True
|
||||
score["regular_user"]["robots_violations"] = False
|
||||
else:
|
||||
score["attacker"]["robots_violations"] = False
|
||||
score["good_crawler"]["robots_violations"] = False
|
||||
score["bad_crawler"]["robots_violations"] = False
|
||||
score["regular_user"]["robots_violations"] = False
|
||||
# if violated_robots_ratio >= violated_robots_threshold:
|
||||
# score["attacker"]["robots_violations"] = True
|
||||
# score["good_crawler"]["robots_violations"] = False
|
||||
# score["bad_crawler"]["robots_violations"] = True
|
||||
# score["regular_user"]["robots_violations"] = False
|
||||
# else:
|
||||
# score["attacker"]["robots_violations"] = False
|
||||
# score["good_crawler"]["robots_violations"] = False
|
||||
# score["bad_crawler"]["robots_violations"] = False
|
||||
# score["regular_user"]["robots_violations"] = False
|
||||
|
||||
#--------------------- Requests Timing ---------------------
|
||||
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
|
||||
timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
|
||||
timestamps = sorted(timestamps, reverse=True)
|
||||
# #--------------------- Requests Timing ---------------------
|
||||
# #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||
# timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
|
||||
# now_utc = datetime.now(tz=ZoneInfo('UTC'))
|
||||
# timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
|
||||
# timestamps = sorted(timestamps, reverse=True)
|
||||
|
||||
time_diffs = []
|
||||
for i in range(0, len(timestamps)-1):
|
||||
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
|
||||
time_diffs.append(diff)
|
||||
# time_diffs = []
|
||||
# for i in range(0, len(timestamps)-1):
|
||||
# diff = (timestamps[i] - timestamps[i+1]).total_seconds()
|
||||
# time_diffs.append(diff)
|
||||
|
||||
mean = 0
|
||||
variance = 0
|
||||
std = 0
|
||||
cv = 0
|
||||
if time_diffs:
|
||||
mean = sum(time_diffs) / len(time_diffs)
|
||||
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
|
||||
std = variance ** 0.5
|
||||
cv = std/mean
|
||||
print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
|
||||
# mean = 0
|
||||
# variance = 0
|
||||
# std = 0
|
||||
# cv = 0
|
||||
# if time_diffs:
|
||||
# mean = sum(time_diffs) / len(time_diffs)
|
||||
# variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
|
||||
# std = variance ** 0.5
|
||||
# cv = std/mean
|
||||
# app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
|
||||
|
||||
if cv >= uneven_request_timing_threshold:
|
||||
score["attacker"]["uneven_request_timing"] = True
|
||||
score["good_crawler"]["uneven_request_timing"] = False
|
||||
score["bad_crawler"]["uneven_request_timing"] = False
|
||||
score["regular_user"]["uneven_request_timing"] = True
|
||||
else:
|
||||
score["attacker"]["uneven_request_timing"] = False
|
||||
score["good_crawler"]["uneven_request_timing"] = False
|
||||
score["bad_crawler"]["uneven_request_timing"] = False
|
||||
score["regular_user"]["uneven_request_timing"] = False
|
||||
# if cv >= uneven_request_timing_threshold:
|
||||
# score["attacker"]["uneven_request_timing"] = True
|
||||
# score["good_crawler"]["uneven_request_timing"] = False
|
||||
# score["bad_crawler"]["uneven_request_timing"] = False
|
||||
# score["regular_user"]["uneven_request_timing"] = True
|
||||
# else:
|
||||
# score["attacker"]["uneven_request_timing"] = False
|
||||
# score["good_crawler"]["uneven_request_timing"] = False
|
||||
# score["bad_crawler"]["uneven_request_timing"] = False
|
||||
# score["regular_user"]["uneven_request_timing"] = False
|
||||
|
||||
|
||||
#--------------------- Different User Agents ---------------------
|
||||
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
|
||||
user_agents_used = [item["user_agent"] for item in accesses]
|
||||
user_agents_used = list(dict.fromkeys(user_agents_used))
|
||||
#print(f"User agents used: {user_agents_used}")
|
||||
# #--------------------- Different User Agents ---------------------
|
||||
# #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
|
||||
# user_agents_used = [item["user_agent"] for item in accesses]
|
||||
# user_agents_used = list(dict.fromkeys(user_agents_used))
|
||||
# #print(f"User agents used: {user_agents_used}")
|
||||
|
||||
if len(user_agents_used) >= user_agents_used_threshold:
|
||||
score["attacker"]["different_user_agents"] = True
|
||||
score["good_crawler"]["different_user_agents"] = False
|
||||
score["bad_crawler"]["different_user_agentss"] = True
|
||||
score["regular_user"]["different_user_agents"] = False
|
||||
else:
|
||||
score["attacker"]["different_user_agents"] = False
|
||||
score["good_crawler"]["different_user_agents"] = False
|
||||
score["bad_crawler"]["different_user_agents"] = False
|
||||
score["regular_user"]["different_user_agents"] = False
|
||||
# if len(user_agents_used) >= user_agents_used_threshold:
|
||||
# score["attacker"]["different_user_agents"] = True
|
||||
# score["good_crawler"]["different_user_agents"] = False
|
||||
# score["bad_crawler"]["different_user_agentss"] = True
|
||||
# score["regular_user"]["different_user_agents"] = False
|
||||
# else:
|
||||
# score["attacker"]["different_user_agents"] = False
|
||||
# score["good_crawler"]["different_user_agents"] = False
|
||||
# score["bad_crawler"]["different_user_agents"] = False
|
||||
# score["regular_user"]["different_user_agents"] = False
|
||||
|
||||
#--------------------- Attack URLs ---------------------
|
||||
# #--------------------- Attack URLs ---------------------
|
||||
|
||||
attack_urls_found_list = []
|
||||
# attack_urls_found_list = []
|
||||
|
||||
wl = get_wordlists()
|
||||
if wl.attack_urls:
|
||||
queried_paths = [item["path"] for item in accesses]
|
||||
# wl = get_wordlists()
|
||||
# if wl.attack_patterns:
|
||||
# queried_paths = [item["path"] for item in accesses]
|
||||
|
||||
for queried_path in queried_paths:
|
||||
for name, pattern in wl.attack_urls.items():
|
||||
if re.search(pattern, queried_path, re.IGNORECASE):
|
||||
attack_urls_found_list.append(pattern)
|
||||
# for queried_path in queried_paths:
|
||||
# # URL decode the path to catch encoded attacks
|
||||
# try:
|
||||
# decoded_path = urllib.parse.unquote(queried_path)
|
||||
# # Double decode to catch double-encoded attacks
|
||||
# decoded_path_twice = urllib.parse.unquote(decoded_path)
|
||||
# except Exception:
|
||||
# decoded_path = queried_path
|
||||
# decoded_path_twice = queried_path
|
||||
|
||||
# for name, pattern in wl.attack_patterns.items():
|
||||
# # Check original, decoded, and double-decoded paths
|
||||
# if (re.search(pattern, queried_path, re.IGNORECASE) or
|
||||
# re.search(pattern, decoded_path, re.IGNORECASE) or
|
||||
# re.search(pattern, decoded_path_twice, re.IGNORECASE)):
|
||||
# attack_urls_found_list.append(f"{name}: {pattern}")
|
||||
|
||||
#remove duplicates
|
||||
attack_urls_found_list = set(attack_urls_found_list)
|
||||
attack_urls_found_list = list(attack_urls_found_list)
|
||||
# #remove duplicates
|
||||
# attack_urls_found_list = set(attack_urls_found_list)
|
||||
# attack_urls_found_list = list(attack_urls_found_list)
|
||||
|
||||
if len(attack_urls_found_list) > attack_urls_threshold:
|
||||
score["attacker"]["attack_url"] = True
|
||||
score["good_crawler"]["attack_url"] = False
|
||||
score["bad_crawler"]["attack_url"] = False
|
||||
score["regular_user"]["attack_url"] = False
|
||||
else:
|
||||
score["attacker"]["attack_url"] = False
|
||||
score["good_crawler"]["attack_url"] = False
|
||||
score["bad_crawler"]["attack_url"] = False
|
||||
score["regular_user"]["attack_url"] = False
|
||||
# if len(attack_urls_found_list) > attack_urls_threshold:
|
||||
# score["attacker"]["attack_url"] = True
|
||||
# score["good_crawler"]["attack_url"] = False
|
||||
# score["bad_crawler"]["attack_url"] = False
|
||||
# score["regular_user"]["attack_url"] = False
|
||||
# else:
|
||||
# score["attacker"]["attack_url"] = False
|
||||
# score["good_crawler"]["attack_url"] = False
|
||||
# score["bad_crawler"]["attack_url"] = False
|
||||
# score["regular_user"]["attack_url"] = False
|
||||
|
||||
#--------------------- Calculate score ---------------------
|
||||
# #--------------------- Calculate score ---------------------
|
||||
|
||||
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
|
||||
# attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
|
||||
|
||||
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
|
||||
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
|
||||
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
|
||||
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
|
||||
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
|
||||
# attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
|
||||
# attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
|
||||
# attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
|
||||
# attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
|
||||
# attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
|
||||
|
||||
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
|
||||
# good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
|
||||
# good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
|
||||
# good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
|
||||
# good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
|
||||
# good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
|
||||
|
||||
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
|
||||
# bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
|
||||
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
|
||||
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
|
||||
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
|
||||
# bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
|
||||
|
||||
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
|
||||
# regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
|
||||
# regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
|
||||
# regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
|
||||
# regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
|
||||
# regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
|
||||
|
||||
print(f"Attacker score: {attacker_score}")
|
||||
print(f"Good Crawler score: {good_crawler_score}")
|
||||
print(f"Bad Crawler score: {bad_crawler_score}")
|
||||
print(f"Regular User score: {regular_user_score}")
|
||||
# score_details = f"""
|
||||
# Attacker score: {attacker_score}
|
||||
# Good Crawler score: {good_crawler_score}
|
||||
# Bad Crawler score: {bad_crawler_score}
|
||||
# Regular User score: {regular_user_score}
|
||||
# """
|
||||
# app_logger.debug(score_details)
|
||||
|
||||
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
|
||||
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
|
||||
category = max(category_scores, key=category_scores.get)
|
||||
last_analysis = datetime.utcnow()
|
||||
# analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
|
||||
# category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
|
||||
# category = max(category_scores, key=category_scores.get)
|
||||
# last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
||||
|
||||
self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
# self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
|
||||
return 0
|
||||
# return 0
|
||||
|
||||
def update_ip_rep_infos(self, ip: str) -> list[str]:
|
||||
api_url = "https://iprep.lcrawl.com/api/iprep/"
|
||||
params = {
|
||||
"cidr": ip
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
response = requests.get(api_url, headers=headers, params=params)
|
||||
payload = response.json()
|
||||
|
||||
if payload["results"]:
|
||||
data = payload["results"][0]
|
||||
# def update_ip_rep_infos(self, ip: str) -> list[str]:
|
||||
# api_url = "https://iprep.lcrawl.com/api/iprep/"
|
||||
# params = {
|
||||
# "cidr": ip
|
||||
# }
|
||||
# headers = {
|
||||
# "Content-Type": "application/json"
|
||||
# }
|
||||
|
||||
country_iso_code = data["geoip_data"]["country_iso_code"]
|
||||
asn = data["geoip_data"]["asn_autonomous_system_number"]
|
||||
asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
|
||||
list_on = data["list_on"]
|
||||
# response = requests.get(api_url, headers=headers, params=params)
|
||||
# payload = response.json()
|
||||
|
||||
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
|
||||
sanitized_asn = sanitize_for_storage(asn, 100)
|
||||
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
|
||||
sanitized_list_on = sanitize_dict(list_on, 100000)
|
||||
# if payload["results"]:
|
||||
# data = payload["results"][0]
|
||||
|
||||
# country_iso_code = data["geoip_data"]["country_iso_code"]
|
||||
# asn = data["geoip_data"]["asn_autonomous_system_number"]
|
||||
# asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
|
||||
# list_on = data["list_on"]
|
||||
|
||||
# sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
|
||||
# sanitized_asn = sanitize_for_storage(asn, 100)
|
||||
# sanitized_asn_org = sanitize_for_storage(asn_org, 100)
|
||||
# sanitized_list_on = sanitize_dict(list_on, 100000)
|
||||
|
||||
self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
|
||||
# self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
|
||||
|
||||
return
|
||||
# return
|
||||
154
src/database.py
154
src/database.py
@@ -9,11 +9,12 @@ import os
|
||||
import stat
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
from sqlalchemy import create_engine, func, distinct, case
|
||||
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
||||
|
||||
from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats
|
||||
from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
|
||||
from sanitizer import (
|
||||
sanitize_ip,
|
||||
sanitize_path,
|
||||
@@ -22,6 +23,9 @@ from sanitizer import (
|
||||
sanitize_attack_pattern,
|
||||
)
|
||||
|
||||
from logger import get_app_logger
|
||||
|
||||
applogger = get_app_logger()
|
||||
|
||||
class DatabaseManager:
|
||||
"""
|
||||
@@ -127,7 +131,7 @@ class DatabaseManager:
|
||||
method=method[:10],
|
||||
is_suspicious=is_suspicious,
|
||||
is_honeypot_trigger=is_honeypot_trigger,
|
||||
timestamp=datetime.utcnow()
|
||||
timestamp=datetime.now(tz=ZoneInfo('UTC'))
|
||||
)
|
||||
session.add(access_log)
|
||||
session.flush() # Get the ID before committing
|
||||
@@ -154,7 +158,7 @@ class DatabaseManager:
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
# Log error but don't crash - database persistence is secondary to honeypot function
|
||||
print(f"Database error persisting access: {e}")
|
||||
applogger.critical(f"Database error persisting access: {e}")
|
||||
return None
|
||||
finally:
|
||||
self.close_session()
|
||||
@@ -185,7 +189,7 @@ class DatabaseManager:
|
||||
path=sanitize_path(path),
|
||||
username=sanitize_credential(username),
|
||||
password=sanitize_credential(password),
|
||||
timestamp=datetime.utcnow()
|
||||
timestamp=datetime.now(tz=ZoneInfo('UTC'))
|
||||
)
|
||||
session.add(credential)
|
||||
session.commit()
|
||||
@@ -193,7 +197,7 @@ class DatabaseManager:
|
||||
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
print(f"Database error persisting credential: {e}")
|
||||
applogger.critical(f"Database error persisting credential: {e}")
|
||||
return None
|
||||
finally:
|
||||
self.close_session()
|
||||
@@ -207,7 +211,7 @@ class DatabaseManager:
|
||||
ip: IP address to update
|
||||
"""
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
now = datetime.utcnow()
|
||||
now = datetime.now(tz=ZoneInfo('UTC'))
|
||||
|
||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||
|
||||
@@ -226,6 +230,7 @@ class DatabaseManager:
|
||||
def update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None:
|
||||
"""
|
||||
Update IP statistics (ip is already persisted).
|
||||
Records category change in history if category has changed.
|
||||
|
||||
Args:
|
||||
ip: IP address to update
|
||||
@@ -235,16 +240,28 @@ class DatabaseManager:
|
||||
last_analysis: timestamp of last analysis
|
||||
|
||||
"""
|
||||
print(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
|
||||
applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
|
||||
applogger.info(f"IP: {ip} category has been updated to {category}")
|
||||
|
||||
session = self.session
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||
|
||||
# Check if category has changed and record it
|
||||
old_category = ip_stats.category
|
||||
if old_category != category:
|
||||
self._record_category_change(sanitized_ip, old_category, category, last_analysis)
|
||||
|
||||
ip_stats.analyzed_metrics = analyzed_metrics
|
||||
ip_stats.category = category
|
||||
ip_stats.category_scores = category_scores
|
||||
ip_stats.last_analysis = last_analysis
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
print(f"Error updating IP stats analysis: {e}")
|
||||
|
||||
def manual_update_category(self, ip: str, category: str) -> None:
|
||||
"""
|
||||
@@ -256,13 +273,81 @@ class DatabaseManager:
|
||||
|
||||
"""
|
||||
session = self.session
|
||||
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||
|
||||
|
||||
# Record the manual category change
|
||||
old_category = ip_stats.category
|
||||
if old_category != category:
|
||||
self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC')))
|
||||
|
||||
ip_stats.category = category
|
||||
ip_stats.manual_category = True
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
print(f"Error updating manual category: {e}")
|
||||
|
||||
def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
|
||||
"""
|
||||
Internal method to record category changes in history.
|
||||
Only records if there's an actual change from a previous category.
|
||||
|
||||
Args:
|
||||
ip: IP address
|
||||
old_category: Previous category (None if first categorization)
|
||||
new_category: New category
|
||||
timestamp: When the change occurred
|
||||
"""
|
||||
# Don't record initial categorization (when old_category is None)
|
||||
# Only record actual category changes
|
||||
if old_category is None:
|
||||
return
|
||||
|
||||
session = self.session
|
||||
try:
|
||||
history_entry = CategoryHistory(
|
||||
ip=ip,
|
||||
old_category=old_category,
|
||||
new_category=new_category,
|
||||
timestamp=timestamp
|
||||
)
|
||||
session.add(history_entry)
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
session.rollback()
|
||||
applogger.error(f"Error recording category change: {e}")
|
||||
|
||||
def get_category_history(self, ip: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve category change history for a specific IP.
|
||||
|
||||
Args:
|
||||
ip: IP address to get history for
|
||||
|
||||
Returns:
|
||||
List of category change records ordered by timestamp
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
history = session.query(CategoryHistory).filter(
|
||||
CategoryHistory.ip == sanitized_ip
|
||||
).order_by(CategoryHistory.timestamp.asc()).all()
|
||||
|
||||
return [
|
||||
{
|
||||
'old_category': h.old_category,
|
||||
'new_category': h.new_category,
|
||||
'timestamp': h.timestamp.isoformat() + '+00:00'
|
||||
}
|
||||
for h in history
|
||||
]
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
|
||||
"""
|
||||
@@ -326,7 +411,7 @@ class DatabaseManager:
|
||||
'method': log.method,
|
||||
'is_suspicious': log.is_suspicious,
|
||||
'is_honeypot_trigger': log.is_honeypot_trigger,
|
||||
'timestamp': log.timestamp.isoformat(),
|
||||
'timestamp': log.timestamp.isoformat() + '+00:00',
|
||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||
}
|
||||
for log in logs
|
||||
@@ -419,7 +504,7 @@ class DatabaseManager:
|
||||
'path': attempt.path,
|
||||
'username': attempt.username,
|
||||
'password': attempt.password,
|
||||
'timestamp': attempt.timestamp.isoformat()
|
||||
'timestamp': attempt.timestamp.isoformat() + '+00:00'
|
||||
}
|
||||
for attempt in attempts
|
||||
]
|
||||
@@ -446,8 +531,8 @@ class DatabaseManager:
|
||||
{
|
||||
'ip': s.ip,
|
||||
'total_requests': s.total_requests,
|
||||
'first_seen': s.first_seen.isoformat(),
|
||||
'last_seen': s.last_seen.isoformat(),
|
||||
'first_seen': s.first_seen.isoformat() + '+00:00',
|
||||
'last_seen': s.last_seen.isoformat() + '+00:00',
|
||||
'country_code': s.country_code,
|
||||
'city': s.city,
|
||||
'asn': s.asn,
|
||||
@@ -464,6 +549,47 @@ class DatabaseManager:
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def get_ip_stats_by_ip(self, ip: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve IP statistics for a specific IP address.
|
||||
|
||||
Args:
|
||||
ip: The IP address to look up
|
||||
|
||||
Returns:
|
||||
Dictionary with IP stats or None if not found
|
||||
"""
|
||||
session = self.session
|
||||
try:
|
||||
stat = session.query(IpStats).filter(IpStats.ip == ip).first()
|
||||
|
||||
if not stat:
|
||||
return None
|
||||
|
||||
# Get category history for this IP
|
||||
category_history = self.get_category_history(ip)
|
||||
|
||||
return {
|
||||
'ip': stat.ip,
|
||||
'total_requests': stat.total_requests,
|
||||
'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None,
|
||||
'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None,
|
||||
'country_code': stat.country_code,
|
||||
'city': stat.city,
|
||||
'asn': stat.asn,
|
||||
'asn_org': stat.asn_org,
|
||||
'reputation_score': stat.reputation_score,
|
||||
'reputation_source': stat.reputation_source,
|
||||
'analyzed_metrics': stat.analyzed_metrics or {},
|
||||
'category': stat.category,
|
||||
'category_scores': stat.category_scores or {},
|
||||
'manual_category': stat.manual_category,
|
||||
'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None,
|
||||
'category_history': category_history
|
||||
}
|
||||
finally:
|
||||
self.close_session()
|
||||
|
||||
def get_dashboard_counts(self) -> Dict[str, int]:
|
||||
"""
|
||||
Get aggregate statistics for the dashboard.
|
||||
@@ -592,7 +718,7 @@ class DatabaseManager:
|
||||
'ip': log.ip,
|
||||
'path': log.path,
|
||||
'user_agent': log.user_agent,
|
||||
'timestamp': log.timestamp.isoformat()
|
||||
'timestamp': log.timestamp.isoformat() + '+00:00'
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
@@ -650,7 +776,7 @@ class DatabaseManager:
|
||||
'ip': log.ip,
|
||||
'path': log.path,
|
||||
'user_agent': log.user_agent,
|
||||
'timestamp': log.timestamp.isoformat(),
|
||||
'timestamp': log.timestamp.isoformat() + '+00:00',
|
||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||
}
|
||||
for log in logs
|
||||
|
||||
1
src/exports/malicious_ips.txt
Normal file
1
src/exports/malicious_ips.txt
Normal file
@@ -0,0 +1 @@
|
||||
127.0.0.1
|
||||
@@ -407,17 +407,75 @@ class Handler(BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
try:
|
||||
stats = self.tracker.get_stats()
|
||||
self.wfile.write(generate_dashboard(stats).encode())
|
||||
timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
|
||||
dashboard_path = self.config.dashboard_secret_path
|
||||
self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode())
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
self.app_logger.error(f"Error generating dashboard: {e}")
|
||||
return
|
||||
|
||||
# API endpoint for fetching IP stats
|
||||
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
|
||||
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'application/json')
|
||||
self.send_header('Access-Control-Allow-Origin', '*')
|
||||
# Prevent browser caching - force fresh data from database every time
|
||||
self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
|
||||
self.send_header('Pragma', 'no-cache')
|
||||
self.send_header('Expires', '0')
|
||||
self.end_headers()
|
||||
try:
|
||||
from database import get_database
|
||||
import json
|
||||
db = get_database()
|
||||
ip_stats = db.get_ip_stats_by_ip(ip_address)
|
||||
if ip_stats:
|
||||
self.wfile.write(json.dumps(ip_stats).encode())
|
||||
else:
|
||||
self.wfile.write(json.dumps({'error': 'IP not found'}).encode())
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
self.app_logger.error(f"Error fetching IP stats: {e}")
|
||||
self.wfile.write(json.dumps({'error': str(e)}).encode())
|
||||
return
|
||||
|
||||
# API endpoint for downloading malicious IPs file
|
||||
if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt":
|
||||
import os
|
||||
file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
|
||||
try:
|
||||
if os.path.exists(file_path):
|
||||
with open(file_path, 'rb') as f:
|
||||
content = f.read()
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/plain')
|
||||
self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"')
|
||||
self.send_header('Content-Length', str(len(content)))
|
||||
self.end_headers()
|
||||
self.wfile.write(content)
|
||||
else:
|
||||
self.send_response(404)
|
||||
self.send_header('Content-type', 'text/plain')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'File not found')
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
self.app_logger.error(f"Error serving malicious IPs file: {e}")
|
||||
self.send_response(500)
|
||||
self.send_header('Content-type', 'text/plain')
|
||||
self.end_headers()
|
||||
self.wfile.write(b'Internal server error')
|
||||
return
|
||||
|
||||
self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
|
||||
|
||||
self.analyzer.infer_user_category(client_ip)
|
||||
self.analyzer.update_ip_rep_infos(client_ip)
|
||||
# self.analyzer.infer_user_category(client_ip)
|
||||
# self.analyzer.update_ip_rep_infos(client_ip)
|
||||
|
||||
if self.tracker.is_suspicious_user_agent(user_agent):
|
||||
self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
|
||||
|
||||
40
src/migrations/add_category_history.py
Normal file
40
src/migrations/add_category_history.py
Normal file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Migration script to add CategoryHistory table to existing databases.
|
||||
Run this once to upgrade your database schema.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path to import modules
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from database import get_database, DatabaseManager
|
||||
from models import Base, CategoryHistory
|
||||
|
||||
|
||||
def migrate():
|
||||
"""Create CategoryHistory table if it doesn't exist."""
|
||||
print("Starting migration: Adding CategoryHistory table...")
|
||||
|
||||
try:
|
||||
db = get_database()
|
||||
|
||||
# Initialize database if not already done
|
||||
if not db._initialized:
|
||||
db.initialize()
|
||||
|
||||
# Create only the CategoryHistory table
|
||||
CategoryHistory.__table__.create(db._engine, checkfirst=True)
|
||||
|
||||
print("✓ Migration completed successfully!")
|
||||
print(" - CategoryHistory table created")
|
||||
|
||||
except Exception as e:
|
||||
print(f"✗ Migration failed: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate()
|
||||
@@ -150,4 +150,59 @@ class IpStats(Base):
|
||||
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
|
||||
return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
|
||||
|
||||
|
||||
class CategoryHistory(Base):
|
||||
"""
|
||||
Records category changes for IP addresses over time.
|
||||
|
||||
Tracks when an IP's category changes, storing both the previous
|
||||
and new category along with timestamp for timeline visualization.
|
||||
"""
|
||||
__tablename__ = 'category_history'
|
||||
|
||||
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
|
||||
old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
|
||||
new_category: Mapped[str] = mapped_column(String(50), nullable=False)
|
||||
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
|
||||
|
||||
# Composite index for efficient IP-based timeline queries
|
||||
__table_args__ = (
|
||||
Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
|
||||
)
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>"
|
||||
|
||||
|
||||
# class IpLog(Base):
|
||||
# """
|
||||
# Records all IPs that have accessed the honeypot, along with aggregated stats and inferred user category.
|
||||
# """
|
||||
# __tablename__ = 'ip_logs'
|
||||
|
||||
# id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
||||
# ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
|
||||
# stats: Mapped[List[str]] = mapped_column(String(MAX_PATH_LENGTH))
|
||||
# category: Mapped[str] = mapped_column(String(15))
|
||||
# manual_category: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
# last_analysis: Mapped[datetime] = mapped_column(DateTime, index=True),
|
||||
|
||||
# # Relationship to attack detections
|
||||
# access_logs: Mapped[List["AccessLog"]] = relationship(
|
||||
# "AccessLog",
|
||||
# back_populates="ip",
|
||||
# cascade="all, delete-orphan"
|
||||
# )
|
||||
|
||||
# # Indexes for common queries
|
||||
# __table_args__ = (
|
||||
# Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
|
||||
# Index('ix_access_logs_is_suspicious', 'is_suspicious'),
|
||||
# Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'),
|
||||
# )
|
||||
|
||||
# def __repr__(self) -> str:
|
||||
# return f"<AccessLog(id={self.id}, ip='{self.ip}', path='{self.path[:50]}')>"
|
||||
@@ -14,6 +14,7 @@ from analyzer import Analyzer
|
||||
from handler import Handler
|
||||
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
|
||||
from database import initialize_database
|
||||
from tasks_master import get_tasksmaster
|
||||
|
||||
|
||||
def print_usage():
|
||||
@@ -92,6 +93,10 @@ def main():
|
||||
except IOError:
|
||||
app_logger.warning("Can't read input file. Using randomly generated links.")
|
||||
|
||||
# tasks master init
|
||||
tasks_master = get_tasksmaster()
|
||||
tasks_master.run_scheduled_tasks()
|
||||
|
||||
try:
|
||||
app_logger.info(f'Starting deception server on port {config.port}...')
|
||||
app_logger.info(f'Timezone configured: {tz.key}')
|
||||
|
||||
265
src/tasks/analyze_ips.py
Normal file
265
src/tasks/analyze_ips.py
Normal file
@@ -0,0 +1,265 @@
|
||||
from sqlalchemy import select
|
||||
from typing import Optional
|
||||
from database import get_database, DatabaseManager
|
||||
from zoneinfo import ZoneInfo
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
import urllib.parse
|
||||
from wordlists import get_wordlists
|
||||
from config import get_config
|
||||
from logger import get_app_logger
|
||||
import requests
|
||||
from sanitizer import sanitize_for_storage, sanitize_dict
|
||||
|
||||
# ----------------------
|
||||
# TASK CONFIG
|
||||
# ----------------------
|
||||
|
||||
TASK_CONFIG = {
|
||||
"name": "analyze-ips",
|
||||
"cron": "*/1 * * * *",
|
||||
"enabled": True,
|
||||
"run_when_loaded": True
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
config = get_config()
|
||||
db_manager = get_database()
|
||||
app_logger = get_app_logger()
|
||||
|
||||
http_risky_methods_threshold = config.http_risky_methods_threshold
|
||||
violated_robots_threshold = config.violated_robots_threshold
|
||||
uneven_request_timing_threshold = config.uneven_request_timing_threshold
|
||||
user_agents_used_threshold = config.user_agents_used_threshold
|
||||
attack_urls_threshold = config.attack_urls_threshold
|
||||
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
|
||||
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
|
||||
score = {}
|
||||
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
|
||||
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
|
||||
weights = {
|
||||
"attacker": {
|
||||
"risky_http_methods": 6,
|
||||
"robots_violations": 4,
|
||||
"uneven_request_timing": 3,
|
||||
"different_user_agents": 8,
|
||||
"attack_url": 15
|
||||
},
|
||||
"good_crawler": {
|
||||
"risky_http_methods": 1,
|
||||
"robots_violations": 0,
|
||||
"uneven_request_timing": 0,
|
||||
"different_user_agents": 0,
|
||||
"attack_url": 0
|
||||
},
|
||||
"bad_crawler": {
|
||||
"risky_http_methods": 2,
|
||||
"robots_violations": 7,
|
||||
"uneven_request_timing": 0,
|
||||
"different_user_agents": 5,
|
||||
"attack_url": 5
|
||||
},
|
||||
"regular_user": {
|
||||
"risky_http_methods": 0,
|
||||
"robots_violations": 0,
|
||||
"uneven_request_timing": 8,
|
||||
"different_user_agents": 3,
|
||||
"attack_url": 0
|
||||
}
|
||||
}
|
||||
accesses = db_manager.get_access_logs(limit=999999999)
|
||||
ips = {item['ip'] for item in accesses}
|
||||
|
||||
for ip in ips:
|
||||
ip_accesses = [item for item in accesses if item["ip"] == ip]
|
||||
total_accesses_count = len(accesses)
|
||||
if total_accesses_count <= 0:
|
||||
return
|
||||
|
||||
# Set category as "unknown" for the first 3 requests
|
||||
if total_accesses_count < 3:
|
||||
category = "unknown"
|
||||
analyzed_metrics = {}
|
||||
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
|
||||
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
||||
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
return 0
|
||||
#--------------------- HTTP Methods ---------------------
|
||||
get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
|
||||
post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
|
||||
put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
|
||||
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
|
||||
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
|
||||
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
|
||||
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
|
||||
if total_accesses_count > http_risky_methods_threshold:
|
||||
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
|
||||
else:
|
||||
http_method_attacker_score = 0
|
||||
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
|
||||
if http_method_attacker_score >= http_risky_methods_threshold:
|
||||
score["attacker"]["risky_http_methods"] = True
|
||||
score["good_crawler"]["risky_http_methods"] = False
|
||||
score["bad_crawler"]["risky_http_methods"] = True
|
||||
score["regular_user"]["risky_http_methods"] = False
|
||||
else:
|
||||
score["attacker"]["risky_http_methods"] = False
|
||||
score["good_crawler"]["risky_http_methods"] = True
|
||||
score["bad_crawler"]["risky_http_methods"] = False
|
||||
score["regular_user"]["risky_http_methods"] = False
|
||||
#--------------------- Robots Violations ---------------------
|
||||
#respect robots.txt and login/config pages access frequency
|
||||
robots_disallows = []
|
||||
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
|
||||
with open(robots_path, "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
|
||||
if parts[0] == "Disallow":
|
||||
parts[1] = parts[1].rstrip("/")
|
||||
#print(f"DISALLOW {parts[1]}")
|
||||
robots_disallows.append(parts[1].strip())
|
||||
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
|
||||
violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
|
||||
#print(f"Violated robots count: {violated_robots_count}")
|
||||
if total_accesses_count > 0:
|
||||
violated_robots_ratio = violated_robots_count / total_accesses_count
|
||||
else:
|
||||
violated_robots_ratio = 0
|
||||
if violated_robots_ratio >= violated_robots_threshold:
|
||||
score["attacker"]["robots_violations"] = True
|
||||
score["good_crawler"]["robots_violations"] = False
|
||||
score["bad_crawler"]["robots_violations"] = True
|
||||
score["regular_user"]["robots_violations"] = False
|
||||
else:
|
||||
score["attacker"]["robots_violations"] = False
|
||||
score["good_crawler"]["robots_violations"] = False
|
||||
score["bad_crawler"]["robots_violations"] = False
|
||||
score["regular_user"]["robots_violations"] = False
|
||||
|
||||
#--------------------- Requests Timing ---------------------
|
||||
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
|
||||
now_utc = datetime.now(tz=ZoneInfo('UTC'))
|
||||
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
|
||||
timestamps = sorted(timestamps, reverse=True)
|
||||
time_diffs = []
|
||||
for i in range(0, len(timestamps)-1):
|
||||
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
|
||||
time_diffs.append(diff)
|
||||
|
||||
mean = 0
|
||||
variance = 0
|
||||
std = 0
|
||||
cv = 0
|
||||
if time_diffs:
|
||||
mean = sum(time_diffs) / len(time_diffs)
|
||||
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
|
||||
std = variance ** 0.5
|
||||
cv = std/mean
|
||||
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
|
||||
if cv >= uneven_request_timing_threshold:
|
||||
score["attacker"]["uneven_request_timing"] = True
|
||||
score["good_crawler"]["uneven_request_timing"] = False
|
||||
score["bad_crawler"]["uneven_request_timing"] = False
|
||||
score["regular_user"]["uneven_request_timing"] = True
|
||||
else:
|
||||
score["attacker"]["uneven_request_timing"] = False
|
||||
score["good_crawler"]["uneven_request_timing"] = False
|
||||
score["bad_crawler"]["uneven_request_timing"] = False
|
||||
score["regular_user"]["uneven_request_timing"] = False
|
||||
#--------------------- Different User Agents ---------------------
|
||||
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
|
||||
user_agents_used = [item["user_agent"] for item in ip_accesses]
|
||||
user_agents_used = list(dict.fromkeys(user_agents_used))
|
||||
#print(f"User agents used: {user_agents_used}")
|
||||
if len(user_agents_used) >= user_agents_used_threshold:
|
||||
score["attacker"]["different_user_agents"] = True
|
||||
score["good_crawler"]["different_user_agents"] = False
|
||||
score["bad_crawler"]["different_user_agentss"] = True
|
||||
score["regular_user"]["different_user_agents"] = False
|
||||
else:
|
||||
score["attacker"]["different_user_agents"] = False
|
||||
score["good_crawler"]["different_user_agents"] = False
|
||||
score["bad_crawler"]["different_user_agents"] = False
|
||||
score["regular_user"]["different_user_agents"] = False
|
||||
#--------------------- Attack URLs ---------------------
|
||||
attack_urls_found_list = []
|
||||
wl = get_wordlists()
|
||||
if wl.attack_patterns:
|
||||
queried_paths = [item["path"] for item in ip_accesses]
|
||||
for queried_path in queried_paths:
|
||||
# URL decode the path to catch encoded attacks
|
||||
try:
|
||||
decoded_path = urllib.parse.unquote(queried_path)
|
||||
# Double decode to catch double-encoded attacks
|
||||
decoded_path_twice = urllib.parse.unquote(decoded_path)
|
||||
except Exception:
|
||||
decoded_path = queried_path
|
||||
decoded_path_twice = queried_path
|
||||
|
||||
for name, pattern in wl.attack_patterns.items():
|
||||
# Check original, decoded, and double-decoded paths
|
||||
if (re.search(pattern, queried_path, re.IGNORECASE) or
|
||||
re.search(pattern, decoded_path, re.IGNORECASE) or
|
||||
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
|
||||
attack_urls_found_list.append(f"{name}: {pattern}")
|
||||
|
||||
#remove duplicates
|
||||
attack_urls_found_list = set(attack_urls_found_list)
|
||||
attack_urls_found_list = list(attack_urls_found_list)
|
||||
|
||||
if len(attack_urls_found_list) >= attack_urls_threshold:
|
||||
score["attacker"]["attack_url"] = True
|
||||
score["good_crawler"]["attack_url"] = False
|
||||
score["bad_crawler"]["attack_url"] = False
|
||||
score["regular_user"]["attack_url"] = False
|
||||
else:
|
||||
score["attacker"]["attack_url"] = False
|
||||
score["good_crawler"]["attack_url"] = False
|
||||
score["bad_crawler"]["attack_url"] = False
|
||||
score["regular_user"]["attack_url"] = False
|
||||
#--------------------- Calculate score ---------------------
|
||||
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
|
||||
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
|
||||
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
|
||||
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
|
||||
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
|
||||
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
|
||||
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
|
||||
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
|
||||
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
|
||||
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
|
||||
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
|
||||
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
|
||||
score_details = f"""
|
||||
Attacker score: {attacker_score}
|
||||
Good Crawler score: {good_crawler_score}
|
||||
Bad Crawler score: {bad_crawler_score}
|
||||
Regular User score: {regular_user_score}
|
||||
"""
|
||||
app_logger.debug(score_details)
|
||||
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
|
||||
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
|
||||
category = max(category_scores, key=category_scores.get)
|
||||
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
||||
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
return
|
||||
59
src/tasks/fetch_ip_rep.py
Normal file
59
src/tasks/fetch_ip_rep.py
Normal file
@@ -0,0 +1,59 @@
|
||||
from sqlalchemy import select
|
||||
from typing import Optional
|
||||
from database import get_database, DatabaseManager
|
||||
from zoneinfo import ZoneInfo
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
import re
|
||||
import urllib.parse
|
||||
from wordlists import get_wordlists
|
||||
from config import get_config
|
||||
from logger import get_app_logger
|
||||
import requests
|
||||
from sanitizer import sanitize_for_storage, sanitize_dict
|
||||
|
||||
# ----------------------
|
||||
# TASK CONFIG
|
||||
# ----------------------
|
||||
|
||||
TASK_CONFIG = {
|
||||
"name": "fetch-ip-rep",
|
||||
"cron": "*/1 * * * *",
|
||||
"enabled": True,
|
||||
"run_when_loaded": True
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
config = get_config()
|
||||
db_manager = get_database()
|
||||
app_logger = get_app_logger()
|
||||
|
||||
accesses = db_manager.get_access_logs(limit=999999999)
|
||||
ips = {item['ip'] for item in accesses}
|
||||
|
||||
for ip in ips:
|
||||
api_url = "https://iprep.lcrawl.com/api/iprep/"
|
||||
params = {
|
||||
"cidr": ip
|
||||
}
|
||||
headers = {
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
response = requests.get(api_url, headers=headers, params=params)
|
||||
payload = response.json()
|
||||
if payload["results"]:
|
||||
data = payload["results"][0]
|
||||
country_iso_code = data["geoip_data"]["country_iso_code"]
|
||||
asn = data["geoip_data"]["asn_autonomous_system_number"]
|
||||
asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
|
||||
list_on = data["list_on"]
|
||||
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
|
||||
sanitized_asn = sanitize_for_storage(asn, 100)
|
||||
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
|
||||
sanitized_list_on = sanitize_dict(list_on, 100000)
|
||||
|
||||
db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
|
||||
|
||||
return
|
||||
57
src/tasks/top_attacking_ips.py
Normal file
57
src/tasks/top_attacking_ips.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# tasks/export_malicious_ips.py
|
||||
|
||||
import os
|
||||
from logger import get_app_logger
|
||||
from database import get_database
|
||||
from models import AccessLog
|
||||
from sqlalchemy import distinct
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
# ----------------------
|
||||
# TASK CONFIG
|
||||
# ----------------------
|
||||
TASK_CONFIG = {
|
||||
"name": "export-malicious-ips",
|
||||
"cron": "*/5 * * * *",
|
||||
"enabled": True,
|
||||
"run_when_loaded": True
|
||||
}
|
||||
|
||||
EXPORTS_DIR = "exports"
|
||||
OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
|
||||
|
||||
# ----------------------
|
||||
# TASK LOGIC
|
||||
# ----------------------
|
||||
def main():
|
||||
"""
|
||||
Export all IPs flagged as suspicious to a text file.
|
||||
TasksMaster will call this function based on the cron schedule.
|
||||
"""
|
||||
task_name = TASK_CONFIG.get("name")
|
||||
app_logger.info(f"[Background Task] {task_name} starting...")
|
||||
|
||||
try:
|
||||
db = get_database()
|
||||
session = db.session
|
||||
|
||||
# Query distinct suspicious IPs
|
||||
results = session.query(distinct(AccessLog.ip)).filter(
|
||||
AccessLog.is_suspicious == True
|
||||
).all()
|
||||
|
||||
# Ensure exports directory exists
|
||||
os.makedirs(EXPORTS_DIR, exist_ok=True)
|
||||
|
||||
# Write IPs to file (one per line)
|
||||
with open(OUTPUT_FILE, 'w') as f:
|
||||
for (ip,) in results:
|
||||
f.write(f"{ip}\n")
|
||||
|
||||
app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
|
||||
|
||||
except Exception as e:
|
||||
app_logger.error(f"[Background Task] {task_name} failed: {e}")
|
||||
finally:
|
||||
db.close_session()
|
||||
288
src/tasks_master.py
Normal file
288
src/tasks_master.py
Normal file
@@ -0,0 +1,288 @@
|
||||
import os
|
||||
import sys
|
||||
import datetime
|
||||
import functools
|
||||
import threading
|
||||
import importlib
|
||||
import importlib.util
|
||||
|
||||
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
|
||||
|
||||
app_logger = get_app_logger()
|
||||
|
||||
try:
|
||||
from apscheduler.schedulers.background import BackgroundScheduler
|
||||
from apscheduler.triggers.cron import CronTrigger
|
||||
from apscheduler.events import EVENT_JOB_EXECUTED, EVENT_JOB_ERROR
|
||||
except ModuleNotFoundError:
|
||||
msg = (
|
||||
"Required modules are not installed. "
|
||||
"Can not continue with module / application loading.\n"
|
||||
"Install it with: pip install -r requirements"
|
||||
)
|
||||
print(msg, file=sys.stderr)
|
||||
app_logger.error(msg)
|
||||
exit()
|
||||
|
||||
|
||||
# ---------- TASKSMASTER CLASS ----------
|
||||
class TasksMaster:
|
||||
|
||||
TASK_DEFAULT_CRON = '*/15 * * * *'
|
||||
TASK_JITTER = 240
|
||||
TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
|
||||
|
||||
def __init__(self, scheduler: BackgroundScheduler):
|
||||
self.tasks = self._config_tasks()
|
||||
self.scheduler = scheduler
|
||||
self.last_run_times = {}
|
||||
self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
|
||||
|
||||
def _config_tasks(self):
|
||||
"""
|
||||
Loads tasks from the TASKS_FOLDER and logs how many were found.
|
||||
"""
|
||||
tasks_defined = self._load_tasks_from_folder(self.TASKS_FOLDER)
|
||||
app_logger.info(f"Scheduled Tasks Loaded from folder: {self.TASKS_FOLDER}")
|
||||
return tasks_defined
|
||||
|
||||
def _load_tasks_from_folder(self, folder_path):
|
||||
"""
|
||||
Loads and registers task modules from a specified folder.
|
||||
|
||||
This function scans the given folder for Python (.py) files, dynamically
|
||||
imports each as a module, and looks for two attributes:
|
||||
- TASK_CONFIG: A dictionary containing task metadata, specifically the
|
||||
'name' and 'cron' (cron schedule string).
|
||||
- main: A callable function that represents the task's execution logic.
|
||||
|
||||
Tasks with both attributes are added to a list with their configuration and
|
||||
execution function.
|
||||
|
||||
Args:
|
||||
folder_path (str): Path to the folder containing task scripts.
|
||||
|
||||
Returns:
|
||||
list[dict]: A list of task definitions with keys:
|
||||
- 'name' (str): The name of the task.
|
||||
- 'filename' (str): The file the task was loaded from.
|
||||
- 'cron' (str): The crontab string for scheduling.
|
||||
- 'enabled' (bool): Whether the task is enabled.
|
||||
- 'run_when_loaded' (bool): Whether to run the task immediately.
|
||||
"""
|
||||
tasks = []
|
||||
|
||||
if not os.path.exists(folder_path):
|
||||
app_logger.error(f"{folder_path} does not exist! Unable to load tasks!")
|
||||
return tasks
|
||||
|
||||
# we sort the files so that we have a set order, which helps with debugging
|
||||
for filename in sorted(os.listdir(folder_path)):
|
||||
|
||||
# skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
|
||||
if not filename.endswith('.py') or filename.startswith("__"):
|
||||
continue
|
||||
|
||||
path = os.path.join(folder_path, filename)
|
||||
module_name = filename[:-3]
|
||||
spec = importlib.util.spec_from_file_location(f"tasks.{module_name}", path)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
try:
|
||||
spec.loader.exec_module(module)
|
||||
sys.modules[f"tasks.{module_name}"] = module
|
||||
except Exception as e:
|
||||
app_logger.error(f"Failed to import {filename}: {e}")
|
||||
continue
|
||||
|
||||
# if we have a tasks config and a main function, we attempt to schedule it
|
||||
if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
|
||||
|
||||
# ensure task_config is a dict
|
||||
if not isinstance(module.TASK_CONFIG, dict):
|
||||
app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
|
||||
continue
|
||||
|
||||
task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
|
||||
task_name = module.TASK_CONFIG.get("name", module_name)
|
||||
|
||||
# ensure the task_cron is a valid cron value
|
||||
try:
|
||||
CronTrigger.from_crontab(task_cron)
|
||||
except ValueError as ve:
|
||||
app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
|
||||
continue
|
||||
|
||||
task = {
|
||||
'name': module.TASK_CONFIG.get('name', module_name),
|
||||
'filename': filename,
|
||||
'cron': task_cron,
|
||||
"enabled": module.TASK_CONFIG.get("enabled", False),
|
||||
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
|
||||
}
|
||||
|
||||
tasks.append(task)
|
||||
|
||||
# we are missing things, and we log what's missing
|
||||
else:
|
||||
if not hasattr(module, 'TASK_CONFIG'):
|
||||
app_logger.warning(f"Missing TASK_CONFIG in {filename}")
|
||||
elif not hasattr(module, 'main'):
|
||||
app_logger.warning(f"Missing main() in {filename}")
|
||||
|
||||
return tasks
|
||||
|
||||
def _add_jobs(self):
|
||||
# for each task in the tasks config file...
|
||||
for task_to_run in self.tasks:
|
||||
|
||||
# remember, these tasks, are built from the "load_tasks_from_folder" function,
|
||||
# if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here.
|
||||
task_name = task_to_run.get("name")
|
||||
run_when_loaded = task_to_run.get("run_when_loaded")
|
||||
module_name = os.path.splitext(task_to_run.get("filename"))[0]
|
||||
task_enabled = task_to_run.get("enabled", False)
|
||||
|
||||
# if no crontab set for this task, we use 15 as the default.
|
||||
task_cron = task_to_run.get("cron") or self.TASK_DEFAULT_CRON
|
||||
|
||||
# if task is disabled, skip this one
|
||||
if not task_enabled:
|
||||
app_logger.info(f"{task_name} is disabled in client config. Skipping task")
|
||||
continue
|
||||
try:
|
||||
if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))):
|
||||
# schedule the task now that everything has checked out above...
|
||||
self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
|
||||
app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
|
||||
else:
|
||||
app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
|
||||
|
||||
except Exception as e:
|
||||
app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
|
||||
|
||||
def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
|
||||
try:
|
||||
# Dynamically import the module
|
||||
module = importlib.import_module(f"tasks.{module_name}")
|
||||
|
||||
# Check if the module has a 'main' function
|
||||
if hasattr(module, 'main'):
|
||||
app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
|
||||
|
||||
# unique_job_id
|
||||
job_identifier = f"{module_name}__{task_name}"
|
||||
|
||||
# little insurance to make sure the cron is set to something and not none
|
||||
if task_cron is None:
|
||||
task_cron = self.TASK_DEFAULT_CRON
|
||||
|
||||
trigger = CronTrigger.from_crontab(task_cron)
|
||||
|
||||
# schedule the task / job
|
||||
if run_when_loaded:
|
||||
app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
|
||||
|
||||
self.scheduler.add_job(
|
||||
module.main,
|
||||
trigger,
|
||||
id=job_identifier,
|
||||
jitter=self.TASK_JITTER,
|
||||
name=task_name,
|
||||
next_run_time=datetime.datetime.now(),
|
||||
max_instances=1
|
||||
)
|
||||
else:
|
||||
self.scheduler.add_job(
|
||||
module.main,
|
||||
trigger,
|
||||
id=job_identifier,
|
||||
jitter=self.TASK_JITTER,
|
||||
name=task_name,
|
||||
max_instances=1
|
||||
)
|
||||
else:
|
||||
app_logger.error(f"{module_name} does not define a 'main' function.")
|
||||
|
||||
except Exception as e:
|
||||
app_logger.error(f"Failed to load {module_name}: {e}")
|
||||
|
||||
def job_listener(self, event):
|
||||
job_id = event.job_id
|
||||
self.last_run_times[job_id] = datetime.datetime.now()
|
||||
|
||||
if event.exception:
|
||||
app_logger.error(f"Job {event.job_id} failed: {event.exception}")
|
||||
else:
|
||||
app_logger.info(f"Job {event.job_id} completed successfully.")
|
||||
|
||||
def list_jobs(self):
|
||||
scheduled_jobs = self.scheduler.get_jobs()
|
||||
jobs_list = []
|
||||
|
||||
for job in scheduled_jobs:
|
||||
jobs_list.append({
|
||||
"id": job.id,
|
||||
"name": job.name,
|
||||
"next_run": job.next_run_time,
|
||||
})
|
||||
return jobs_list
|
||||
|
||||
def run_scheduled_tasks(self):
|
||||
"""
|
||||
Runs and schedules enabled tasks using the background scheduler.
|
||||
|
||||
This method performs the following:
|
||||
1. Retrieves the current task configurations and updates internal state.
|
||||
2. Adds new jobs to the scheduler based on the latest configuration.
|
||||
3. Starts the scheduler to begin executing tasks at their defined intervals.
|
||||
|
||||
This ensures the scheduler is always running with the most up-to-date
|
||||
task definitions and enabled status.
|
||||
"""
|
||||
|
||||
# Add enabled tasks to the scheduler
|
||||
self._add_jobs()
|
||||
|
||||
# Start the scheduler to begin executing the scheduled tasks (if not already running)
|
||||
if not self.scheduler.running:
|
||||
self.scheduler.start()
|
||||
|
||||
|
||||
# ---------- SINGLETON WRAPPER ----------
|
||||
T = type
|
||||
|
||||
def singleton_loader(func):
|
||||
"""Decorator to ensure only one instance exists."""
|
||||
cache: dict[str, T] = {}
|
||||
lock = threading.Lock()
|
||||
|
||||
@functools.wraps(func)
|
||||
def wrapper(*args, **kwargs) -> T:
|
||||
with lock:
|
||||
if func.__name__ not in cache:
|
||||
cache[func.__name__] = func(*args, **kwargs)
|
||||
return cache[func.__name__]
|
||||
return wrapper
|
||||
|
||||
|
||||
@singleton_loader
|
||||
def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster:
|
||||
"""
|
||||
Returns the singleton TasksMaster instance.
|
||||
|
||||
- Automatically creates a BackgroundScheduler if none is provided.
|
||||
- Automatically starts the scheduler when the singleton is created.
|
||||
|
||||
:param scheduler: Optional APScheduler instance. If None, a new BackgroundScheduler will be created.
|
||||
"""
|
||||
if scheduler is None:
|
||||
scheduler = BackgroundScheduler()
|
||||
|
||||
tm_instance = TasksMaster(scheduler)
|
||||
|
||||
# Auto-start scheduler if not already running
|
||||
if not scheduler.running:
|
||||
scheduler.start()
|
||||
app_logger.info("TasksMaster scheduler started automatically with singleton creation.")
|
||||
|
||||
return tm_instance
|
||||
@@ -7,6 +7,7 @@ Customize this template to change the dashboard appearance.
|
||||
|
||||
import html
|
||||
from datetime import datetime
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
def _escape(value) -> str:
|
||||
"""Escape HTML special characters to prevent XSS attacks."""
|
||||
@@ -14,22 +15,52 @@ def _escape(value) -> str:
|
||||
return ""
|
||||
return html.escape(str(value))
|
||||
|
||||
def format_timestamp(iso_timestamp: str) -> str:
|
||||
"""Format ISO timestamp for display (YYYY-MM-DD HH:MM:SS)"""
|
||||
def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str:
|
||||
"""Format ISO timestamp for display with timezone conversion
|
||||
|
||||
Args:
|
||||
iso_timestamp: ISO format timestamp string (UTC)
|
||||
timezone: IANA timezone string to convert to
|
||||
time_only: If True, return only HH:MM:SS, otherwise full datetime
|
||||
"""
|
||||
try:
|
||||
# Parse UTC timestamp
|
||||
dt = datetime.fromisoformat(iso_timestamp)
|
||||
# Convert to target timezone
|
||||
if dt.tzinfo is not None:
|
||||
dt = dt.astimezone(ZoneInfo(timezone))
|
||||
|
||||
if time_only:
|
||||
return dt.strftime("%H:%M:%S")
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
except Exception:
|
||||
# Fallback for old format
|
||||
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
|
||||
|
||||
|
||||
def generate_dashboard(stats: dict) -> str:
|
||||
"""Generate dashboard HTML with access statistics"""
|
||||
def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str:
|
||||
"""Generate dashboard HTML with access statistics
|
||||
|
||||
# Generate IP rows (IPs are generally safe but escape for consistency)
|
||||
Args:
|
||||
stats: Statistics dictionary
|
||||
timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
|
||||
dashboard_path: The secret dashboard path for generating API URLs
|
||||
"""
|
||||
|
||||
# Generate IP rows with clickable functionality for dropdown stats
|
||||
top_ips_rows = '\n'.join([
|
||||
f'<tr><td class="rank">{i+1}</td><td>{_escape(ip)}</td><td>{count}</td></tr>'
|
||||
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
|
||||
<td class="rank">{i+1}</td>
|
||||
<td class="ip-clickable">{_escape(ip)}</td>
|
||||
<td>{count}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-{_escape(ip).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="3" class="ip-stats-cell">
|
||||
<div class="ip-stats-dropdown">
|
||||
<div class="loading">Loading stats...</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>'''
|
||||
for i, (ip, count) in enumerate(stats['top_ips'])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
|
||||
|
||||
@@ -45,27 +76,76 @@ def generate_dashboard(stats: dict) -> str:
|
||||
for i, (ua, count) in enumerate(stats['top_user_agents'])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
|
||||
|
||||
# Generate suspicious accesses rows (CRITICAL: multiple user-controlled fields)
|
||||
# Generate suspicious accesses rows with clickable IPs
|
||||
suspicious_rows = '\n'.join([
|
||||
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
|
||||
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
|
||||
<td class="ip-clickable">{_escape(log["ip"])}</td>
|
||||
<td>{_escape(log["path"])}</td>
|
||||
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="4" class="ip-stats-cell">
|
||||
<div class="ip-stats-dropdown">
|
||||
<div class="loading">Loading stats...</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>'''
|
||||
for log in stats['recent_suspicious'][-10:]
|
||||
]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
|
||||
|
||||
# Generate honeypot triggered IPs rows
|
||||
# Generate honeypot triggered IPs rows with clickable IPs
|
||||
honeypot_rows = '\n'.join([
|
||||
f'<tr><td>{_escape(ip)}</td><td style="word-break: break-all;">{_escape(", ".join(paths))}</td><td>{len(paths)}</td></tr>'
|
||||
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
|
||||
<td class="ip-clickable">{_escape(ip)}</td>
|
||||
<td style="word-break: break-all;">{_escape(", ".join(paths))}</td>
|
||||
<td>{len(paths)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-honeypot-{_escape(ip).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="3" class="ip-stats-cell">
|
||||
<div class="ip-stats-dropdown">
|
||||
<div class="loading">Loading stats...</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>'''
|
||||
for ip, paths in stats.get('honeypot_triggered_ips', [])
|
||||
]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
|
||||
|
||||
# Generate attack types rows (CRITICAL: paths and user agents are user-controlled)
|
||||
# Generate attack types rows with clickable IPs
|
||||
attack_type_rows = '\n'.join([
|
||||
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["path"])}</td><td>{_escape(", ".join(log["attack_types"]))}</td><td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
|
||||
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
|
||||
<td class="ip-clickable">{_escape(log["ip"])}</td>
|
||||
<td>{_escape(log["path"])}</td>
|
||||
<td>{_escape(", ".join(log["attack_types"]))}</td>
|
||||
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="5" class="ip-stats-cell">
|
||||
<div class="ip-stats-dropdown">
|
||||
<div class="loading">Loading stats...</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>'''
|
||||
for log in stats.get('attack_types', [])[-10:]
|
||||
]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
|
||||
|
||||
# Generate credential attempts rows (CRITICAL: usernames and passwords are user-controlled)
|
||||
# Generate credential attempts rows with clickable IPs
|
||||
credential_rows = '\n'.join([
|
||||
f'<tr><td>{_escape(log["ip"])}</td><td>{_escape(log["username"])}</td><td>{_escape(log["password"])}</td><td>{_escape(log["path"])}</td><td>{_escape(log["timestamp"].split("T")[1][:8])}</td></tr>'
|
||||
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
|
||||
<td class="ip-clickable">{_escape(log["ip"])}</td>
|
||||
<td>{_escape(log["username"])}</td>
|
||||
<td>{_escape(log["password"])}</td>
|
||||
<td>{_escape(log["path"])}</td>
|
||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="5" class="ip-stats-cell">
|
||||
<div class="ip-stats-dropdown">
|
||||
<div class="loading">Loading stats...</div>
|
||||
</div>
|
||||
</td>
|
||||
</tr>'''
|
||||
for log in stats.get('credential_attempts', [])[-20:]
|
||||
]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
|
||||
|
||||
@@ -85,12 +165,36 @@ def generate_dashboard(stats: dict) -> str:
|
||||
.container {{
|
||||
max-width: 1400px;
|
||||
margin: 0 auto;
|
||||
position: relative;
|
||||
}}
|
||||
h1 {{
|
||||
color: #58a6ff;
|
||||
text-align: center;
|
||||
margin-bottom: 40px;
|
||||
}}
|
||||
.download-section {{
|
||||
position: absolute;
|
||||
top: 0;
|
||||
right: 0;
|
||||
}}
|
||||
.download-btn {{
|
||||
display: inline-block;
|
||||
padding: 8px 14px;
|
||||
background: #238636;
|
||||
color: #ffffff;
|
||||
text-decoration: none;
|
||||
border-radius: 6px;
|
||||
font-weight: 500;
|
||||
font-size: 13px;
|
||||
transition: background 0.2s;
|
||||
border: 1px solid #2ea043;
|
||||
}}
|
||||
.download-btn:hover {{
|
||||
background: #2ea043;
|
||||
}}
|
||||
.download-btn:active {{
|
||||
background: #1f7a2f;
|
||||
}}
|
||||
.stats-grid {{
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||
@@ -180,10 +284,202 @@ def generate_dashboard(stats: dict) -> str:
|
||||
content: '▼';
|
||||
opacity: 1;
|
||||
}}
|
||||
.ip-row {{
|
||||
transition: background-color 0.2s;
|
||||
}}
|
||||
.ip-clickable {{
|
||||
cursor: pointer;
|
||||
color: #58a6ff !important;
|
||||
font-weight: 500;
|
||||
text-decoration: underline;
|
||||
text-decoration-style: dotted;
|
||||
text-underline-offset: 3px;
|
||||
}}
|
||||
.ip-clickable:hover {{
|
||||
color: #79c0ff !important;
|
||||
text-decoration-style: solid;
|
||||
background: #1c2128;
|
||||
}}
|
||||
.ip-stats-row {{
|
||||
background: #0d1117;
|
||||
}}
|
||||
.ip-stats-cell {{
|
||||
padding: 0 !important;
|
||||
}}
|
||||
.ip-stats-dropdown {{
|
||||
margin-top: 10px;
|
||||
padding: 15px;
|
||||
background: #0d1117;
|
||||
border: 1px solid #30363d;
|
||||
border-radius: 6px;
|
||||
font-size: 13px;
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
}}
|
||||
.stats-left {{
|
||||
flex: 1;
|
||||
}}
|
||||
.stats-right {{
|
||||
flex: 0 0 200px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}}
|
||||
.radar-chart {{
|
||||
position: relative;
|
||||
width: 220px;
|
||||
height: 220px;
|
||||
overflow: visible;
|
||||
}}
|
||||
.radar-legend {{
|
||||
margin-top: 10px;
|
||||
font-size: 11px;
|
||||
}}
|
||||
.radar-legend-item {{
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 6px;
|
||||
margin: 3px 0;
|
||||
}}
|
||||
.radar-legend-color {{
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 2px;
|
||||
}}
|
||||
.ip-stats-dropdown .loading {{
|
||||
color: #8b949e;
|
||||
font-style: italic;
|
||||
}}
|
||||
.stat-row {{
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
padding: 5px 0;
|
||||
border-bottom: 1px solid #21262d;
|
||||
}}
|
||||
.stat-row:last-child {{
|
||||
border-bottom: none;
|
||||
}}
|
||||
.stat-label-sm {{
|
||||
color: #8b949e;
|
||||
font-weight: 500;
|
||||
}}
|
||||
.stat-value-sm {{
|
||||
color: #58a6ff;
|
||||
font-weight: 600;
|
||||
}}
|
||||
.category-badge {{
|
||||
display: inline-block;
|
||||
padding: 4px 8px;
|
||||
border-radius: 4px;
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
text-transform: uppercase;
|
||||
}}
|
||||
.category-attacker {{
|
||||
background: #f851491a;
|
||||
color: #f85149;
|
||||
border: 1px solid #f85149;
|
||||
}}
|
||||
.category-good-crawler {{
|
||||
background: #3fb9501a;
|
||||
color: #3fb950;
|
||||
border: 1px solid #3fb950;
|
||||
}}
|
||||
.category-bad-crawler {{
|
||||
background: #f0883e1a;
|
||||
color: #f0883e;
|
||||
border: 1px solid #f0883e;
|
||||
}}
|
||||
.category-regular-user {{
|
||||
background: #58a6ff1a;
|
||||
color: #58a6ff;
|
||||
border: 1px solid #58a6ff;
|
||||
}}
|
||||
.category-unknown {{
|
||||
background: #8b949e1a;
|
||||
color: #8b949e;
|
||||
border: 1px solid #8b949e;
|
||||
}}
|
||||
.timeline-container {{
|
||||
margin-top: 15px;
|
||||
padding-top: 15px;
|
||||
border-top: 1px solid #30363d;
|
||||
}}
|
||||
.timeline-title {{
|
||||
color: #58a6ff;
|
||||
font-size: 13px;
|
||||
font-weight: 600;
|
||||
margin-bottom: 10px;
|
||||
}}
|
||||
.timeline {{
|
||||
position: relative;
|
||||
padding-left: 30px;
|
||||
}}
|
||||
.timeline::before {{
|
||||
content: '';
|
||||
position: absolute;
|
||||
left: 12px;
|
||||
top: 5px;
|
||||
bottom: 5px;
|
||||
width: 3px;
|
||||
background: #30363d;
|
||||
}}
|
||||
.timeline-item {{
|
||||
position: relative;
|
||||
padding-bottom: 15px;
|
||||
}}
|
||||
.timeline-item:last-child {{
|
||||
padding-bottom: 0;
|
||||
}}
|
||||
.timeline-marker {{
|
||||
position: absolute;
|
||||
left: -26px;
|
||||
width: 16px;
|
||||
height: 16px;
|
||||
border-radius: 50%;
|
||||
border: 2px solid #0d1117;
|
||||
}}
|
||||
.timeline-marker.attacker {{
|
||||
background: #f85149;
|
||||
}}
|
||||
.timeline-marker.good-crawler {{
|
||||
background: #3fb950;
|
||||
}}
|
||||
.timeline-marker.bad-crawler {{
|
||||
background: #f0883e;
|
||||
}}
|
||||
.timeline-marker.regular-user {{
|
||||
background: #58a6ff;
|
||||
}}
|
||||
.timeline-marker.unknown {{
|
||||
background: #8b949e;
|
||||
}}
|
||||
.timeline-content {{
|
||||
font-size: 12px;
|
||||
}}
|
||||
.timeline-category {{
|
||||
font-weight: 600;
|
||||
}}
|
||||
.timeline-timestamp {{
|
||||
color: #8b949e;
|
||||
font-size: 11px;
|
||||
margin-top: 2px;
|
||||
}}
|
||||
.timeline-arrow {{
|
||||
color: #8b949e;
|
||||
margin: 0 7px;
|
||||
}}
|
||||
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="download-section">
|
||||
<a href="{dashboard_path}/api/download/malicious_ips.txt" class="download-btn" download>
|
||||
Export Malicious IPs
|
||||
</a>
|
||||
</div>
|
||||
<h1>Krawl Dashboard</h1>
|
||||
|
||||
<div class="stats-grid">
|
||||
@@ -331,6 +627,31 @@ def generate_dashboard(stats: dict) -> str:
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
// Server timezone configuration
|
||||
const SERVER_TIMEZONE = '{timezone}';
|
||||
const DASHBOARD_PATH = '{dashboard_path}';
|
||||
|
||||
// Convert UTC timestamp to configured timezone
|
||||
function formatTimestamp(isoTimestamp) {{
|
||||
if (!isoTimestamp) return 'N/A';
|
||||
try {{
|
||||
const date = new Date(isoTimestamp);
|
||||
return date.toLocaleString('en-US', {{
|
||||
timeZone: SERVER_TIMEZONE,
|
||||
year: 'numeric',
|
||||
month: '2-digit',
|
||||
day: '2-digit',
|
||||
hour: '2-digit',
|
||||
minute: '2-digit',
|
||||
second: '2-digit',
|
||||
hour12: false
|
||||
}});
|
||||
}} catch (err) {{
|
||||
console.error('Error formatting timestamp:', err);
|
||||
return new Date(isoTimestamp).toLocaleString();
|
||||
}}
|
||||
}}
|
||||
|
||||
// Add sorting functionality to tables
|
||||
document.querySelectorAll('th.sortable').forEach(header => {{
|
||||
header.addEventListener('click', function() {{
|
||||
@@ -387,6 +708,248 @@ def generate_dashboard(stats: dict) -> str:
|
||||
rows.forEach(row => tbody.appendChild(row));
|
||||
}});
|
||||
}});
|
||||
|
||||
// IP stats dropdown functionality
|
||||
document.querySelectorAll('.ip-clickable').forEach(cell => {{
|
||||
cell.addEventListener('click', async function(e) {{
|
||||
const row = e.currentTarget.closest('.ip-row');
|
||||
if (!row) return;
|
||||
|
||||
const ip = row.getAttribute('data-ip');
|
||||
const statsRow = row.nextElementSibling;
|
||||
if (!statsRow || !statsRow.classList.contains('ip-stats-row')) return;
|
||||
|
||||
const isVisible = getComputedStyle(statsRow).display !== 'none';
|
||||
|
||||
document.querySelectorAll('.ip-stats-row').forEach(r => {{
|
||||
r.style.display = 'none';
|
||||
}});
|
||||
|
||||
if (isVisible) return;
|
||||
|
||||
statsRow.style.display = 'table-row';
|
||||
|
||||
const dropdown = statsRow.querySelector('.ip-stats-dropdown');
|
||||
|
||||
// Always fetch fresh data from database
|
||||
if (dropdown) {{
|
||||
dropdown.innerHTML = '<div class="loading">Loading stats...</div>';
|
||||
try {{
|
||||
const response = await fetch(`${{DASHBOARD_PATH}}/api/ip-stats/${{ip}}`, {{
|
||||
cache: 'no-store',
|
||||
headers: {{
|
||||
'Cache-Control': 'no-cache',
|
||||
'Pragma': 'no-cache'
|
||||
}}
|
||||
}});
|
||||
if (!response.ok) throw new Error(`HTTP ${{response.status}}`);
|
||||
|
||||
const data = await response.json();
|
||||
dropdown.innerHTML = data.error
|
||||
? `<div style="color:#f85149;">Error: ${{data.error}}</div>`
|
||||
: formatIpStats(data);
|
||||
}} catch (err) {{
|
||||
dropdown.innerHTML = `<div style="color:#f85149;">Failed to load stats: ${{err.message}}</div>`;
|
||||
}}
|
||||
}}
|
||||
}});
|
||||
}});
|
||||
|
||||
function formatIpStats(stats) {{
|
||||
let html = '<div class="stats-left">';
|
||||
|
||||
// Basic info
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Total Requests:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">First Seen:</span>';
|
||||
html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Last Seen:</span>';
|
||||
html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
// Category
|
||||
if (stats.category) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Category:</span>';
|
||||
const categoryClass = 'category-' + stats.category.toLowerCase().replace('_', '-');
|
||||
html += `<span class="category-badge ${{categoryClass}}">${{stats.category}}</span>`;
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
// GeoIP info if available
|
||||
if (stats.country_code || stats.city) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Location:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
if (stats.asn_org) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">ASN Org:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`;
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
// Reputation score if available
|
||||
if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Reputation Score:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.reputation_score}} ${{stats.reputation_source ? '(' + stats.reputation_source + ')' : ''}}</span>`;
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
// Category History Timeline
|
||||
if (stats.category_history && stats.category_history.length > 0) {{
|
||||
html += '<div class="timeline-container">';
|
||||
html += '<div class="timeline-title">Behavior Timeline</div>';
|
||||
html += '<div class="timeline">';
|
||||
|
||||
stats.category_history.forEach((change, index) => {{
|
||||
const categoryClass = change.new_category.toLowerCase().replace('_', '-');
|
||||
const timestamp = formatTimestamp(change.timestamp);
|
||||
|
||||
html += '<div class="timeline-item">';
|
||||
html += `<div class="timeline-marker ${{categoryClass}}"></div>`;
|
||||
html += '<div class="timeline-content">';
|
||||
|
||||
if (change.old_category) {{
|
||||
const oldCategoryBadge = 'category-' + change.old_category.toLowerCase().replace('_', '-');
|
||||
html += `<span class="category-badge ${{oldCategoryBadge}}">${{change.old_category}}</span>`;
|
||||
html += '<span class="timeline-arrow">→</span>';
|
||||
}} else {{
|
||||
html += '<span style="color: #8b949e;">Initial:</span> ';
|
||||
}}
|
||||
|
||||
const newCategoryBadge = 'category-' + change.new_category.toLowerCase().replace('_', '-');
|
||||
html += `<span class="category-badge ${{newCategoryBadge}}">${{change.new_category}}</span>`;
|
||||
html += `<div class="timeline-timestamp">${{timestamp}}</div>`;
|
||||
html += '</div>';
|
||||
html += '</div>';
|
||||
}});
|
||||
|
||||
html += '</div>';
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
html += '</div>';
|
||||
|
||||
// Radar chart on the right
|
||||
if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
|
||||
html += '<div class="stats-right">';
|
||||
html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
|
||||
html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
|
||||
|
||||
const scores = {{
|
||||
attacker: stats.category_scores.attacker || 0,
|
||||
good_crawler: stats.category_scores.good_crawler || 0,
|
||||
bad_crawler: stats.category_scores.bad_crawler || 0,
|
||||
regular_user: stats.category_scores.regular_user || 0,
|
||||
unknown: stats.category_scores.unknown || 0
|
||||
}};
|
||||
|
||||
// Normalize scores for better visualization
|
||||
const maxScore = Math.max(...Object.values(scores), 1);
|
||||
const minVisibleRadius = 0.15; // Minimum 15% visibility even for 0 values
|
||||
const normalizedScores = {{}};
|
||||
|
||||
Object.keys(scores).forEach(key => {{
|
||||
// Scale values: ensure minimum visibility + proportional to max
|
||||
normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
|
||||
}});
|
||||
|
||||
const colors = {{
|
||||
attacker: '#f85149',
|
||||
good_crawler: '#3fb950',
|
||||
bad_crawler: '#f0883e',
|
||||
regular_user: '#58a6ff',
|
||||
unknown: '#8b949e'
|
||||
}};
|
||||
|
||||
const labels = {{
|
||||
attacker: 'Attacker',
|
||||
good_crawler: 'Good Bot',
|
||||
bad_crawler: 'Bad Bot',
|
||||
regular_user: 'User',
|
||||
unknown: 'Unknown'
|
||||
}};
|
||||
|
||||
// Draw radar background grid
|
||||
const cx = 100, cy = 100, maxRadius = 75;
|
||||
for (let i = 1; i <= 5; i++) {{
|
||||
const r = (maxRadius / 5) * i;
|
||||
html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
|
||||
}}
|
||||
|
||||
// Draw axes (now with 5 points for pentagon)
|
||||
const angles = [0, 72, 144, 216, 288];
|
||||
const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
|
||||
|
||||
angles.forEach((angle, i) => {{
|
||||
const rad = (angle - 90) * Math.PI / 180;
|
||||
const x2 = cx + maxRadius * Math.cos(rad);
|
||||
const y2 = cy + maxRadius * Math.sin(rad);
|
||||
html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
|
||||
|
||||
// Add labels at consistent distance
|
||||
const labelDist = maxRadius + 35;
|
||||
const lx = cx + labelDist * Math.cos(rad);
|
||||
const ly = cy + labelDist * Math.sin(rad);
|
||||
html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
|
||||
}});
|
||||
|
||||
// Draw filled polygon for scores
|
||||
let points = [];
|
||||
angles.forEach((angle, i) => {{
|
||||
const normalizedScore = normalizedScores[keys[i]];
|
||||
const rad = (angle - 90) * Math.PI / 180;
|
||||
const r = normalizedScore * maxRadius;
|
||||
const x = cx + r * Math.cos(rad);
|
||||
const y = cy + r * Math.sin(rad);
|
||||
points.push(`${{x}},${{y}}`);
|
||||
}});
|
||||
|
||||
// Determine dominant category color
|
||||
const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
|
||||
const dominantColor = colors[dominantKey];
|
||||
|
||||
// Draw single colored area
|
||||
html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
|
||||
|
||||
// Draw points
|
||||
angles.forEach((angle, i) => {{
|
||||
const normalizedScore = normalizedScores[keys[i]];
|
||||
const rad = (angle - 90) * Math.PI / 180;
|
||||
const r = normalizedScore * maxRadius;
|
||||
const x = cx + r * Math.cos(rad);
|
||||
const y = cy + r * Math.sin(rad);
|
||||
html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`;
|
||||
}});
|
||||
|
||||
html += '</svg>';
|
||||
|
||||
// Legend
|
||||
html += '<div class="radar-legend">';
|
||||
keys.forEach(key => {{
|
||||
html += '<div class="radar-legend-item">';
|
||||
html += `<div class="radar-legend-color" style="background: ${{colors[key]}};"></div>`;
|
||||
html += `<span style="color: #8b949e;">${{labels[key]}}: ${{scores[key]}} pt</span>`;
|
||||
html += '</div>';
|
||||
}});
|
||||
html += '</div>';
|
||||
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
return html;
|
||||
}}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
||||
@@ -46,21 +46,12 @@
|
||||
gap: 10px;
|
||||
align-items: center;
|
||||
overflow-y: auto;
|
||||
overflow-x: hidden;
|
||||
flex: 1;
|
||||
padding-top: 10px;
|
||||
}}
|
||||
.links-container::-webkit-scrollbar {{
|
||||
width: 8px;
|
||||
}}
|
||||
.links-container::-webkit-scrollbar-track {{
|
||||
background: #0d1117;
|
||||
}}
|
||||
.links-container::-webkit-scrollbar-thumb {{
|
||||
background: #30363d;
|
||||
border-radius: 4px;
|
||||
}}
|
||||
.links-container::-webkit-scrollbar-thumb:hover {{
|
||||
background: #484f58;
|
||||
width: 0px;
|
||||
}}
|
||||
.link-box {{
|
||||
background: #161b22;
|
||||
|
||||
@@ -131,7 +131,8 @@ class Wordlists:
|
||||
|
||||
@property
|
||||
def attack_urls(self):
|
||||
return self._data.get("attack_urls", [])
|
||||
"""Deprecated: use attack_patterns instead. Returns attack_patterns for backward compatibility."""
|
||||
return self._data.get("attack_patterns", {})
|
||||
|
||||
|
||||
_wordlists_instance = None
|
||||
|
||||
@@ -353,11 +353,14 @@
|
||||
}
|
||||
},
|
||||
"attack_patterns": {
|
||||
"path_traversal": "\\.\\.",
|
||||
"path_traversal": "(\\.\\.|%2e%2e|%252e%252e|\\.{2,}|%c0%ae|%c1%9c)",
|
||||
"sql_injection": "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)",
|
||||
"xss_attempt": "(<script|</script|javascript:|onerror=|onload=|onclick=|onmouseover=|onfocus=|onblur=|<iframe|<img|<svg|<embed|<object|<body|<input|eval\\(|alert\\(|prompt\\(|confirm\\(|document\\.|window\\.|<style|expression\\(|vbscript:|data:text/html)",
|
||||
"common_probes": "(wp-admin|phpmyadmin|\\.env|\\.git|/admin|/config)",
|
||||
"shell_injection": "(\\||;|`|\\$\\(|&&)"
|
||||
"shell_injection": "(\\||;|`|\\$\\(|&&|\\bnc\\b|\\bnetcat\\b|\\bwget\\b|\\bcurl\\b|/bin/bash|/bin/sh|cmd\\.exe)",
|
||||
"lfi_rfi": "(file://|php://|expect://|data://|zip://|phar://|/etc/passwd|/etc/shadow|/proc/self|c:\\\\windows)",
|
||||
"xxe_injection": "(<!ENTITY|<!DOCTYPE|SYSTEM|PUBLIC)",
|
||||
"ldap_injection": "(\\*\\)|\\(\\||\\(&)",
|
||||
"command_injection": "(&&|\\|\\||;|\\$\\{|\\$\\(|`)"
|
||||
},
|
||||
"server_headers": [
|
||||
"Apache/2.4.41 (Ubuntu)",
|
||||
@@ -366,11 +369,5 @@
|
||||
"cloudflare",
|
||||
"AmazonS3",
|
||||
"gunicorn/20.1.0"
|
||||
],
|
||||
"attack_urls": {
|
||||
"path_traversal": "\\.\\.",
|
||||
"sql_injection": "('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
|
||||
"xss_attempt": "(<script|javascript:|onerror=|onload=)",
|
||||
"shell_injection": "(\\||;|`|\\$\\(|&&)"
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user