Linted code iwht black tool

This commit is contained in:
Lorenzo Venerandi
2026-01-23 22:00:21 +01:00
parent 25384585d9
commit 4450d3a4e3
22 changed files with 1387 additions and 868 deletions

View File

@@ -20,7 +20,7 @@ TASK_CONFIG = {
"name": "analyze-ips",
"cron": "*/1 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
@@ -34,48 +34,74 @@ def main():
uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
uneven_request_timing_time_window_seconds = (
config.uneven_request_timing_time_window_seconds
)
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["attacker"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["good_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["bad_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["regular_user"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
# 1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
"risky_http_methods": 6,
"robots_violations": 4,
"uneven_request_timing": 3,
"different_user_agents": 8,
"attack_url": 15
"attack_url": 15,
},
"good_crawler": {
"risky_http_methods": 1,
"robots_violations": 0,
"uneven_request_timing": 0,
"different_user_agents": 0,
"attack_url": 0
"attack_url": 0,
},
"bad_crawler": {
"risky_http_methods": 2,
"robots_violations": 7,
"uneven_request_timing": 0,
"different_user_agents": 5,
"attack_url": 5
"attack_url": 5,
},
"regular_user": {
"risky_http_methods": 0,
"robots_violations": 0,
"uneven_request_timing": 8,
"different_user_agents": 3,
"attack_url": 0
}
"attack_url": 0,
},
}
# Get IPs with recent activity (last minute to match cron schedule)
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
ips_to_analyze = {item['ip'] for item in recent_accesses}
ips_to_analyze = {item["ip"] for item in recent_accesses}
if not ips_to_analyze:
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
@@ -92,23 +118,51 @@ def main():
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
category_scores = {
"attacker": 0,
"good_crawler": 0,
"bad_crawler": 0,
"regular_user": 0,
"unknown": 0,
}
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return 0
#--------------------- HTTP Methods ---------------------
get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
# --------------------- HTTP Methods ---------------------
get_accesses_count = len(
[item for item in ip_accesses if item["method"] == "GET"]
)
post_accesses_count = len(
[item for item in ip_accesses if item["method"] == "POST"]
)
put_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PUT"]
)
delete_accesses_count = len(
[item for item in ip_accesses if item["method"] == "DELETE"]
)
head_accesses_count = len(
[item for item in ip_accesses if item["method"] == "HEAD"]
)
options_accesses_count = len(
[item for item in ip_accesses if item["method"] == "OPTIONS"]
)
patch_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PATCH"]
)
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
http_method_attacker_score = (
post_accesses_count
+ put_accesses_count
+ delete_accesses_count
+ options_accesses_count
+ patch_accesses_count
) / total_accesses_count
else:
http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
# print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
@@ -119,8 +173,8 @@ def main():
score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
#--------------------- Robots Violations ---------------------
#respect robots.txt and login/config pages access frequency
# --------------------- Robots Violations ---------------------
# respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
@@ -132,11 +186,20 @@ def main():
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}")
# print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
#print(f"Violated robots count: {violated_robots_count}")
# if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len(
[
item
for item in ip_accesses
if any(
item["path"].rstrip("/").startswith(disallow)
for disallow in robots_disallows
)
]
)
# print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count
else:
@@ -152,15 +215,20 @@ def main():
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
# --------------------- Requests Timing ---------------------
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = [
ts
for ts in timestamps
if now_utc - ts
<= timedelta(seconds=uneven_request_timing_time_window_seconds)
]
timestamps = sorted(timestamps, reverse=True)
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
for i in range(0, len(timestamps) - 1):
diff = (timestamps[i] - timestamps[i + 1]).total_seconds()
time_diffs.append(diff)
mean = 0
@@ -170,9 +238,11 @@ def main():
if time_diffs:
mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5
cv = std/mean
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
std = variance**0.5
cv = std / mean
app_logger.debug(
f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}"
)
if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
@@ -183,11 +253,11 @@ def main():
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
#--------------------- Different User Agents ---------------------
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# --------------------- Different User Agents ---------------------
# Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in ip_accesses]
user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}")
# print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
@@ -198,7 +268,7 @@ def main():
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs ---------------------
# --------------------- Attack URLs ---------------------
attack_urls_found_list = []
wl = get_wordlists()
if wl.attack_patterns:
@@ -215,12 +285,14 @@ def main():
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
if (
re.search(pattern, queried_path, re.IGNORECASE)
or re.search(pattern, decoded_path, re.IGNORECASE)
or re.search(pattern, decoded_path_twice, re.IGNORECASE)
):
attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates
# remove duplicates
attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list)
@@ -234,28 +306,102 @@ def main():
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
#--------------------- Calculate score ---------------------
# --------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
attacker_score = (
score["attacker"]["risky_http_methods"]
* weights["attacker"]["risky_http_methods"]
)
attacker_score = (
attacker_score
+ score["attacker"]["robots_violations"]
* weights["attacker"]["robots_violations"]
)
attacker_score = (
attacker_score
+ score["attacker"]["uneven_request_timing"]
* weights["attacker"]["uneven_request_timing"]
)
attacker_score = (
attacker_score
+ score["attacker"]["different_user_agents"]
* weights["attacker"]["different_user_agents"]
)
attacker_score = (
attacker_score
+ score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
)
good_crawler_score = (
score["good_crawler"]["risky_http_methods"]
* weights["good_crawler"]["risky_http_methods"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["robots_violations"]
* weights["good_crawler"]["robots_violations"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["uneven_request_timing"]
* weights["good_crawler"]["uneven_request_timing"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["different_user_agents"]
* weights["good_crawler"]["different_user_agents"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["attack_url"]
* weights["good_crawler"]["attack_url"]
)
bad_crawler_score = (
score["bad_crawler"]["risky_http_methods"]
* weights["bad_crawler"]["risky_http_methods"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["robots_violations"]
* weights["bad_crawler"]["robots_violations"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["uneven_request_timing"]
* weights["bad_crawler"]["uneven_request_timing"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["different_user_agents"]
* weights["bad_crawler"]["different_user_agents"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
)
regular_user_score = (
score["regular_user"]["risky_http_methods"]
* weights["regular_user"]["risky_http_methods"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["robots_violations"]
* weights["regular_user"]["robots_violations"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["uneven_request_timing"]
* weights["regular_user"]["uneven_request_timing"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["different_user_agents"]
* weights["regular_user"]["different_user_agents"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["attack_url"]
* weights["regular_user"]["attack_url"]
)
score_details = f"""
Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score}
@@ -263,9 +409,22 @@ def main():
Regular User score: {regular_user_score}
"""
app_logger.debug(score_details)
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
analyzed_metrics = {
"risky_http_methods": http_method_attacker_score,
"robots_violations": violated_robots_ratio,
"uneven_request_timing": mean,
"different_user_agents": user_agents_used,
"attack_url": attack_urls_found_list,
}
category_scores = {
"attacker": attacker_score,
"good_crawler": good_crawler_score,
"bad_crawler": bad_crawler_score,
"regular_user": regular_user_score,
}
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return

View File

@@ -11,7 +11,7 @@ TASK_CONFIG = {
"name": "fetch-ip-rep",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
@@ -21,7 +21,9 @@ def main():
# Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
app_logger.info(
f"{len(unenriched_ips)} IP's need to be have reputation enrichment."
)
for ip in unenriched_ips:
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"
@@ -43,8 +45,11 @@ def main():
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(
ip, sanitized_country_iso_code, sanitized_asn,
sanitized_asn_org, sanitized_list_on
ip,
sanitized_country_iso_code,
sanitized_asn,
sanitized_asn_org,
sanitized_list_on,
)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")

View File

@@ -17,24 +17,29 @@ TASK_CONFIG = {
"name": "export-malicious-ips",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
EXPORTS_DIR = "exports"
OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ----------------------
# TASK LOGIC
# ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter(
AccessLog.is_honeypot_trigger == True,
AccessLog.timestamp >= cutoff_time
).count()
count = (
session.query(AccessLog)
.filter(
AccessLog.is_honeypot_trigger == True, AccessLog.timestamp >= cutoff_time
)
.count()
)
return count > 0
def main():
"""
Export all IPs flagged as suspicious to a text file.
@@ -49,23 +54,29 @@ def main():
# Check for recent honeypot activity
if not has_recent_honeypot_access(session):
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
app_logger.info(
f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes"
)
return
# Query distinct suspicious IPs
results = session.query(distinct(AccessLog.ip)).filter(
AccessLog.is_suspicious == True
).all()
results = (
session.query(distinct(AccessLog.ip))
.filter(AccessLog.is_suspicious == True)
.all()
)
# Ensure exports directory exists
os.makedirs(EXPORTS_DIR, exist_ok=True)
# Write IPs to file (one per line)
with open(OUTPUT_FILE, 'w') as f:
with open(OUTPUT_FILE, "w") as f:
for (ip,) in results:
f.write(f"{ip}\n")
app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
app_logger.info(
f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}"
)
except Exception as e:
app_logger.error(f"[Background Task] {task_name} failed: {e}")