Merge pull request #94 from BlessedRebuS/fix/new-ip-reevaluation
Fix/new ip reevaluation
This commit is contained in:
@@ -2,8 +2,8 @@ apiVersion: v2
|
|||||||
name: krawl-chart
|
name: krawl-chart
|
||||||
description: A Helm chart for Krawl honeypot server
|
description: A Helm chart for Krawl honeypot server
|
||||||
type: application
|
type: application
|
||||||
version: 1.0.7
|
version: 1.0.8
|
||||||
appVersion: 1.0.7
|
appVersion: 1.0.8
|
||||||
keywords:
|
keywords:
|
||||||
- honeypot
|
- honeypot
|
||||||
- security
|
- security
|
||||||
|
|||||||
@@ -790,22 +790,69 @@ class DatabaseManager:
|
|||||||
|
|
||||||
def get_ips_needing_reevaluation(self) -> List[str]:
|
def get_ips_needing_reevaluation(self) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Get all IP addresses that have been flagged for reevaluation.
|
Get all IP addresses that need evaluation.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of IP addresses where need_reevaluation is True
|
List of IP addresses where need_reevaluation is True
|
||||||
|
or that have never been analyzed (last_analysis is NULL)
|
||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
ips = (
|
ips = (
|
||||||
session.query(IpStats.ip)
|
session.query(IpStats.ip)
|
||||||
.filter(IpStats.need_reevaluation == True)
|
.filter(
|
||||||
|
or_(
|
||||||
|
IpStats.need_reevaluation == True,
|
||||||
|
IpStats.last_analysis.is_(None),
|
||||||
|
)
|
||||||
|
)
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
return [ip[0] for ip in ips]
|
return [ip[0] for ip in ips]
|
||||||
finally:
|
finally:
|
||||||
self.close_session()
|
self.close_session()
|
||||||
|
|
||||||
|
def flag_stale_ips_for_reevaluation(self) -> int:
|
||||||
|
"""
|
||||||
|
Flag IPs for reevaluation where:
|
||||||
|
- last_seen is between 15 and 30 days ago
|
||||||
|
- last_analysis is more than 10 days ago (or never analyzed)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of IPs flagged for reevaluation
|
||||||
|
"""
|
||||||
|
session = self.session
|
||||||
|
try:
|
||||||
|
now = datetime.now()
|
||||||
|
last_seen_lower = now - timedelta(days=30)
|
||||||
|
last_seen_upper = now - timedelta(days=15)
|
||||||
|
last_analysis_cutoff = now - timedelta(days=10)
|
||||||
|
|
||||||
|
count = (
|
||||||
|
session.query(IpStats)
|
||||||
|
.filter(
|
||||||
|
IpStats.last_seen >= last_seen_lower,
|
||||||
|
IpStats.last_seen <= last_seen_upper,
|
||||||
|
or_(
|
||||||
|
IpStats.last_analysis <= last_analysis_cutoff,
|
||||||
|
IpStats.last_analysis.is_(None),
|
||||||
|
),
|
||||||
|
IpStats.need_reevaluation == False,
|
||||||
|
IpStats.manual_category == False,
|
||||||
|
)
|
||||||
|
.update(
|
||||||
|
{IpStats.need_reevaluation: True},
|
||||||
|
synchronize_session=False,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
session.commit()
|
||||||
|
return count
|
||||||
|
except Exception as e:
|
||||||
|
session.rollback()
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
self.close_session()
|
||||||
|
|
||||||
def get_access_logs(
|
def get_access_logs(
|
||||||
self,
|
self,
|
||||||
limit: int = 100,
|
limit: int = 100,
|
||||||
|
|||||||
@@ -398,6 +398,8 @@ async def trap_page(request: Request, path: str):
|
|||||||
access_logger.warning(
|
access_logger.warning(
|
||||||
f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {full_path}"
|
f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {full_path}"
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
access_logger.info(f"[REQUEST] {client_ip} - {full_path}")
|
||||||
|
|
||||||
# Record access unless the router dependency already handled it
|
# Record access unless the router dependency already handled it
|
||||||
# (attack pattern or honeypot path → already recorded by _track_honeypot_request)
|
# (attack pattern or honeypot path → already recorded by _track_honeypot_request)
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
from collections import Counter
|
||||||
from database import get_database
|
from database import get_database
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
@@ -94,6 +95,19 @@ def main():
|
|||||||
"attack_url": 0,
|
"attack_url": 0,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
# Parse robots.txt once before the loop (it never changes during a run)
|
||||||
|
robots_disallows = []
|
||||||
|
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
|
||||||
|
with open(robots_path, "r") as f:
|
||||||
|
for line in f:
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
parts = line.split(":")
|
||||||
|
if parts[0] == "Disallow":
|
||||||
|
parts[1] = parts[1].rstrip("/")
|
||||||
|
robots_disallows.append(parts[1].strip())
|
||||||
|
|
||||||
# Get IPs flagged for reevaluation (set when a suspicious request arrives)
|
# Get IPs flagged for reevaluation (set when a suspicious request arrives)
|
||||||
ips_to_analyze = set(db_manager.get_ips_needing_reevaluation())
|
ips_to_analyze = set(db_manager.get_ips_needing_reevaluation())
|
||||||
|
|
||||||
@@ -105,41 +119,21 @@ def main():
|
|||||||
|
|
||||||
for ip in ips_to_analyze:
|
for ip in ips_to_analyze:
|
||||||
# Get full history for this IP to perform accurate analysis
|
# Get full history for this IP to perform accurate analysis
|
||||||
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
|
ip_accesses = db_manager.get_access_logs(
|
||||||
|
limit=10000, ip_filter=ip, since_minutes=1440 * 30
|
||||||
|
) # look back up to 30 days of history for better accuracy
|
||||||
total_accesses_count = len(ip_accesses)
|
total_accesses_count = len(ip_accesses)
|
||||||
if total_accesses_count <= 0:
|
if total_accesses_count <= 0:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# --------------------- HTTP Methods ---------------------
|
# --------------------- HTTP Methods ---------------------
|
||||||
get_accesses_count = len(
|
method_counts = Counter(item["method"] for item in ip_accesses)
|
||||||
[item for item in ip_accesses if item["method"] == "GET"]
|
|
||||||
)
|
|
||||||
post_accesses_count = len(
|
|
||||||
[item for item in ip_accesses if item["method"] == "POST"]
|
|
||||||
)
|
|
||||||
put_accesses_count = len(
|
|
||||||
[item for item in ip_accesses if item["method"] == "PUT"]
|
|
||||||
)
|
|
||||||
delete_accesses_count = len(
|
|
||||||
[item for item in ip_accesses if item["method"] == "DELETE"]
|
|
||||||
)
|
|
||||||
head_accesses_count = len(
|
|
||||||
[item for item in ip_accesses if item["method"] == "HEAD"]
|
|
||||||
)
|
|
||||||
options_accesses_count = len(
|
|
||||||
[item for item in ip_accesses if item["method"] == "OPTIONS"]
|
|
||||||
)
|
|
||||||
patch_accesses_count = len(
|
|
||||||
[item for item in ip_accesses if item["method"] == "PATCH"]
|
|
||||||
)
|
|
||||||
if total_accesses_count > http_risky_methods_threshold:
|
if total_accesses_count > http_risky_methods_threshold:
|
||||||
http_method_attacker_score = (
|
risky_count = sum(
|
||||||
post_accesses_count
|
method_counts.get(m, 0)
|
||||||
+ put_accesses_count
|
for m in ("POST", "PUT", "DELETE", "OPTIONS", "PATCH")
|
||||||
+ delete_accesses_count
|
)
|
||||||
+ options_accesses_count
|
http_method_attacker_score = risky_count / total_accesses_count
|
||||||
+ patch_accesses_count
|
|
||||||
) / total_accesses_count
|
|
||||||
else:
|
else:
|
||||||
http_method_attacker_score = 0
|
http_method_attacker_score = 0
|
||||||
# print(f"HTTP Method attacker score: {http_method_attacker_score}")
|
# print(f"HTTP Method attacker score: {http_method_attacker_score}")
|
||||||
@@ -154,21 +148,6 @@ def main():
|
|||||||
score["bad_crawler"]["risky_http_methods"] = False
|
score["bad_crawler"]["risky_http_methods"] = False
|
||||||
score["regular_user"]["risky_http_methods"] = False
|
score["regular_user"]["risky_http_methods"] = False
|
||||||
# --------------------- Robots Violations ---------------------
|
# --------------------- Robots Violations ---------------------
|
||||||
# respect robots.txt and login/config pages access frequency
|
|
||||||
robots_disallows = []
|
|
||||||
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
|
|
||||||
with open(robots_path, "r") as f:
|
|
||||||
for line in f:
|
|
||||||
line = line.strip()
|
|
||||||
if not line:
|
|
||||||
continue
|
|
||||||
parts = line.split(":")
|
|
||||||
|
|
||||||
if parts[0] == "Disallow":
|
|
||||||
parts[1] = parts[1].rstrip("/")
|
|
||||||
# print(f"DISALLOW {parts[1]}")
|
|
||||||
robots_disallows.append(parts[1].strip())
|
|
||||||
# if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
|
|
||||||
violated_robots_count = len(
|
violated_robots_count = len(
|
||||||
[
|
[
|
||||||
item
|
item
|
||||||
|
|||||||
33
src/tasks/flag_stale_ips.py
Normal file
33
src/tasks/flag_stale_ips.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
from database import get_database
|
||||||
|
from logger import get_app_logger
|
||||||
|
|
||||||
|
# ----------------------
|
||||||
|
# TASK CONFIG
|
||||||
|
# ----------------------
|
||||||
|
|
||||||
|
TASK_CONFIG = {
|
||||||
|
"name": "flag-stale-ips",
|
||||||
|
"cron": "0 2 * * *", # Run daily at 2 AM
|
||||||
|
"enabled": True,
|
||||||
|
"run_when_loaded": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
db = get_database()
|
||||||
|
|
||||||
|
try:
|
||||||
|
count = db.flag_stale_ips_for_reevaluation()
|
||||||
|
if count > 0:
|
||||||
|
app_logger.info(
|
||||||
|
f"[Background Task] flag-stale-ips: Flagged {count} stale IPs for reevaluation"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
app_logger.debug(
|
||||||
|
"[Background Task] flag-stale-ips: No stale IPs found to flag"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
app_logger.error(
|
||||||
|
f"[Background Task] flag-stale-ips: Error flagging stale IPs: {e}"
|
||||||
|
)
|
||||||
Reference in New Issue
Block a user