diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 8f1d13b..fbfd6ea 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 1.0.7 -appVersion: 1.0.7 +version: 1.0.8 +appVersion: 1.0.8 keywords: - honeypot - security diff --git a/src/database.py b/src/database.py index 789aa29..9daca49 100644 --- a/src/database.py +++ b/src/database.py @@ -790,22 +790,69 @@ class DatabaseManager: def get_ips_needing_reevaluation(self) -> List[str]: """ - Get all IP addresses that have been flagged for reevaluation. + Get all IP addresses that need evaluation. Returns: List of IP addresses where need_reevaluation is True + or that have never been analyzed (last_analysis is NULL) """ session = self.session try: ips = ( session.query(IpStats.ip) - .filter(IpStats.need_reevaluation == True) + .filter( + or_( + IpStats.need_reevaluation == True, + IpStats.last_analysis.is_(None), + ) + ) .all() ) return [ip[0] for ip in ips] finally: self.close_session() + def flag_stale_ips_for_reevaluation(self) -> int: + """ + Flag IPs for reevaluation where: + - last_seen is between 15 and 30 days ago + - last_analysis is more than 10 days ago (or never analyzed) + + Returns: + Number of IPs flagged for reevaluation + """ + session = self.session + try: + now = datetime.now() + last_seen_lower = now - timedelta(days=30) + last_seen_upper = now - timedelta(days=15) + last_analysis_cutoff = now - timedelta(days=10) + + count = ( + session.query(IpStats) + .filter( + IpStats.last_seen >= last_seen_lower, + IpStats.last_seen <= last_seen_upper, + or_( + IpStats.last_analysis <= last_analysis_cutoff, + IpStats.last_analysis.is_(None), + ), + IpStats.need_reevaluation == False, + IpStats.manual_category == False, + ) + .update( + {IpStats.need_reevaluation: True}, + synchronize_session=False, + ) + ) + session.commit() + return count + except Exception as e: + session.rollback() + raise + finally: + self.close_session() + def get_access_logs( self, limit: int = 100, diff --git a/src/routes/honeypot.py b/src/routes/honeypot.py index 6db1c65..e4b384c 100644 --- a/src/routes/honeypot.py +++ b/src/routes/honeypot.py @@ -398,6 +398,8 @@ async def trap_page(request: Request, path: str): access_logger.warning( f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {full_path}" ) + else: + access_logger.info(f"[REQUEST] {client_ip} - {full_path}") # Record access unless the router dependency already handled it # (attack pattern or honeypot path → already recorded by _track_honeypot_request) diff --git a/src/tasks/analyze_ips.py b/src/tasks/analyze_ips.py index 12ef5e4..295cd92 100644 --- a/src/tasks/analyze_ips.py +++ b/src/tasks/analyze_ips.py @@ -1,3 +1,4 @@ +from collections import Counter from database import get_database from pathlib import Path from datetime import datetime, timedelta @@ -94,6 +95,19 @@ def main(): "attack_url": 0, }, } + # Parse robots.txt once before the loop (it never changes during a run) + robots_disallows = [] + robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt" + with open(robots_path, "r") as f: + for line in f: + line = line.strip() + if not line: + continue + parts = line.split(":") + if parts[0] == "Disallow": + parts[1] = parts[1].rstrip("/") + robots_disallows.append(parts[1].strip()) + # Get IPs flagged for reevaluation (set when a suspicious request arrives) ips_to_analyze = set(db_manager.get_ips_needing_reevaluation()) @@ -105,41 +119,21 @@ def main(): for ip in ips_to_analyze: # Get full history for this IP to perform accurate analysis - ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip) + ip_accesses = db_manager.get_access_logs( + limit=10000, ip_filter=ip, since_minutes=1440 * 30 + ) # look back up to 30 days of history for better accuracy total_accesses_count = len(ip_accesses) if total_accesses_count <= 0: continue # --------------------- HTTP Methods --------------------- - get_accesses_count = len( - [item for item in ip_accesses if item["method"] == "GET"] - ) - post_accesses_count = len( - [item for item in ip_accesses if item["method"] == "POST"] - ) - put_accesses_count = len( - [item for item in ip_accesses if item["method"] == "PUT"] - ) - delete_accesses_count = len( - [item for item in ip_accesses if item["method"] == "DELETE"] - ) - head_accesses_count = len( - [item for item in ip_accesses if item["method"] == "HEAD"] - ) - options_accesses_count = len( - [item for item in ip_accesses if item["method"] == "OPTIONS"] - ) - patch_accesses_count = len( - [item for item in ip_accesses if item["method"] == "PATCH"] - ) + method_counts = Counter(item["method"] for item in ip_accesses) if total_accesses_count > http_risky_methods_threshold: - http_method_attacker_score = ( - post_accesses_count - + put_accesses_count - + delete_accesses_count - + options_accesses_count - + patch_accesses_count - ) / total_accesses_count + risky_count = sum( + method_counts.get(m, 0) + for m in ("POST", "PUT", "DELETE", "OPTIONS", "PATCH") + ) + http_method_attacker_score = risky_count / total_accesses_count else: http_method_attacker_score = 0 # print(f"HTTP Method attacker score: {http_method_attacker_score}") @@ -154,21 +148,6 @@ def main(): score["bad_crawler"]["risky_http_methods"] = False score["regular_user"]["risky_http_methods"] = False # --------------------- Robots Violations --------------------- - # respect robots.txt and login/config pages access frequency - robots_disallows = [] - robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt" - with open(robots_path, "r") as f: - for line in f: - line = line.strip() - if not line: - continue - parts = line.split(":") - - if parts[0] == "Disallow": - parts[1] = parts[1].rstrip("/") - # print(f"DISALLOW {parts[1]}") - robots_disallows.append(parts[1].strip()) - # if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker violated_robots_count = len( [ item diff --git a/src/tasks/flag_stale_ips.py b/src/tasks/flag_stale_ips.py new file mode 100644 index 0000000..a9e8e01 --- /dev/null +++ b/src/tasks/flag_stale_ips.py @@ -0,0 +1,33 @@ +from database import get_database +from logger import get_app_logger + +# ---------------------- +# TASK CONFIG +# ---------------------- + +TASK_CONFIG = { + "name": "flag-stale-ips", + "cron": "0 2 * * *", # Run daily at 2 AM + "enabled": True, + "run_when_loaded": False, +} + + +def main(): + app_logger = get_app_logger() + db = get_database() + + try: + count = db.flag_stale_ips_for_reevaluation() + if count > 0: + app_logger.info( + f"[Background Task] flag-stale-ips: Flagged {count} stale IPs for reevaluation" + ) + else: + app_logger.debug( + "[Background Task] flag-stale-ips: No stale IPs found to flag" + ) + except Exception as e: + app_logger.error( + f"[Background Task] flag-stale-ips: Error flagging stale IPs: {e}" + )