diff --git a/src/database.py b/src/database.py index 5d41e2c..d7501ca 100644 --- a/src/database.py +++ b/src/database.py @@ -7,7 +7,7 @@ Provides SQLAlchemy session management and database initialization. import os import stat -from datetime import datetime +from datetime import datetime, timedelta from typing import Optional, List, Dict, Any from zoneinfo import ZoneInfo @@ -390,6 +390,7 @@ class DatabaseManager: def get_unenriched_ips(self, limit: int = 100) -> List[str]: """ Get IPs that don't have reputation data yet. + Excludes RFC1918 private addresses and other non-routable IPs. Args: limit: Maximum number of IPs to return @@ -400,7 +401,18 @@ class DatabaseManager: session = self.session try: ips = session.query(IpStats.ip).filter( - IpStats.country_code.is_(None) + IpStats.country_code.is_(None), + ~IpStats.ip.like('10.%'), + ~IpStats.ip.like('172.16.%'), + ~IpStats.ip.like('172.17.%'), + ~IpStats.ip.like('172.18.%'), + ~IpStats.ip.like('172.19.%'), + ~IpStats.ip.like('172.2_.%'), + ~IpStats.ip.like('172.30.%'), + ~IpStats.ip.like('172.31.%'), + ~IpStats.ip.like('192.168.%'), + ~IpStats.ip.like('127.%'), + ~IpStats.ip.like('169.254.%') ).limit(limit).all() return [ip[0] for ip in ips] finally: @@ -411,7 +423,8 @@ class DatabaseManager: limit: int = 100, offset: int = 0, ip_filter: Optional[str] = None, - suspicious_only: bool = False + suspicious_only: bool = False, + since_minutes: Optional[int] = None ) -> List[Dict[str, Any]]: """ Retrieve access logs with optional filtering. @@ -421,6 +434,7 @@ class DatabaseManager: offset: Number of records to skip ip_filter: Filter by IP address suspicious_only: Only return suspicious requests + since_minutes: Only return logs from the last N minutes Returns: List of access log dictionaries @@ -433,6 +447,9 @@ class DatabaseManager: query = query.filter(AccessLog.ip == sanitize_ip(ip_filter)) if suspicious_only: query = query.filter(AccessLog.is_suspicious == True) + if since_minutes is not None: + cutoff_time = datetime.now(tz=ZoneInfo('UTC')) - timedelta(minutes=since_minutes) + query = query.filter(AccessLog.timestamp >= cutoff_time) logs = query.offset(offset).limit(limit).all() diff --git a/src/tasks/analyze_ips.py b/src/tasks/analyze_ips.py index e4fda84..7ca0d62 100644 --- a/src/tasks/analyze_ips.py +++ b/src/tasks/analyze_ips.py @@ -73,12 +73,18 @@ def main(): "attack_url": 0 } } - accesses = db_manager.get_access_logs(limit=999999999) - ips = {item['ip'] for item in accesses} + # Get IPs with recent activity (last minute to match cron schedule) + recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1) + ips_to_analyze = {item['ip'] for item in recent_accesses} - for ip in ips: - ip_accesses = [item for item in accesses if item["ip"] == ip] - total_accesses_count = len(accesses) + if not ips_to_analyze: + app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping") + return + + for ip in ips_to_analyze: + # Get full history for this IP to perform accurate analysis + ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip) + total_accesses_count = len(ip_accesses) if total_accesses_count <= 0: return diff --git a/src/tasks/fetch_ip_rep.py b/src/tasks/fetch_ip_rep.py index 9a78ee6..efddaea 100644 --- a/src/tasks/fetch_ip_rep.py +++ b/src/tasks/fetch_ip_rep.py @@ -21,7 +21,7 @@ def main(): # Only get IPs that haven't been enriched yet unenriched_ips = db_manager.get_unenriched_ips(limit=50) - + app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.") for ip in unenriched_ips: try: api_url = "https://iprep.lcrawl.com/api/iprep/" diff --git a/src/tasks/top_attacking_ips.py b/src/tasks/top_attacking_ips.py index d9e18d3..af81e54 100644 --- a/src/tasks/top_attacking_ips.py +++ b/src/tasks/top_attacking_ips.py @@ -1,6 +1,8 @@ # tasks/export_malicious_ips.py import os +from datetime import datetime, timedelta +from zoneinfo import ZoneInfo from logger import get_app_logger from database import get_database from models import AccessLog @@ -24,6 +26,15 @@ OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt") # ---------------------- # TASK LOGIC # ---------------------- +def has_recent_honeypot_access(session, minutes: int = 5) -> bool: + """Check if honeypot was accessed in the last N minutes.""" + cutoff_time = datetime.now(tz=ZoneInfo('UTC')) - timedelta(minutes=minutes) + count = session.query(AccessLog).filter( + AccessLog.is_honeypot_trigger == True, + AccessLog.timestamp >= cutoff_time + ).count() + return count > 0 + def main(): """ Export all IPs flagged as suspicious to a text file. @@ -36,6 +47,11 @@ def main(): db = get_database() session = db.session + # Check for recent honeypot activity + if not has_recent_honeypot_access(session): + app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes") + return + # Query distinct suspicious IPs results = session.query(distinct(AccessLog.ip)).filter( AccessLog.is_suspicious == True