refactor: streamline memory management by removing unused variables and enhancing cleanup logic

This commit is contained in:
Lorenzo Venerandi
2026-02-17 18:09:35 +01:00
parent b94cd38b61
commit 846fba631f
3 changed files with 8 additions and 100 deletions

View File

@@ -2,10 +2,9 @@
""" """
Memory cleanup task for Krawl honeypot. Memory cleanup task for Krawl honeypot.
Periodically trims unbounded in-memory structures to prevent OOM. Periodically cleans expired bans and stale entries from ip_page_visits.
""" """
from database import get_database
from logger import get_app_logger from logger import get_app_logger
# ---------------------- # ----------------------
@@ -35,35 +34,24 @@ def main():
app_logger.warning("Tracker not initialized, skipping memory cleanup") app_logger.warning("Tracker not initialized, skipping memory cleanup")
return return
# Get memory stats before cleanup
stats_before = tracker.get_memory_stats() stats_before = tracker.get_memory_stats()
# Run cleanup
tracker.cleanup_memory() tracker.cleanup_memory()
# Get memory stats after cleanup
stats_after = tracker.get_memory_stats() stats_after = tracker.get_memory_stats()
# Log changes visits_reduced = (
access_log_reduced = ( stats_before["ip_page_visits"] - stats_after["ip_page_visits"]
stats_before["access_log_size"] - stats_after["access_log_size"]
)
cred_reduced = (
stats_before["credential_attempts_size"]
- stats_after["credential_attempts_size"]
) )
if access_log_reduced > 0 or cred_reduced > 0: if visits_reduced > 0:
app_logger.info( app_logger.info(
f"Memory cleanup: Trimmed {access_log_reduced} access logs, " f"Memory cleanup: Removed {visits_reduced} stale ip_page_visits entries"
f"{cred_reduced} credential attempts"
) )
# Log current memory state for monitoring
app_logger.debug( app_logger.debug(
f"Memory stats after cleanup: " f"Memory stats after cleanup: "
f"access_logs={stats_after['access_log_size']}, " f"ip_page_visits={stats_after['ip_page_visits']}"
f"credentials={stats_after['credential_attempts_size']}"
) )
except Exception as e: except Exception as e:

View File

@@ -40,7 +40,6 @@ class TasksMaster:
def __init__(self, scheduler: BackgroundScheduler): def __init__(self, scheduler: BackgroundScheduler):
self.tasks = self._config_tasks() self.tasks = self._config_tasks()
self.scheduler = scheduler self.scheduler = scheduler
self.last_run_times = {}
self.scheduler.add_listener( self.scheduler.add_listener(
self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR
) )
@@ -234,9 +233,6 @@ class TasksMaster:
app_logger.error(f"Failed to load {module_name}: {e}") app_logger.error(f"Failed to load {module_name}: {e}")
def job_listener(self, event): def job_listener(self, event):
job_id = event.job_id
self.last_run_times[job_id] = datetime.datetime.now()
if event.exception: if event.exception:
app_logger.error(f"Job {event.job_id} failed: {event.exception}") app_logger.error(f"Job {event.job_id} failed: {event.exception}")
else: else:

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from typing import Dict, List, Tuple, Optional from typing import Dict, Tuple, Optional
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
@@ -9,7 +9,6 @@ import urllib.parse
from wordlists import get_wordlists from wordlists import get_wordlists
from database import get_database, DatabaseManager from database import get_database, DatabaseManager
from ip_utils import is_local_or_private_ip, is_valid_public_ip
# Module-level singleton for background task access # Module-level singleton for background task access
_tracker_instance: "AccessTracker | None" = None _tracker_instance: "AccessTracker | None" = None
@@ -49,12 +48,6 @@ class AccessTracker:
""" """
self.max_pages_limit = max_pages_limit self.max_pages_limit = max_pages_limit
self.ban_duration_seconds = ban_duration_seconds self.ban_duration_seconds = ban_duration_seconds
self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = []
# Memory limits for in-memory lists (prevents unbounded growth)
self.max_access_log_size = 10_000 # Keep only recent 10k accesses
self.max_credential_log_size = 5_000 # Keep only recent 5k attempts
# Track pages visited by each IP (for good crawler limiting) # Track pages visited by each IP (for good crawler limiting)
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict) self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
@@ -213,23 +206,6 @@ class AccessTracker:
if server_ip and ip == server_ip: if server_ip and ip == server_ip:
return return
# In-memory storage for dashboard
self.credential_attempts.append(
{
"ip": ip,
"path": path,
"username": username,
"password": password,
"timestamp": datetime.now().isoformat(),
}
)
# Trim if exceeding max size (prevent unbounded growth)
if len(self.credential_attempts) > self.max_credential_log_size:
self.credential_attempts = self.credential_attempts[
-self.max_credential_log_size :
]
# Persist to database # Persist to database
if self.db: if self.db:
try: try:
@@ -288,22 +264,6 @@ class AccessTracker:
is_honeypot = self.is_honeypot_path(path) is_honeypot = self.is_honeypot_path(path)
# In-memory storage for dashboard # In-memory storage for dashboard
self.access_log.append(
{
"ip": ip,
"path": path,
"user_agent": user_agent,
"suspicious": is_suspicious,
"honeypot_triggered": self.is_honeypot_path(path),
"attack_types": attack_findings,
"timestamp": datetime.now().isoformat(),
}
)
# Trim if exceeding max size (prevent unbounded growth)
if len(self.access_log) > self.max_access_log_size:
self.access_log = self.access_log[-self.max_access_log_size :]
# Persist to database # Persist to database
if self.db: if self.db:
try: try:
@@ -581,25 +541,6 @@ class AccessTracker:
except Exception: except Exception:
return 0 return 0
def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent suspicious accesses (excludes local/private IPs)"""
suspicious = [
log
for log in self.access_log
if log.get("suspicious", False)
and not is_local_or_private_ip(log.get("ip", ""))
]
return suspicious[-limit:]
def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent accesses with detected attack types (excludes local/private IPs)"""
attacks = [
log
for log in self.access_log
if log.get("attack_types") and not is_local_or_private_ip(log.get("ip", ""))
]
return attacks[-limit:]
def get_stats(self) -> Dict: def get_stats(self) -> Dict:
"""Get statistics summary from database.""" """Get statistics summary from database."""
if not self.db: if not self.db:
@@ -623,22 +564,7 @@ class AccessTracker:
""" """
Clean up in-memory structures to prevent unbounded growth. Clean up in-memory structures to prevent unbounded growth.
Should be called periodically (e.g., every 5 minutes). Should be called periodically (e.g., every 5 minutes).
Trimming strategy:
- Keep most recent N entries in logs
- Remove oldest entries when limit exceeded
- Clean expired ban entries from ip_page_visits
""" """
# Trim access_log to max size (keep most recent)
if len(self.access_log) > self.max_access_log_size:
self.access_log = self.access_log[-self.max_access_log_size :]
# Trim credential_attempts to max size (keep most recent)
if len(self.credential_attempts) > self.max_credential_log_size:
self.credential_attempts = self.credential_attempts[
-self.max_credential_log_size :
]
# Clean expired ban entries from ip_page_visits # Clean expired ban entries from ip_page_visits
current_time = datetime.now() current_time = datetime.now()
for ip, data in self.ip_page_visits.items(): for ip, data in self.ip_page_visits.items():
@@ -671,7 +597,5 @@ class AccessTracker:
Dictionary with counts of in-memory items Dictionary with counts of in-memory items
""" """
return { return {
"access_log_size": len(self.access_log), "ip_page_visits": len(self.ip_page_visits),
"credential_attempts_size": len(self.credential_attempts),
"unique_ip_page_visits": len(self.ip_page_visits),
} }