diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 92d530c..812ff4f 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 1.1.0 -appVersion: 1.1.0 +version: 1.1.1 +appVersion: 1.1.1 keywords: - honeypot - security diff --git a/src/database.py b/src/database.py index cbee4a0..a44ad3e 100644 --- a/src/database.py +++ b/src/database.py @@ -815,24 +815,25 @@ class DatabaseManager: def flag_stale_ips_for_reevaluation(self) -> int: """ Flag IPs for reevaluation where: - - last_seen is between 5 and 30 days ago + - last_seen is older than the configured retention period - last_analysis is more than 5 days ago Returns: Number of IPs flagged for reevaluation """ + from config import get_config + session = self.session try: now = datetime.now() - last_seen_lower = now - timedelta(days=30) - last_seen_upper = now - timedelta(days=5) + retention_days = get_config().database_retention_days + last_seen_cutoff = now - timedelta(days=retention_days) last_analysis_cutoff = now - timedelta(days=5) count = ( session.query(IpStats) .filter( - IpStats.last_seen >= last_seen_lower, - IpStats.last_seen <= last_seen_upper, + IpStats.last_seen <= last_seen_cutoff, IpStats.last_analysis <= last_analysis_cutoff, IpStats.need_reevaluation == False, IpStats.manual_category == False, diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py index af803c6..ab4af86 100644 --- a/src/tasks/db_retention.py +++ b/src/tasks/db_retention.py @@ -7,6 +7,8 @@ Periodically deletes old records based on configured retention_days. from datetime import datetime, timedelta +from sqlalchemy import or_ + from database import get_database from logger import get_app_logger @@ -26,12 +28,18 @@ app_logger = get_app_logger() def main(): """ - Delete access logs, credential attempts, and attack detections - older than the configured retention period. + Delete old records based on the configured retention period. + Keeps suspicious access logs, their attack detections, linked IPs, + category history, and all credential attempts. """ try: from config import get_config - from models import AccessLog, CredentialAttempt, AttackDetection + from models import ( + AccessLog, + AttackDetection, + IpStats, + CategoryHistory, + ) config = get_config() retention_days = config.database_retention_days @@ -41,35 +49,71 @@ def main(): cutoff = datetime.now() - timedelta(days=retention_days) - # Delete attack detections linked to old access logs first (FK constraint) - old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff) + # Delete attack detections linked to old NON-suspicious access logs (FK constraint) + old_nonsuspicious_log_ids = session.query(AccessLog.id).filter( + AccessLog.timestamp < cutoff, + AccessLog.is_suspicious == False, + AccessLog.is_honeypot_trigger == False, + ) detections_deleted = ( session.query(AttackDetection) - .filter(AttackDetection.access_log_id.in_(old_log_ids)) + .filter(AttackDetection.access_log_id.in_(old_nonsuspicious_log_ids)) .delete(synchronize_session=False) ) - # Delete old access logs + # Delete old non-suspicious access logs (keep suspicious ones) logs_deleted = ( session.query(AccessLog) - .filter(AccessLog.timestamp < cutoff) + .filter( + AccessLog.timestamp < cutoff, + AccessLog.is_suspicious == False, + AccessLog.is_honeypot_trigger == False, + ) .delete(synchronize_session=False) ) - # Delete old credential attempts - creds_deleted = ( - session.query(CredentialAttempt) - .filter(CredentialAttempt.timestamp < cutoff) + # IPs to preserve: those with any suspicious access logs + preserved_ips = ( + session.query(AccessLog.ip) + .filter( + or_( + AccessLog.is_suspicious == True, + AccessLog.is_honeypot_trigger == True, + ) + ) + .distinct() + ) + + # Delete stale IPs, but keep those linked to suspicious logs + ips_deleted = ( + session.query(IpStats) + .filter( + IpStats.last_seen < cutoff, + ~IpStats.ip.in_(preserved_ips), + ) + .delete(synchronize_session=False) + ) + + # Delete old category history, but keep records for preserved IPs + history_deleted = ( + session.query(CategoryHistory) + .filter( + CategoryHistory.timestamp < cutoff, + ~CategoryHistory.ip.in_(preserved_ips), + ) .delete(synchronize_session=False) ) session.commit() - if logs_deleted or creds_deleted or detections_deleted: + total = logs_deleted + detections_deleted + ips_deleted + history_deleted + if total: app_logger.info( f"DB retention: Deleted {logs_deleted} access logs, " f"{detections_deleted} attack detections, " - f"{creds_deleted} credential attempts older than {retention_days} days" + f"{ips_deleted} stale IPs, " + f"{history_deleted} category history records " + f"older than {retention_days} days" ) except Exception as e: