From f3ec4f84201b23825dd1c37cd22c955d01080394 Mon Sep 17 00:00:00 2001 From: Lorenzo Venerandi Date: Tue, 3 Mar 2026 18:58:06 +0100 Subject: [PATCH 1/4] fix: update stale IP reevaluation logic to flag IPs last seen over 30 days ago --- src/database.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/database.py b/src/database.py index cbee4a0..32a893e 100644 --- a/src/database.py +++ b/src/database.py @@ -815,7 +815,7 @@ class DatabaseManager: def flag_stale_ips_for_reevaluation(self) -> int: """ Flag IPs for reevaluation where: - - last_seen is between 5 and 30 days ago + - last_seen is more than 30 days ago - last_analysis is more than 5 days ago Returns: @@ -824,15 +824,13 @@ class DatabaseManager: session = self.session try: now = datetime.now() - last_seen_lower = now - timedelta(days=30) - last_seen_upper = now - timedelta(days=5) + last_seen_cutoff = now - timedelta(days=30) last_analysis_cutoff = now - timedelta(days=5) count = ( session.query(IpStats) .filter( - IpStats.last_seen >= last_seen_lower, - IpStats.last_seen <= last_seen_upper, + IpStats.last_seen <= last_seen_cutoff, IpStats.last_analysis <= last_analysis_cutoff, IpStats.need_reevaluation == False, IpStats.manual_category == False, From 36df289a5e213b0f485d206eb15beb3b4a3eeace Mon Sep 17 00:00:00 2001 From: Lorenzo Venerandi Date: Tue, 3 Mar 2026 19:03:40 +0100 Subject: [PATCH 2/4] feat: enhance database retention logic to use configurable retention period and delete stale IPs --- src/database.py | 7 +++++-- src/tasks/db_retention.py | 40 ++++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/database.py b/src/database.py index 32a893e..a44ad3e 100644 --- a/src/database.py +++ b/src/database.py @@ -815,16 +815,19 @@ class DatabaseManager: def flag_stale_ips_for_reevaluation(self) -> int: """ Flag IPs for reevaluation where: - - last_seen is more than 30 days ago + - last_seen is older than the configured retention period - last_analysis is more than 5 days ago Returns: Number of IPs flagged for reevaluation """ + from config import get_config + session = self.session try: now = datetime.now() - last_seen_cutoff = now - timedelta(days=30) + retention_days = get_config().database_retention_days + last_seen_cutoff = now - timedelta(days=retention_days) last_analysis_cutoff = now - timedelta(days=5) count = ( diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py index af803c6..eb76637 100644 --- a/src/tasks/db_retention.py +++ b/src/tasks/db_retention.py @@ -26,12 +26,18 @@ app_logger = get_app_logger() def main(): """ - Delete access logs, credential attempts, and attack detections - older than the configured retention period. + Delete all records older than the configured retention period. + Covers: AccessLog, AttackDetection, CredentialAttempt, IpStats, CategoryHistory. """ try: from config import get_config - from models import AccessLog, CredentialAttempt, AttackDetection + from models import ( + AccessLog, + CredentialAttempt, + AttackDetection, + IpStats, + CategoryHistory, + ) config = get_config() retention_days = config.database_retention_days @@ -63,13 +69,37 @@ def main(): .delete(synchronize_session=False) ) + # Delete IPs not seen within the retention period + ips_deleted = ( + session.query(IpStats) + .filter(IpStats.last_seen < cutoff) + .delete(synchronize_session=False) + ) + + # Delete old category history records + history_deleted = ( + session.query(CategoryHistory) + .filter(CategoryHistory.timestamp < cutoff) + .delete(synchronize_session=False) + ) + session.commit() - if logs_deleted or creds_deleted or detections_deleted: + total = ( + logs_deleted + + detections_deleted + + creds_deleted + + ips_deleted + + history_deleted + ) + if total: app_logger.info( f"DB retention: Deleted {logs_deleted} access logs, " f"{detections_deleted} attack detections, " - f"{creds_deleted} credential attempts older than {retention_days} days" + f"{creds_deleted} credential attempts, " + f"{ips_deleted} stale IPs, " + f"{history_deleted} category history records " + f"older than {retention_days} days" ) except Exception as e: From 31fb1e8d9faed2a02ba4115860b9677ca037bde6 Mon Sep 17 00:00:00 2001 From: Lorenzo Venerandi Date: Tue, 3 Mar 2026 19:06:09 +0100 Subject: [PATCH 3/4] chore: update chart version to 1.1.1 for consistency with app version --- helm/Chart.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 92d530c..812ff4f 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 1.1.0 -appVersion: 1.1.0 +version: 1.1.1 +appVersion: 1.1.1 keywords: - honeypot - security From 6dc9cfe502ab6a3ff667997c35b1d38e760801de Mon Sep 17 00:00:00 2001 From: Lorenzo Venerandi Date: Tue, 3 Mar 2026 19:16:27 +0100 Subject: [PATCH 4/4] feat: enhance database retention logic to preserve suspicious access logs and linked IPs --- src/tasks/db_retention.py | 64 ++++++++++++++++++++++++--------------- 1 file changed, 39 insertions(+), 25 deletions(-) diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py index eb76637..ab4af86 100644 --- a/src/tasks/db_retention.py +++ b/src/tasks/db_retention.py @@ -7,6 +7,8 @@ Periodically deletes old records based on configured retention_days. from datetime import datetime, timedelta +from sqlalchemy import or_ + from database import get_database from logger import get_app_logger @@ -26,14 +28,14 @@ app_logger = get_app_logger() def main(): """ - Delete all records older than the configured retention period. - Covers: AccessLog, AttackDetection, CredentialAttempt, IpStats, CategoryHistory. + Delete old records based on the configured retention period. + Keeps suspicious access logs, their attack detections, linked IPs, + category history, and all credential attempts. """ try: from config import get_config from models import ( AccessLog, - CredentialAttempt, AttackDetection, IpStats, CategoryHistory, @@ -47,56 +49,68 @@ def main(): cutoff = datetime.now() - timedelta(days=retention_days) - # Delete attack detections linked to old access logs first (FK constraint) - old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff) + # Delete attack detections linked to old NON-suspicious access logs (FK constraint) + old_nonsuspicious_log_ids = session.query(AccessLog.id).filter( + AccessLog.timestamp < cutoff, + AccessLog.is_suspicious == False, + AccessLog.is_honeypot_trigger == False, + ) detections_deleted = ( session.query(AttackDetection) - .filter(AttackDetection.access_log_id.in_(old_log_ids)) + .filter(AttackDetection.access_log_id.in_(old_nonsuspicious_log_ids)) .delete(synchronize_session=False) ) - # Delete old access logs + # Delete old non-suspicious access logs (keep suspicious ones) logs_deleted = ( session.query(AccessLog) - .filter(AccessLog.timestamp < cutoff) + .filter( + AccessLog.timestamp < cutoff, + AccessLog.is_suspicious == False, + AccessLog.is_honeypot_trigger == False, + ) .delete(synchronize_session=False) ) - # Delete old credential attempts - creds_deleted = ( - session.query(CredentialAttempt) - .filter(CredentialAttempt.timestamp < cutoff) - .delete(synchronize_session=False) + # IPs to preserve: those with any suspicious access logs + preserved_ips = ( + session.query(AccessLog.ip) + .filter( + or_( + AccessLog.is_suspicious == True, + AccessLog.is_honeypot_trigger == True, + ) + ) + .distinct() ) - # Delete IPs not seen within the retention period + # Delete stale IPs, but keep those linked to suspicious logs ips_deleted = ( session.query(IpStats) - .filter(IpStats.last_seen < cutoff) + .filter( + IpStats.last_seen < cutoff, + ~IpStats.ip.in_(preserved_ips), + ) .delete(synchronize_session=False) ) - # Delete old category history records + # Delete old category history, but keep records for preserved IPs history_deleted = ( session.query(CategoryHistory) - .filter(CategoryHistory.timestamp < cutoff) + .filter( + CategoryHistory.timestamp < cutoff, + ~CategoryHistory.ip.in_(preserved_ips), + ) .delete(synchronize_session=False) ) session.commit() - total = ( - logs_deleted - + detections_deleted - + creds_deleted - + ips_deleted - + history_deleted - ) + total = logs_deleted + detections_deleted + ips_deleted + history_deleted if total: app_logger.info( f"DB retention: Deleted {logs_deleted} access logs, " f"{detections_deleted} attack detections, " - f"{creds_deleted} credential attempts, " f"{ips_deleted} stale IPs, " f"{history_deleted} category history records " f"older than {retention_days} days"