feat: enhance database retention logic to preserve suspicious access logs and linked IPs

This commit is contained in:
Lorenzo Venerandi
2026-03-03 19:16:27 +01:00
parent 31fb1e8d9f
commit 6dc9cfe502

View File

@@ -7,6 +7,8 @@ Periodically deletes old records based on configured retention_days.
from datetime import datetime, timedelta
from sqlalchemy import or_
from database import get_database
from logger import get_app_logger
@@ -26,14 +28,14 @@ app_logger = get_app_logger()
def main():
"""
Delete all records older than the configured retention period.
Covers: AccessLog, AttackDetection, CredentialAttempt, IpStats, CategoryHistory.
Delete old records based on the configured retention period.
Keeps suspicious access logs, their attack detections, linked IPs,
category history, and all credential attempts.
"""
try:
from config import get_config
from models import (
AccessLog,
CredentialAttempt,
AttackDetection,
IpStats,
CategoryHistory,
@@ -47,56 +49,68 @@ def main():
cutoff = datetime.now() - timedelta(days=retention_days)
# Delete attack detections linked to old access logs first (FK constraint)
old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff)
# Delete attack detections linked to old NON-suspicious access logs (FK constraint)
old_nonsuspicious_log_ids = session.query(AccessLog.id).filter(
AccessLog.timestamp < cutoff,
AccessLog.is_suspicious == False,
AccessLog.is_honeypot_trigger == False,
)
detections_deleted = (
session.query(AttackDetection)
.filter(AttackDetection.access_log_id.in_(old_log_ids))
.filter(AttackDetection.access_log_id.in_(old_nonsuspicious_log_ids))
.delete(synchronize_session=False)
)
# Delete old access logs
# Delete old non-suspicious access logs (keep suspicious ones)
logs_deleted = (
session.query(AccessLog)
.filter(AccessLog.timestamp < cutoff)
.filter(
AccessLog.timestamp < cutoff,
AccessLog.is_suspicious == False,
AccessLog.is_honeypot_trigger == False,
)
.delete(synchronize_session=False)
)
# Delete old credential attempts
creds_deleted = (
session.query(CredentialAttempt)
.filter(CredentialAttempt.timestamp < cutoff)
.delete(synchronize_session=False)
# IPs to preserve: those with any suspicious access logs
preserved_ips = (
session.query(AccessLog.ip)
.filter(
or_(
AccessLog.is_suspicious == True,
AccessLog.is_honeypot_trigger == True,
)
)
.distinct()
)
# Delete IPs not seen within the retention period
# Delete stale IPs, but keep those linked to suspicious logs
ips_deleted = (
session.query(IpStats)
.filter(IpStats.last_seen < cutoff)
.filter(
IpStats.last_seen < cutoff,
~IpStats.ip.in_(preserved_ips),
)
.delete(synchronize_session=False)
)
# Delete old category history records
# Delete old category history, but keep records for preserved IPs
history_deleted = (
session.query(CategoryHistory)
.filter(CategoryHistory.timestamp < cutoff)
.filter(
CategoryHistory.timestamp < cutoff,
~CategoryHistory.ip.in_(preserved_ips),
)
.delete(synchronize_session=False)
)
session.commit()
total = (
logs_deleted
+ detections_deleted
+ creds_deleted
+ ips_deleted
+ history_deleted
)
total = logs_deleted + detections_deleted + ips_deleted + history_deleted
if total:
app_logger.info(
f"DB retention: Deleted {logs_deleted} access logs, "
f"{detections_deleted} attack detections, "
f"{creds_deleted} credential attempts, "
f"{ips_deleted} stale IPs, "
f"{history_deleted} category history records "
f"older than {retention_days} days"