Merge pull request #112 from BlessedRebuS/feat/improve-tasks

Feat/improve tasks
This commit is contained in:
Patrick Di Fazio
2026-03-03 19:19:58 +01:00
committed by GitHub
3 changed files with 66 additions and 21 deletions

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart name: krawl-chart
description: A Helm chart for Krawl honeypot server description: A Helm chart for Krawl honeypot server
type: application type: application
version: 1.1.0 version: 1.1.1
appVersion: 1.1.0 appVersion: 1.1.1
keywords: keywords:
- honeypot - honeypot
- security - security

View File

@@ -815,24 +815,25 @@ class DatabaseManager:
def flag_stale_ips_for_reevaluation(self) -> int: def flag_stale_ips_for_reevaluation(self) -> int:
""" """
Flag IPs for reevaluation where: Flag IPs for reevaluation where:
- last_seen is between 5 and 30 days ago - last_seen is older than the configured retention period
- last_analysis is more than 5 days ago - last_analysis is more than 5 days ago
Returns: Returns:
Number of IPs flagged for reevaluation Number of IPs flagged for reevaluation
""" """
from config import get_config
session = self.session session = self.session
try: try:
now = datetime.now() now = datetime.now()
last_seen_lower = now - timedelta(days=30) retention_days = get_config().database_retention_days
last_seen_upper = now - timedelta(days=5) last_seen_cutoff = now - timedelta(days=retention_days)
last_analysis_cutoff = now - timedelta(days=5) last_analysis_cutoff = now - timedelta(days=5)
count = ( count = (
session.query(IpStats) session.query(IpStats)
.filter( .filter(
IpStats.last_seen >= last_seen_lower, IpStats.last_seen <= last_seen_cutoff,
IpStats.last_seen <= last_seen_upper,
IpStats.last_analysis <= last_analysis_cutoff, IpStats.last_analysis <= last_analysis_cutoff,
IpStats.need_reevaluation == False, IpStats.need_reevaluation == False,
IpStats.manual_category == False, IpStats.manual_category == False,

View File

@@ -7,6 +7,8 @@ Periodically deletes old records based on configured retention_days.
from datetime import datetime, timedelta from datetime import datetime, timedelta
from sqlalchemy import or_
from database import get_database from database import get_database
from logger import get_app_logger from logger import get_app_logger
@@ -26,12 +28,18 @@ app_logger = get_app_logger()
def main(): def main():
""" """
Delete access logs, credential attempts, and attack detections Delete old records based on the configured retention period.
older than the configured retention period. Keeps suspicious access logs, their attack detections, linked IPs,
category history, and all credential attempts.
""" """
try: try:
from config import get_config from config import get_config
from models import AccessLog, CredentialAttempt, AttackDetection from models import (
AccessLog,
AttackDetection,
IpStats,
CategoryHistory,
)
config = get_config() config = get_config()
retention_days = config.database_retention_days retention_days = config.database_retention_days
@@ -41,35 +49,71 @@ def main():
cutoff = datetime.now() - timedelta(days=retention_days) cutoff = datetime.now() - timedelta(days=retention_days)
# Delete attack detections linked to old access logs first (FK constraint) # Delete attack detections linked to old NON-suspicious access logs (FK constraint)
old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff) old_nonsuspicious_log_ids = session.query(AccessLog.id).filter(
AccessLog.timestamp < cutoff,
AccessLog.is_suspicious == False,
AccessLog.is_honeypot_trigger == False,
)
detections_deleted = ( detections_deleted = (
session.query(AttackDetection) session.query(AttackDetection)
.filter(AttackDetection.access_log_id.in_(old_log_ids)) .filter(AttackDetection.access_log_id.in_(old_nonsuspicious_log_ids))
.delete(synchronize_session=False) .delete(synchronize_session=False)
) )
# Delete old access logs # Delete old non-suspicious access logs (keep suspicious ones)
logs_deleted = ( logs_deleted = (
session.query(AccessLog) session.query(AccessLog)
.filter(AccessLog.timestamp < cutoff) .filter(
AccessLog.timestamp < cutoff,
AccessLog.is_suspicious == False,
AccessLog.is_honeypot_trigger == False,
)
.delete(synchronize_session=False) .delete(synchronize_session=False)
) )
# Delete old credential attempts # IPs to preserve: those with any suspicious access logs
creds_deleted = ( preserved_ips = (
session.query(CredentialAttempt) session.query(AccessLog.ip)
.filter(CredentialAttempt.timestamp < cutoff) .filter(
or_(
AccessLog.is_suspicious == True,
AccessLog.is_honeypot_trigger == True,
)
)
.distinct()
)
# Delete stale IPs, but keep those linked to suspicious logs
ips_deleted = (
session.query(IpStats)
.filter(
IpStats.last_seen < cutoff,
~IpStats.ip.in_(preserved_ips),
)
.delete(synchronize_session=False)
)
# Delete old category history, but keep records for preserved IPs
history_deleted = (
session.query(CategoryHistory)
.filter(
CategoryHistory.timestamp < cutoff,
~CategoryHistory.ip.in_(preserved_ips),
)
.delete(synchronize_session=False) .delete(synchronize_session=False)
) )
session.commit() session.commit()
if logs_deleted or creds_deleted or detections_deleted: total = logs_deleted + detections_deleted + ips_deleted + history_deleted
if total:
app_logger.info( app_logger.info(
f"DB retention: Deleted {logs_deleted} access logs, " f"DB retention: Deleted {logs_deleted} access logs, "
f"{detections_deleted} attack detections, " f"{detections_deleted} attack detections, "
f"{creds_deleted} credential attempts older than {retention_days} days" f"{ips_deleted} stale IPs, "
f"{history_deleted} category history records "
f"older than {retention_days} days"
) )
except Exception as e: except Exception as e: