Merge pull request #94 from BlessedRebuS/fix/new-ip-reevaluation

Fix/new ip reevaluation
This commit is contained in:
Lorenzo Venerandi
2026-02-22 18:46:56 +01:00
committed by GitHub
5 changed files with 109 additions and 48 deletions

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart
description: A Helm chart for Krawl honeypot server
type: application
version: 1.0.7
appVersion: 1.0.7
version: 1.0.8
appVersion: 1.0.8
keywords:
- honeypot
- security

View File

@@ -790,22 +790,69 @@ class DatabaseManager:
def get_ips_needing_reevaluation(self) -> List[str]:
"""
Get all IP addresses that have been flagged for reevaluation.
Get all IP addresses that need evaluation.
Returns:
List of IP addresses where need_reevaluation is True
or that have never been analyzed (last_analysis is NULL)
"""
session = self.session
try:
ips = (
session.query(IpStats.ip)
.filter(IpStats.need_reevaluation == True)
.filter(
or_(
IpStats.need_reevaluation == True,
IpStats.last_analysis.is_(None),
)
)
.all()
)
return [ip[0] for ip in ips]
finally:
self.close_session()
def flag_stale_ips_for_reevaluation(self) -> int:
"""
Flag IPs for reevaluation where:
- last_seen is between 15 and 30 days ago
- last_analysis is more than 10 days ago (or never analyzed)
Returns:
Number of IPs flagged for reevaluation
"""
session = self.session
try:
now = datetime.now()
last_seen_lower = now - timedelta(days=30)
last_seen_upper = now - timedelta(days=15)
last_analysis_cutoff = now - timedelta(days=10)
count = (
session.query(IpStats)
.filter(
IpStats.last_seen >= last_seen_lower,
IpStats.last_seen <= last_seen_upper,
or_(
IpStats.last_analysis <= last_analysis_cutoff,
IpStats.last_analysis.is_(None),
),
IpStats.need_reevaluation == False,
IpStats.manual_category == False,
)
.update(
{IpStats.need_reevaluation: True},
synchronize_session=False,
)
)
session.commit()
return count
except Exception as e:
session.rollback()
raise
finally:
self.close_session()
def get_access_logs(
self,
limit: int = 100,

View File

@@ -398,6 +398,8 @@ async def trap_page(request: Request, path: str):
access_logger.warning(
f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {full_path}"
)
else:
access_logger.info(f"[REQUEST] {client_ip} - {full_path}")
# Record access unless the router dependency already handled it
# (attack pattern or honeypot path → already recorded by _track_honeypot_request)

View File

@@ -1,3 +1,4 @@
from collections import Counter
from database import get_database
from pathlib import Path
from datetime import datetime, timedelta
@@ -94,6 +95,19 @@ def main():
"attack_url": 0,
},
}
# Parse robots.txt once before the loop (it never changes during a run)
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(":")
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
robots_disallows.append(parts[1].strip())
# Get IPs flagged for reevaluation (set when a suspicious request arrives)
ips_to_analyze = set(db_manager.get_ips_needing_reevaluation())
@@ -105,41 +119,21 @@ def main():
for ip in ips_to_analyze:
# Get full history for this IP to perform accurate analysis
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
ip_accesses = db_manager.get_access_logs(
limit=10000, ip_filter=ip, since_minutes=1440 * 30
) # look back up to 30 days of history for better accuracy
total_accesses_count = len(ip_accesses)
if total_accesses_count <= 0:
continue
# --------------------- HTTP Methods ---------------------
get_accesses_count = len(
[item for item in ip_accesses if item["method"] == "GET"]
)
post_accesses_count = len(
[item for item in ip_accesses if item["method"] == "POST"]
)
put_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PUT"]
)
delete_accesses_count = len(
[item for item in ip_accesses if item["method"] == "DELETE"]
)
head_accesses_count = len(
[item for item in ip_accesses if item["method"] == "HEAD"]
)
options_accesses_count = len(
[item for item in ip_accesses if item["method"] == "OPTIONS"]
)
patch_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PATCH"]
)
method_counts = Counter(item["method"] for item in ip_accesses)
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (
post_accesses_count
+ put_accesses_count
+ delete_accesses_count
+ options_accesses_count
+ patch_accesses_count
) / total_accesses_count
risky_count = sum(
method_counts.get(m, 0)
for m in ("POST", "PUT", "DELETE", "OPTIONS", "PATCH")
)
http_method_attacker_score = risky_count / total_accesses_count
else:
http_method_attacker_score = 0
# print(f"HTTP Method attacker score: {http_method_attacker_score}")
@@ -154,21 +148,6 @@ def main():
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
# --------------------- Robots Violations ---------------------
# respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split(":")
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
# print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
# if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len(
[
item

View File

@@ -0,0 +1,33 @@
from database import get_database
from logger import get_app_logger
# ----------------------
# TASK CONFIG
# ----------------------
TASK_CONFIG = {
"name": "flag-stale-ips",
"cron": "0 2 * * *", # Run daily at 2 AM
"enabled": True,
"run_when_loaded": False,
}
def main():
app_logger = get_app_logger()
db = get_database()
try:
count = db.flag_stale_ips_for_reevaluation()
if count > 0:
app_logger.info(
f"[Background Task] flag-stale-ips: Flagged {count} stale IPs for reevaluation"
)
else:
app_logger.debug(
"[Background Task] flag-stale-ips: No stale IPs found to flag"
)
except Exception as e:
app_logger.error(
f"[Background Task] flag-stale-ips: Error flagging stale IPs: {e}"
)