Merge pull request #53 from BlessedRebuS/feat/ip-log-improvements
added ip logging memory improvements, added local ip and public ip ex…
This commit is contained in:
@@ -11,7 +11,6 @@ from wordlists import get_wordlists
|
|||||||
from config import get_config
|
from config import get_config
|
||||||
from logger import get_app_logger
|
from logger import get_app_logger
|
||||||
import requests
|
import requests
|
||||||
from sanitizer import sanitize_for_storage, sanitize_dict
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Functions for user activity analysis
|
Functions for user activity analysis
|
||||||
@@ -27,14 +26,12 @@ class Analyzer:
|
|||||||
|
|
||||||
def __init__(self, db_manager: Optional[DatabaseManager] = None):
|
def __init__(self, db_manager: Optional[DatabaseManager] = None):
|
||||||
"""
|
"""
|
||||||
Initialize the access tracker.
|
Initialize the analyzer.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
db_manager: Optional DatabaseManager for persistence.
|
db_manager: Optional DatabaseManager for persistence.
|
||||||
If None, will use the global singleton.
|
If None, will use the global singleton.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Database manager for persistence (lazily initialized)
|
|
||||||
self._db_manager = db_manager
|
self._db_manager = db_manager
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -49,7 +46,6 @@ class Analyzer:
|
|||||||
try:
|
try:
|
||||||
self._db_manager = get_database()
|
self._db_manager = get_database()
|
||||||
except Exception:
|
except Exception:
|
||||||
# Database not initialized, persistence disabled
|
|
||||||
pass
|
pass
|
||||||
return self._db_manager
|
return self._db_manager
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ from typing import Optional, Tuple
|
|||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
import time
|
import time
|
||||||
from logger import get_app_logger
|
from logger import get_app_logger
|
||||||
|
import socket
|
||||||
|
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
@@ -50,6 +51,67 @@ class Config:
|
|||||||
user_agents_used_threshold: float = None
|
user_agents_used_threshold: float = None
|
||||||
attack_urls_threshold: float = None
|
attack_urls_threshold: float = None
|
||||||
|
|
||||||
|
_server_ip: Optional[str] = None
|
||||||
|
_server_ip_cache_time: float = 0
|
||||||
|
_ip_cache_ttl: int = 300
|
||||||
|
|
||||||
|
def get_server_ip(self, refresh: bool = False) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Get the server's own public IP address.
|
||||||
|
Excludes requests from the server itself from being tracked.
|
||||||
|
|
||||||
|
Caches the IP for 5 minutes to avoid repeated lookups.
|
||||||
|
Automatically refreshes if cache is stale.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
refresh: Force refresh the IP cache (bypass TTL)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Server IP address or None if unable to determine
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Check if cache is valid and not forced refresh
|
||||||
|
if (
|
||||||
|
self._server_ip is not None
|
||||||
|
and not refresh
|
||||||
|
and (current_time - self._server_ip_cache_time) < self._ip_cache_ttl
|
||||||
|
):
|
||||||
|
return self._server_ip
|
||||||
|
|
||||||
|
try:
|
||||||
|
hostname = socket.gethostname()
|
||||||
|
|
||||||
|
# Try to get public IP by connecting to an external server
|
||||||
|
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||||
|
s.connect(("8.8.8.8", 80))
|
||||||
|
ip = s.getsockname()[0]
|
||||||
|
s.close()
|
||||||
|
|
||||||
|
self._server_ip = ip
|
||||||
|
self._server_ip_cache_time = current_time
|
||||||
|
|
||||||
|
return ip
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
get_app_logger().warning(
|
||||||
|
f"Could not determine server IP address: {e}. "
|
||||||
|
"All IPs will be tracked (including potential server IP)."
|
||||||
|
)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def refresh_server_ip(self) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Force refresh the cached server IP.
|
||||||
|
Use this if you suspect the IP has changed.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
New server IP address or None if unable to determine
|
||||||
|
"""
|
||||||
|
return self.get_server_ip(refresh=True)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml(cls) -> "Config":
|
def from_yaml(cls) -> "Config":
|
||||||
"""Create configuration from YAML file"""
|
"""Create configuration from YAML file"""
|
||||||
@@ -139,8 +201,8 @@ class Config:
|
|||||||
infinite_pages_for_malicious=crawl.get(
|
infinite_pages_for_malicious=crawl.get(
|
||||||
"infinite_pages_for_malicious", True
|
"infinite_pages_for_malicious", True
|
||||||
),
|
),
|
||||||
max_pages_limit=crawl.get("max_pages_limit", 200),
|
max_pages_limit=crawl.get("max_pages_limit", 500),
|
||||||
ban_duration_seconds=crawl.get("ban_duration_seconds", 60),
|
ban_duration_seconds=crawl.get("ban_duration_seconds", 10),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
100
src/database.py
100
src/database.py
@@ -15,6 +15,8 @@ from sqlalchemy import create_engine, func, distinct, case, event
|
|||||||
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
||||||
from sqlalchemy.engine import Engine
|
from sqlalchemy.engine import Engine
|
||||||
|
|
||||||
|
from ip_utils import is_local_or_private_ip, is_valid_public_ip
|
||||||
|
|
||||||
|
|
||||||
@event.listens_for(Engine, "connect")
|
@event.listens_for(Engine, "connect")
|
||||||
def set_sqlite_pragma(dbapi_connection, connection_record):
|
def set_sqlite_pragma(dbapi_connection, connection_record):
|
||||||
@@ -688,7 +690,7 @@ class DatabaseManager:
|
|||||||
|
|
||||||
def get_dashboard_counts(self) -> Dict[str, int]:
|
def get_dashboard_counts(self) -> Dict[str, int]:
|
||||||
"""
|
"""
|
||||||
Get aggregate statistics for the dashboard.
|
Get aggregate statistics for the dashboard (excludes local/private IPs and server IP).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary with total_accesses, unique_ips, unique_paths,
|
Dictionary with total_accesses, unique_ips, unique_paths,
|
||||||
@@ -696,33 +698,34 @@ class DatabaseManager:
|
|||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
# Get main aggregate counts in one query
|
# Get server IP to filter it out
|
||||||
result = session.query(
|
from config import get_config
|
||||||
func.count(AccessLog.id).label("total_accesses"),
|
config = get_config()
|
||||||
func.count(distinct(AccessLog.ip)).label("unique_ips"),
|
server_ip = config.get_server_ip()
|
||||||
func.count(distinct(AccessLog.path)).label("unique_paths"),
|
|
||||||
func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label(
|
# Get all accesses first, then filter out local IPs and server IP
|
||||||
"suspicious_accesses"
|
all_accesses = session.query(AccessLog).all()
|
||||||
),
|
|
||||||
func.sum(
|
# Filter out local/private IPs and server IP
|
||||||
case((AccessLog.is_honeypot_trigger == True, 1), else_=0)
|
public_accesses = [
|
||||||
).label("honeypot_triggered"),
|
log for log in all_accesses
|
||||||
).first()
|
if is_valid_public_ip(log.ip, server_ip)
|
||||||
|
]
|
||||||
# Get unique IPs that triggered honeypots
|
|
||||||
honeypot_ips = (
|
# Calculate counts from filtered data
|
||||||
session.query(func.count(distinct(AccessLog.ip)))
|
total_accesses = len(public_accesses)
|
||||||
.filter(AccessLog.is_honeypot_trigger == True)
|
unique_ips = len(set(log.ip for log in public_accesses))
|
||||||
.scalar()
|
unique_paths = len(set(log.path for log in public_accesses))
|
||||||
or 0
|
suspicious_accesses = sum(1 for log in public_accesses if log.is_suspicious)
|
||||||
)
|
honeypot_triggered = sum(1 for log in public_accesses if log.is_honeypot_trigger)
|
||||||
|
honeypot_ips = len(set(log.ip for log in public_accesses if log.is_honeypot_trigger))
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"total_accesses": result.total_accesses or 0,
|
"total_accesses": total_accesses,
|
||||||
"unique_ips": result.unique_ips or 0,
|
"unique_ips": unique_ips,
|
||||||
"unique_paths": result.unique_paths or 0,
|
"unique_paths": unique_paths,
|
||||||
"suspicious_accesses": int(result.suspicious_accesses or 0),
|
"suspicious_accesses": suspicious_accesses,
|
||||||
"honeypot_triggered": int(result.honeypot_triggered or 0),
|
"honeypot_triggered": honeypot_triggered,
|
||||||
"honeypot_ips": honeypot_ips,
|
"honeypot_ips": honeypot_ips,
|
||||||
}
|
}
|
||||||
finally:
|
finally:
|
||||||
@@ -730,7 +733,7 @@ class DatabaseManager:
|
|||||||
|
|
||||||
def get_top_ips(self, limit: int = 10) -> List[tuple]:
|
def get_top_ips(self, limit: int = 10) -> List[tuple]:
|
||||||
"""
|
"""
|
||||||
Get top IP addresses by access count.
|
Get top IP addresses by access count (excludes local/private IPs and server IP).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
limit: Maximum number of results
|
limit: Maximum number of results
|
||||||
@@ -740,15 +743,25 @@ class DatabaseManager:
|
|||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
|
# Get server IP to filter it out
|
||||||
|
from config import get_config
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
|
||||||
results = (
|
results = (
|
||||||
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
|
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
|
||||||
.group_by(AccessLog.ip)
|
.group_by(AccessLog.ip)
|
||||||
.order_by(func.count(AccessLog.id).desc())
|
.order_by(func.count(AccessLog.id).desc())
|
||||||
.limit(limit)
|
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
return [(row.ip, row.count) for row in results]
|
# Filter out local/private IPs and server IP, then limit results
|
||||||
|
filtered = [
|
||||||
|
(row.ip, row.count)
|
||||||
|
for row in results
|
||||||
|
if is_valid_public_ip(row.ip, server_ip)
|
||||||
|
]
|
||||||
|
return filtered[:limit]
|
||||||
finally:
|
finally:
|
||||||
self.close_session()
|
self.close_session()
|
||||||
|
|
||||||
@@ -805,7 +818,7 @@ class DatabaseManager:
|
|||||||
|
|
||||||
def get_recent_suspicious(self, limit: int = 20) -> List[Dict[str, Any]]:
|
def get_recent_suspicious(self, limit: int = 20) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Get recent suspicious access attempts.
|
Get recent suspicious access attempts (excludes local/private IPs and server IP).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
limit: Maximum number of results
|
limit: Maximum number of results
|
||||||
@@ -815,14 +828,24 @@ class DatabaseManager:
|
|||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
|
# Get server IP to filter it out
|
||||||
|
from config import get_config
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
|
||||||
logs = (
|
logs = (
|
||||||
session.query(AccessLog)
|
session.query(AccessLog)
|
||||||
.filter(AccessLog.is_suspicious == True)
|
.filter(AccessLog.is_suspicious == True)
|
||||||
.order_by(AccessLog.timestamp.desc())
|
.order_by(AccessLog.timestamp.desc())
|
||||||
.limit(limit)
|
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Filter out local/private IPs and server IP
|
||||||
|
filtered_logs = [
|
||||||
|
log for log in logs
|
||||||
|
if is_valid_public_ip(log.ip, server_ip)
|
||||||
|
]
|
||||||
|
|
||||||
return [
|
return [
|
||||||
{
|
{
|
||||||
"ip": log.ip,
|
"ip": log.ip,
|
||||||
@@ -830,20 +853,26 @@ class DatabaseManager:
|
|||||||
"user_agent": log.user_agent,
|
"user_agent": log.user_agent,
|
||||||
"timestamp": log.timestamp.isoformat(),
|
"timestamp": log.timestamp.isoformat(),
|
||||||
}
|
}
|
||||||
for log in logs
|
for log in filtered_logs[:limit]
|
||||||
]
|
]
|
||||||
finally:
|
finally:
|
||||||
self.close_session()
|
self.close_session()
|
||||||
|
|
||||||
def get_honeypot_triggered_ips(self) -> List[tuple]:
|
def get_honeypot_triggered_ips(self) -> List[tuple]:
|
||||||
"""
|
"""
|
||||||
Get IPs that triggered honeypot paths with the paths they accessed.
|
Get IPs that triggered honeypot paths with the paths they accessed
|
||||||
|
(excludes local/private IPs and server IP).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of (ip, [paths]) tuples
|
List of (ip, [paths]) tuples
|
||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
|
# Get server IP to filter it out
|
||||||
|
from config import get_config
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
|
||||||
# Get all honeypot triggers grouped by IP
|
# Get all honeypot triggers grouped by IP
|
||||||
results = (
|
results = (
|
||||||
session.query(AccessLog.ip, AccessLog.path)
|
session.query(AccessLog.ip, AccessLog.path)
|
||||||
@@ -851,9 +880,12 @@ class DatabaseManager:
|
|||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
# Group paths by IP
|
# Group paths by IP, filtering out local/private IPs and server IP
|
||||||
ip_paths: Dict[str, List[str]] = {}
|
ip_paths: Dict[str, List[str]] = {}
|
||||||
for row in results:
|
for row in results:
|
||||||
|
# Skip invalid IPs
|
||||||
|
if not is_valid_public_ip(row.ip, server_ip):
|
||||||
|
continue
|
||||||
if row.ip not in ip_paths:
|
if row.ip not in ip_paths:
|
||||||
ip_paths[row.ip] = []
|
ip_paths[row.ip] = []
|
||||||
if row.path not in ip_paths[row.ip]:
|
if row.path not in ip_paths[row.ip]:
|
||||||
|
|||||||
@@ -1,6 +0,0 @@
|
|||||||
127.0.0.1
|
|
||||||
175.23.45.67
|
|
||||||
205.32.180.65
|
|
||||||
198.51.100.89
|
|
||||||
210.45.67.89
|
|
||||||
203.0.113.45
|
|
||||||
61
src/ip_utils.py
Normal file
61
src/ip_utils.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
IP utility functions for filtering and validating IP addresses.
|
||||||
|
Provides common IP filtering logic used across the Krawl honeypot.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import ipaddress
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
|
||||||
|
def is_local_or_private_ip(ip_str: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if an IP address is local, private, or reserved.
|
||||||
|
|
||||||
|
Filters out:
|
||||||
|
- 127.0.0.1 (localhost)
|
||||||
|
- 127.0.0.0/8 (loopback)
|
||||||
|
- 10.0.0.0/8 (private network)
|
||||||
|
- 172.16.0.0/12 (private network)
|
||||||
|
- 192.168.0.0/16 (private network)
|
||||||
|
- 0.0.0.0/8 (this network)
|
||||||
|
- ::1 (IPv6 localhost)
|
||||||
|
- ::ffff:127.0.0.0/104 (IPv6-mapped IPv4 loopback)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ip_str: IP address string
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if IP is local/private/reserved, False if it's public
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
ip = ipaddress.ip_address(ip_str)
|
||||||
|
return (
|
||||||
|
ip.is_private
|
||||||
|
or ip.is_loopback
|
||||||
|
or ip.is_reserved
|
||||||
|
or ip.is_link_local
|
||||||
|
or str(ip) in ("0.0.0.0", "::1")
|
||||||
|
)
|
||||||
|
except ValueError:
|
||||||
|
# Invalid IP address
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_public_ip(ip: str, server_ip: Optional[str] = None) -> bool:
|
||||||
|
"""
|
||||||
|
Check if an IP is public and not the server's own IP.
|
||||||
|
|
||||||
|
Returns True only if:
|
||||||
|
- IP is not in local/private ranges AND
|
||||||
|
- IP is not the server's own public IP (if server_ip provided)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
ip: IP address string to check
|
||||||
|
server_ip: Server's public IP (optional). If provided, filters out this IP too.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if IP is a valid public IP to track, False otherwise
|
||||||
|
"""
|
||||||
|
return not is_local_or_private_ip(ip) and (server_ip is None or ip != server_ip)
|
||||||
66
src/tasks/memory_cleanup.py
Normal file
66
src/tasks/memory_cleanup.py
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
"""
|
||||||
|
Memory cleanup task for Krawl honeypot.
|
||||||
|
Periodically trims unbounded in-memory structures to prevent OOM.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from database import get_database
|
||||||
|
from logger import get_app_logger
|
||||||
|
|
||||||
|
# ----------------------
|
||||||
|
# TASK CONFIG
|
||||||
|
# ----------------------
|
||||||
|
|
||||||
|
TASK_CONFIG = {
|
||||||
|
"name": "memory-cleanup",
|
||||||
|
"cron": "*/5 * * * *", # Run every 5 minutes
|
||||||
|
"enabled": True,
|
||||||
|
"run_when_loaded": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""
|
||||||
|
Clean up in-memory structures in the tracker.
|
||||||
|
Called periodically to prevent unbounded memory growth.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Import here to avoid circular imports
|
||||||
|
from handler import Handler
|
||||||
|
|
||||||
|
if not Handler.tracker:
|
||||||
|
app_logger.warning("Tracker not initialized, skipping memory cleanup")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get memory stats before cleanup
|
||||||
|
stats_before = Handler.tracker.get_memory_stats()
|
||||||
|
|
||||||
|
# Run cleanup
|
||||||
|
Handler.tracker.cleanup_memory()
|
||||||
|
|
||||||
|
# Get memory stats after cleanup
|
||||||
|
stats_after = Handler.tracker.get_memory_stats()
|
||||||
|
|
||||||
|
# Log changes
|
||||||
|
access_log_reduced = stats_before["access_log_size"] - stats_after["access_log_size"]
|
||||||
|
cred_reduced = stats_before["credential_attempts_size"] - stats_after["credential_attempts_size"]
|
||||||
|
|
||||||
|
if access_log_reduced > 0 or cred_reduced > 0:
|
||||||
|
app_logger.info(
|
||||||
|
f"Memory cleanup: Trimmed {access_log_reduced} access logs, "
|
||||||
|
f"{cred_reduced} credential attempts"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Log current memory state for monitoring
|
||||||
|
app_logger.debug(
|
||||||
|
f"Memory stats after cleanup: "
|
||||||
|
f"access_logs={stats_after['access_log_size']}, "
|
||||||
|
f"credentials={stats_after['credential_attempts_size']}, "
|
||||||
|
f"unique_ips={stats_after['unique_ips_tracked']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
app_logger.error(f"Error during memory cleanup: {e}")
|
||||||
@@ -5,7 +5,9 @@ from datetime import datetime, timedelta
|
|||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
from logger import get_app_logger
|
from logger import get_app_logger
|
||||||
from database import get_database
|
from database import get_database
|
||||||
|
from config import get_config
|
||||||
from models import AccessLog
|
from models import AccessLog
|
||||||
|
from ip_utils import is_local_or_private_ip, is_valid_public_ip
|
||||||
from sqlalchemy import distinct
|
from sqlalchemy import distinct
|
||||||
|
|
||||||
app_logger = get_app_logger()
|
app_logger = get_app_logger()
|
||||||
@@ -66,16 +68,26 @@ def main():
|
|||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Filter out local/private IPs and the server's own IP
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
|
||||||
|
public_ips = [
|
||||||
|
ip for (ip,) in results
|
||||||
|
if is_valid_public_ip(ip, server_ip)
|
||||||
|
]
|
||||||
|
|
||||||
# Ensure exports directory exists
|
# Ensure exports directory exists
|
||||||
os.makedirs(EXPORTS_DIR, exist_ok=True)
|
os.makedirs(EXPORTS_DIR, exist_ok=True)
|
||||||
|
|
||||||
# Write IPs to file (one per line)
|
# Write IPs to file (one per line)
|
||||||
with open(OUTPUT_FILE, "w") as f:
|
with open(OUTPUT_FILE, "w") as f:
|
||||||
for (ip,) in results:
|
for ip in public_ips:
|
||||||
f.write(f"{ip}\n")
|
f.write(f"{ip}\n")
|
||||||
|
|
||||||
app_logger.info(
|
app_logger.info(
|
||||||
f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}"
|
f"[Background Task] {task_name} exported {len(public_ips)} public IPs "
|
||||||
|
f"(filtered {len(results) - len(public_ips)} local/private IPs) to {OUTPUT_FILE}"
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
258
src/tracker.py
258
src/tracker.py
@@ -6,8 +6,10 @@ from datetime import datetime
|
|||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
import re
|
import re
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
from wordlists import get_wordlists
|
from wordlists import get_wordlists
|
||||||
from database import get_database, DatabaseManager
|
from database import get_database, DatabaseManager
|
||||||
|
from ip_utils import is_local_or_private_ip, is_valid_public_ip
|
||||||
|
|
||||||
|
|
||||||
class AccessTracker:
|
class AccessTracker:
|
||||||
@@ -39,6 +41,11 @@ class AccessTracker:
|
|||||||
self.access_log: List[Dict] = []
|
self.access_log: List[Dict] = []
|
||||||
self.credential_attempts: List[Dict] = []
|
self.credential_attempts: List[Dict] = []
|
||||||
|
|
||||||
|
# Memory limits for in-memory lists (prevents unbounded growth)
|
||||||
|
self.max_access_log_size = 10_000 # Keep only recent 10k accesses
|
||||||
|
self.max_credential_log_size = 5_000 # Keep only recent 5k attempts
|
||||||
|
self.max_counter_keys = 100_000 # Max unique IPs/paths/user agents
|
||||||
|
|
||||||
# Track pages visited by each IP (for good crawler limiting)
|
# Track pages visited by each IP (for good crawler limiting)
|
||||||
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
|
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
|
||||||
|
|
||||||
@@ -162,7 +169,15 @@ class AccessTracker:
|
|||||||
Record a credential login attempt.
|
Record a credential login attempt.
|
||||||
|
|
||||||
Stores in both in-memory list and SQLite database.
|
Stores in both in-memory list and SQLite database.
|
||||||
|
Skips recording if the IP is the server's own public IP.
|
||||||
"""
|
"""
|
||||||
|
# Skip if this is the server's own IP
|
||||||
|
from config import get_config
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
if server_ip and ip == server_ip:
|
||||||
|
return
|
||||||
|
|
||||||
# In-memory storage for dashboard
|
# In-memory storage for dashboard
|
||||||
self.credential_attempts.append(
|
self.credential_attempts.append(
|
||||||
{
|
{
|
||||||
@@ -174,6 +189,12 @@ class AccessTracker:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Trim if exceeding max size (prevent unbounded growth)
|
||||||
|
if len(self.credential_attempts) > self.max_credential_log_size:
|
||||||
|
self.credential_attempts = self.credential_attempts[
|
||||||
|
-self.max_credential_log_size :
|
||||||
|
]
|
||||||
|
|
||||||
# Persist to database
|
# Persist to database
|
||||||
if self.db:
|
if self.db:
|
||||||
try:
|
try:
|
||||||
@@ -196,6 +217,7 @@ class AccessTracker:
|
|||||||
Record an access attempt.
|
Record an access attempt.
|
||||||
|
|
||||||
Stores in both in-memory structures and SQLite database.
|
Stores in both in-memory structures and SQLite database.
|
||||||
|
Skips recording if the IP is the server's own public IP.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ip: Client IP address
|
ip: Client IP address
|
||||||
@@ -204,6 +226,13 @@ class AccessTracker:
|
|||||||
body: Request body (for POST/PUT)
|
body: Request body (for POST/PUT)
|
||||||
method: HTTP method
|
method: HTTP method
|
||||||
"""
|
"""
|
||||||
|
# Skip if this is the server's own IP
|
||||||
|
from config import get_config
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
if server_ip and ip == server_ip:
|
||||||
|
return
|
||||||
|
|
||||||
self.ip_counts[ip] += 1
|
self.ip_counts[ip] += 1
|
||||||
self.path_counts[path] += 1
|
self.path_counts[path] += 1
|
||||||
if user_agent:
|
if user_agent:
|
||||||
@@ -240,6 +269,10 @@ class AccessTracker:
|
|||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Trim if exceeding max size (prevent unbounded growth)
|
||||||
|
if len(self.access_log) > self.max_access_log_size:
|
||||||
|
self.access_log = self.access_log[-self.max_access_log_size :]
|
||||||
|
|
||||||
# Persist to database
|
# Persist to database
|
||||||
if self.db:
|
if self.db:
|
||||||
try:
|
try:
|
||||||
@@ -348,7 +381,13 @@ class AccessTracker:
|
|||||||
def increment_page_visit(self, client_ip: str) -> int:
|
def increment_page_visit(self, client_ip: str) -> int:
|
||||||
"""
|
"""
|
||||||
Increment page visit counter for an IP and return the new count.
|
Increment page visit counter for an IP and return the new count.
|
||||||
If ban timestamp exists and 60+ seconds have passed, reset the counter.
|
Implements incremental bans: each violation increases ban duration exponentially.
|
||||||
|
|
||||||
|
Ban duration formula: base_duration * (2 ^ violation_count)
|
||||||
|
- 1st violation: base_duration (e.g., 60 seconds)
|
||||||
|
- 2nd violation: base_duration * 2 (120 seconds)
|
||||||
|
- 3rd violation: base_duration * 4 (240 seconds)
|
||||||
|
- Nth violation: base_duration * 2^(N-1)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
client_ip: The client IP address
|
client_ip: The client IP address
|
||||||
@@ -356,19 +395,41 @@ class AccessTracker:
|
|||||||
Returns:
|
Returns:
|
||||||
The updated page visit count for this IP
|
The updated page visit count for this IP
|
||||||
"""
|
"""
|
||||||
|
# Skip if this is the server's own IP
|
||||||
|
from config import get_config
|
||||||
|
config = get_config()
|
||||||
|
server_ip = config.get_server_ip()
|
||||||
|
if server_ip and client_ip == server_ip:
|
||||||
|
return 0
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Initialize if not exists
|
# Initialize if not exists
|
||||||
if client_ip not in self.ip_page_visits:
|
if client_ip not in self.ip_page_visits:
|
||||||
self.ip_page_visits[client_ip] = {"count": 0, "ban_timestamp": None}
|
self.ip_page_visits[client_ip] = {
|
||||||
|
"count": 0,
|
||||||
|
"ban_timestamp": None,
|
||||||
|
"total_violations": 0,
|
||||||
|
"ban_multiplier": 1,
|
||||||
|
}
|
||||||
|
|
||||||
# Increment count
|
# Increment count
|
||||||
self.ip_page_visits[client_ip]["count"] += 1
|
self.ip_page_visits[client_ip]["count"] += 1
|
||||||
|
|
||||||
# Set ban if reached limit
|
# Set ban if reached limit
|
||||||
if self.ip_page_visits[client_ip]["count"] >= self.max_pages_limit:
|
if self.ip_page_visits[client_ip]["count"] >= self.max_pages_limit:
|
||||||
self.ip_page_visits[client_ip][
|
# Increment violation counter
|
||||||
"ban_timestamp"
|
self.ip_page_visits[client_ip]["total_violations"] += 1
|
||||||
] = datetime.now().isoformat()
|
violations = self.ip_page_visits[client_ip]["total_violations"]
|
||||||
|
|
||||||
|
# Calculate exponential ban multiplier: 2^(violations - 1)
|
||||||
|
# Violation 1: 2^0 = 1x
|
||||||
|
# Violation 2: 2^1 = 2x
|
||||||
|
# Violation 3: 2^2 = 4x
|
||||||
|
# Violation 4: 2^3 = 8x, etc.
|
||||||
|
self.ip_page_visits[client_ip]["ban_multiplier"] = 2 ** (violations - 1)
|
||||||
|
|
||||||
|
# Set ban timestamp
|
||||||
|
self.ip_page_visits[client_ip]["ban_timestamp"] = datetime.now().isoformat()
|
||||||
|
|
||||||
return self.ip_page_visits[client_ip]["count"]
|
return self.ip_page_visits[client_ip]["count"]
|
||||||
|
|
||||||
@@ -378,6 +439,10 @@ class AccessTracker:
|
|||||||
def is_banned_ip(self, client_ip: str) -> bool:
|
def is_banned_ip(self, client_ip: str) -> bool:
|
||||||
"""
|
"""
|
||||||
Check if an IP is currently banned due to exceeding page visit limits.
|
Check if an IP is currently banned due to exceeding page visit limits.
|
||||||
|
Uses incremental ban duration based on violation count.
|
||||||
|
|
||||||
|
Ban duration = base_duration * (2 ^ (violations - 1))
|
||||||
|
Each time an IP is banned again, duration doubles.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
client_ip: The client IP address
|
client_ip: The client IP address
|
||||||
@@ -386,26 +451,87 @@ class AccessTracker:
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
if client_ip in self.ip_page_visits:
|
if client_ip in self.ip_page_visits:
|
||||||
ban_timestamp = self.ip_page_visits[client_ip]["ban_timestamp"]
|
ban_timestamp = self.ip_page_visits[client_ip].get("ban_timestamp")
|
||||||
if ban_timestamp is not None:
|
if ban_timestamp is not None:
|
||||||
banned = True
|
# Get the ban multiplier for this violation
|
||||||
|
ban_multiplier = self.ip_page_visits[client_ip].get(
|
||||||
|
"ban_multiplier", 1
|
||||||
|
)
|
||||||
|
|
||||||
# Check if ban period has expired (> 60 seconds)
|
# Calculate effective ban duration based on violations
|
||||||
ban_time = datetime.fromisoformat(
|
effective_ban_duration = self.ban_duration_seconds * ban_multiplier
|
||||||
self.ip_page_visits[client_ip]["ban_timestamp"]
|
|
||||||
)
|
|
||||||
time_diff = datetime.now() - ban_time
|
|
||||||
if time_diff.total_seconds() > self.ban_duration_seconds:
|
|
||||||
self.ip_page_visits[client_ip]["count"] = 0
|
|
||||||
self.ip_page_visits[client_ip]["ban_timestamp"] = None
|
|
||||||
banned = False
|
|
||||||
|
|
||||||
return banned
|
# Check if ban period has expired
|
||||||
|
ban_time = datetime.fromisoformat(ban_timestamp)
|
||||||
|
time_diff = datetime.now() - ban_time
|
||||||
|
|
||||||
|
if time_diff.total_seconds() > effective_ban_duration:
|
||||||
|
# Ban expired, reset for next cycle
|
||||||
|
# Keep violation count for next offense
|
||||||
|
self.ip_page_visits[client_ip]["count"] = 0
|
||||||
|
self.ip_page_visits[client_ip]["ban_timestamp"] = None
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# Still banned
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
except Exception:
|
except Exception:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def get_page_visit_count(self, client_ip: str) -> int:
|
def get_ban_info(self, client_ip: str) -> dict:
|
||||||
|
"""
|
||||||
|
Get detailed ban information for an IP.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with ban status, violations, and remaining ban time
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if client_ip not in self.ip_page_visits:
|
||||||
|
return {
|
||||||
|
"is_banned": False,
|
||||||
|
"violations": 0,
|
||||||
|
"ban_multiplier": 1,
|
||||||
|
"remaining_ban_seconds": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
ip_data = self.ip_page_visits[client_ip]
|
||||||
|
ban_timestamp = ip_data.get("ban_timestamp")
|
||||||
|
|
||||||
|
if ban_timestamp is None:
|
||||||
|
return {
|
||||||
|
"is_banned": False,
|
||||||
|
"violations": ip_data.get("total_violations", 0),
|
||||||
|
"ban_multiplier": ip_data.get("ban_multiplier", 1),
|
||||||
|
"remaining_ban_seconds": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Ban is active, calculate remaining time
|
||||||
|
ban_multiplier = ip_data.get("ban_multiplier", 1)
|
||||||
|
effective_ban_duration = self.ban_duration_seconds * ban_multiplier
|
||||||
|
|
||||||
|
ban_time = datetime.fromisoformat(ban_timestamp)
|
||||||
|
time_diff = datetime.now() - ban_time
|
||||||
|
remaining_seconds = max(
|
||||||
|
0, effective_ban_duration - time_diff.total_seconds()
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"is_banned": remaining_seconds > 0,
|
||||||
|
"violations": ip_data.get("total_violations", 0),
|
||||||
|
"ban_multiplier": ban_multiplier,
|
||||||
|
"effective_ban_duration_seconds": effective_ban_duration,
|
||||||
|
"remaining_ban_seconds": remaining_seconds,
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
return {
|
||||||
|
"is_banned": False,
|
||||||
|
"violations": 0,
|
||||||
|
"ban_multiplier": 1,
|
||||||
|
"remaining_ban_seconds": 0,
|
||||||
|
}
|
||||||
"""
|
"""
|
||||||
Get the current page visit count for an IP.
|
Get the current page visit count for an IP.
|
||||||
|
|
||||||
@@ -421,8 +547,13 @@ class AccessTracker:
|
|||||||
return 0
|
return 0
|
||||||
|
|
||||||
def get_top_ips(self, limit: int = 10) -> List[Tuple[str, int]]:
|
def get_top_ips(self, limit: int = 10) -> List[Tuple[str, int]]:
|
||||||
"""Get top N IP addresses by access count"""
|
"""Get top N IP addresses by access count (excludes local/private IPs)"""
|
||||||
return sorted(self.ip_counts.items(), key=lambda x: x[1], reverse=True)[:limit]
|
filtered = [
|
||||||
|
(ip, count)
|
||||||
|
for ip, count in self.ip_counts.items()
|
||||||
|
if not is_local_or_private_ip(ip)
|
||||||
|
]
|
||||||
|
return sorted(filtered, key=lambda x: x[1], reverse=True)[:limit]
|
||||||
|
|
||||||
def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]:
|
def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]:
|
||||||
"""Get top N paths by access count"""
|
"""Get top N paths by access count"""
|
||||||
@@ -437,18 +568,30 @@ class AccessTracker:
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]:
|
def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]:
|
||||||
"""Get recent suspicious accesses"""
|
"""Get recent suspicious accesses (excludes local/private IPs)"""
|
||||||
suspicious = [log for log in self.access_log if log.get("suspicious", False)]
|
suspicious = [
|
||||||
|
log
|
||||||
|
for log in self.access_log
|
||||||
|
if log.get("suspicious", False) and not is_local_or_private_ip(log.get("ip", ""))
|
||||||
|
]
|
||||||
return suspicious[-limit:]
|
return suspicious[-limit:]
|
||||||
|
|
||||||
def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
|
def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
|
||||||
"""Get recent accesses with detected attack types"""
|
"""Get recent accesses with detected attack types (excludes local/private IPs)"""
|
||||||
attacks = [log for log in self.access_log if log.get("attack_types")]
|
attacks = [
|
||||||
|
log
|
||||||
|
for log in self.access_log
|
||||||
|
if log.get("attack_types") and not is_local_or_private_ip(log.get("ip", ""))
|
||||||
|
]
|
||||||
return attacks[-limit:]
|
return attacks[-limit:]
|
||||||
|
|
||||||
def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
|
def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
|
||||||
"""Get IPs that accessed honeypot paths"""
|
"""Get IPs that accessed honeypot paths (excludes local/private IPs)"""
|
||||||
return [(ip, paths) for ip, paths in self.honeypot_triggered.items()]
|
return [
|
||||||
|
(ip, paths)
|
||||||
|
for ip, paths in self.honeypot_triggered.items()
|
||||||
|
if not is_local_or_private_ip(ip)
|
||||||
|
]
|
||||||
|
|
||||||
def get_stats(self) -> Dict:
|
def get_stats(self) -> Dict:
|
||||||
"""Get statistics summary from database."""
|
"""Get statistics summary from database."""
|
||||||
@@ -468,3 +611,66 @@ class AccessTracker:
|
|||||||
stats["credential_attempts"] = self.db.get_credential_attempts(limit=50)
|
stats["credential_attempts"] = self.db.get_credential_attempts(limit=50)
|
||||||
|
|
||||||
return stats
|
return stats
|
||||||
|
|
||||||
|
def cleanup_memory(self) -> None:
|
||||||
|
"""
|
||||||
|
Clean up in-memory structures to prevent unbounded growth.
|
||||||
|
Should be called periodically (e.g., every 5 minutes).
|
||||||
|
|
||||||
|
Trimming strategy:
|
||||||
|
- Keep most recent N entries in logs
|
||||||
|
- Remove oldest entries when limit exceeded
|
||||||
|
- Clean expired ban entries from ip_page_visits
|
||||||
|
"""
|
||||||
|
# Trim access_log to max size (keep most recent)
|
||||||
|
if len(self.access_log) > self.max_access_log_size:
|
||||||
|
self.access_log = self.access_log[-self.max_access_log_size:]
|
||||||
|
|
||||||
|
# Trim credential_attempts to max size (keep most recent)
|
||||||
|
if len(self.credential_attempts) > self.max_credential_log_size:
|
||||||
|
self.credential_attempts = self.credential_attempts[
|
||||||
|
-self.max_credential_log_size :
|
||||||
|
]
|
||||||
|
|
||||||
|
# Clean expired ban entries from ip_page_visits
|
||||||
|
current_time = datetime.now()
|
||||||
|
ips_to_clean = []
|
||||||
|
for ip, data in self.ip_page_visits.items():
|
||||||
|
ban_timestamp = data.get("ban_timestamp")
|
||||||
|
if ban_timestamp is not None:
|
||||||
|
try:
|
||||||
|
ban_time = datetime.fromisoformat(ban_timestamp)
|
||||||
|
time_diff = (current_time - ban_time).total_seconds()
|
||||||
|
if time_diff > self.ban_duration_seconds:
|
||||||
|
# Ban expired, reset the entry
|
||||||
|
data["count"] = 0
|
||||||
|
data["ban_timestamp"] = None
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Optional: Remove IPs with zero activity (advanced cleanup)
|
||||||
|
# Comment out to keep indefinite history of zero-activity IPs
|
||||||
|
# ips_to_remove = [
|
||||||
|
# ip
|
||||||
|
# for ip, data in self.ip_page_visits.items()
|
||||||
|
# if data.get("count", 0) == 0 and data.get("ban_timestamp") is None
|
||||||
|
# ]
|
||||||
|
# for ip in ips_to_remove:
|
||||||
|
# del self.ip_page_visits[ip]
|
||||||
|
|
||||||
|
def get_memory_stats(self) -> Dict[str, int]:
|
||||||
|
"""
|
||||||
|
Get current memory usage statistics for monitoring.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary with counts of in-memory items
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"access_log_size": len(self.access_log),
|
||||||
|
"credential_attempts_size": len(self.credential_attempts),
|
||||||
|
"unique_ips_tracked": len(self.ip_counts),
|
||||||
|
"unique_paths_tracked": len(self.path_counts),
|
||||||
|
"unique_user_agents": len(self.user_agent_counts),
|
||||||
|
"unique_ip_page_visits": len(self.ip_page_visits),
|
||||||
|
"honeypot_triggered_ips": len(self.honeypot_triggered),
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user