Merge pull request #42 from BlessedRebuS/feat/task-optimization

Optimize scheduled tasks to reduce unnecessary processing
This commit is contained in:
Patrick Di Fazio
2026-01-17 23:06:30 +01:00
committed by GitHub
14 changed files with 238 additions and 243 deletions

1
.gitignore vendored
View File

@@ -56,6 +56,7 @@ secrets/
.env .env
.env.local .env.local
.env.*.local .env.*.local
.envrc
# Logs # Logs
*.log *.log

View File

@@ -3,7 +3,6 @@
server: server:
port: 5000 port: 5000
delay: 100 # Response delay in milliseconds delay: 100 # Response delay in milliseconds
timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default
# manually set the server header, if null a random one will be used. # manually set the server header, if null a random one will be used.
server_header: null server_header: null

View File

@@ -1,5 +1,4 @@
version: '3.8' ---
services: services:
krawl: krawl:
build: build:
@@ -8,11 +7,26 @@ services:
container_name: krawl-server container_name: krawl-server
ports: ports:
- "5000:5000" - "5000:5000"
environment:
- CONFIG_LOCATION=config.yaml
# set this to change timezone, alternatively mount /etc/timezone or /etc/localtime based on the time system management of the host environment
# - TZ=${TZ}
volumes: volumes:
- ./wordlists.json:/app/wordlists.json:ro - ./wordlists.json:/app/wordlists.json:ro
- ./config.yaml:/app/config.yaml:ro - ./config.yaml:/app/config.yaml:ro
- ./logs:/app/logs - ./logs:/app/logs
- ./exports:/app/exports - ./exports:/app/exports
environment: - data:/app/data
- CONFIG_LOCATION=config.yaml
restart: unless-stopped restart: unless-stopped
develop:
watch:
- path: ./Dockerfile
action: rebuild
- path: ./src/
action: sync+restart
target: /app/src
- path: ./docker-compose.yaml
action: rebuild
volumes:
data:

View File

@@ -23,7 +23,7 @@ class Analyzer:
""" """
Analyzes users activity and produces aggregated insights Analyzes users activity and produces aggregated insights
""" """
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None): def __init__(self, db_manager: Optional[DatabaseManager] = None):
""" """
Initialize the access tracker. Initialize the access tracker.
@@ -31,7 +31,6 @@ class Analyzer:
db_manager: Optional DatabaseManager for persistence. db_manager: Optional DatabaseManager for persistence.
If None, will use the global singleton. If None, will use the global singleton.
""" """
self.timezone = timezone or ZoneInfo('UTC')
# Database manager for persistence (lazily initialized) # Database manager for persistence (lazily initialized)
self._db_manager = db_manager self._db_manager = db_manager

View File

@@ -32,7 +32,6 @@ class Config:
# Database settings # Database settings
database_path: str = "data/krawl.db" database_path: str = "data/krawl.db"
database_retention_days: int = 30 database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
# Analyzer settings # Analyzer settings
http_risky_methods_threshold: float = None http_risky_methods_threshold: float = None
@@ -42,39 +41,6 @@ class Config:
user_agents_used_threshold: float = None user_agents_used_threshold: float = None
attack_urls_threshold: float = None attack_urls_threshold: float = None
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
"""Get the system's default timezone"""
try:
if os.path.islink('/etc/localtime'):
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
# Default fallback to UTC
return 'UTC'
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
try:
return ZoneInfo(self.timezone)
except Exception:
pass
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
except Exception:
return ZoneInfo('UTC')
@classmethod @classmethod
def from_yaml(cls) -> 'Config': def from_yaml(cls) -> 'Config':
"""Create configuration from YAML file""" """Create configuration from YAML file"""
@@ -118,7 +84,6 @@ class Config:
port=server.get('port', 5000), port=server.get('port', 5000),
delay=server.get('delay', 100), delay=server.get('delay', 100),
server_header=server.get('server_header',""), server_header=server.get('server_header',""),
timezone=server.get('timezone'),
links_length_range=( links_length_range=(
links.get('min_length', 5), links.get('min_length', 5),
links.get('max_length', 15) links.get('max_length', 15)

View File

@@ -7,7 +7,7 @@ Provides SQLAlchemy session management and database initialization.
import os import os
import stat import stat
from datetime import datetime from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
@@ -141,7 +141,7 @@ class DatabaseManager:
method=method[:10], method=method[:10],
is_suspicious=is_suspicious, is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger, is_honeypot_trigger=is_honeypot_trigger,
timestamp=datetime.now(tz=ZoneInfo('UTC')) timestamp=datetime.now()
) )
session.add(access_log) session.add(access_log)
session.flush() # Get the ID before committing session.flush() # Get the ID before committing
@@ -199,7 +199,7 @@ class DatabaseManager:
path=sanitize_path(path), path=sanitize_path(path),
username=sanitize_credential(username), username=sanitize_credential(username),
password=sanitize_credential(password), password=sanitize_credential(password),
timestamp=datetime.now(tz=ZoneInfo('UTC')) timestamp=datetime.now()
) )
session.add(credential) session.add(credential)
session.commit() session.commit()
@@ -221,7 +221,7 @@ class DatabaseManager:
ip: IP address to update ip: IP address to update
""" """
sanitized_ip = sanitize_ip(ip) sanitized_ip = sanitize_ip(ip)
now = datetime.now(tz=ZoneInfo('UTC')) now = datetime.now()
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
@@ -290,7 +290,7 @@ class DatabaseManager:
# Record the manual category change # Record the manual category change
old_category = ip_stats.category old_category = ip_stats.category
if old_category != category: if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC'))) self._record_category_change(sanitized_ip, old_category, category, datetime.now())
ip_stats.category = category ip_stats.category = category
ip_stats.manual_category = True ip_stats.manual_category = True
@@ -352,7 +352,7 @@ class DatabaseManager:
{ {
'old_category': h.old_category, 'old_category': h.old_category,
'new_category': h.new_category, 'new_category': h.new_category,
'timestamp': h.timestamp.isoformat() + '+00:00' 'timestamp': h.timestamp.isoformat()
} }
for h in history for h in history
] ]
@@ -390,6 +390,7 @@ class DatabaseManager:
def get_unenriched_ips(self, limit: int = 100) -> List[str]: def get_unenriched_ips(self, limit: int = 100) -> List[str]:
""" """
Get IPs that don't have reputation data yet. Get IPs that don't have reputation data yet.
Excludes RFC1918 private addresses and other non-routable IPs.
Args: Args:
limit: Maximum number of IPs to return limit: Maximum number of IPs to return
@@ -400,7 +401,18 @@ class DatabaseManager:
session = self.session session = self.session
try: try:
ips = session.query(IpStats.ip).filter( ips = session.query(IpStats.ip).filter(
IpStats.country_code.is_(None) IpStats.country_code.is_(None),
~IpStats.ip.like('10.%'),
~IpStats.ip.like('172.16.%'),
~IpStats.ip.like('172.17.%'),
~IpStats.ip.like('172.18.%'),
~IpStats.ip.like('172.19.%'),
~IpStats.ip.like('172.2_.%'),
~IpStats.ip.like('172.30.%'),
~IpStats.ip.like('172.31.%'),
~IpStats.ip.like('192.168.%'),
~IpStats.ip.like('127.%'),
~IpStats.ip.like('169.254.%')
).limit(limit).all() ).limit(limit).all()
return [ip[0] for ip in ips] return [ip[0] for ip in ips]
finally: finally:
@@ -411,7 +423,8 @@ class DatabaseManager:
limit: int = 100, limit: int = 100,
offset: int = 0, offset: int = 0,
ip_filter: Optional[str] = None, ip_filter: Optional[str] = None,
suspicious_only: bool = False suspicious_only: bool = False,
since_minutes: Optional[int] = None
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
""" """
Retrieve access logs with optional filtering. Retrieve access logs with optional filtering.
@@ -421,6 +434,7 @@ class DatabaseManager:
offset: Number of records to skip offset: Number of records to skip
ip_filter: Filter by IP address ip_filter: Filter by IP address
suspicious_only: Only return suspicious requests suspicious_only: Only return suspicious requests
since_minutes: Only return logs from the last N minutes
Returns: Returns:
List of access log dictionaries List of access log dictionaries
@@ -433,6 +447,9 @@ class DatabaseManager:
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter)) query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
if suspicious_only: if suspicious_only:
query = query.filter(AccessLog.is_suspicious == True) query = query.filter(AccessLog.is_suspicious == True)
if since_minutes is not None:
cutoff_time = datetime.now() - timedelta(minutes=since_minutes)
query = query.filter(AccessLog.timestamp >= cutoff_time)
logs = query.offset(offset).limit(limit).all() logs = query.offset(offset).limit(limit).all()
@@ -445,7 +462,7 @@ class DatabaseManager:
'method': log.method, 'method': log.method,
'is_suspicious': log.is_suspicious, 'is_suspicious': log.is_suspicious,
'is_honeypot_trigger': log.is_honeypot_trigger, 'is_honeypot_trigger': log.is_honeypot_trigger,
'timestamp': log.timestamp.isoformat() + '+00:00', 'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections] 'attack_types': [d.attack_type for d in log.attack_detections]
} }
for log in logs for log in logs
@@ -538,7 +555,7 @@ class DatabaseManager:
'path': attempt.path, 'path': attempt.path,
'username': attempt.username, 'username': attempt.username,
'password': attempt.password, 'password': attempt.password,
'timestamp': attempt.timestamp.isoformat() + '+00:00' 'timestamp': attempt.timestamp.isoformat()
} }
for attempt in attempts for attempt in attempts
] ]
@@ -565,8 +582,8 @@ class DatabaseManager:
{ {
'ip': s.ip, 'ip': s.ip,
'total_requests': s.total_requests, 'total_requests': s.total_requests,
'first_seen': s.first_seen.isoformat() + '+00:00', 'first_seen': s.first_seen.isoformat(),
'last_seen': s.last_seen.isoformat() + '+00:00', 'last_seen': s.last_seen.isoformat(),
'country_code': s.country_code, 'country_code': s.country_code,
'city': s.city, 'city': s.city,
'asn': s.asn, 'asn': s.asn,
@@ -606,8 +623,8 @@ class DatabaseManager:
return { return {
'ip': stat.ip, 'ip': stat.ip,
'total_requests': stat.total_requests, 'total_requests': stat.total_requests,
'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None, 'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None, 'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
'country_code': stat.country_code, 'country_code': stat.country_code,
'city': stat.city, 'city': stat.city,
'asn': stat.asn, 'asn': stat.asn,
@@ -619,7 +636,7 @@ class DatabaseManager:
'category': stat.category, 'category': stat.category,
'category_scores': stat.category_scores or {}, 'category_scores': stat.category_scores or {},
'manual_category': stat.manual_category, 'manual_category': stat.manual_category,
'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None, 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
'category_history': category_history 'category_history': category_history
} }
finally: finally:
@@ -690,7 +707,7 @@ class DatabaseManager:
Args: Args:
limit: Maximum number of results limit: Maximum number of results
Returns: Returns:data
List of (path, count) tuples ordered by count descending List of (path, count) tuples ordered by count descending
""" """
session = self.session session = self.session
@@ -753,7 +770,7 @@ class DatabaseManager:
'ip': log.ip, 'ip': log.ip,
'path': log.path, 'path': log.path,
'user_agent': log.user_agent, 'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat() + '+00:00' 'timestamp': log.timestamp.isoformat()
} }
for log in logs for log in logs
] ]
@@ -811,7 +828,7 @@ class DatabaseManager:
'ip': log.ip, 'ip': log.ip,
'path': log.path, 'path': log.path,
'user_agent': log.user_agent, 'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat() + '+00:00', 'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections] 'attack_types': [d.attack_type for d in log.attack_detections]
} }
for log in logs for log in logs

View File

@@ -407,9 +407,8 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
try: try:
stats = self.tracker.get_stats() stats = self.tracker.get_stats()
timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
dashboard_path = self.config.dashboard_secret_path dashboard_path = self.config.dashboard_secret_path
self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode()) self.wfile.write(generate_dashboard(stats, dashboard_path).encode())
except BrokenPipeError: except BrokenPipeError:
pass pass
except Exception as e: except Exception as e:

View File

@@ -8,20 +8,17 @@ Provides two loggers: app (application) and access (HTTP access logs).
import logging import logging
import os import os
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from typing import Optional
from zoneinfo import ZoneInfo
from datetime import datetime from datetime import datetime
class TimezoneFormatter(logging.Formatter): class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone""" """Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None): def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt) super().__init__(fmt, datefmt)
self.timezone = timezone or ZoneInfo('UTC')
def formatTime(self, record, datefmt=None): def formatTime(self, record, datefmt=None):
"""Override formatTime to use configured timezone""" """Override formatTime to use configured timezone"""
dt = datetime.fromtimestamp(record.created, tz=self.timezone) dt = datetime.fromtimestamp(record.created)
if datefmt: if datefmt:
return dt.strftime(datefmt) return dt.strftime(datefmt)
return dt.isoformat() return dt.isoformat()
@@ -37,19 +34,16 @@ class LoggerManager:
cls._instance._initialized = False cls._instance._initialized = False
return cls._instance return cls._instance
def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None: def initialize(self, log_dir: str = "logs") -> None:
""" """
Initialize the logging system with rotating file handlers. Initialize the logging system with rotating file handlers.loggers
Args: Args:
log_dir: Directory for log files (created if not exists) log_dir: Directory for log files (created if not exists)
timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
""" """
if self._initialized: if self._initialized:
return return
self.timezone = timezone or ZoneInfo('UTC')
# Create log directory if it doesn't exist # Create log directory if it doesn't exist
os.makedirs(log_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True)
@@ -57,7 +51,6 @@ class LoggerManager:
log_format = TimezoneFormatter( log_format = TimezoneFormatter(
"[%(asctime)s] %(levelname)s - %(message)s", "[%(asctime)s] %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S", datefmt="%Y-%m-%d %H:%M:%S",
timezone=self.timezone
) )
# Rotation settings: 1MB max, 5 backups # Rotation settings: 1MB max, 5 backups
@@ -104,7 +97,7 @@ class LoggerManager:
self._credential_logger.handlers.clear() self._credential_logger.handlers.clear()
# Credential logger uses a simple format: timestamp|ip|username|password|path # Credential logger uses a simple format: timestamp|ip|username|password|path
credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone) credential_format = TimezoneFormatter("%(message)s")
credential_file_handler = RotatingFileHandler( credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"), os.path.join(log_dir, "credentials.log"),
@@ -157,6 +150,6 @@ def get_credential_logger() -> logging.Logger:
return _logger_manager.credentials return _logger_manager.credentials
def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None: def initialize_logging(log_dir: str = "logs") -> None:
"""Initialize the logging system.""" """Initialize the logging system."""
_logger_manager.initialize(log_dir, timezone) _logger_manager.initialize(log_dir)

View File

@@ -29,7 +29,6 @@ def print_usage():
print(' server:') print(' server:')
print(' port: 5000') print(' port: 5000')
print(' delay: 100') print(' delay: 100')
print(' timezone: null # or "America/New_York"')
print(' links:') print(' links:')
print(' min_length: 5') print(' min_length: 5')
print(' max_length: 15') print(' max_length: 15')
@@ -55,11 +54,8 @@ def main():
config = get_config() config = get_config()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone # Initialize logging with timezone
initialize_logging(timezone=tz) initialize_logging()
app_logger = get_app_logger() app_logger = get_app_logger()
access_logger = get_access_logger() access_logger = get_access_logger()
credential_logger = get_credential_logger() credential_logger = get_credential_logger()
@@ -71,8 +67,8 @@ def main():
except Exception as e: except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.') app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
tracker = AccessTracker(timezone=tz) tracker = AccessTracker()
analyzer = Analyzer(timezone=tz) analyzer = Analyzer()
Handler.config = config Handler.config = config
Handler.tracker = tracker Handler.tracker = tracker
@@ -99,7 +95,6 @@ def main():
try: try:
app_logger.info(f'Starting deception server on port {config.port}...') app_logger.info(f'Starting deception server on port {config.port}...')
app_logger.info(f'Timezone configured: {tz.key}')
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}') app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
if config.canary_token_url: if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries') app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')

View File

@@ -73,12 +73,18 @@ def main():
"attack_url": 0 "attack_url": 0
} }
} }
accesses = db_manager.get_access_logs(limit=999999999) # Get IPs with recent activity (last minute to match cron schedule)
ips = {item['ip'] for item in accesses} recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
ips_to_analyze = {item['ip'] for item in recent_accesses}
for ip in ips: if not ips_to_analyze:
ip_accesses = [item for item in accesses if item["ip"] == ip] app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
total_accesses_count = len(accesses) return
for ip in ips_to_analyze:
# Get full history for this IP to perform accurate analysis
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
total_accesses_count = len(ip_accesses)
if total_accesses_count <= 0: if total_accesses_count <= 0:
return return
@@ -87,7 +93,7 @@ def main():
category = "unknown" category = "unknown"
analyzed_metrics = {} analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0} category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
last_analysis = datetime.now(tz=ZoneInfo('UTC')) last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return 0 return 0
#--------------------- HTTP Methods --------------------- #--------------------- HTTP Methods ---------------------
@@ -147,9 +153,9 @@ def main():
score["regular_user"]["robots_violations"] = False score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing --------------------- #--------------------- Requests Timing ---------------------
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior # Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses] timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now(tz=ZoneInfo('UTC')) now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = sorted(timestamps, reverse=True) timestamps = sorted(timestamps, reverse=True)
time_diffs = [] time_diffs = []
@@ -260,6 +266,6 @@ def main():
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
category = max(category_scores, key=category_scores.get) category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now(tz=ZoneInfo('UTC')) last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return return

View File

@@ -21,7 +21,7 @@ def main():
# Only get IPs that haven't been enriched yet # Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50) unenriched_ips = db_manager.get_unenriched_ips(limit=50)
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
for ip in unenriched_ips: for ip in unenriched_ips:
try: try:
api_url = "https://iprep.lcrawl.com/api/iprep/" api_url = "https://iprep.lcrawl.com/api/iprep/"

View File

@@ -1,6 +1,8 @@
# tasks/export_malicious_ips.py # tasks/export_malicious_ips.py
import os import os
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from logger import get_app_logger from logger import get_app_logger
from database import get_database from database import get_database
from models import AccessLog from models import AccessLog
@@ -24,6 +26,15 @@ OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ---------------------- # ----------------------
# TASK LOGIC # TASK LOGIC
# ---------------------- # ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter(
AccessLog.is_honeypot_trigger == True,
AccessLog.timestamp >= cutoff_time
).count()
return count > 0
def main(): def main():
""" """
Export all IPs flagged as suspicious to a text file. Export all IPs flagged as suspicious to a text file.
@@ -36,6 +47,11 @@ def main():
db = get_database() db = get_database()
session = db.session session = db.session
# Check for recent honeypot activity
if not has_recent_honeypot_access(session):
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
return
# Query distinct suspicious IPs # Query distinct suspicious IPs
results = session.query(distinct(AccessLog.ip)).filter( results = session.query(distinct(AccessLog.ip)).filter(
AccessLog.is_suspicious == True AccessLog.is_suspicious == True

View File

@@ -15,21 +15,16 @@ def _escape(value) -> str:
return "" return ""
return html.escape(str(value)) return html.escape(str(value))
def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str: def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion """Format ISO timestamp for display with timezone conversion
Args: Args:
iso_timestamp: ISO format timestamp string (UTC) iso_timestamp: ISO format timestamp string (UTC)
timezone: IANA timezone string to convert to
time_only: If True, return only HH:MM:SS, otherwise full datetime time_only: If True, return only HH:MM:SS, otherwise full datetime
""" """
try: try:
# Parse UTC timestamp # Parse UTC timestamp
dt = datetime.fromisoformat(iso_timestamp) dt = datetime.fromisoformat(iso_timestamp)
# Convert to target timezone
if dt.tzinfo is not None:
dt = dt.astimezone(ZoneInfo(timezone))
if time_only: if time_only:
return dt.strftime("%H:%M:%S") return dt.strftime("%H:%M:%S")
return dt.strftime("%Y-%m-%d %H:%M:%S") return dt.strftime("%Y-%m-%d %H:%M:%S")
@@ -38,12 +33,11 @@ def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str: def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
"""Generate dashboard HTML with access statistics """Generate dashboard HTML with access statistics
Args: Args:
stats: Statistics dictionary stats: Statistics dictionary
timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
dashboard_path: The secret dashboard path for generating API URLs dashboard_path: The secret dashboard path for generating API URLs
""" """
@@ -82,7 +76,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td class="ip-clickable">{_escape(log["ip"])}</td> <td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td> <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td> <td>{format_timestamp(log["timestamp"], time_only=True)}</td>
</tr> </tr>
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;"> <tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="4" class="ip-stats-cell"> <td colspan="4" class="ip-stats-cell">
@@ -118,7 +112,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td>{_escape(", ".join(log["attack_types"]))}</td> <td>{_escape(", ".join(log["attack_types"]))}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td> <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td> <td>{format_timestamp(log["timestamp"],time_only=True)}</td>
</tr> </tr>
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;"> <tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="5" class="ip-stats-cell"> <td colspan="5" class="ip-stats-cell">
@@ -137,7 +131,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td>{_escape(log["username"])}</td> <td>{_escape(log["username"])}</td>
<td>{_escape(log["password"])}</td> <td>{_escape(log["password"])}</td>
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td> <td>{format_timestamp(log["timestamp"], time_only=True)}</td>
</tr> </tr>
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;"> <tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="5" class="ip-stats-cell"> <td colspan="5" class="ip-stats-cell">
@@ -683,7 +677,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
</div> </div>
</div> </div>
<script> <script>
const SERVER_TIMEZONE = '{timezone}';
const DASHBOARD_PATH = '{dashboard_path}'; const DASHBOARD_PATH = '{dashboard_path}';
function formatTimestamp(isoTimestamp) {{ function formatTimestamp(isoTimestamp) {{
@@ -691,7 +684,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
try {{ try {{
const date = new Date(isoTimestamp); const date = new Date(isoTimestamp);
return date.toLocaleString('en-US', {{ return date.toLocaleString('en-US', {{
timeZone: SERVER_TIMEZONE,
year: 'numeric', year: 'numeric',
month: '2-digit', month: '2-digit',
day: '2-digit', day: '2-digit',

View File

@@ -17,7 +17,7 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis. persists data to SQLite for long-term storage and analysis.
""" """
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None): def __init__(self, db_manager: Optional[DatabaseManager] = None):
""" """
Initialize the access tracker. Initialize the access tracker.
@@ -30,7 +30,6 @@ class AccessTracker:
self.user_agent_counts: Dict[str, int] = defaultdict(int) self.user_agent_counts: Dict[str, int] = defaultdict(int)
self.access_log: List[Dict] = [] self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = [] self.credential_attempts: List[Dict] = []
self.timezone = timezone or ZoneInfo('UTC')
self.suspicious_patterns = [ self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests', 'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix', 'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -126,7 +125,7 @@ class AccessTracker:
'path': path, 'path': path,
'username': username, 'username': username,
'password': password, 'password': password,
'timestamp': datetime.now(self.timezone).isoformat() 'timestamp': datetime.now().isoformat()
}) })
# Persist to database # Persist to database
@@ -193,7 +192,7 @@ class AccessTracker:
'suspicious': is_suspicious, 'suspicious': is_suspicious,
'honeypot_triggered': self.is_honeypot_path(path), 'honeypot_triggered': self.is_honeypot_path(path),
'attack_types':attack_findings, 'attack_types':attack_findings,
'timestamp': datetime.now(self.timezone).isoformat() 'timestamp': datetime.now().isoformat()
}) })
# Persist to database # Persist to database