Merge pull request #42 from BlessedRebuS/feat/task-optimization
Optimize scheduled tasks to reduce unnecessary processing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -56,6 +56,7 @@ secrets/
|
|||||||
.env
|
.env
|
||||||
.env.local
|
.env.local
|
||||||
.env.*.local
|
.env.*.local
|
||||||
|
.envrc
|
||||||
|
|
||||||
# Logs
|
# Logs
|
||||||
*.log
|
*.log
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
server:
|
server:
|
||||||
port: 5000
|
port: 5000
|
||||||
delay: 100 # Response delay in milliseconds
|
delay: 100 # Response delay in milliseconds
|
||||||
timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default
|
|
||||||
|
|
||||||
# manually set the server header, if null a random one will be used.
|
# manually set the server header, if null a random one will be used.
|
||||||
server_header: null
|
server_header: null
|
||||||
|
|||||||
@@ -1,5 +1,4 @@
|
|||||||
version: '3.8'
|
---
|
||||||
|
|
||||||
services:
|
services:
|
||||||
krawl:
|
krawl:
|
||||||
build:
|
build:
|
||||||
@@ -8,11 +7,26 @@ services:
|
|||||||
container_name: krawl-server
|
container_name: krawl-server
|
||||||
ports:
|
ports:
|
||||||
- "5000:5000"
|
- "5000:5000"
|
||||||
|
environment:
|
||||||
|
- CONFIG_LOCATION=config.yaml
|
||||||
|
# set this to change timezone, alternatively mount /etc/timezone or /etc/localtime based on the time system management of the host environment
|
||||||
|
# - TZ=${TZ}
|
||||||
volumes:
|
volumes:
|
||||||
- ./wordlists.json:/app/wordlists.json:ro
|
- ./wordlists.json:/app/wordlists.json:ro
|
||||||
- ./config.yaml:/app/config.yaml:ro
|
- ./config.yaml:/app/config.yaml:ro
|
||||||
- ./logs:/app/logs
|
- ./logs:/app/logs
|
||||||
- ./exports:/app/exports
|
- ./exports:/app/exports
|
||||||
environment:
|
- data:/app/data
|
||||||
- CONFIG_LOCATION=config.yaml
|
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
|
develop:
|
||||||
|
watch:
|
||||||
|
- path: ./Dockerfile
|
||||||
|
action: rebuild
|
||||||
|
- path: ./src/
|
||||||
|
action: sync+restart
|
||||||
|
target: /app/src
|
||||||
|
- path: ./docker-compose.yaml
|
||||||
|
action: rebuild
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
data:
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ class Analyzer:
|
|||||||
"""
|
"""
|
||||||
Analyzes users activity and produces aggregated insights
|
Analyzes users activity and produces aggregated insights
|
||||||
"""
|
"""
|
||||||
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
|
def __init__(self, db_manager: Optional[DatabaseManager] = None):
|
||||||
"""
|
"""
|
||||||
Initialize the access tracker.
|
Initialize the access tracker.
|
||||||
|
|
||||||
@@ -31,7 +31,6 @@ class Analyzer:
|
|||||||
db_manager: Optional DatabaseManager for persistence.
|
db_manager: Optional DatabaseManager for persistence.
|
||||||
If None, will use the global singleton.
|
If None, will use the global singleton.
|
||||||
"""
|
"""
|
||||||
self.timezone = timezone or ZoneInfo('UTC')
|
|
||||||
|
|
||||||
# Database manager for persistence (lazily initialized)
|
# Database manager for persistence (lazily initialized)
|
||||||
self._db_manager = db_manager
|
self._db_manager = db_manager
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ class Config:
|
|||||||
# Database settings
|
# Database settings
|
||||||
database_path: str = "data/krawl.db"
|
database_path: str = "data/krawl.db"
|
||||||
database_retention_days: int = 30
|
database_retention_days: int = 30
|
||||||
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
|
|
||||||
|
|
||||||
# Analyzer settings
|
# Analyzer settings
|
||||||
http_risky_methods_threshold: float = None
|
http_risky_methods_threshold: float = None
|
||||||
@@ -42,39 +41,6 @@ class Config:
|
|||||||
user_agents_used_threshold: float = None
|
user_agents_used_threshold: float = None
|
||||||
attack_urls_threshold: float = None
|
attack_urls_threshold: float = None
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
# Try to fetch timezone before if not set
|
|
||||||
def get_system_timezone() -> str:
|
|
||||||
"""Get the system's default timezone"""
|
|
||||||
try:
|
|
||||||
if os.path.islink('/etc/localtime'):
|
|
||||||
tz_path = os.readlink('/etc/localtime')
|
|
||||||
if 'zoneinfo/' in tz_path:
|
|
||||||
return tz_path.split('zoneinfo/')[-1]
|
|
||||||
|
|
||||||
local_tz = time.tzname[time.daylight]
|
|
||||||
if local_tz and local_tz != 'UTC':
|
|
||||||
return local_tz
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Default fallback to UTC
|
|
||||||
return 'UTC'
|
|
||||||
|
|
||||||
def get_timezone(self) -> ZoneInfo:
|
|
||||||
"""Get configured timezone as ZoneInfo object"""
|
|
||||||
if self.timezone:
|
|
||||||
try:
|
|
||||||
return ZoneInfo(self.timezone)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
system_tz = self.get_system_timezone()
|
|
||||||
try:
|
|
||||||
return ZoneInfo(system_tz)
|
|
||||||
except Exception:
|
|
||||||
return ZoneInfo('UTC')
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_yaml(cls) -> 'Config':
|
def from_yaml(cls) -> 'Config':
|
||||||
"""Create configuration from YAML file"""
|
"""Create configuration from YAML file"""
|
||||||
@@ -118,7 +84,6 @@ class Config:
|
|||||||
port=server.get('port', 5000),
|
port=server.get('port', 5000),
|
||||||
delay=server.get('delay', 100),
|
delay=server.get('delay', 100),
|
||||||
server_header=server.get('server_header',""),
|
server_header=server.get('server_header',""),
|
||||||
timezone=server.get('timezone'),
|
|
||||||
links_length_range=(
|
links_length_range=(
|
||||||
links.get('min_length', 5),
|
links.get('min_length', 5),
|
||||||
links.get('max_length', 15)
|
links.get('max_length', 15)
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ Provides SQLAlchemy session management and database initialization.
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
import stat
|
import stat
|
||||||
from datetime import datetime
|
from datetime import datetime, timedelta
|
||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
@@ -141,7 +141,7 @@ class DatabaseManager:
|
|||||||
method=method[:10],
|
method=method[:10],
|
||||||
is_suspicious=is_suspicious,
|
is_suspicious=is_suspicious,
|
||||||
is_honeypot_trigger=is_honeypot_trigger,
|
is_honeypot_trigger=is_honeypot_trigger,
|
||||||
timestamp=datetime.now(tz=ZoneInfo('UTC'))
|
timestamp=datetime.now()
|
||||||
)
|
)
|
||||||
session.add(access_log)
|
session.add(access_log)
|
||||||
session.flush() # Get the ID before committing
|
session.flush() # Get the ID before committing
|
||||||
@@ -199,7 +199,7 @@ class DatabaseManager:
|
|||||||
path=sanitize_path(path),
|
path=sanitize_path(path),
|
||||||
username=sanitize_credential(username),
|
username=sanitize_credential(username),
|
||||||
password=sanitize_credential(password),
|
password=sanitize_credential(password),
|
||||||
timestamp=datetime.now(tz=ZoneInfo('UTC'))
|
timestamp=datetime.now()
|
||||||
)
|
)
|
||||||
session.add(credential)
|
session.add(credential)
|
||||||
session.commit()
|
session.commit()
|
||||||
@@ -221,7 +221,7 @@ class DatabaseManager:
|
|||||||
ip: IP address to update
|
ip: IP address to update
|
||||||
"""
|
"""
|
||||||
sanitized_ip = sanitize_ip(ip)
|
sanitized_ip = sanitize_ip(ip)
|
||||||
now = datetime.now(tz=ZoneInfo('UTC'))
|
now = datetime.now()
|
||||||
|
|
||||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||||
|
|
||||||
@@ -290,7 +290,7 @@ class DatabaseManager:
|
|||||||
# Record the manual category change
|
# Record the manual category change
|
||||||
old_category = ip_stats.category
|
old_category = ip_stats.category
|
||||||
if old_category != category:
|
if old_category != category:
|
||||||
self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC')))
|
self._record_category_change(sanitized_ip, old_category, category, datetime.now())
|
||||||
|
|
||||||
ip_stats.category = category
|
ip_stats.category = category
|
||||||
ip_stats.manual_category = True
|
ip_stats.manual_category = True
|
||||||
@@ -352,7 +352,7 @@ class DatabaseManager:
|
|||||||
{
|
{
|
||||||
'old_category': h.old_category,
|
'old_category': h.old_category,
|
||||||
'new_category': h.new_category,
|
'new_category': h.new_category,
|
||||||
'timestamp': h.timestamp.isoformat() + '+00:00'
|
'timestamp': h.timestamp.isoformat()
|
||||||
}
|
}
|
||||||
for h in history
|
for h in history
|
||||||
]
|
]
|
||||||
@@ -390,6 +390,7 @@ class DatabaseManager:
|
|||||||
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
|
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Get IPs that don't have reputation data yet.
|
Get IPs that don't have reputation data yet.
|
||||||
|
Excludes RFC1918 private addresses and other non-routable IPs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
limit: Maximum number of IPs to return
|
limit: Maximum number of IPs to return
|
||||||
@@ -400,7 +401,18 @@ class DatabaseManager:
|
|||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
ips = session.query(IpStats.ip).filter(
|
ips = session.query(IpStats.ip).filter(
|
||||||
IpStats.country_code.is_(None)
|
IpStats.country_code.is_(None),
|
||||||
|
~IpStats.ip.like('10.%'),
|
||||||
|
~IpStats.ip.like('172.16.%'),
|
||||||
|
~IpStats.ip.like('172.17.%'),
|
||||||
|
~IpStats.ip.like('172.18.%'),
|
||||||
|
~IpStats.ip.like('172.19.%'),
|
||||||
|
~IpStats.ip.like('172.2_.%'),
|
||||||
|
~IpStats.ip.like('172.30.%'),
|
||||||
|
~IpStats.ip.like('172.31.%'),
|
||||||
|
~IpStats.ip.like('192.168.%'),
|
||||||
|
~IpStats.ip.like('127.%'),
|
||||||
|
~IpStats.ip.like('169.254.%')
|
||||||
).limit(limit).all()
|
).limit(limit).all()
|
||||||
return [ip[0] for ip in ips]
|
return [ip[0] for ip in ips]
|
||||||
finally:
|
finally:
|
||||||
@@ -411,7 +423,8 @@ class DatabaseManager:
|
|||||||
limit: int = 100,
|
limit: int = 100,
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
ip_filter: Optional[str] = None,
|
ip_filter: Optional[str] = None,
|
||||||
suspicious_only: bool = False
|
suspicious_only: bool = False,
|
||||||
|
since_minutes: Optional[int] = None
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Retrieve access logs with optional filtering.
|
Retrieve access logs with optional filtering.
|
||||||
@@ -421,6 +434,7 @@ class DatabaseManager:
|
|||||||
offset: Number of records to skip
|
offset: Number of records to skip
|
||||||
ip_filter: Filter by IP address
|
ip_filter: Filter by IP address
|
||||||
suspicious_only: Only return suspicious requests
|
suspicious_only: Only return suspicious requests
|
||||||
|
since_minutes: Only return logs from the last N minutes
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of access log dictionaries
|
List of access log dictionaries
|
||||||
@@ -433,6 +447,9 @@ class DatabaseManager:
|
|||||||
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
|
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
|
||||||
if suspicious_only:
|
if suspicious_only:
|
||||||
query = query.filter(AccessLog.is_suspicious == True)
|
query = query.filter(AccessLog.is_suspicious == True)
|
||||||
|
if since_minutes is not None:
|
||||||
|
cutoff_time = datetime.now() - timedelta(minutes=since_minutes)
|
||||||
|
query = query.filter(AccessLog.timestamp >= cutoff_time)
|
||||||
|
|
||||||
logs = query.offset(offset).limit(limit).all()
|
logs = query.offset(offset).limit(limit).all()
|
||||||
|
|
||||||
@@ -445,7 +462,7 @@ class DatabaseManager:
|
|||||||
'method': log.method,
|
'method': log.method,
|
||||||
'is_suspicious': log.is_suspicious,
|
'is_suspicious': log.is_suspicious,
|
||||||
'is_honeypot_trigger': log.is_honeypot_trigger,
|
'is_honeypot_trigger': log.is_honeypot_trigger,
|
||||||
'timestamp': log.timestamp.isoformat() + '+00:00',
|
'timestamp': log.timestamp.isoformat(),
|
||||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||||
}
|
}
|
||||||
for log in logs
|
for log in logs
|
||||||
@@ -538,7 +555,7 @@ class DatabaseManager:
|
|||||||
'path': attempt.path,
|
'path': attempt.path,
|
||||||
'username': attempt.username,
|
'username': attempt.username,
|
||||||
'password': attempt.password,
|
'password': attempt.password,
|
||||||
'timestamp': attempt.timestamp.isoformat() + '+00:00'
|
'timestamp': attempt.timestamp.isoformat()
|
||||||
}
|
}
|
||||||
for attempt in attempts
|
for attempt in attempts
|
||||||
]
|
]
|
||||||
@@ -565,8 +582,8 @@ class DatabaseManager:
|
|||||||
{
|
{
|
||||||
'ip': s.ip,
|
'ip': s.ip,
|
||||||
'total_requests': s.total_requests,
|
'total_requests': s.total_requests,
|
||||||
'first_seen': s.first_seen.isoformat() + '+00:00',
|
'first_seen': s.first_seen.isoformat(),
|
||||||
'last_seen': s.last_seen.isoformat() + '+00:00',
|
'last_seen': s.last_seen.isoformat(),
|
||||||
'country_code': s.country_code,
|
'country_code': s.country_code,
|
||||||
'city': s.city,
|
'city': s.city,
|
||||||
'asn': s.asn,
|
'asn': s.asn,
|
||||||
@@ -606,8 +623,8 @@ class DatabaseManager:
|
|||||||
return {
|
return {
|
||||||
'ip': stat.ip,
|
'ip': stat.ip,
|
||||||
'total_requests': stat.total_requests,
|
'total_requests': stat.total_requests,
|
||||||
'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None,
|
'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
|
||||||
'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None,
|
'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
|
||||||
'country_code': stat.country_code,
|
'country_code': stat.country_code,
|
||||||
'city': stat.city,
|
'city': stat.city,
|
||||||
'asn': stat.asn,
|
'asn': stat.asn,
|
||||||
@@ -619,7 +636,7 @@ class DatabaseManager:
|
|||||||
'category': stat.category,
|
'category': stat.category,
|
||||||
'category_scores': stat.category_scores or {},
|
'category_scores': stat.category_scores or {},
|
||||||
'manual_category': stat.manual_category,
|
'manual_category': stat.manual_category,
|
||||||
'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None,
|
'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
|
||||||
'category_history': category_history
|
'category_history': category_history
|
||||||
}
|
}
|
||||||
finally:
|
finally:
|
||||||
@@ -690,7 +707,7 @@ class DatabaseManager:
|
|||||||
Args:
|
Args:
|
||||||
limit: Maximum number of results
|
limit: Maximum number of results
|
||||||
|
|
||||||
Returns:
|
Returns:data
|
||||||
List of (path, count) tuples ordered by count descending
|
List of (path, count) tuples ordered by count descending
|
||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
@@ -753,7 +770,7 @@ class DatabaseManager:
|
|||||||
'ip': log.ip,
|
'ip': log.ip,
|
||||||
'path': log.path,
|
'path': log.path,
|
||||||
'user_agent': log.user_agent,
|
'user_agent': log.user_agent,
|
||||||
'timestamp': log.timestamp.isoformat() + '+00:00'
|
'timestamp': log.timestamp.isoformat()
|
||||||
}
|
}
|
||||||
for log in logs
|
for log in logs
|
||||||
]
|
]
|
||||||
@@ -811,7 +828,7 @@ class DatabaseManager:
|
|||||||
'ip': log.ip,
|
'ip': log.ip,
|
||||||
'path': log.path,
|
'path': log.path,
|
||||||
'user_agent': log.user_agent,
|
'user_agent': log.user_agent,
|
||||||
'timestamp': log.timestamp.isoformat() + '+00:00',
|
'timestamp': log.timestamp.isoformat(),
|
||||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||||
}
|
}
|
||||||
for log in logs
|
for log in logs
|
||||||
|
|||||||
@@ -407,9 +407,8 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
self.end_headers()
|
self.end_headers()
|
||||||
try:
|
try:
|
||||||
stats = self.tracker.get_stats()
|
stats = self.tracker.get_stats()
|
||||||
timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
|
|
||||||
dashboard_path = self.config.dashboard_secret_path
|
dashboard_path = self.config.dashboard_secret_path
|
||||||
self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode())
|
self.wfile.write(generate_dashboard(stats, dashboard_path).encode())
|
||||||
except BrokenPipeError:
|
except BrokenPipeError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -8,20 +8,17 @@ Provides two loggers: app (application) and access (HTTP access logs).
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from logging.handlers import RotatingFileHandler
|
from logging.handlers import RotatingFileHandler
|
||||||
from typing import Optional
|
|
||||||
from zoneinfo import ZoneInfo
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
class TimezoneFormatter(logging.Formatter):
|
class TimezoneFormatter(logging.Formatter):
|
||||||
"""Custom formatter that respects configured timezone"""
|
"""Custom formatter that respects configured timezone"""
|
||||||
def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None):
|
def __init__(self, fmt=None, datefmt=None):
|
||||||
super().__init__(fmt, datefmt)
|
super().__init__(fmt, datefmt)
|
||||||
self.timezone = timezone or ZoneInfo('UTC')
|
|
||||||
|
|
||||||
def formatTime(self, record, datefmt=None):
|
def formatTime(self, record, datefmt=None):
|
||||||
"""Override formatTime to use configured timezone"""
|
"""Override formatTime to use configured timezone"""
|
||||||
dt = datetime.fromtimestamp(record.created, tz=self.timezone)
|
dt = datetime.fromtimestamp(record.created)
|
||||||
if datefmt:
|
if datefmt:
|
||||||
return dt.strftime(datefmt)
|
return dt.strftime(datefmt)
|
||||||
return dt.isoformat()
|
return dt.isoformat()
|
||||||
@@ -37,19 +34,16 @@ class LoggerManager:
|
|||||||
cls._instance._initialized = False
|
cls._instance._initialized = False
|
||||||
return cls._instance
|
return cls._instance
|
||||||
|
|
||||||
def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
|
def initialize(self, log_dir: str = "logs") -> None:
|
||||||
"""
|
"""
|
||||||
Initialize the logging system with rotating file handlers.
|
Initialize the logging system with rotating file handlers.loggers
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
log_dir: Directory for log files (created if not exists)
|
log_dir: Directory for log files (created if not exists)
|
||||||
timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
|
|
||||||
"""
|
"""
|
||||||
if self._initialized:
|
if self._initialized:
|
||||||
return
|
return
|
||||||
|
|
||||||
self.timezone = timezone or ZoneInfo('UTC')
|
|
||||||
|
|
||||||
# Create log directory if it doesn't exist
|
# Create log directory if it doesn't exist
|
||||||
os.makedirs(log_dir, exist_ok=True)
|
os.makedirs(log_dir, exist_ok=True)
|
||||||
|
|
||||||
@@ -57,7 +51,6 @@ class LoggerManager:
|
|||||||
log_format = TimezoneFormatter(
|
log_format = TimezoneFormatter(
|
||||||
"[%(asctime)s] %(levelname)s - %(message)s",
|
"[%(asctime)s] %(levelname)s - %(message)s",
|
||||||
datefmt="%Y-%m-%d %H:%M:%S",
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
timezone=self.timezone
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Rotation settings: 1MB max, 5 backups
|
# Rotation settings: 1MB max, 5 backups
|
||||||
@@ -104,7 +97,7 @@ class LoggerManager:
|
|||||||
self._credential_logger.handlers.clear()
|
self._credential_logger.handlers.clear()
|
||||||
|
|
||||||
# Credential logger uses a simple format: timestamp|ip|username|password|path
|
# Credential logger uses a simple format: timestamp|ip|username|password|path
|
||||||
credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone)
|
credential_format = TimezoneFormatter("%(message)s")
|
||||||
|
|
||||||
credential_file_handler = RotatingFileHandler(
|
credential_file_handler = RotatingFileHandler(
|
||||||
os.path.join(log_dir, "credentials.log"),
|
os.path.join(log_dir, "credentials.log"),
|
||||||
@@ -157,6 +150,6 @@ def get_credential_logger() -> logging.Logger:
|
|||||||
return _logger_manager.credentials
|
return _logger_manager.credentials
|
||||||
|
|
||||||
|
|
||||||
def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
|
def initialize_logging(log_dir: str = "logs") -> None:
|
||||||
"""Initialize the logging system."""
|
"""Initialize the logging system."""
|
||||||
_logger_manager.initialize(log_dir, timezone)
|
_logger_manager.initialize(log_dir)
|
||||||
|
|||||||
@@ -29,7 +29,6 @@ def print_usage():
|
|||||||
print(' server:')
|
print(' server:')
|
||||||
print(' port: 5000')
|
print(' port: 5000')
|
||||||
print(' delay: 100')
|
print(' delay: 100')
|
||||||
print(' timezone: null # or "America/New_York"')
|
|
||||||
print(' links:')
|
print(' links:')
|
||||||
print(' min_length: 5')
|
print(' min_length: 5')
|
||||||
print(' max_length: 15')
|
print(' max_length: 15')
|
||||||
@@ -55,11 +54,8 @@ def main():
|
|||||||
|
|
||||||
config = get_config()
|
config = get_config()
|
||||||
|
|
||||||
# Get timezone configuration
|
|
||||||
tz = config.get_timezone()
|
|
||||||
|
|
||||||
# Initialize logging with timezone
|
# Initialize logging with timezone
|
||||||
initialize_logging(timezone=tz)
|
initialize_logging()
|
||||||
app_logger = get_app_logger()
|
app_logger = get_app_logger()
|
||||||
access_logger = get_access_logger()
|
access_logger = get_access_logger()
|
||||||
credential_logger = get_credential_logger()
|
credential_logger = get_credential_logger()
|
||||||
@@ -71,8 +67,8 @@ def main():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
|
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
|
||||||
|
|
||||||
tracker = AccessTracker(timezone=tz)
|
tracker = AccessTracker()
|
||||||
analyzer = Analyzer(timezone=tz)
|
analyzer = Analyzer()
|
||||||
|
|
||||||
Handler.config = config
|
Handler.config = config
|
||||||
Handler.tracker = tracker
|
Handler.tracker = tracker
|
||||||
@@ -99,7 +95,6 @@ def main():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
app_logger.info(f'Starting deception server on port {config.port}...')
|
app_logger.info(f'Starting deception server on port {config.port}...')
|
||||||
app_logger.info(f'Timezone configured: {tz.key}')
|
|
||||||
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
|
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
|
||||||
if config.canary_token_url:
|
if config.canary_token_url:
|
||||||
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
|
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
|
||||||
|
|||||||
@@ -73,12 +73,18 @@ def main():
|
|||||||
"attack_url": 0
|
"attack_url": 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
accesses = db_manager.get_access_logs(limit=999999999)
|
# Get IPs with recent activity (last minute to match cron schedule)
|
||||||
ips = {item['ip'] for item in accesses}
|
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
|
||||||
|
ips_to_analyze = {item['ip'] for item in recent_accesses}
|
||||||
|
|
||||||
for ip in ips:
|
if not ips_to_analyze:
|
||||||
ip_accesses = [item for item in accesses if item["ip"] == ip]
|
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
|
||||||
total_accesses_count = len(accesses)
|
return
|
||||||
|
|
||||||
|
for ip in ips_to_analyze:
|
||||||
|
# Get full history for this IP to perform accurate analysis
|
||||||
|
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
|
||||||
|
total_accesses_count = len(ip_accesses)
|
||||||
if total_accesses_count <= 0:
|
if total_accesses_count <= 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -87,7 +93,7 @@ def main():
|
|||||||
category = "unknown"
|
category = "unknown"
|
||||||
analyzed_metrics = {}
|
analyzed_metrics = {}
|
||||||
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
|
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
|
||||||
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
last_analysis = datetime.now()
|
||||||
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||||
return 0
|
return 0
|
||||||
#--------------------- HTTP Methods ---------------------
|
#--------------------- HTTP Methods ---------------------
|
||||||
@@ -147,9 +153,9 @@ def main():
|
|||||||
score["regular_user"]["robots_violations"] = False
|
score["regular_user"]["robots_violations"] = False
|
||||||
|
|
||||||
#--------------------- Requests Timing ---------------------
|
#--------------------- Requests Timing ---------------------
|
||||||
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||||
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
|
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
|
||||||
now_utc = datetime.now(tz=ZoneInfo('UTC'))
|
now_utc = datetime.now()
|
||||||
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
|
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
|
||||||
timestamps = sorted(timestamps, reverse=True)
|
timestamps = sorted(timestamps, reverse=True)
|
||||||
time_diffs = []
|
time_diffs = []
|
||||||
@@ -260,6 +266,6 @@ def main():
|
|||||||
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
|
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
|
||||||
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
|
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
|
||||||
category = max(category_scores, key=category_scores.get)
|
category = max(category_scores, key=category_scores.get)
|
||||||
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
last_analysis = datetime.now()
|
||||||
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||||
return
|
return
|
||||||
@@ -21,7 +21,7 @@ def main():
|
|||||||
|
|
||||||
# Only get IPs that haven't been enriched yet
|
# Only get IPs that haven't been enriched yet
|
||||||
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
|
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
|
||||||
|
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
|
||||||
for ip in unenriched_ips:
|
for ip in unenriched_ips:
|
||||||
try:
|
try:
|
||||||
api_url = "https://iprep.lcrawl.com/api/iprep/"
|
api_url = "https://iprep.lcrawl.com/api/iprep/"
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
# tasks/export_malicious_ips.py
|
# tasks/export_malicious_ips.py
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from zoneinfo import ZoneInfo
|
||||||
from logger import get_app_logger
|
from logger import get_app_logger
|
||||||
from database import get_database
|
from database import get_database
|
||||||
from models import AccessLog
|
from models import AccessLog
|
||||||
@@ -24,6 +26,15 @@ OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
|
|||||||
# ----------------------
|
# ----------------------
|
||||||
# TASK LOGIC
|
# TASK LOGIC
|
||||||
# ----------------------
|
# ----------------------
|
||||||
|
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
|
||||||
|
"""Check if honeypot was accessed in the last N minutes."""
|
||||||
|
cutoff_time = datetime.now() - timedelta(minutes=minutes)
|
||||||
|
count = session.query(AccessLog).filter(
|
||||||
|
AccessLog.is_honeypot_trigger == True,
|
||||||
|
AccessLog.timestamp >= cutoff_time
|
||||||
|
).count()
|
||||||
|
return count > 0
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""
|
"""
|
||||||
Export all IPs flagged as suspicious to a text file.
|
Export all IPs flagged as suspicious to a text file.
|
||||||
@@ -36,6 +47,11 @@ def main():
|
|||||||
db = get_database()
|
db = get_database()
|
||||||
session = db.session
|
session = db.session
|
||||||
|
|
||||||
|
# Check for recent honeypot activity
|
||||||
|
if not has_recent_honeypot_access(session):
|
||||||
|
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
|
||||||
|
return
|
||||||
|
|
||||||
# Query distinct suspicious IPs
|
# Query distinct suspicious IPs
|
||||||
results = session.query(distinct(AccessLog.ip)).filter(
|
results = session.query(distinct(AccessLog.ip)).filter(
|
||||||
AccessLog.is_suspicious == True
|
AccessLog.is_suspicious == True
|
||||||
|
|||||||
@@ -15,21 +15,16 @@ def _escape(value) -> str:
|
|||||||
return ""
|
return ""
|
||||||
return html.escape(str(value))
|
return html.escape(str(value))
|
||||||
|
|
||||||
def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str:
|
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
|
||||||
"""Format ISO timestamp for display with timezone conversion
|
"""Format ISO timestamp for display with timezone conversion
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
iso_timestamp: ISO format timestamp string (UTC)
|
iso_timestamp: ISO format timestamp string (UTC)
|
||||||
timezone: IANA timezone string to convert to
|
|
||||||
time_only: If True, return only HH:MM:SS, otherwise full datetime
|
time_only: If True, return only HH:MM:SS, otherwise full datetime
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Parse UTC timestamp
|
# Parse UTC timestamp
|
||||||
dt = datetime.fromisoformat(iso_timestamp)
|
dt = datetime.fromisoformat(iso_timestamp)
|
||||||
# Convert to target timezone
|
|
||||||
if dt.tzinfo is not None:
|
|
||||||
dt = dt.astimezone(ZoneInfo(timezone))
|
|
||||||
|
|
||||||
if time_only:
|
if time_only:
|
||||||
return dt.strftime("%H:%M:%S")
|
return dt.strftime("%H:%M:%S")
|
||||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||||
@@ -38,12 +33,11 @@ def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool
|
|||||||
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
|
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
|
||||||
|
|
||||||
|
|
||||||
def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str:
|
def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
|
||||||
"""Generate dashboard HTML with access statistics
|
"""Generate dashboard HTML with access statistics
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
stats: Statistics dictionary
|
stats: Statistics dictionary
|
||||||
timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
|
|
||||||
dashboard_path: The secret dashboard path for generating API URLs
|
dashboard_path: The secret dashboard path for generating API URLs
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -82,7 +76,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
|||||||
<td class="ip-clickable">{_escape(log["ip"])}</td>
|
<td class="ip-clickable">{_escape(log["ip"])}</td>
|
||||||
<td>{_escape(log["path"])}</td>
|
<td>{_escape(log["path"])}</td>
|
||||||
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
||||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
<td>{format_timestamp(log["timestamp"], time_only=True)}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||||
<td colspan="4" class="ip-stats-cell">
|
<td colspan="4" class="ip-stats-cell">
|
||||||
@@ -118,7 +112,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
|||||||
<td>{_escape(log["path"])}</td>
|
<td>{_escape(log["path"])}</td>
|
||||||
<td>{_escape(", ".join(log["attack_types"]))}</td>
|
<td>{_escape(", ".join(log["attack_types"]))}</td>
|
||||||
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
||||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
<td>{format_timestamp(log["timestamp"],time_only=True)}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||||
<td colspan="5" class="ip-stats-cell">
|
<td colspan="5" class="ip-stats-cell">
|
||||||
@@ -137,7 +131,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
|||||||
<td>{_escape(log["username"])}</td>
|
<td>{_escape(log["username"])}</td>
|
||||||
<td>{_escape(log["password"])}</td>
|
<td>{_escape(log["password"])}</td>
|
||||||
<td>{_escape(log["path"])}</td>
|
<td>{_escape(log["path"])}</td>
|
||||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
<td>{format_timestamp(log["timestamp"], time_only=True)}</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||||
<td colspan="5" class="ip-stats-cell">
|
<td colspan="5" class="ip-stats-cell">
|
||||||
@@ -683,7 +677,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<script>
|
<script>
|
||||||
const SERVER_TIMEZONE = '{timezone}';
|
|
||||||
const DASHBOARD_PATH = '{dashboard_path}';
|
const DASHBOARD_PATH = '{dashboard_path}';
|
||||||
|
|
||||||
function formatTimestamp(isoTimestamp) {{
|
function formatTimestamp(isoTimestamp) {{
|
||||||
@@ -691,7 +684,6 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
|||||||
try {{
|
try {{
|
||||||
const date = new Date(isoTimestamp);
|
const date = new Date(isoTimestamp);
|
||||||
return date.toLocaleString('en-US', {{
|
return date.toLocaleString('en-US', {{
|
||||||
timeZone: SERVER_TIMEZONE,
|
|
||||||
year: 'numeric',
|
year: 'numeric',
|
||||||
month: '2-digit',
|
month: '2-digit',
|
||||||
day: '2-digit',
|
day: '2-digit',
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ class AccessTracker:
|
|||||||
Maintains in-memory structures for fast dashboard access and
|
Maintains in-memory structures for fast dashboard access and
|
||||||
persists data to SQLite for long-term storage and analysis.
|
persists data to SQLite for long-term storage and analysis.
|
||||||
"""
|
"""
|
||||||
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
|
def __init__(self, db_manager: Optional[DatabaseManager] = None):
|
||||||
"""
|
"""
|
||||||
Initialize the access tracker.
|
Initialize the access tracker.
|
||||||
|
|
||||||
@@ -30,7 +30,6 @@ class AccessTracker:
|
|||||||
self.user_agent_counts: Dict[str, int] = defaultdict(int)
|
self.user_agent_counts: Dict[str, int] = defaultdict(int)
|
||||||
self.access_log: List[Dict] = []
|
self.access_log: List[Dict] = []
|
||||||
self.credential_attempts: List[Dict] = []
|
self.credential_attempts: List[Dict] = []
|
||||||
self.timezone = timezone or ZoneInfo('UTC')
|
|
||||||
self.suspicious_patterns = [
|
self.suspicious_patterns = [
|
||||||
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
|
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
|
||||||
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
|
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
|
||||||
@@ -126,7 +125,7 @@ class AccessTracker:
|
|||||||
'path': path,
|
'path': path,
|
||||||
'username': username,
|
'username': username,
|
||||||
'password': password,
|
'password': password,
|
||||||
'timestamp': datetime.now(self.timezone).isoformat()
|
'timestamp': datetime.now().isoformat()
|
||||||
})
|
})
|
||||||
|
|
||||||
# Persist to database
|
# Persist to database
|
||||||
@@ -193,7 +192,7 @@ class AccessTracker:
|
|||||||
'suspicious': is_suspicious,
|
'suspicious': is_suspicious,
|
||||||
'honeypot_triggered': self.is_honeypot_path(path),
|
'honeypot_triggered': self.is_honeypot_path(path),
|
||||||
'attack_types':attack_findings,
|
'attack_types':attack_findings,
|
||||||
'timestamp': datetime.now(self.timezone).isoformat()
|
'timestamp': datetime.now().isoformat()
|
||||||
})
|
})
|
||||||
|
|
||||||
# Persist to database
|
# Persist to database
|
||||||
|
|||||||
Reference in New Issue
Block a user