Merge pull request #42 from BlessedRebuS/feat/task-optimization
Optimize scheduled tasks to reduce unnecessary processing
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -56,6 +56,7 @@ secrets/
|
||||
.env
|
||||
.env.local
|
||||
.env.*.local
|
||||
.envrc
|
||||
|
||||
# Logs
|
||||
*.log
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
server:
|
||||
port: 5000
|
||||
delay: 100 # Response delay in milliseconds
|
||||
timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default
|
||||
|
||||
# manually set the server header, if null a random one will be used.
|
||||
server_header: null
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
version: '3.8'
|
||||
|
||||
---
|
||||
services:
|
||||
krawl:
|
||||
build:
|
||||
@@ -8,11 +7,26 @@ services:
|
||||
container_name: krawl-server
|
||||
ports:
|
||||
- "5000:5000"
|
||||
environment:
|
||||
- CONFIG_LOCATION=config.yaml
|
||||
# set this to change timezone, alternatively mount /etc/timezone or /etc/localtime based on the time system management of the host environment
|
||||
# - TZ=${TZ}
|
||||
volumes:
|
||||
- ./wordlists.json:/app/wordlists.json:ro
|
||||
- ./config.yaml:/app/config.yaml:ro
|
||||
- ./logs:/app/logs
|
||||
- ./exports:/app/exports
|
||||
environment:
|
||||
- CONFIG_LOCATION=config.yaml
|
||||
- data:/app/data
|
||||
restart: unless-stopped
|
||||
develop:
|
||||
watch:
|
||||
- path: ./Dockerfile
|
||||
action: rebuild
|
||||
- path: ./src/
|
||||
action: sync+restart
|
||||
target: /app/src
|
||||
- path: ./docker-compose.yaml
|
||||
action: rebuild
|
||||
|
||||
volumes:
|
||||
data:
|
||||
|
||||
@@ -23,7 +23,7 @@ class Analyzer:
|
||||
"""
|
||||
Analyzes users activity and produces aggregated insights
|
||||
"""
|
||||
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
|
||||
def __init__(self, db_manager: Optional[DatabaseManager] = None):
|
||||
"""
|
||||
Initialize the access tracker.
|
||||
|
||||
@@ -31,11 +31,10 @@ class Analyzer:
|
||||
db_manager: Optional DatabaseManager for persistence.
|
||||
If None, will use the global singleton.
|
||||
"""
|
||||
self.timezone = timezone or ZoneInfo('UTC')
|
||||
|
||||
# Database manager for persistence (lazily initialized)
|
||||
self._db_manager = db_manager
|
||||
|
||||
|
||||
@property
|
||||
def db(self) -> Optional[DatabaseManager]:
|
||||
"""
|
||||
@@ -51,11 +50,11 @@ class Analyzer:
|
||||
# Database not initialized, persistence disabled
|
||||
pass
|
||||
return self._db_manager
|
||||
|
||||
|
||||
# def infer_user_category(self, ip: str) -> str:
|
||||
|
||||
# config = get_config()
|
||||
|
||||
|
||||
# http_risky_methods_threshold = config.http_risky_methods_threshold
|
||||
# violated_robots_threshold = config.violated_robots_threshold
|
||||
# uneven_request_timing_threshold = config.uneven_request_timing_threshold
|
||||
@@ -70,7 +69,7 @@ class Analyzer:
|
||||
# score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
# score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
# score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
|
||||
|
||||
# #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
|
||||
# weights = {
|
||||
# "attacker": {
|
||||
@@ -108,7 +107,7 @@ class Analyzer:
|
||||
# total_accesses_count = len(accesses)
|
||||
# if total_accesses_count <= 0:
|
||||
# return
|
||||
|
||||
|
||||
# # Set category as "unknown" for the first 5 requests
|
||||
# if total_accesses_count < 3:
|
||||
# category = "unknown"
|
||||
@@ -127,7 +126,7 @@ class Analyzer:
|
||||
# delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
|
||||
# head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
|
||||
# options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
|
||||
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
|
||||
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
|
||||
|
||||
# if total_accesses_count > http_risky_methods_threshold:
|
||||
# http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
|
||||
@@ -156,7 +155,7 @@ class Analyzer:
|
||||
# if not line:
|
||||
# continue
|
||||
# parts = line.split(":")
|
||||
|
||||
|
||||
# if parts[0] == "Disallow":
|
||||
# parts[1] = parts[1].rstrip("/")
|
||||
# #print(f"DISALLOW {parts[1]}")
|
||||
@@ -180,7 +179,7 @@ class Analyzer:
|
||||
# score["good_crawler"]["robots_violations"] = False
|
||||
# score["bad_crawler"]["robots_violations"] = False
|
||||
# score["regular_user"]["robots_violations"] = False
|
||||
|
||||
|
||||
# #--------------------- Requests Timing ---------------------
|
||||
# #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||
# timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
|
||||
@@ -192,7 +191,7 @@ class Analyzer:
|
||||
# for i in range(0, len(timestamps)-1):
|
||||
# diff = (timestamps[i] - timestamps[i+1]).total_seconds()
|
||||
# time_diffs.append(diff)
|
||||
|
||||
|
||||
# mean = 0
|
||||
# variance = 0
|
||||
# std = 0
|
||||
@@ -250,10 +249,10 @@ class Analyzer:
|
||||
# except Exception:
|
||||
# decoded_path = queried_path
|
||||
# decoded_path_twice = queried_path
|
||||
|
||||
|
||||
# for name, pattern in wl.attack_patterns.items():
|
||||
# # Check original, decoded, and double-decoded paths
|
||||
# if (re.search(pattern, queried_path, re.IGNORECASE) or
|
||||
# if (re.search(pattern, queried_path, re.IGNORECASE) or
|
||||
# re.search(pattern, decoded_path, re.IGNORECASE) or
|
||||
# re.search(pattern, decoded_path_twice, re.IGNORECASE)):
|
||||
# attack_urls_found_list.append(f"{name}: {pattern}")
|
||||
@@ -261,7 +260,7 @@ class Analyzer:
|
||||
# #remove duplicates
|
||||
# attack_urls_found_list = set(attack_urls_found_list)
|
||||
# attack_urls_found_list = list(attack_urls_found_list)
|
||||
|
||||
|
||||
# if len(attack_urls_found_list) > attack_urls_threshold:
|
||||
# score["attacker"]["attack_url"] = True
|
||||
# score["good_crawler"]["attack_url"] = False
|
||||
@@ -344,7 +343,7 @@ class Analyzer:
|
||||
# sanitized_asn = sanitize_for_storage(asn, 100)
|
||||
# sanitized_asn_org = sanitize_for_storage(asn_org, 100)
|
||||
# sanitized_list_on = sanitize_dict(list_on, 100000)
|
||||
|
||||
|
||||
# self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
|
||||
|
||||
# return
|
||||
|
||||
# return
|
||||
|
||||
@@ -32,7 +32,6 @@ class Config:
|
||||
# Database settings
|
||||
database_path: str = "data/krawl.db"
|
||||
database_retention_days: int = 30
|
||||
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
|
||||
|
||||
# Analyzer settings
|
||||
http_risky_methods_threshold: float = None
|
||||
@@ -42,39 +41,6 @@ class Config:
|
||||
user_agents_used_threshold: float = None
|
||||
attack_urls_threshold: float = None
|
||||
|
||||
@staticmethod
|
||||
# Try to fetch timezone before if not set
|
||||
def get_system_timezone() -> str:
|
||||
"""Get the system's default timezone"""
|
||||
try:
|
||||
if os.path.islink('/etc/localtime'):
|
||||
tz_path = os.readlink('/etc/localtime')
|
||||
if 'zoneinfo/' in tz_path:
|
||||
return tz_path.split('zoneinfo/')[-1]
|
||||
|
||||
local_tz = time.tzname[time.daylight]
|
||||
if local_tz and local_tz != 'UTC':
|
||||
return local_tz
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Default fallback to UTC
|
||||
return 'UTC'
|
||||
|
||||
def get_timezone(self) -> ZoneInfo:
|
||||
"""Get configured timezone as ZoneInfo object"""
|
||||
if self.timezone:
|
||||
try:
|
||||
return ZoneInfo(self.timezone)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
system_tz = self.get_system_timezone()
|
||||
try:
|
||||
return ZoneInfo(system_tz)
|
||||
except Exception:
|
||||
return ZoneInfo('UTC')
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls) -> 'Config':
|
||||
"""Create configuration from YAML file"""
|
||||
@@ -113,12 +79,11 @@ class Config:
|
||||
# ensure the dashboard path starts with a /
|
||||
if dashboard_path[:1] != "/":
|
||||
dashboard_path = f"/{dashboard_path}"
|
||||
|
||||
|
||||
return cls(
|
||||
port=server.get('port', 5000),
|
||||
delay=server.get('delay', 100),
|
||||
server_header=server.get('server_header',""),
|
||||
timezone=server.get('timezone'),
|
||||
links_length_range=(
|
||||
links.get('min_length', 5),
|
||||
links.get('max_length', 15)
|
||||
@@ -140,7 +105,7 @@ class Config:
|
||||
database_retention_days=database.get('retention_days', 30),
|
||||
http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1),
|
||||
violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1),
|
||||
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
|
||||
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
|
||||
uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300),
|
||||
user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2),
|
||||
attack_urls_threshold=analyzer.get('attack_urls_threshold', 1)
|
||||
|
||||
@@ -7,7 +7,7 @@ Provides SQLAlchemy session management and database initialization.
|
||||
|
||||
import os
|
||||
import stat
|
||||
from datetime import datetime
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Optional, List, Dict, Any
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
@@ -141,7 +141,7 @@ class DatabaseManager:
|
||||
method=method[:10],
|
||||
is_suspicious=is_suspicious,
|
||||
is_honeypot_trigger=is_honeypot_trigger,
|
||||
timestamp=datetime.now(tz=ZoneInfo('UTC'))
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
session.add(access_log)
|
||||
session.flush() # Get the ID before committing
|
||||
@@ -199,7 +199,7 @@ class DatabaseManager:
|
||||
path=sanitize_path(path),
|
||||
username=sanitize_credential(username),
|
||||
password=sanitize_credential(password),
|
||||
timestamp=datetime.now(tz=ZoneInfo('UTC'))
|
||||
timestamp=datetime.now()
|
||||
)
|
||||
session.add(credential)
|
||||
session.commit()
|
||||
@@ -221,7 +221,7 @@ class DatabaseManager:
|
||||
ip: IP address to update
|
||||
"""
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
now = datetime.now(tz=ZoneInfo('UTC'))
|
||||
now = datetime.now()
|
||||
|
||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||
|
||||
@@ -266,7 +266,7 @@ class DatabaseManager:
|
||||
ip_stats.category = category
|
||||
ip_stats.category_scores = category_scores
|
||||
ip_stats.last_analysis = last_analysis
|
||||
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
@@ -280,21 +280,21 @@ class DatabaseManager:
|
||||
Args:
|
||||
ip: IP address to update
|
||||
category: selected category
|
||||
|
||||
|
||||
"""
|
||||
session = self.session
|
||||
sanitized_ip = sanitize_ip(ip)
|
||||
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
||||
|
||||
|
||||
|
||||
# Record the manual category change
|
||||
old_category = ip_stats.category
|
||||
if old_category != category:
|
||||
self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC')))
|
||||
self._record_category_change(sanitized_ip, old_category, category, datetime.now())
|
||||
|
||||
ip_stats.category = category
|
||||
ip_stats.manual_category = True
|
||||
|
||||
|
||||
try:
|
||||
session.commit()
|
||||
except Exception as e:
|
||||
@@ -316,7 +316,7 @@ class DatabaseManager:
|
||||
# Only record actual category changes
|
||||
if old_category is None:
|
||||
return
|
||||
|
||||
|
||||
session = self.session
|
||||
try:
|
||||
history_entry = CategoryHistory(
|
||||
@@ -352,7 +352,7 @@ class DatabaseManager:
|
||||
{
|
||||
'old_category': h.old_category,
|
||||
'new_category': h.new_category,
|
||||
'timestamp': h.timestamp.isoformat() + '+00:00'
|
||||
'timestamp': h.timestamp.isoformat()
|
||||
}
|
||||
for h in history
|
||||
]
|
||||
@@ -390,6 +390,7 @@ class DatabaseManager:
|
||||
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
|
||||
"""
|
||||
Get IPs that don't have reputation data yet.
|
||||
Excludes RFC1918 private addresses and other non-routable IPs.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of IPs to return
|
||||
@@ -400,7 +401,18 @@ class DatabaseManager:
|
||||
session = self.session
|
||||
try:
|
||||
ips = session.query(IpStats.ip).filter(
|
||||
IpStats.country_code.is_(None)
|
||||
IpStats.country_code.is_(None),
|
||||
~IpStats.ip.like('10.%'),
|
||||
~IpStats.ip.like('172.16.%'),
|
||||
~IpStats.ip.like('172.17.%'),
|
||||
~IpStats.ip.like('172.18.%'),
|
||||
~IpStats.ip.like('172.19.%'),
|
||||
~IpStats.ip.like('172.2_.%'),
|
||||
~IpStats.ip.like('172.30.%'),
|
||||
~IpStats.ip.like('172.31.%'),
|
||||
~IpStats.ip.like('192.168.%'),
|
||||
~IpStats.ip.like('127.%'),
|
||||
~IpStats.ip.like('169.254.%')
|
||||
).limit(limit).all()
|
||||
return [ip[0] for ip in ips]
|
||||
finally:
|
||||
@@ -411,7 +423,8 @@ class DatabaseManager:
|
||||
limit: int = 100,
|
||||
offset: int = 0,
|
||||
ip_filter: Optional[str] = None,
|
||||
suspicious_only: bool = False
|
||||
suspicious_only: bool = False,
|
||||
since_minutes: Optional[int] = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Retrieve access logs with optional filtering.
|
||||
@@ -421,6 +434,7 @@ class DatabaseManager:
|
||||
offset: Number of records to skip
|
||||
ip_filter: Filter by IP address
|
||||
suspicious_only: Only return suspicious requests
|
||||
since_minutes: Only return logs from the last N minutes
|
||||
|
||||
Returns:
|
||||
List of access log dictionaries
|
||||
@@ -433,6 +447,9 @@ class DatabaseManager:
|
||||
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
|
||||
if suspicious_only:
|
||||
query = query.filter(AccessLog.is_suspicious == True)
|
||||
if since_minutes is not None:
|
||||
cutoff_time = datetime.now() - timedelta(minutes=since_minutes)
|
||||
query = query.filter(AccessLog.timestamp >= cutoff_time)
|
||||
|
||||
logs = query.offset(offset).limit(limit).all()
|
||||
|
||||
@@ -445,7 +462,7 @@ class DatabaseManager:
|
||||
'method': log.method,
|
||||
'is_suspicious': log.is_suspicious,
|
||||
'is_honeypot_trigger': log.is_honeypot_trigger,
|
||||
'timestamp': log.timestamp.isoformat() + '+00:00',
|
||||
'timestamp': log.timestamp.isoformat(),
|
||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||
}
|
||||
for log in logs
|
||||
@@ -501,7 +518,7 @@ class DatabaseManager:
|
||||
# print(f"Database error persisting access: {e}")
|
||||
# return None
|
||||
# finally:
|
||||
# self.close_session()
|
||||
# self.close_session()
|
||||
|
||||
def get_credential_attempts(
|
||||
self,
|
||||
@@ -538,7 +555,7 @@ class DatabaseManager:
|
||||
'path': attempt.path,
|
||||
'username': attempt.username,
|
||||
'password': attempt.password,
|
||||
'timestamp': attempt.timestamp.isoformat() + '+00:00'
|
||||
'timestamp': attempt.timestamp.isoformat()
|
||||
}
|
||||
for attempt in attempts
|
||||
]
|
||||
@@ -565,8 +582,8 @@ class DatabaseManager:
|
||||
{
|
||||
'ip': s.ip,
|
||||
'total_requests': s.total_requests,
|
||||
'first_seen': s.first_seen.isoformat() + '+00:00',
|
||||
'last_seen': s.last_seen.isoformat() + '+00:00',
|
||||
'first_seen': s.first_seen.isoformat(),
|
||||
'last_seen': s.last_seen.isoformat(),
|
||||
'country_code': s.country_code,
|
||||
'city': s.city,
|
||||
'asn': s.asn,
|
||||
@@ -596,18 +613,18 @@ class DatabaseManager:
|
||||
session = self.session
|
||||
try:
|
||||
stat = session.query(IpStats).filter(IpStats.ip == ip).first()
|
||||
|
||||
|
||||
if not stat:
|
||||
return None
|
||||
|
||||
|
||||
# Get category history for this IP
|
||||
category_history = self.get_category_history(ip)
|
||||
|
||||
|
||||
return {
|
||||
'ip': stat.ip,
|
||||
'total_requests': stat.total_requests,
|
||||
'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None,
|
||||
'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None,
|
||||
'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
|
||||
'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
|
||||
'country_code': stat.country_code,
|
||||
'city': stat.city,
|
||||
'asn': stat.asn,
|
||||
@@ -619,7 +636,7 @@ class DatabaseManager:
|
||||
'category': stat.category,
|
||||
'category_scores': stat.category_scores or {},
|
||||
'manual_category': stat.manual_category,
|
||||
'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None,
|
||||
'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
|
||||
'category_history': category_history
|
||||
}
|
||||
finally:
|
||||
@@ -690,7 +707,7 @@ class DatabaseManager:
|
||||
Args:
|
||||
limit: Maximum number of results
|
||||
|
||||
Returns:
|
||||
Returns:data
|
||||
List of (path, count) tuples ordered by count descending
|
||||
"""
|
||||
session = self.session
|
||||
@@ -753,7 +770,7 @@ class DatabaseManager:
|
||||
'ip': log.ip,
|
||||
'path': log.path,
|
||||
'user_agent': log.user_agent,
|
||||
'timestamp': log.timestamp.isoformat() + '+00:00'
|
||||
'timestamp': log.timestamp.isoformat()
|
||||
}
|
||||
for log in logs
|
||||
]
|
||||
@@ -811,7 +828,7 @@ class DatabaseManager:
|
||||
'ip': log.ip,
|
||||
'path': log.path,
|
||||
'user_agent': log.user_agent,
|
||||
'timestamp': log.timestamp.isoformat() + '+00:00',
|
||||
'timestamp': log.timestamp.isoformat(),
|
||||
'attack_types': [d.attack_type for d in log.attack_detections]
|
||||
}
|
||||
for log in logs
|
||||
|
||||
@@ -43,12 +43,12 @@ class Handler(BaseHTTPRequestHandler):
|
||||
if forwarded_for:
|
||||
# X-Forwarded-For can contain multiple IPs, get the first (original client)
|
||||
return forwarded_for.split(',')[0].strip()
|
||||
|
||||
|
||||
# Check X-Real-IP header (set by nginx and other proxies)
|
||||
real_ip = self.headers.get('X-Real-IP')
|
||||
if real_ip:
|
||||
return real_ip.strip()
|
||||
|
||||
|
||||
# Fallback to direct connection IP
|
||||
return self.client_address[0]
|
||||
|
||||
@@ -73,12 +73,12 @@ class Handler(BaseHTTPRequestHandler):
|
||||
if not error_codes:
|
||||
error_codes = [400, 401, 403, 404, 500, 502, 503]
|
||||
return random.choice(error_codes)
|
||||
|
||||
|
||||
def _parse_query_string(self) -> str:
|
||||
"""Extract query string from the request path"""
|
||||
parsed = urlparse(self.path)
|
||||
return parsed.query
|
||||
|
||||
|
||||
def _handle_sql_endpoint(self, path: str) -> bool:
|
||||
"""
|
||||
Handle SQL injection honeypot endpoints.
|
||||
@@ -86,22 +86,22 @@ class Handler(BaseHTTPRequestHandler):
|
||||
"""
|
||||
# SQL-vulnerable endpoints
|
||||
sql_endpoints = ['/api/search', '/api/sql', '/api/database']
|
||||
|
||||
|
||||
base_path = urlparse(path).path
|
||||
if base_path not in sql_endpoints:
|
||||
return False
|
||||
|
||||
|
||||
try:
|
||||
# Get query parameters
|
||||
query_string = self._parse_query_string()
|
||||
|
||||
|
||||
# Log SQL injection attempt
|
||||
client_ip = self._get_client_ip()
|
||||
user_agent = self._get_user_agent()
|
||||
|
||||
|
||||
# Always check for SQL injection patterns
|
||||
error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
|
||||
|
||||
|
||||
if error_msg:
|
||||
# SQL injection detected - log and return error
|
||||
self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
|
||||
@@ -117,9 +117,9 @@ class Handler(BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
response_data = get_sql_response_with_data(base_path, query_string or "")
|
||||
self.wfile.write(response_data.encode())
|
||||
|
||||
|
||||
return True
|
||||
|
||||
|
||||
except BrokenPipeError:
|
||||
# Client disconnected
|
||||
return True
|
||||
@@ -142,7 +142,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
|
||||
# Build the content HTML
|
||||
content = ""
|
||||
|
||||
|
||||
# Add canary token if needed
|
||||
if Handler.counter <= 0 and self.config.canary_token_url:
|
||||
content += f"""
|
||||
@@ -189,16 +189,16 @@ class Handler(BaseHTTPRequestHandler):
|
||||
|
||||
from urllib.parse import urlparse
|
||||
base_path = urlparse(self.path).path
|
||||
|
||||
|
||||
if base_path in ['/api/search', '/api/sql', '/api/database']:
|
||||
content_length = int(self.headers.get('Content-Length', 0))
|
||||
if content_length > 0:
|
||||
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
|
||||
|
||||
|
||||
self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
|
||||
|
||||
|
||||
error_msg, content_type, status_code = generate_sql_error_response(post_data)
|
||||
|
||||
|
||||
try:
|
||||
if error_msg:
|
||||
self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
|
||||
@@ -217,26 +217,26 @@ class Handler(BaseHTTPRequestHandler):
|
||||
except Exception as e:
|
||||
self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
|
||||
return
|
||||
|
||||
|
||||
if base_path == '/api/contact':
|
||||
content_length = int(self.headers.get('Content-Length', 0))
|
||||
if content_length > 0:
|
||||
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
|
||||
|
||||
|
||||
parsed_data = {}
|
||||
for pair in post_data.split('&'):
|
||||
if '=' in pair:
|
||||
key, value = pair.split('=', 1)
|
||||
from urllib.parse import unquote_plus
|
||||
parsed_data[unquote_plus(key)] = unquote_plus(value)
|
||||
|
||||
|
||||
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
|
||||
|
||||
|
||||
if xss_detected:
|
||||
self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
|
||||
else:
|
||||
self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
|
||||
|
||||
|
||||
try:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
@@ -264,17 +264,17 @@ class Handler(BaseHTTPRequestHandler):
|
||||
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||
credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}"
|
||||
self.credential_logger.info(credential_line)
|
||||
|
||||
|
||||
# Also record in tracker for dashboard
|
||||
self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
|
||||
|
||||
|
||||
self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
|
||||
|
||||
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
|
||||
self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
|
||||
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
try:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
@@ -289,11 +289,11 @@ class Handler(BaseHTTPRequestHandler):
|
||||
|
||||
def serve_special_path(self, path: str) -> bool:
|
||||
"""Serve special paths like robots.txt, API endpoints, etc."""
|
||||
|
||||
|
||||
# Check SQL injection honeypot endpoints first
|
||||
if self._handle_sql_endpoint(path):
|
||||
return True
|
||||
|
||||
|
||||
try:
|
||||
if path == '/robots.txt':
|
||||
self.send_response(200)
|
||||
@@ -301,7 +301,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.robots_txt().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/plain')
|
||||
@@ -311,7 +311,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
else:
|
||||
self.wfile.write(passwords_txt().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/users.json', '/api_keys.json', '/config.json']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'application/json')
|
||||
@@ -323,28 +323,28 @@ class Handler(BaseHTTPRequestHandler):
|
||||
else:
|
||||
self.wfile.write(api_response('/api/config').encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/admin', '/admin/', '/admin/login', '/login']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.login_form().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/users', '/user', '/database', '/db', '/search']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.product_search().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.input_form().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path == '/server':
|
||||
error_html, content_type = generate_server_error()
|
||||
self.send_response(500)
|
||||
@@ -352,35 +352,35 @@ class Handler(BaseHTTPRequestHandler):
|
||||
self.end_headers()
|
||||
self.wfile.write(error_html.encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.wp_login().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower():
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.wordpress().encode())
|
||||
return True
|
||||
|
||||
|
||||
if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
self.wfile.write(html_templates.phpmyadmin().encode())
|
||||
return True
|
||||
|
||||
|
||||
if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'application/json')
|
||||
self.end_headers()
|
||||
self.wfile.write(api_response(path).encode())
|
||||
return True
|
||||
|
||||
|
||||
if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
@@ -400,22 +400,21 @@ class Handler(BaseHTTPRequestHandler):
|
||||
"""Responds to webpage requests"""
|
||||
client_ip = self._get_client_ip()
|
||||
user_agent = self._get_user_agent()
|
||||
|
||||
|
||||
if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path:
|
||||
self.send_response(200)
|
||||
self.send_header('Content-type', 'text/html')
|
||||
self.end_headers()
|
||||
try:
|
||||
stats = self.tracker.get_stats()
|
||||
timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
|
||||
dashboard_path = self.config.dashboard_secret_path
|
||||
self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode())
|
||||
self.wfile.write(generate_dashboard(stats, dashboard_path).encode())
|
||||
except BrokenPipeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
self.app_logger.error(f"Error generating dashboard: {e}")
|
||||
return
|
||||
|
||||
|
||||
# API endpoint for fetching IP stats
|
||||
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
|
||||
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
|
||||
@@ -473,7 +472,7 @@ class Handler(BaseHTTPRequestHandler):
|
||||
return
|
||||
|
||||
self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
|
||||
|
||||
|
||||
# self.analyzer.infer_user_category(client_ip)
|
||||
# self.analyzer.update_ip_rep_infos(client_ip)
|
||||
|
||||
@@ -497,9 +496,9 @@ class Handler(BaseHTTPRequestHandler):
|
||||
|
||||
try:
|
||||
self.wfile.write(self.generate_page(self.path).encode())
|
||||
|
||||
|
||||
Handler.counter -= 1
|
||||
|
||||
|
||||
if Handler.counter < 0:
|
||||
Handler.counter = self.config.canary_token_tries
|
||||
except BrokenPipeError:
|
||||
|
||||
@@ -8,20 +8,17 @@ Provides two loggers: app (application) and access (HTTP access logs).
|
||||
import logging
|
||||
import os
|
||||
from logging.handlers import RotatingFileHandler
|
||||
from typing import Optional
|
||||
from zoneinfo import ZoneInfo
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class TimezoneFormatter(logging.Formatter):
|
||||
"""Custom formatter that respects configured timezone"""
|
||||
def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None):
|
||||
def __init__(self, fmt=None, datefmt=None):
|
||||
super().__init__(fmt, datefmt)
|
||||
self.timezone = timezone or ZoneInfo('UTC')
|
||||
|
||||
|
||||
def formatTime(self, record, datefmt=None):
|
||||
"""Override formatTime to use configured timezone"""
|
||||
dt = datetime.fromtimestamp(record.created, tz=self.timezone)
|
||||
dt = datetime.fromtimestamp(record.created)
|
||||
if datefmt:
|
||||
return dt.strftime(datefmt)
|
||||
return dt.isoformat()
|
||||
@@ -37,19 +34,16 @@ class LoggerManager:
|
||||
cls._instance._initialized = False
|
||||
return cls._instance
|
||||
|
||||
def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
|
||||
def initialize(self, log_dir: str = "logs") -> None:
|
||||
"""
|
||||
Initialize the logging system with rotating file handlers.
|
||||
Initialize the logging system with rotating file handlers.loggers
|
||||
|
||||
Args:
|
||||
log_dir: Directory for log files (created if not exists)
|
||||
timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
|
||||
"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
self.timezone = timezone or ZoneInfo('UTC')
|
||||
|
||||
# Create log directory if it doesn't exist
|
||||
os.makedirs(log_dir, exist_ok=True)
|
||||
|
||||
@@ -57,7 +51,6 @@ class LoggerManager:
|
||||
log_format = TimezoneFormatter(
|
||||
"[%(asctime)s] %(levelname)s - %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
timezone=self.timezone
|
||||
)
|
||||
|
||||
# Rotation settings: 1MB max, 5 backups
|
||||
@@ -104,8 +97,8 @@ class LoggerManager:
|
||||
self._credential_logger.handlers.clear()
|
||||
|
||||
# Credential logger uses a simple format: timestamp|ip|username|password|path
|
||||
credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone)
|
||||
|
||||
credential_format = TimezoneFormatter("%(message)s")
|
||||
|
||||
credential_file_handler = RotatingFileHandler(
|
||||
os.path.join(log_dir, "credentials.log"),
|
||||
maxBytes=max_bytes,
|
||||
@@ -157,6 +150,6 @@ def get_credential_logger() -> logging.Logger:
|
||||
return _logger_manager.credentials
|
||||
|
||||
|
||||
def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
|
||||
def initialize_logging(log_dir: str = "logs") -> None:
|
||||
"""Initialize the logging system."""
|
||||
_logger_manager.initialize(log_dir, timezone)
|
||||
_logger_manager.initialize(log_dir)
|
||||
|
||||
@@ -29,7 +29,6 @@ def print_usage():
|
||||
print(' server:')
|
||||
print(' port: 5000')
|
||||
print(' delay: 100')
|
||||
print(' timezone: null # or "America/New_York"')
|
||||
print(' links:')
|
||||
print(' min_length: 5')
|
||||
print(' max_length: 15')
|
||||
@@ -55,11 +54,8 @@ def main():
|
||||
|
||||
config = get_config()
|
||||
|
||||
# Get timezone configuration
|
||||
tz = config.get_timezone()
|
||||
|
||||
# Initialize logging with timezone
|
||||
initialize_logging(timezone=tz)
|
||||
initialize_logging()
|
||||
app_logger = get_app_logger()
|
||||
access_logger = get_access_logger()
|
||||
credential_logger = get_credential_logger()
|
||||
@@ -71,8 +67,8 @@ def main():
|
||||
except Exception as e:
|
||||
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
|
||||
|
||||
tracker = AccessTracker(timezone=tz)
|
||||
analyzer = Analyzer(timezone=tz)
|
||||
tracker = AccessTracker()
|
||||
analyzer = Analyzer()
|
||||
|
||||
Handler.config = config
|
||||
Handler.tracker = tracker
|
||||
@@ -99,7 +95,6 @@ def main():
|
||||
|
||||
try:
|
||||
app_logger.info(f'Starting deception server on port {config.port}...')
|
||||
app_logger.info(f'Timezone configured: {tz.key}')
|
||||
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
|
||||
if config.canary_token_url:
|
||||
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
|
||||
|
||||
@@ -28,7 +28,7 @@ def main():
|
||||
config = get_config()
|
||||
db_manager = get_database()
|
||||
app_logger = get_app_logger()
|
||||
|
||||
|
||||
http_risky_methods_threshold = config.http_risky_methods_threshold
|
||||
violated_robots_threshold = config.violated_robots_threshold
|
||||
uneven_request_timing_threshold = config.uneven_request_timing_threshold
|
||||
@@ -41,7 +41,7 @@ def main():
|
||||
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
|
||||
|
||||
|
||||
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
|
||||
weights = {
|
||||
"attacker": {
|
||||
@@ -73,21 +73,27 @@ def main():
|
||||
"attack_url": 0
|
||||
}
|
||||
}
|
||||
accesses = db_manager.get_access_logs(limit=999999999)
|
||||
ips = {item['ip'] for item in accesses}
|
||||
# Get IPs with recent activity (last minute to match cron schedule)
|
||||
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
|
||||
ips_to_analyze = {item['ip'] for item in recent_accesses}
|
||||
|
||||
for ip in ips:
|
||||
ip_accesses = [item for item in accesses if item["ip"] == ip]
|
||||
total_accesses_count = len(accesses)
|
||||
if not ips_to_analyze:
|
||||
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
|
||||
return
|
||||
|
||||
for ip in ips_to_analyze:
|
||||
# Get full history for this IP to perform accurate analysis
|
||||
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
|
||||
total_accesses_count = len(ip_accesses)
|
||||
if total_accesses_count <= 0:
|
||||
return
|
||||
|
||||
|
||||
# Set category as "unknown" for the first 3 requests
|
||||
if total_accesses_count < 3:
|
||||
category = "unknown"
|
||||
analyzed_metrics = {}
|
||||
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
|
||||
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
||||
last_analysis = datetime.now()
|
||||
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
return 0
|
||||
#--------------------- HTTP Methods ---------------------
|
||||
@@ -97,7 +103,7 @@ def main():
|
||||
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
|
||||
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
|
||||
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
|
||||
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
|
||||
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
|
||||
if total_accesses_count > http_risky_methods_threshold:
|
||||
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
|
||||
else:
|
||||
@@ -123,7 +129,7 @@ def main():
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(":")
|
||||
|
||||
|
||||
if parts[0] == "Disallow":
|
||||
parts[1] = parts[1].rstrip("/")
|
||||
#print(f"DISALLOW {parts[1]}")
|
||||
@@ -145,18 +151,18 @@ def main():
|
||||
score["good_crawler"]["robots_violations"] = False
|
||||
score["bad_crawler"]["robots_violations"] = False
|
||||
score["regular_user"]["robots_violations"] = False
|
||||
|
||||
|
||||
#--------------------- Requests Timing ---------------------
|
||||
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
|
||||
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
|
||||
now_utc = datetime.now(tz=ZoneInfo('UTC'))
|
||||
now_utc = datetime.now()
|
||||
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
|
||||
timestamps = sorted(timestamps, reverse=True)
|
||||
time_diffs = []
|
||||
for i in range(0, len(timestamps)-1):
|
||||
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
|
||||
time_diffs.append(diff)
|
||||
|
||||
|
||||
mean = 0
|
||||
variance = 0
|
||||
std = 0
|
||||
@@ -206,14 +212,14 @@ def main():
|
||||
except Exception:
|
||||
decoded_path = queried_path
|
||||
decoded_path_twice = queried_path
|
||||
|
||||
|
||||
for name, pattern in wl.attack_patterns.items():
|
||||
# Check original, decoded, and double-decoded paths
|
||||
if (re.search(pattern, queried_path, re.IGNORECASE) or
|
||||
if (re.search(pattern, queried_path, re.IGNORECASE) or
|
||||
re.search(pattern, decoded_path, re.IGNORECASE) or
|
||||
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
|
||||
attack_urls_found_list.append(f"{name}: {pattern}")
|
||||
|
||||
|
||||
#remove duplicates
|
||||
attack_urls_found_list = set(attack_urls_found_list)
|
||||
attack_urls_found_list = list(attack_urls_found_list)
|
||||
@@ -260,6 +266,6 @@ def main():
|
||||
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
|
||||
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
|
||||
category = max(category_scores, key=category_scores.get)
|
||||
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
|
||||
last_analysis = datetime.now()
|
||||
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
|
||||
return
|
||||
return
|
||||
|
||||
@@ -21,7 +21,7 @@ def main():
|
||||
|
||||
# Only get IPs that haven't been enriched yet
|
||||
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
|
||||
|
||||
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
|
||||
for ip in unenriched_ips:
|
||||
try:
|
||||
api_url = "https://iprep.lcrawl.com/api/iprep/"
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# tasks/export_malicious_ips.py
|
||||
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
from zoneinfo import ZoneInfo
|
||||
from logger import get_app_logger
|
||||
from database import get_database
|
||||
from models import AccessLog
|
||||
@@ -24,6 +26,15 @@ OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
|
||||
# ----------------------
|
||||
# TASK LOGIC
|
||||
# ----------------------
|
||||
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
|
||||
"""Check if honeypot was accessed in the last N minutes."""
|
||||
cutoff_time = datetime.now() - timedelta(minutes=minutes)
|
||||
count = session.query(AccessLog).filter(
|
||||
AccessLog.is_honeypot_trigger == True,
|
||||
AccessLog.timestamp >= cutoff_time
|
||||
).count()
|
||||
return count > 0
|
||||
|
||||
def main():
|
||||
"""
|
||||
Export all IPs flagged as suspicious to a text file.
|
||||
@@ -36,6 +47,11 @@ def main():
|
||||
db = get_database()
|
||||
session = db.session
|
||||
|
||||
# Check for recent honeypot activity
|
||||
if not has_recent_honeypot_access(session):
|
||||
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
|
||||
return
|
||||
|
||||
# Query distinct suspicious IPs
|
||||
results = session.query(distinct(AccessLog.ip)).filter(
|
||||
AccessLog.is_suspicious == True
|
||||
|
||||
@@ -15,21 +15,16 @@ def _escape(value) -> str:
|
||||
return ""
|
||||
return html.escape(str(value))
|
||||
|
||||
def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str:
|
||||
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
|
||||
"""Format ISO timestamp for display with timezone conversion
|
||||
|
||||
|
||||
Args:
|
||||
iso_timestamp: ISO format timestamp string (UTC)
|
||||
timezone: IANA timezone string to convert to
|
||||
time_only: If True, return only HH:MM:SS, otherwise full datetime
|
||||
"""
|
||||
try:
|
||||
# Parse UTC timestamp
|
||||
dt = datetime.fromisoformat(iso_timestamp)
|
||||
# Convert to target timezone
|
||||
if dt.tzinfo is not None:
|
||||
dt = dt.astimezone(ZoneInfo(timezone))
|
||||
|
||||
if time_only:
|
||||
return dt.strftime("%H:%M:%S")
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
@@ -38,15 +33,14 @@ def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool
|
||||
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
|
||||
|
||||
|
||||
def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str:
|
||||
def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
|
||||
"""Generate dashboard HTML with access statistics
|
||||
|
||||
|
||||
Args:
|
||||
stats: Statistics dictionary
|
||||
timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
|
||||
dashboard_path: The secret dashboard path for generating API URLs
|
||||
"""
|
||||
|
||||
|
||||
# Generate IP rows with clickable functionality for dropdown stats
|
||||
top_ips_rows = '\n'.join([
|
||||
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
|
||||
@@ -82,7 +76,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
<td class="ip-clickable">{_escape(log["ip"])}</td>
|
||||
<td>{_escape(log["path"])}</td>
|
||||
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
||||
<td>{format_timestamp(log["timestamp"], time_only=True)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="4" class="ip-stats-cell">
|
||||
@@ -118,7 +112,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
<td>{_escape(log["path"])}</td>
|
||||
<td>{_escape(", ".join(log["attack_types"]))}</td>
|
||||
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
|
||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
||||
<td>{format_timestamp(log["timestamp"],time_only=True)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="5" class="ip-stats-cell">
|
||||
@@ -137,7 +131,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
<td>{_escape(log["username"])}</td>
|
||||
<td>{_escape(log["password"])}</td>
|
||||
<td>{_escape(log["path"])}</td>
|
||||
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
|
||||
<td>{format_timestamp(log["timestamp"], time_only=True)}</td>
|
||||
</tr>
|
||||
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
|
||||
<td colspan="5" class="ip-stats-cell">
|
||||
@@ -537,7 +531,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
</a>
|
||||
</div>
|
||||
<h1>Krawl Dashboard</h1>
|
||||
|
||||
|
||||
<div class="stats-grid">
|
||||
<div class="stat-card">
|
||||
<div class="stat-value">{stats['total_accesses']}</div>
|
||||
@@ -683,15 +677,13 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
</div>
|
||||
</div>
|
||||
<script>
|
||||
const SERVER_TIMEZONE = '{timezone}';
|
||||
const DASHBOARD_PATH = '{dashboard_path}';
|
||||
|
||||
|
||||
function formatTimestamp(isoTimestamp) {{
|
||||
if (!isoTimestamp) return 'N/A';
|
||||
try {{
|
||||
const date = new Date(isoTimestamp);
|
||||
return date.toLocaleString('en-US', {{
|
||||
timeZone: SERVER_TIMEZONE,
|
||||
return date.toLocaleString('en-US', {{
|
||||
year: 'numeric',
|
||||
month: '2-digit',
|
||||
day: '2-digit',
|
||||
@@ -705,7 +697,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
return new Date(isoTimestamp).toLocaleString();
|
||||
}}
|
||||
}}
|
||||
|
||||
|
||||
document.querySelectorAll('th.sortable').forEach(header => {{
|
||||
header.addEventListener('click', function() {{
|
||||
const table = this.closest('table');
|
||||
@@ -713,25 +705,25 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
const rows = Array.from(tbody.querySelectorAll('tr'));
|
||||
const sortType = this.getAttribute('data-sort');
|
||||
const columnIndex = Array.from(this.parentElement.children).indexOf(this);
|
||||
|
||||
|
||||
const isAscending = this.classList.contains('asc');
|
||||
|
||||
|
||||
table.querySelectorAll('th.sortable').forEach(th => {{
|
||||
th.classList.remove('asc', 'desc');
|
||||
}});
|
||||
|
||||
|
||||
this.classList.add(isAscending ? 'desc' : 'asc');
|
||||
|
||||
|
||||
rows.sort((a, b) => {{
|
||||
let aValue = a.cells[columnIndex].textContent.trim();
|
||||
let bValue = b.cells[columnIndex].textContent.trim();
|
||||
|
||||
|
||||
if (sortType === 'count') {{
|
||||
aValue = parseInt(aValue) || 0;
|
||||
bValue = parseInt(bValue) || 0;
|
||||
return isAscending ? bValue - aValue : aValue - bValue;
|
||||
}}
|
||||
|
||||
|
||||
if (sortType === 'ip') {{
|
||||
const ipToNum = ip => {{
|
||||
const parts = ip.split('.');
|
||||
@@ -742,14 +734,14 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
const bNum = ipToNum(bValue);
|
||||
return isAscending ? bNum - aNum : aNum - bNum;
|
||||
}}
|
||||
|
||||
|
||||
if (isAscending) {{
|
||||
return bValue.localeCompare(aValue);
|
||||
}} else {{
|
||||
return aValue.localeCompare(bValue);
|
||||
}}
|
||||
}});
|
||||
|
||||
|
||||
rows.forEach(row => tbody.appendChild(row));
|
||||
}});
|
||||
}});
|
||||
@@ -797,39 +789,39 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
}}
|
||||
}});
|
||||
}});
|
||||
|
||||
|
||||
function formatIpStats(stats) {{
|
||||
let html = '<div class="stats-left">';
|
||||
|
||||
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Total Requests:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">First Seen:</span>';
|
||||
html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Last Seen:</span>';
|
||||
html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
|
||||
html += '</div>';
|
||||
|
||||
|
||||
if (stats.country_code || stats.city) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Location:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
|
||||
if (stats.asn_org) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">ASN Org:</span>';
|
||||
html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`;
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
|
||||
if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
|
||||
html += '<div class="stat-row">';
|
||||
html += '<span class="stat-label-sm">Reputation Score:</span>';
|
||||
@@ -847,7 +839,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
|
||||
if (stats.category_history && stats.category_history.length > 0) {{
|
||||
html += '<div class="timeline-container">';
|
||||
|
||||
|
||||
html += '<div class="timeline-header">';
|
||||
html += '<div class="timeline-title">Behavior Timeline</div>';
|
||||
|
||||
@@ -912,14 +904,14 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
html += '</div>';
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
|
||||
html += '</div>';
|
||||
|
||||
|
||||
if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
|
||||
html += '<div class="stats-right">';
|
||||
html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
|
||||
html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
|
||||
|
||||
|
||||
const scores = {{
|
||||
attacker: stats.category_scores.attacker || 0,
|
||||
good_crawler: stats.category_scores.good_crawler || 0,
|
||||
@@ -927,15 +919,15 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
regular_user: stats.category_scores.regular_user || 0,
|
||||
unknown: stats.category_scores.unknown || 0
|
||||
}};
|
||||
|
||||
|
||||
const maxScore = Math.max(...Object.values(scores), 1);
|
||||
const minVisibleRadius = 0.15;
|
||||
const normalizedScores = {{}};
|
||||
|
||||
|
||||
Object.keys(scores).forEach(key => {{
|
||||
normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
|
||||
}});
|
||||
|
||||
|
||||
const colors = {{
|
||||
attacker: '#f85149',
|
||||
good_crawler: '#3fb950',
|
||||
@@ -943,7 +935,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
regular_user: '#58a6ff',
|
||||
unknown: '#8b949e'
|
||||
}};
|
||||
|
||||
|
||||
const labels = {{
|
||||
attacker: 'Attacker',
|
||||
good_crawler: 'Good Bot',
|
||||
@@ -951,28 +943,28 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
regular_user: 'User',
|
||||
unknown: 'Unknown'
|
||||
}};
|
||||
|
||||
|
||||
const cx = 100, cy = 100, maxRadius = 75;
|
||||
for (let i = 1; i <= 5; i++) {{
|
||||
const r = (maxRadius / 5) * i;
|
||||
html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
|
||||
}}
|
||||
|
||||
|
||||
const angles = [0, 72, 144, 216, 288];
|
||||
const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
|
||||
|
||||
|
||||
angles.forEach((angle, i) => {{
|
||||
const rad = (angle - 90) * Math.PI / 180;
|
||||
const x2 = cx + maxRadius * Math.cos(rad);
|
||||
const y2 = cy + maxRadius * Math.sin(rad);
|
||||
html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
|
||||
|
||||
|
||||
const labelDist = maxRadius + 35;
|
||||
const lx = cx + labelDist * Math.cos(rad);
|
||||
const ly = cy + labelDist * Math.sin(rad);
|
||||
html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
|
||||
}});
|
||||
|
||||
|
||||
let points = [];
|
||||
angles.forEach((angle, i) => {{
|
||||
const normalizedScore = normalizedScores[keys[i]];
|
||||
@@ -982,12 +974,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
const y = cy + r * Math.sin(rad);
|
||||
points.push(`${{x}},${{y}}`);
|
||||
}});
|
||||
|
||||
|
||||
const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
|
||||
const dominantColor = colors[dominantKey];
|
||||
|
||||
|
||||
html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
|
||||
|
||||
|
||||
angles.forEach((angle, i) => {{
|
||||
const normalizedScore = normalizedScores[keys[i]];
|
||||
const rad = (angle - 90) * Math.PI / 180;
|
||||
@@ -996,9 +988,9 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
const y = cy + r * Math.sin(rad);
|
||||
html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`;
|
||||
}});
|
||||
|
||||
|
||||
html += '</svg>';
|
||||
|
||||
|
||||
html += '<div class="radar-legend">';
|
||||
keys.forEach(key => {{
|
||||
html += '<div class="radar-legend-item">';
|
||||
@@ -1007,10 +999,10 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
|
||||
html += '</div>';
|
||||
}});
|
||||
html += '</div>';
|
||||
|
||||
|
||||
html += '</div>';
|
||||
}}
|
||||
|
||||
|
||||
return html;
|
||||
}}
|
||||
</script>
|
||||
|
||||
@@ -17,7 +17,7 @@ class AccessTracker:
|
||||
Maintains in-memory structures for fast dashboard access and
|
||||
persists data to SQLite for long-term storage and analysis.
|
||||
"""
|
||||
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
|
||||
def __init__(self, db_manager: Optional[DatabaseManager] = None):
|
||||
"""
|
||||
Initialize the access tracker.
|
||||
|
||||
@@ -30,7 +30,6 @@ class AccessTracker:
|
||||
self.user_agent_counts: Dict[str, int] = defaultdict(int)
|
||||
self.access_log: List[Dict] = []
|
||||
self.credential_attempts: List[Dict] = []
|
||||
self.timezone = timezone or ZoneInfo('UTC')
|
||||
self.suspicious_patterns = [
|
||||
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
|
||||
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
|
||||
@@ -40,7 +39,7 @@ class AccessTracker:
|
||||
# Load attack patterns from wordlists
|
||||
wl = get_wordlists()
|
||||
self.attack_types = wl.attack_patterns
|
||||
|
||||
|
||||
# Fallback if wordlists not loaded
|
||||
if not self.attack_types:
|
||||
self.attack_types = {
|
||||
@@ -80,38 +79,38 @@ class AccessTracker:
|
||||
"""
|
||||
if not post_data:
|
||||
return None, None
|
||||
|
||||
|
||||
username = None
|
||||
password = None
|
||||
|
||||
|
||||
try:
|
||||
# Parse URL-encoded form data
|
||||
parsed = urllib.parse.parse_qs(post_data)
|
||||
|
||||
|
||||
# Common username field names
|
||||
username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
|
||||
for field in username_fields:
|
||||
if field in parsed and parsed[field]:
|
||||
username = parsed[field][0]
|
||||
break
|
||||
|
||||
|
||||
# Common password field names
|
||||
password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
|
||||
for field in password_fields:
|
||||
if field in parsed and parsed[field]:
|
||||
password = parsed[field][0]
|
||||
break
|
||||
|
||||
|
||||
except Exception:
|
||||
# If parsing fails, try simple regex patterns
|
||||
username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
|
||||
password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
|
||||
|
||||
|
||||
if username_match:
|
||||
username = urllib.parse.unquote_plus(username_match.group(1))
|
||||
if password_match:
|
||||
password = urllib.parse.unquote_plus(password_match.group(1))
|
||||
|
||||
|
||||
return username, password
|
||||
|
||||
def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
|
||||
@@ -126,7 +125,7 @@ class AccessTracker:
|
||||
'path': path,
|
||||
'username': username,
|
||||
'password': password,
|
||||
'timestamp': datetime.now(self.timezone).isoformat()
|
||||
'timestamp': datetime.now().isoformat()
|
||||
})
|
||||
|
||||
# Persist to database
|
||||
@@ -193,7 +192,7 @@ class AccessTracker:
|
||||
'suspicious': is_suspicious,
|
||||
'honeypot_triggered': self.is_honeypot_path(path),
|
||||
'attack_types':attack_findings,
|
||||
'timestamp': datetime.now(self.timezone).isoformat()
|
||||
'timestamp': datetime.now().isoformat()
|
||||
})
|
||||
|
||||
# Persist to database
|
||||
|
||||
Reference in New Issue
Block a user