Merge branch 'dev' into fix/dashboard

This commit is contained in:
Patrick Di Fazio
2026-01-17 23:09:14 +01:00
committed by GitHub
14 changed files with 237 additions and 242 deletions

1
.gitignore vendored
View File

@@ -56,6 +56,7 @@ secrets/
.env
.env.local
.env.*.local
.envrc
# Logs
*.log

View File

@@ -3,7 +3,6 @@
server:
port: 5000
delay: 100 # Response delay in milliseconds
timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default
# manually set the server header, if null a random one will be used.
server_header: null

View File

@@ -1,5 +1,4 @@
version: '3.8'
---
services:
krawl:
build:
@@ -8,11 +7,26 @@ services:
container_name: krawl-server
ports:
- "5000:5000"
environment:
- CONFIG_LOCATION=config.yaml
# set this to change timezone, alternatively mount /etc/timezone or /etc/localtime based on the time system management of the host environment
# - TZ=${TZ}
volumes:
- ./wordlists.json:/app/wordlists.json:ro
- ./config.yaml:/app/config.yaml:ro
- ./logs:/app/logs
- ./exports:/app/exports
environment:
- CONFIG_LOCATION=config.yaml
- data:/app/data
restart: unless-stopped
develop:
watch:
- path: ./Dockerfile
action: rebuild
- path: ./src/
action: sync+restart
target: /app/src
- path: ./docker-compose.yaml
action: rebuild
volumes:
data:

View File

@@ -23,7 +23,7 @@ class Analyzer:
"""
Analyzes users activity and produces aggregated insights
"""
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
def __init__(self, db_manager: Optional[DatabaseManager] = None):
"""
Initialize the access tracker.
@@ -31,11 +31,10 @@ class Analyzer:
db_manager: Optional DatabaseManager for persistence.
If None, will use the global singleton.
"""
self.timezone = timezone or ZoneInfo('UTC')
# Database manager for persistence (lazily initialized)
self._db_manager = db_manager
@property
def db(self) -> Optional[DatabaseManager]:
"""
@@ -51,11 +50,11 @@ class Analyzer:
# Database not initialized, persistence disabled
pass
return self._db_manager
# def infer_user_category(self, ip: str) -> str:
# config = get_config()
# http_risky_methods_threshold = config.http_risky_methods_threshold
# violated_robots_threshold = config.violated_robots_threshold
# uneven_request_timing_threshold = config.uneven_request_timing_threshold
@@ -70,7 +69,7 @@ class Analyzer:
# score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
# weights = {
# "attacker": {
@@ -108,7 +107,7 @@ class Analyzer:
# total_accesses_count = len(accesses)
# if total_accesses_count <= 0:
# return
# # Set category as "unknown" for the first 5 requests
# if total_accesses_count < 3:
# category = "unknown"
@@ -127,7 +126,7 @@ class Analyzer:
# delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
# head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
# options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
# if total_accesses_count > http_risky_methods_threshold:
# http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
@@ -156,7 +155,7 @@ class Analyzer:
# if not line:
# continue
# parts = line.split(":")
# if parts[0] == "Disallow":
# parts[1] = parts[1].rstrip("/")
# #print(f"DISALLOW {parts[1]}")
@@ -180,7 +179,7 @@ class Analyzer:
# score["good_crawler"]["robots_violations"] = False
# score["bad_crawler"]["robots_violations"] = False
# score["regular_user"]["robots_violations"] = False
# #--------------------- Requests Timing ---------------------
# #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
# timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
@@ -192,7 +191,7 @@ class Analyzer:
# for i in range(0, len(timestamps)-1):
# diff = (timestamps[i] - timestamps[i+1]).total_seconds()
# time_diffs.append(diff)
# mean = 0
# variance = 0
# std = 0
@@ -250,10 +249,10 @@ class Analyzer:
# except Exception:
# decoded_path = queried_path
# decoded_path_twice = queried_path
# for name, pattern in wl.attack_patterns.items():
# # Check original, decoded, and double-decoded paths
# if (re.search(pattern, queried_path, re.IGNORECASE) or
# if (re.search(pattern, queried_path, re.IGNORECASE) or
# re.search(pattern, decoded_path, re.IGNORECASE) or
# re.search(pattern, decoded_path_twice, re.IGNORECASE)):
# attack_urls_found_list.append(f"{name}: {pattern}")
@@ -261,7 +260,7 @@ class Analyzer:
# #remove duplicates
# attack_urls_found_list = set(attack_urls_found_list)
# attack_urls_found_list = list(attack_urls_found_list)
# if len(attack_urls_found_list) > attack_urls_threshold:
# score["attacker"]["attack_url"] = True
# score["good_crawler"]["attack_url"] = False
@@ -344,7 +343,7 @@ class Analyzer:
# sanitized_asn = sanitize_for_storage(asn, 100)
# sanitized_asn_org = sanitize_for_storage(asn_org, 100)
# sanitized_list_on = sanitize_dict(list_on, 100000)
# self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
# return
# return

View File

@@ -32,7 +32,6 @@ class Config:
# Database settings
database_path: str = "data/krawl.db"
database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
# Analyzer settings
http_risky_methods_threshold: float = None
@@ -42,39 +41,6 @@ class Config:
user_agents_used_threshold: float = None
attack_urls_threshold: float = None
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
"""Get the system's default timezone"""
try:
if os.path.islink('/etc/localtime'):
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
# Default fallback to UTC
return 'UTC'
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
try:
return ZoneInfo(self.timezone)
except Exception:
pass
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
except Exception:
return ZoneInfo('UTC')
@classmethod
def from_yaml(cls) -> 'Config':
"""Create configuration from YAML file"""
@@ -113,12 +79,11 @@ class Config:
# ensure the dashboard path starts with a /
if dashboard_path[:1] != "/":
dashboard_path = f"/{dashboard_path}"
return cls(
port=server.get('port', 5000),
delay=server.get('delay', 100),
server_header=server.get('server_header',""),
timezone=server.get('timezone'),
links_length_range=(
links.get('min_length', 5),
links.get('max_length', 15)
@@ -140,7 +105,7 @@ class Config:
database_retention_days=database.get('retention_days', 30),
http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1),
violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1),
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300),
user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2),
attack_urls_threshold=analyzer.get('attack_urls_threshold', 1)

View File

@@ -7,7 +7,7 @@ Provides SQLAlchemy session management and database initialization.
import os
import stat
from datetime import datetime
from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any
from zoneinfo import ZoneInfo
@@ -141,7 +141,7 @@ class DatabaseManager:
method=method[:10],
is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger,
timestamp=datetime.now(tz=ZoneInfo('UTC'))
timestamp=datetime.now()
)
session.add(access_log)
session.flush() # Get the ID before committing
@@ -199,7 +199,7 @@ class DatabaseManager:
path=sanitize_path(path),
username=sanitize_credential(username),
password=sanitize_credential(password),
timestamp=datetime.now(tz=ZoneInfo('UTC'))
timestamp=datetime.now()
)
session.add(credential)
session.commit()
@@ -221,7 +221,7 @@ class DatabaseManager:
ip: IP address to update
"""
sanitized_ip = sanitize_ip(ip)
now = datetime.now(tz=ZoneInfo('UTC'))
now = datetime.now()
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
@@ -266,7 +266,7 @@ class DatabaseManager:
ip_stats.category = category
ip_stats.category_scores = category_scores
ip_stats.last_analysis = last_analysis
try:
session.commit()
except Exception as e:
@@ -280,21 +280,21 @@ class DatabaseManager:
Args:
ip: IP address to update
category: selected category
"""
session = self.session
sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
# Record the manual category change
old_category = ip_stats.category
if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC')))
self._record_category_change(sanitized_ip, old_category, category, datetime.now())
ip_stats.category = category
ip_stats.manual_category = True
try:
session.commit()
except Exception as e:
@@ -316,7 +316,7 @@ class DatabaseManager:
# Only record actual category changes
if old_category is None:
return
session = self.session
try:
history_entry = CategoryHistory(
@@ -352,7 +352,7 @@ class DatabaseManager:
{
'old_category': h.old_category,
'new_category': h.new_category,
'timestamp': h.timestamp.isoformat() + '+00:00'
'timestamp': h.timestamp.isoformat()
}
for h in history
]
@@ -390,6 +390,7 @@ class DatabaseManager:
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
"""
Get IPs that don't have reputation data yet.
Excludes RFC1918 private addresses and other non-routable IPs.
Args:
limit: Maximum number of IPs to return
@@ -400,7 +401,18 @@ class DatabaseManager:
session = self.session
try:
ips = session.query(IpStats.ip).filter(
IpStats.country_code.is_(None)
IpStats.country_code.is_(None),
~IpStats.ip.like('10.%'),
~IpStats.ip.like('172.16.%'),
~IpStats.ip.like('172.17.%'),
~IpStats.ip.like('172.18.%'),
~IpStats.ip.like('172.19.%'),
~IpStats.ip.like('172.2_.%'),
~IpStats.ip.like('172.30.%'),
~IpStats.ip.like('172.31.%'),
~IpStats.ip.like('192.168.%'),
~IpStats.ip.like('127.%'),
~IpStats.ip.like('169.254.%')
).limit(limit).all()
return [ip[0] for ip in ips]
finally:
@@ -411,7 +423,8 @@ class DatabaseManager:
limit: int = 100,
offset: int = 0,
ip_filter: Optional[str] = None,
suspicious_only: bool = False
suspicious_only: bool = False,
since_minutes: Optional[int] = None
) -> List[Dict[str, Any]]:
"""
Retrieve access logs with optional filtering.
@@ -421,6 +434,7 @@ class DatabaseManager:
offset: Number of records to skip
ip_filter: Filter by IP address
suspicious_only: Only return suspicious requests
since_minutes: Only return logs from the last N minutes
Returns:
List of access log dictionaries
@@ -433,6 +447,9 @@ class DatabaseManager:
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
if suspicious_only:
query = query.filter(AccessLog.is_suspicious == True)
if since_minutes is not None:
cutoff_time = datetime.now() - timedelta(minutes=since_minutes)
query = query.filter(AccessLog.timestamp >= cutoff_time)
logs = query.offset(offset).limit(limit).all()
@@ -445,7 +462,7 @@ class DatabaseManager:
'method': log.method,
'is_suspicious': log.is_suspicious,
'is_honeypot_trigger': log.is_honeypot_trigger,
'timestamp': log.timestamp.isoformat() + '+00:00',
'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections]
}
for log in logs
@@ -501,7 +518,7 @@ class DatabaseManager:
# print(f"Database error persisting access: {e}")
# return None
# finally:
# self.close_session()
# self.close_session()
def get_credential_attempts(
self,
@@ -538,7 +555,7 @@ class DatabaseManager:
'path': attempt.path,
'username': attempt.username,
'password': attempt.password,
'timestamp': attempt.timestamp.isoformat() + '+00:00'
'timestamp': attempt.timestamp.isoformat()
}
for attempt in attempts
]
@@ -565,8 +582,8 @@ class DatabaseManager:
{
'ip': s.ip,
'total_requests': s.total_requests,
'first_seen': s.first_seen.isoformat() + '+00:00',
'last_seen': s.last_seen.isoformat() + '+00:00',
'first_seen': s.first_seen.isoformat(),
'last_seen': s.last_seen.isoformat(),
'country_code': s.country_code,
'city': s.city,
'asn': s.asn,
@@ -596,18 +613,18 @@ class DatabaseManager:
session = self.session
try:
stat = session.query(IpStats).filter(IpStats.ip == ip).first()
if not stat:
return None
# Get category history for this IP
category_history = self.get_category_history(ip)
return {
'ip': stat.ip,
'total_requests': stat.total_requests,
'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None,
'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
'country_code': stat.country_code,
'city': stat.city,
'asn': stat.asn,
@@ -619,7 +636,7 @@ class DatabaseManager:
'category': stat.category,
'category_scores': stat.category_scores or {},
'manual_category': stat.manual_category,
'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None,
'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
'category_history': category_history
}
finally:
@@ -690,7 +707,7 @@ class DatabaseManager:
Args:
limit: Maximum number of results
Returns:
Returns:data
List of (path, count) tuples ordered by count descending
"""
session = self.session
@@ -753,7 +770,7 @@ class DatabaseManager:
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat() + '+00:00'
'timestamp': log.timestamp.isoformat()
}
for log in logs
]
@@ -811,7 +828,7 @@ class DatabaseManager:
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat() + '+00:00',
'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections]
}
for log in logs

View File

@@ -43,12 +43,12 @@ class Handler(BaseHTTPRequestHandler):
if forwarded_for:
# X-Forwarded-For can contain multiple IPs, get the first (original client)
return forwarded_for.split(',')[0].strip()
# Check X-Real-IP header (set by nginx and other proxies)
real_ip = self.headers.get('X-Real-IP')
if real_ip:
return real_ip.strip()
# Fallback to direct connection IP
return self.client_address[0]
@@ -73,12 +73,12 @@ class Handler(BaseHTTPRequestHandler):
if not error_codes:
error_codes = [400, 401, 403, 404, 500, 502, 503]
return random.choice(error_codes)
def _parse_query_string(self) -> str:
"""Extract query string from the request path"""
parsed = urlparse(self.path)
return parsed.query
def _handle_sql_endpoint(self, path: str) -> bool:
"""
Handle SQL injection honeypot endpoints.
@@ -86,22 +86,22 @@ class Handler(BaseHTTPRequestHandler):
"""
# SQL-vulnerable endpoints
sql_endpoints = ['/api/search', '/api/sql', '/api/database']
base_path = urlparse(path).path
if base_path not in sql_endpoints:
return False
try:
# Get query parameters
query_string = self._parse_query_string()
# Log SQL injection attempt
client_ip = self._get_client_ip()
user_agent = self._get_user_agent()
# Always check for SQL injection patterns
error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
if error_msg:
# SQL injection detected - log and return error
self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
@@ -117,9 +117,9 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
response_data = get_sql_response_with_data(base_path, query_string or "")
self.wfile.write(response_data.encode())
return True
except BrokenPipeError:
# Client disconnected
return True
@@ -142,7 +142,7 @@ class Handler(BaseHTTPRequestHandler):
# Build the content HTML
content = ""
# Add canary token if needed
if Handler.counter <= 0 and self.config.canary_token_url:
content += f"""
@@ -189,16 +189,16 @@ class Handler(BaseHTTPRequestHandler):
from urllib.parse import urlparse
base_path = urlparse(self.path).path
if base_path in ['/api/search', '/api/sql', '/api/database']:
content_length = int(self.headers.get('Content-Length', 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
error_msg, content_type, status_code = generate_sql_error_response(post_data)
try:
if error_msg:
self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
@@ -217,26 +217,26 @@ class Handler(BaseHTTPRequestHandler):
except Exception as e:
self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
return
if base_path == '/api/contact':
content_length = int(self.headers.get('Content-Length', 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
parsed_data = {}
for pair in post_data.split('&'):
if '=' in pair:
key, value = pair.split('=', 1)
from urllib.parse import unquote_plus
parsed_data[unquote_plus(key)] = unquote_plus(value)
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
if xss_detected:
self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
else:
self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
try:
self.send_response(200)
self.send_header('Content-type', 'text/html')
@@ -264,17 +264,17 @@ class Handler(BaseHTTPRequestHandler):
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}"
self.credential_logger.info(credential_line)
# Also record in tracker for dashboard
self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
time.sleep(1)
try:
self.send_response(200)
self.send_header('Content-type', 'text/html')
@@ -289,11 +289,11 @@ class Handler(BaseHTTPRequestHandler):
def serve_special_path(self, path: str) -> bool:
"""Serve special paths like robots.txt, API endpoints, etc."""
# Check SQL injection honeypot endpoints first
if self._handle_sql_endpoint(path):
return True
try:
if path == '/robots.txt':
self.send_response(200)
@@ -301,7 +301,7 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write(html_templates.robots_txt().encode())
return True
if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']:
self.send_response(200)
self.send_header('Content-type', 'text/plain')
@@ -311,7 +311,7 @@ class Handler(BaseHTTPRequestHandler):
else:
self.wfile.write(passwords_txt().encode())
return True
if path in ['/users.json', '/api_keys.json', '/config.json']:
self.send_response(200)
self.send_header('Content-type', 'application/json')
@@ -323,28 +323,28 @@ class Handler(BaseHTTPRequestHandler):
else:
self.wfile.write(api_response('/api/config').encode())
return True
if path in ['/admin', '/admin/', '/admin/login', '/login']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(html_templates.login_form().encode())
return True
if path in ['/users', '/user', '/database', '/db', '/search']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(html_templates.product_search().encode())
return True
if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(html_templates.input_form().encode())
return True
if path == '/server':
error_html, content_type = generate_server_error()
self.send_response(500)
@@ -352,35 +352,35 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write(error_html.encode())
return True
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(html_templates.wp_login().encode())
return True
if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower():
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(html_templates.wordpress().encode())
return True
if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
self.wfile.write(html_templates.phpmyadmin().encode())
return True
if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.end_headers()
self.wfile.write(api_response(path).encode())
return True
if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
@@ -400,22 +400,21 @@ class Handler(BaseHTTPRequestHandler):
"""Responds to webpage requests"""
client_ip = self._get_client_ip()
user_agent = self._get_user_agent()
if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.end_headers()
try:
stats = self.tracker.get_stats()
timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
dashboard_path = self.config.dashboard_secret_path
self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode())
self.wfile.write(generate_dashboard(stats, dashboard_path).encode())
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error generating dashboard: {e}")
return
# API endpoint for fetching IP stats
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
@@ -473,7 +472,7 @@ class Handler(BaseHTTPRequestHandler):
return
self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
# self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip)
@@ -497,9 +496,9 @@ class Handler(BaseHTTPRequestHandler):
try:
self.wfile.write(self.generate_page(self.path).encode())
Handler.counter -= 1
if Handler.counter < 0:
Handler.counter = self.config.canary_token_tries
except BrokenPipeError:

View File

@@ -8,20 +8,17 @@ Provides two loggers: app (application) and access (HTTP access logs).
import logging
import os
from logging.handlers import RotatingFileHandler
from typing import Optional
from zoneinfo import ZoneInfo
from datetime import datetime
class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None):
def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt)
self.timezone = timezone or ZoneInfo('UTC')
def formatTime(self, record, datefmt=None):
"""Override formatTime to use configured timezone"""
dt = datetime.fromtimestamp(record.created, tz=self.timezone)
dt = datetime.fromtimestamp(record.created)
if datefmt:
return dt.strftime(datefmt)
return dt.isoformat()
@@ -37,19 +34,16 @@ class LoggerManager:
cls._instance._initialized = False
return cls._instance
def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
def initialize(self, log_dir: str = "logs") -> None:
"""
Initialize the logging system with rotating file handlers.
Initialize the logging system with rotating file handlers.loggers
Args:
log_dir: Directory for log files (created if not exists)
timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
"""
if self._initialized:
return
self.timezone = timezone or ZoneInfo('UTC')
# Create log directory if it doesn't exist
os.makedirs(log_dir, exist_ok=True)
@@ -57,7 +51,6 @@ class LoggerManager:
log_format = TimezoneFormatter(
"[%(asctime)s] %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
timezone=self.timezone
)
# Rotation settings: 1MB max, 5 backups
@@ -104,8 +97,8 @@ class LoggerManager:
self._credential_logger.handlers.clear()
# Credential logger uses a simple format: timestamp|ip|username|password|path
credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone)
credential_format = TimezoneFormatter("%(message)s")
credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"),
maxBytes=max_bytes,
@@ -157,6 +150,6 @@ def get_credential_logger() -> logging.Logger:
return _logger_manager.credentials
def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None:
def initialize_logging(log_dir: str = "logs") -> None:
"""Initialize the logging system."""
_logger_manager.initialize(log_dir, timezone)
_logger_manager.initialize(log_dir)

View File

@@ -29,7 +29,6 @@ def print_usage():
print(' server:')
print(' port: 5000')
print(' delay: 100')
print(' timezone: null # or "America/New_York"')
print(' links:')
print(' min_length: 5')
print(' max_length: 15')
@@ -55,11 +54,8 @@ def main():
config = get_config()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone
initialize_logging(timezone=tz)
initialize_logging()
app_logger = get_app_logger()
access_logger = get_access_logger()
credential_logger = get_credential_logger()
@@ -71,8 +67,8 @@ def main():
except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
tracker = AccessTracker(timezone=tz)
analyzer = Analyzer(timezone=tz)
tracker = AccessTracker()
analyzer = Analyzer()
Handler.config = config
Handler.tracker = tracker
@@ -99,7 +95,6 @@ def main():
try:
app_logger.info(f'Starting deception server on port {config.port}...')
app_logger.info(f'Timezone configured: {tz.key}')
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')

View File

@@ -28,7 +28,7 @@ def main():
config = get_config()
db_manager = get_database()
app_logger = get_app_logger()
http_risky_methods_threshold = config.http_risky_methods_threshold
violated_robots_threshold = config.violated_robots_threshold
uneven_request_timing_threshold = config.uneven_request_timing_threshold
@@ -41,7 +41,7 @@ def main():
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
@@ -73,21 +73,27 @@ def main():
"attack_url": 0
}
}
accesses = db_manager.get_access_logs(limit=999999999)
ips = {item['ip'] for item in accesses}
# Get IPs with recent activity (last minute to match cron schedule)
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
ips_to_analyze = {item['ip'] for item in recent_accesses}
for ip in ips:
ip_accesses = [item for item in accesses if item["ip"] == ip]
total_accesses_count = len(accesses)
if not ips_to_analyze:
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
return
for ip in ips_to_analyze:
# Get full history for this IP to perform accurate analysis
ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip)
total_accesses_count = len(ip_accesses)
if total_accesses_count <= 0:
return
# Set category as "unknown" for the first 3 requests
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return 0
#--------------------- HTTP Methods ---------------------
@@ -97,7 +103,7 @@ def main():
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
else:
@@ -123,7 +129,7 @@ def main():
if not line:
continue
parts = line.split(":")
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}")
@@ -145,18 +151,18 @@ def main():
score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now(tz=ZoneInfo('UTC'))
now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = sorted(timestamps, reverse=True)
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
time_diffs.append(diff)
mean = 0
variance = 0
std = 0
@@ -206,14 +212,14 @@ def main():
except Exception:
decoded_path = queried_path
decoded_path_twice = queried_path
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or
if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates
attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list)
@@ -260,6 +266,6 @@ def main():
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now(tz=ZoneInfo('UTC'))
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return
return

View File

@@ -21,7 +21,7 @@ def main():
# Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
for ip in unenriched_ips:
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"

View File

@@ -1,6 +1,8 @@
# tasks/export_malicious_ips.py
import os
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from logger import get_app_logger
from database import get_database
from models import AccessLog
@@ -24,6 +26,15 @@ OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ----------------------
# TASK LOGIC
# ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter(
AccessLog.is_honeypot_trigger == True,
AccessLog.timestamp >= cutoff_time
).count()
return count > 0
def main():
"""
Export all IPs flagged as suspicious to a text file.
@@ -36,6 +47,11 @@ def main():
db = get_database()
session = db.session
# Check for recent honeypot activity
if not has_recent_honeypot_access(session):
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
return
# Query distinct suspicious IPs
results = session.query(distinct(AccessLog.ip)).filter(
AccessLog.is_suspicious == True

View File

@@ -15,21 +15,16 @@ def _escape(value) -> str:
return ""
return html.escape(str(value))
def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str:
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion
Args:
iso_timestamp: ISO format timestamp string (UTC)
timezone: IANA timezone string to convert to
time_only: If True, return only HH:MM:SS, otherwise full datetime
"""
try:
# Parse UTC timestamp
dt = datetime.fromisoformat(iso_timestamp)
# Convert to target timezone
if dt.tzinfo is not None:
dt = dt.astimezone(ZoneInfo(timezone))
if time_only:
return dt.strftime("%H:%M:%S")
return dt.strftime("%Y-%m-%d %H:%M:%S")
@@ -38,15 +33,14 @@ def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str:
def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
"""Generate dashboard HTML with access statistics
Args:
stats: Statistics dictionary
timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
dashboard_path: The secret dashboard path for generating API URLs
"""
# Generate IP rows with clickable functionality for dropdown stats
top_ips_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
@@ -82,7 +76,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
<td>{format_timestamp(log["timestamp"], time_only=True)}</td>
</tr>
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="4" class="ip-stats-cell">
@@ -118,7 +112,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td>{_escape(log["path"])}</td>
<td>{_escape(", ".join(log["attack_types"]))}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
<td>{format_timestamp(log["timestamp"],time_only=True)}</td>
</tr>
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="5" class="ip-stats-cell">
@@ -137,7 +131,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td>{_escape(log["username"])}</td>
<td>{_escape(log["password"])}</td>
<td>{_escape(log["path"])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td>
<td>{format_timestamp(log["timestamp"], time_only=True)}</td>
</tr>
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="5" class="ip-stats-cell">
@@ -515,7 +509,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
</a>
</div>
<h1>Krawl Dashboard</h1>
<div class="stats-grid">
<div class="stat-card">
<div class="stat-value">{stats['total_accesses']}</div>
@@ -661,15 +655,13 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
</div>
</div>
<script>
const SERVER_TIMEZONE = '{timezone}';
const DASHBOARD_PATH = '{dashboard_path}';
function formatTimestamp(isoTimestamp) {{
if (!isoTimestamp) return 'N/A';
try {{
const date = new Date(isoTimestamp);
return date.toLocaleString('en-US', {{
timeZone: SERVER_TIMEZONE,
return date.toLocaleString('en-US', {{
year: 'numeric',
month: '2-digit',
day: '2-digit',
@@ -683,7 +675,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
return new Date(isoTimestamp).toLocaleString();
}}
}}
document.querySelectorAll('th.sortable').forEach(header => {{
header.addEventListener('click', function() {{
const table = this.closest('table');
@@ -691,25 +683,25 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const rows = Array.from(tbody.querySelectorAll('tr'));
const sortType = this.getAttribute('data-sort');
const columnIndex = Array.from(this.parentElement.children).indexOf(this);
const isAscending = this.classList.contains('asc');
table.querySelectorAll('th.sortable').forEach(th => {{
th.classList.remove('asc', 'desc');
}});
this.classList.add(isAscending ? 'desc' : 'asc');
rows.sort((a, b) => {{
let aValue = a.cells[columnIndex].textContent.trim();
let bValue = b.cells[columnIndex].textContent.trim();
if (sortType === 'count') {{
aValue = parseInt(aValue) || 0;
bValue = parseInt(bValue) || 0;
return isAscending ? bValue - aValue : aValue - bValue;
}}
if (sortType === 'ip') {{
const ipToNum = ip => {{
const parts = ip.split('.');
@@ -720,14 +712,14 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const bNum = ipToNum(bValue);
return isAscending ? bNum - aNum : aNum - bNum;
}}
if (isAscending) {{
return bValue.localeCompare(aValue);
}} else {{
return aValue.localeCompare(bValue);
}}
}});
rows.forEach(row => tbody.appendChild(row));
}});
}});
@@ -775,39 +767,39 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
}}
}});
}});
function formatIpStats(stats) {{
let html = '<div class="stats-left">';
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Total Requests:</span>';
html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
html += '</div>';
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">First Seen:</span>';
html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`;
html += '</div>';
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Last Seen:</span>';
html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
html += '</div>';
if (stats.country_code || stats.city) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Location:</span>';
html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
html += '</div>';
}}
if (stats.asn_org) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">ASN Org:</span>';
html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`;
html += '</div>';
}}
if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Reputation Score:</span>';
@@ -881,14 +873,14 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += '</div>';
html += '</div>';
}}
html += '</div>';
if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
html += '<div class="stats-right">';
html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
const scores = {{
attacker: stats.category_scores.attacker || 0,
good_crawler: stats.category_scores.good_crawler || 0,
@@ -896,15 +888,15 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
regular_user: stats.category_scores.regular_user || 0,
unknown: stats.category_scores.unknown || 0
}};
const maxScore = Math.max(...Object.values(scores), 1);
const minVisibleRadius = 0.15;
const normalizedScores = {{}};
Object.keys(scores).forEach(key => {{
normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
}});
const colors = {{
attacker: '#f85149',
good_crawler: '#3fb950',
@@ -912,7 +904,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
regular_user: '#58a6ff',
unknown: '#8b949e'
}};
const labels = {{
attacker: 'Attacker',
good_crawler: 'Good Bot',
@@ -920,28 +912,28 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
regular_user: 'User',
unknown: 'Unknown'
}};
const cx = 100, cy = 100, maxRadius = 75;
for (let i = 1; i <= 5; i++) {{
const r = (maxRadius / 5) * i;
html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
}}
const angles = [0, 72, 144, 216, 288];
const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
angles.forEach((angle, i) => {{
const rad = (angle - 90) * Math.PI / 180;
const x2 = cx + maxRadius * Math.cos(rad);
const y2 = cy + maxRadius * Math.sin(rad);
html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
const labelDist = maxRadius + 35;
const lx = cx + labelDist * Math.cos(rad);
const ly = cy + labelDist * Math.sin(rad);
html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
}});
let points = [];
angles.forEach((angle, i) => {{
const normalizedScore = normalizedScores[keys[i]];
@@ -951,12 +943,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const y = cy + r * Math.sin(rad);
points.push(`${{x}},${{y}}`);
}});
const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
const dominantColor = colors[dominantKey];
html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
angles.forEach((angle, i) => {{
const normalizedScore = normalizedScores[keys[i]];
const rad = (angle - 90) * Math.PI / 180;
@@ -965,9 +957,9 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const y = cy + r * Math.sin(rad);
html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`;
}});
html += '</svg>';
html += '<div class="radar-legend">';
keys.forEach(key => {{
html += '<div class="radar-legend-item">';
@@ -976,10 +968,10 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += '</div>';
}});
html += '</div>';
html += '</div>';
}}
return html;
}}
</script>

View File

@@ -17,7 +17,7 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis.
"""
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None):
def __init__(self, db_manager: Optional[DatabaseManager] = None):
"""
Initialize the access tracker.
@@ -30,7 +30,6 @@ class AccessTracker:
self.user_agent_counts: Dict[str, int] = defaultdict(int)
self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = []
self.timezone = timezone or ZoneInfo('UTC')
self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -40,7 +39,7 @@ class AccessTracker:
# Load attack patterns from wordlists
wl = get_wordlists()
self.attack_types = wl.attack_patterns
# Fallback if wordlists not loaded
if not self.attack_types:
self.attack_types = {
@@ -80,38 +79,38 @@ class AccessTracker:
"""
if not post_data:
return None, None
username = None
password = None
try:
# Parse URL-encoded form data
parsed = urllib.parse.parse_qs(post_data)
# Common username field names
username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
for field in username_fields:
if field in parsed and parsed[field]:
username = parsed[field][0]
break
# Common password field names
password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
for field in password_fields:
if field in parsed and parsed[field]:
password = parsed[field][0]
break
except Exception:
# If parsing fails, try simple regex patterns
username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
if username_match:
username = urllib.parse.unquote_plus(username_match.group(1))
if password_match:
password = urllib.parse.unquote_plus(password_match.group(1))
return username, password
def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
@@ -126,7 +125,7 @@ class AccessTracker:
'path': path,
'username': username,
'password': password,
'timestamp': datetime.now(self.timezone).isoformat()
'timestamp': datetime.now().isoformat()
})
# Persist to database
@@ -193,7 +192,7 @@ class AccessTracker:
'suspicious': is_suspicious,
'honeypot_triggered': self.is_honeypot_path(path),
'attack_types':attack_findings,
'timestamp': datetime.now(self.timezone).isoformat()
'timestamp': datetime.now().isoformat()
})
# Persist to database