feat:removed manual timezone management, delegate timezone configuration to execution environment

removed code that manages timezone setup from config file, krawl now
obeys to the environment configuration
This commit is contained in:
carnivuth
2026-01-17 18:06:09 +01:00
parent 541b5d0f1b
commit 9d9a718aec
11 changed files with 173 additions and 232 deletions

View File

@@ -3,7 +3,6 @@
server: server:
port: 5000 port: 5000
delay: 100 # Response delay in milliseconds delay: 100 # Response delay in milliseconds
timezone: null # e.g., "America/New_York", "Europe/Paris" or null for system default
# manually set the server header, if null a random one will be used. # manually set the server header, if null a random one will be used.
server_header: null server_header: null

View File

@@ -23,7 +23,7 @@ class Analyzer:
""" """
Analyzes users activity and produces aggregated insights Analyzes users activity and produces aggregated insights
""" """
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None): def __init__(self, db_manager: Optional[DatabaseManager] = None):
""" """
Initialize the access tracker. Initialize the access tracker.
@@ -31,11 +31,10 @@ class Analyzer:
db_manager: Optional DatabaseManager for persistence. db_manager: Optional DatabaseManager for persistence.
If None, will use the global singleton. If None, will use the global singleton.
""" """
self.timezone = timezone or ZoneInfo('UTC')
# Database manager for persistence (lazily initialized) # Database manager for persistence (lazily initialized)
self._db_manager = db_manager self._db_manager = db_manager
@property @property
def db(self) -> Optional[DatabaseManager]: def db(self) -> Optional[DatabaseManager]:
""" """
@@ -51,11 +50,11 @@ class Analyzer:
# Database not initialized, persistence disabled # Database not initialized, persistence disabled
pass pass
return self._db_manager return self._db_manager
# def infer_user_category(self, ip: str) -> str: # def infer_user_category(self, ip: str) -> str:
# config = get_config() # config = get_config()
# http_risky_methods_threshold = config.http_risky_methods_threshold # http_risky_methods_threshold = config.http_risky_methods_threshold
# violated_robots_threshold = config.violated_robots_threshold # violated_robots_threshold = config.violated_robots_threshold
# uneven_request_timing_threshold = config.uneven_request_timing_threshold # uneven_request_timing_threshold = config.uneven_request_timing_threshold
@@ -70,7 +69,7 @@ class Analyzer:
# score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} # score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} # score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} # score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
# #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme # #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
# weights = { # weights = {
# "attacker": { # "attacker": {
@@ -108,7 +107,7 @@ class Analyzer:
# total_accesses_count = len(accesses) # total_accesses_count = len(accesses)
# if total_accesses_count <= 0: # if total_accesses_count <= 0:
# return # return
# # Set category as "unknown" for the first 5 requests # # Set category as "unknown" for the first 5 requests
# if total_accesses_count < 3: # if total_accesses_count < 3:
# category = "unknown" # category = "unknown"
@@ -127,7 +126,7 @@ class Analyzer:
# delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"]) # delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"])
# head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"]) # head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"])
# options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"]) # options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"])
# patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"]) # patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"])
# if total_accesses_count > http_risky_methods_threshold: # if total_accesses_count > http_risky_methods_threshold:
# http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count # http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
@@ -156,7 +155,7 @@ class Analyzer:
# if not line: # if not line:
# continue # continue
# parts = line.split(":") # parts = line.split(":")
# if parts[0] == "Disallow": # if parts[0] == "Disallow":
# parts[1] = parts[1].rstrip("/") # parts[1] = parts[1].rstrip("/")
# #print(f"DISALLOW {parts[1]}") # #print(f"DISALLOW {parts[1]}")
@@ -180,7 +179,7 @@ class Analyzer:
# score["good_crawler"]["robots_violations"] = False # score["good_crawler"]["robots_violations"] = False
# score["bad_crawler"]["robots_violations"] = False # score["bad_crawler"]["robots_violations"] = False
# score["regular_user"]["robots_violations"] = False # score["regular_user"]["robots_violations"] = False
# #--------------------- Requests Timing --------------------- # #--------------------- Requests Timing ---------------------
# #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior # #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
# timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses] # timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
@@ -192,7 +191,7 @@ class Analyzer:
# for i in range(0, len(timestamps)-1): # for i in range(0, len(timestamps)-1):
# diff = (timestamps[i] - timestamps[i+1]).total_seconds() # diff = (timestamps[i] - timestamps[i+1]).total_seconds()
# time_diffs.append(diff) # time_diffs.append(diff)
# mean = 0 # mean = 0
# variance = 0 # variance = 0
# std = 0 # std = 0
@@ -250,10 +249,10 @@ class Analyzer:
# except Exception: # except Exception:
# decoded_path = queried_path # decoded_path = queried_path
# decoded_path_twice = queried_path # decoded_path_twice = queried_path
# for name, pattern in wl.attack_patterns.items(): # for name, pattern in wl.attack_patterns.items():
# # Check original, decoded, and double-decoded paths # # Check original, decoded, and double-decoded paths
# if (re.search(pattern, queried_path, re.IGNORECASE) or # if (re.search(pattern, queried_path, re.IGNORECASE) or
# re.search(pattern, decoded_path, re.IGNORECASE) or # re.search(pattern, decoded_path, re.IGNORECASE) or
# re.search(pattern, decoded_path_twice, re.IGNORECASE)): # re.search(pattern, decoded_path_twice, re.IGNORECASE)):
# attack_urls_found_list.append(f"{name}: {pattern}") # attack_urls_found_list.append(f"{name}: {pattern}")
@@ -261,7 +260,7 @@ class Analyzer:
# #remove duplicates # #remove duplicates
# attack_urls_found_list = set(attack_urls_found_list) # attack_urls_found_list = set(attack_urls_found_list)
# attack_urls_found_list = list(attack_urls_found_list) # attack_urls_found_list = list(attack_urls_found_list)
# if len(attack_urls_found_list) > attack_urls_threshold: # if len(attack_urls_found_list) > attack_urls_threshold:
# score["attacker"]["attack_url"] = True # score["attacker"]["attack_url"] = True
# score["good_crawler"]["attack_url"] = False # score["good_crawler"]["attack_url"] = False
@@ -344,7 +343,7 @@ class Analyzer:
# sanitized_asn = sanitize_for_storage(asn, 100) # sanitized_asn = sanitize_for_storage(asn, 100)
# sanitized_asn_org = sanitize_for_storage(asn_org, 100) # sanitized_asn_org = sanitize_for_storage(asn_org, 100)
# sanitized_list_on = sanitize_dict(list_on, 100000) # sanitized_list_on = sanitize_dict(list_on, 100000)
# self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on) # self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
# return # return

View File

@@ -32,7 +32,6 @@ class Config:
# Database settings # Database settings
database_path: str = "data/krawl.db" database_path: str = "data/krawl.db"
database_retention_days: int = 30 database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
# Analyzer settings # Analyzer settings
http_risky_methods_threshold: float = None http_risky_methods_threshold: float = None
@@ -42,39 +41,6 @@ class Config:
user_agents_used_threshold: float = None user_agents_used_threshold: float = None
attack_urls_threshold: float = None attack_urls_threshold: float = None
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
"""Get the system's default timezone"""
try:
if os.path.islink('/etc/localtime'):
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
# Default fallback to UTC
return 'UTC'
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
try:
return ZoneInfo(self.timezone)
except Exception:
pass
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
except Exception:
return ZoneInfo('UTC')
@classmethod @classmethod
def from_yaml(cls) -> 'Config': def from_yaml(cls) -> 'Config':
"""Create configuration from YAML file""" """Create configuration from YAML file"""
@@ -113,12 +79,11 @@ class Config:
# ensure the dashboard path starts with a / # ensure the dashboard path starts with a /
if dashboard_path[:1] != "/": if dashboard_path[:1] != "/":
dashboard_path = f"/{dashboard_path}" dashboard_path = f"/{dashboard_path}"
return cls( return cls(
port=server.get('port', 5000), port=server.get('port', 5000),
delay=server.get('delay', 100), delay=server.get('delay', 100),
server_header=server.get('server_header',""), server_header=server.get('server_header',""),
timezone=server.get('timezone'),
links_length_range=( links_length_range=(
links.get('min_length', 5), links.get('min_length', 5),
links.get('max_length', 15) links.get('max_length', 15)
@@ -140,7 +105,7 @@ class Config:
database_retention_days=database.get('retention_days', 30), database_retention_days=database.get('retention_days', 30),
http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1), http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1),
violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1), violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1),
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300), uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300),
user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2), user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2),
attack_urls_threshold=analyzer.get('attack_urls_threshold', 1) attack_urls_threshold=analyzer.get('attack_urls_threshold', 1)

View File

@@ -141,7 +141,7 @@ class DatabaseManager:
method=method[:10], method=method[:10],
is_suspicious=is_suspicious, is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger, is_honeypot_trigger=is_honeypot_trigger,
timestamp=datetime.now(tz=ZoneInfo('UTC')) timestamp=datetime.now()
) )
session.add(access_log) session.add(access_log)
session.flush() # Get the ID before committing session.flush() # Get the ID before committing
@@ -199,7 +199,7 @@ class DatabaseManager:
path=sanitize_path(path), path=sanitize_path(path),
username=sanitize_credential(username), username=sanitize_credential(username),
password=sanitize_credential(password), password=sanitize_credential(password),
timestamp=datetime.now(tz=ZoneInfo('UTC')) timestamp=datetime.now()
) )
session.add(credential) session.add(credential)
session.commit() session.commit()
@@ -221,7 +221,7 @@ class DatabaseManager:
ip: IP address to update ip: IP address to update
""" """
sanitized_ip = sanitize_ip(ip) sanitized_ip = sanitize_ip(ip)
now = datetime.now(tz=ZoneInfo('UTC')) now = datetime.now()
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
@@ -266,7 +266,7 @@ class DatabaseManager:
ip_stats.category = category ip_stats.category = category
ip_stats.category_scores = category_scores ip_stats.category_scores = category_scores
ip_stats.last_analysis = last_analysis ip_stats.last_analysis = last_analysis
try: try:
session.commit() session.commit()
except Exception as e: except Exception as e:
@@ -280,21 +280,21 @@ class DatabaseManager:
Args: Args:
ip: IP address to update ip: IP address to update
category: selected category category: selected category
""" """
session = self.session session = self.session
sanitized_ip = sanitize_ip(ip) sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
# Record the manual category change # Record the manual category change
old_category = ip_stats.category old_category = ip_stats.category
if old_category != category: if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, datetime.now(tz=ZoneInfo('UTC'))) self._record_category_change(sanitized_ip, old_category, category, datetime.now())
ip_stats.category = category ip_stats.category = category
ip_stats.manual_category = True ip_stats.manual_category = True
try: try:
session.commit() session.commit()
except Exception as e: except Exception as e:
@@ -316,7 +316,7 @@ class DatabaseManager:
# Only record actual category changes # Only record actual category changes
if old_category is None: if old_category is None:
return return
session = self.session session = self.session
try: try:
history_entry = CategoryHistory( history_entry = CategoryHistory(
@@ -352,7 +352,7 @@ class DatabaseManager:
{ {
'old_category': h.old_category, 'old_category': h.old_category,
'new_category': h.new_category, 'new_category': h.new_category,
'timestamp': h.timestamp.isoformat() + '+00:00' 'timestamp': h.timestamp.isoformat()
} }
for h in history for h in history
] ]
@@ -448,7 +448,7 @@ class DatabaseManager:
if suspicious_only: if suspicious_only:
query = query.filter(AccessLog.is_suspicious == True) query = query.filter(AccessLog.is_suspicious == True)
if since_minutes is not None: if since_minutes is not None:
cutoff_time = datetime.now(tz=ZoneInfo('UTC')) - timedelta(minutes=since_minutes) cutoff_time = datetime.now() - timedelta(minutes=since_minutes)
query = query.filter(AccessLog.timestamp >= cutoff_time) query = query.filter(AccessLog.timestamp >= cutoff_time)
logs = query.offset(offset).limit(limit).all() logs = query.offset(offset).limit(limit).all()
@@ -462,7 +462,7 @@ class DatabaseManager:
'method': log.method, 'method': log.method,
'is_suspicious': log.is_suspicious, 'is_suspicious': log.is_suspicious,
'is_honeypot_trigger': log.is_honeypot_trigger, 'is_honeypot_trigger': log.is_honeypot_trigger,
'timestamp': log.timestamp.isoformat() + '+00:00', 'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections] 'attack_types': [d.attack_type for d in log.attack_detections]
} }
for log in logs for log in logs
@@ -518,7 +518,7 @@ class DatabaseManager:
# print(f"Database error persisting access: {e}") # print(f"Database error persisting access: {e}")
# return None # return None
# finally: # finally:
# self.close_session() # self.close_session()
def get_credential_attempts( def get_credential_attempts(
self, self,
@@ -555,7 +555,7 @@ class DatabaseManager:
'path': attempt.path, 'path': attempt.path,
'username': attempt.username, 'username': attempt.username,
'password': attempt.password, 'password': attempt.password,
'timestamp': attempt.timestamp.isoformat() + '+00:00' 'timestamp': attempt.timestamp.isoformat()
} }
for attempt in attempts for attempt in attempts
] ]
@@ -582,8 +582,8 @@ class DatabaseManager:
{ {
'ip': s.ip, 'ip': s.ip,
'total_requests': s.total_requests, 'total_requests': s.total_requests,
'first_seen': s.first_seen.isoformat() + '+00:00', 'first_seen': s.first_seen.isoformat(),
'last_seen': s.last_seen.isoformat() + '+00:00', 'last_seen': s.last_seen.isoformat(),
'country_code': s.country_code, 'country_code': s.country_code,
'city': s.city, 'city': s.city,
'asn': s.asn, 'asn': s.asn,
@@ -613,18 +613,18 @@ class DatabaseManager:
session = self.session session = self.session
try: try:
stat = session.query(IpStats).filter(IpStats.ip == ip).first() stat = session.query(IpStats).filter(IpStats.ip == ip).first()
if not stat: if not stat:
return None return None
# Get category history for this IP # Get category history for this IP
category_history = self.get_category_history(ip) category_history = self.get_category_history(ip)
return { return {
'ip': stat.ip, 'ip': stat.ip,
'total_requests': stat.total_requests, 'total_requests': stat.total_requests,
'first_seen': stat.first_seen.isoformat() + '+00:00' if stat.first_seen else None, 'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() + '+00:00' if stat.last_seen else None, 'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
'country_code': stat.country_code, 'country_code': stat.country_code,
'city': stat.city, 'city': stat.city,
'asn': stat.asn, 'asn': stat.asn,
@@ -636,7 +636,7 @@ class DatabaseManager:
'category': stat.category, 'category': stat.category,
'category_scores': stat.category_scores or {}, 'category_scores': stat.category_scores or {},
'manual_category': stat.manual_category, 'manual_category': stat.manual_category,
'last_analysis': stat.last_analysis.isoformat() + '+00:00' if stat.last_analysis else None, 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
'category_history': category_history 'category_history': category_history
} }
finally: finally:
@@ -707,7 +707,7 @@ class DatabaseManager:
Args: Args:
limit: Maximum number of results limit: Maximum number of results
Returns: Returns:data
List of (path, count) tuples ordered by count descending List of (path, count) tuples ordered by count descending
""" """
session = self.session session = self.session
@@ -770,7 +770,7 @@ class DatabaseManager:
'ip': log.ip, 'ip': log.ip,
'path': log.path, 'path': log.path,
'user_agent': log.user_agent, 'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat() + '+00:00' 'timestamp': log.timestamp.isoformat()
} }
for log in logs for log in logs
] ]
@@ -828,7 +828,7 @@ class DatabaseManager:
'ip': log.ip, 'ip': log.ip,
'path': log.path, 'path': log.path,
'user_agent': log.user_agent, 'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat() + '+00:00', 'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections] 'attack_types': [d.attack_type for d in log.attack_detections]
} }
for log in logs for log in logs

View File

@@ -43,12 +43,12 @@ class Handler(BaseHTTPRequestHandler):
if forwarded_for: if forwarded_for:
# X-Forwarded-For can contain multiple IPs, get the first (original client) # X-Forwarded-For can contain multiple IPs, get the first (original client)
return forwarded_for.split(',')[0].strip() return forwarded_for.split(',')[0].strip()
# Check X-Real-IP header (set by nginx and other proxies) # Check X-Real-IP header (set by nginx and other proxies)
real_ip = self.headers.get('X-Real-IP') real_ip = self.headers.get('X-Real-IP')
if real_ip: if real_ip:
return real_ip.strip() return real_ip.strip()
# Fallback to direct connection IP # Fallback to direct connection IP
return self.client_address[0] return self.client_address[0]
@@ -73,12 +73,12 @@ class Handler(BaseHTTPRequestHandler):
if not error_codes: if not error_codes:
error_codes = [400, 401, 403, 404, 500, 502, 503] error_codes = [400, 401, 403, 404, 500, 502, 503]
return random.choice(error_codes) return random.choice(error_codes)
def _parse_query_string(self) -> str: def _parse_query_string(self) -> str:
"""Extract query string from the request path""" """Extract query string from the request path"""
parsed = urlparse(self.path) parsed = urlparse(self.path)
return parsed.query return parsed.query
def _handle_sql_endpoint(self, path: str) -> bool: def _handle_sql_endpoint(self, path: str) -> bool:
""" """
Handle SQL injection honeypot endpoints. Handle SQL injection honeypot endpoints.
@@ -86,22 +86,22 @@ class Handler(BaseHTTPRequestHandler):
""" """
# SQL-vulnerable endpoints # SQL-vulnerable endpoints
sql_endpoints = ['/api/search', '/api/sql', '/api/database'] sql_endpoints = ['/api/search', '/api/sql', '/api/database']
base_path = urlparse(path).path base_path = urlparse(path).path
if base_path not in sql_endpoints: if base_path not in sql_endpoints:
return False return False
try: try:
# Get query parameters # Get query parameters
query_string = self._parse_query_string() query_string = self._parse_query_string()
# Log SQL injection attempt # Log SQL injection attempt
client_ip = self._get_client_ip() client_ip = self._get_client_ip()
user_agent = self._get_user_agent() user_agent = self._get_user_agent()
# Always check for SQL injection patterns # Always check for SQL injection patterns
error_msg, content_type, status_code = generate_sql_error_response(query_string or "") error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
if error_msg: if error_msg:
# SQL injection detected - log and return error # SQL injection detected - log and return error
self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
@@ -117,9 +117,9 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
response_data = get_sql_response_with_data(base_path, query_string or "") response_data = get_sql_response_with_data(base_path, query_string or "")
self.wfile.write(response_data.encode()) self.wfile.write(response_data.encode())
return True return True
except BrokenPipeError: except BrokenPipeError:
# Client disconnected # Client disconnected
return True return True
@@ -142,7 +142,7 @@ class Handler(BaseHTTPRequestHandler):
# Build the content HTML # Build the content HTML
content = "" content = ""
# Add canary token if needed # Add canary token if needed
if Handler.counter <= 0 and self.config.canary_token_url: if Handler.counter <= 0 and self.config.canary_token_url:
content += f""" content += f"""
@@ -189,16 +189,16 @@ class Handler(BaseHTTPRequestHandler):
from urllib.parse import urlparse from urllib.parse import urlparse
base_path = urlparse(self.path).path base_path = urlparse(self.path).path
if base_path in ['/api/search', '/api/sql', '/api/database']: if base_path in ['/api/search', '/api/sql', '/api/database']:
content_length = int(self.headers.get('Content-Length', 0)) content_length = int(self.headers.get('Content-Length', 0))
if content_length > 0: if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}") self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
error_msg, content_type, status_code = generate_sql_error_response(post_data) error_msg, content_type, status_code = generate_sql_error_response(post_data)
try: try:
if error_msg: if error_msg:
self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}") self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
@@ -217,26 +217,26 @@ class Handler(BaseHTTPRequestHandler):
except Exception as e: except Exception as e:
self.app_logger.error(f"Error in SQL POST handler: {str(e)}") self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
return return
if base_path == '/api/contact': if base_path == '/api/contact':
content_length = int(self.headers.get('Content-Length', 0)) content_length = int(self.headers.get('Content-Length', 0))
if content_length > 0: if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
parsed_data = {} parsed_data = {}
for pair in post_data.split('&'): for pair in post_data.split('&'):
if '=' in pair: if '=' in pair:
key, value = pair.split('=', 1) key, value = pair.split('=', 1)
from urllib.parse import unquote_plus from urllib.parse import unquote_plus
parsed_data[unquote_plus(key)] = unquote_plus(value) parsed_data[unquote_plus(key)] = unquote_plus(value)
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values()) xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
if xss_detected: if xss_detected:
self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}") self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
else: else:
self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}") self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
try: try:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
@@ -264,17 +264,17 @@ class Handler(BaseHTTPRequestHandler):
timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}" credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}"
self.credential_logger.info(credential_line) self.credential_logger.info(credential_line)
# Also record in tracker for dashboard # Also record in tracker for dashboard
self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A') self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}") self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things. # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST') self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
time.sleep(1) time.sleep(1)
try: try:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
@@ -289,11 +289,11 @@ class Handler(BaseHTTPRequestHandler):
def serve_special_path(self, path: str) -> bool: def serve_special_path(self, path: str) -> bool:
"""Serve special paths like robots.txt, API endpoints, etc.""" """Serve special paths like robots.txt, API endpoints, etc."""
# Check SQL injection honeypot endpoints first # Check SQL injection honeypot endpoints first
if self._handle_sql_endpoint(path): if self._handle_sql_endpoint(path):
return True return True
try: try:
if path == '/robots.txt': if path == '/robots.txt':
self.send_response(200) self.send_response(200)
@@ -301,7 +301,7 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(html_templates.robots_txt().encode()) self.wfile.write(html_templates.robots_txt().encode())
return True return True
if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']: if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/plain') self.send_header('Content-type', 'text/plain')
@@ -311,7 +311,7 @@ class Handler(BaseHTTPRequestHandler):
else: else:
self.wfile.write(passwords_txt().encode()) self.wfile.write(passwords_txt().encode())
return True return True
if path in ['/users.json', '/api_keys.json', '/config.json']: if path in ['/users.json', '/api_keys.json', '/config.json']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header('Content-type', 'application/json')
@@ -323,28 +323,28 @@ class Handler(BaseHTTPRequestHandler):
else: else:
self.wfile.write(api_response('/api/config').encode()) self.wfile.write(api_response('/api/config').encode())
return True return True
if path in ['/admin', '/admin/', '/admin/login', '/login']: if path in ['/admin', '/admin/', '/admin/login', '/login']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
self.wfile.write(html_templates.login_form().encode()) self.wfile.write(html_templates.login_form().encode())
return True return True
if path in ['/users', '/user', '/database', '/db', '/search']: if path in ['/users', '/user', '/database', '/db', '/search']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
self.wfile.write(html_templates.product_search().encode()) self.wfile.write(html_templates.product_search().encode())
return True return True
if path in ['/info', '/input', '/contact', '/feedback', '/comment']: if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
self.wfile.write(html_templates.input_form().encode()) self.wfile.write(html_templates.input_form().encode())
return True return True
if path == '/server': if path == '/server':
error_html, content_type = generate_server_error() error_html, content_type = generate_server_error()
self.send_response(500) self.send_response(500)
@@ -352,35 +352,35 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(error_html.encode()) self.wfile.write(error_html.encode())
return True return True
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']: if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
self.wfile.write(html_templates.wp_login().encode()) self.wfile.write(html_templates.wp_login().encode())
return True return True
if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower(): if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower():
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
self.wfile.write(html_templates.wordpress().encode()) self.wfile.write(html_templates.wordpress().encode())
return True return True
if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']: if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
self.wfile.write(html_templates.phpmyadmin().encode()) self.wfile.write(html_templates.phpmyadmin().encode())
return True return True
if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']: if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header('Content-type', 'application/json')
self.end_headers() self.end_headers()
self.wfile.write(api_response(path).encode()) self.wfile.write(api_response(path).encode())
return True return True
if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']: if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
@@ -400,22 +400,21 @@ class Handler(BaseHTTPRequestHandler):
"""Responds to webpage requests""" """Responds to webpage requests"""
client_ip = self._get_client_ip() client_ip = self._get_client_ip()
user_agent = self._get_user_agent() user_agent = self._get_user_agent()
if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path: if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header('Content-type', 'text/html')
self.end_headers() self.end_headers()
try: try:
stats = self.tracker.get_stats() stats = self.tracker.get_stats()
timezone = str(self.config.timezone) if self.config.timezone else 'UTC'
dashboard_path = self.config.dashboard_secret_path dashboard_path = self.config.dashboard_secret_path
self.wfile.write(generate_dashboard(stats, timezone, dashboard_path).encode()) self.wfile.write(generate_dashboard(stats, dashboard_path).encode())
except BrokenPipeError: except BrokenPipeError:
pass pass
except Exception as e: except Exception as e:
self.app_logger.error(f"Error generating dashboard: {e}") self.app_logger.error(f"Error generating dashboard: {e}")
return return
# API endpoint for fetching IP stats # API endpoint for fetching IP stats
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"): if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "") ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
@@ -473,7 +472,7 @@ class Handler(BaseHTTPRequestHandler):
return return
self.tracker.record_access(client_ip, self.path, user_agent, method='GET') self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
# self.analyzer.infer_user_category(client_ip) # self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip) # self.analyzer.update_ip_rep_infos(client_ip)
@@ -497,9 +496,9 @@ class Handler(BaseHTTPRequestHandler):
try: try:
self.wfile.write(self.generate_page(self.path).encode()) self.wfile.write(self.generate_page(self.path).encode())
Handler.counter -= 1 Handler.counter -= 1
if Handler.counter < 0: if Handler.counter < 0:
Handler.counter = self.config.canary_token_tries Handler.counter = self.config.canary_token_tries
except BrokenPipeError: except BrokenPipeError:

View File

@@ -8,20 +8,17 @@ Provides two loggers: app (application) and access (HTTP access logs).
import logging import logging
import os import os
from logging.handlers import RotatingFileHandler from logging.handlers import RotatingFileHandler
from typing import Optional
from zoneinfo import ZoneInfo
from datetime import datetime from datetime import datetime
class TimezoneFormatter(logging.Formatter): class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone""" """Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None, timezone: Optional[ZoneInfo] = None): def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt) super().__init__(fmt, datefmt)
self.timezone = timezone or ZoneInfo('UTC')
def formatTime(self, record, datefmt=None): def formatTime(self, record, datefmt=None):
"""Override formatTime to use configured timezone""" """Override formatTime to use configured timezone"""
dt = datetime.fromtimestamp(record.created, tz=self.timezone) dt = datetime.fromtimestamp(record.created)
if datefmt: if datefmt:
return dt.strftime(datefmt) return dt.strftime(datefmt)
return dt.isoformat() return dt.isoformat()
@@ -37,19 +34,16 @@ class LoggerManager:
cls._instance._initialized = False cls._instance._initialized = False
return cls._instance return cls._instance
def initialize(self, log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None: def initialize(self, log_dir: str = "logs") -> None:
""" """
Initialize the logging system with rotating file handlers. Initialize the logging system with rotating file handlers.loggers
Args: Args:
log_dir: Directory for log files (created if not exists) log_dir: Directory for log files (created if not exists)
timezone: ZoneInfo timezone for log timestamps (defaults to UTC)
""" """
if self._initialized: if self._initialized:
return return
self.timezone = timezone or ZoneInfo('UTC')
# Create log directory if it doesn't exist # Create log directory if it doesn't exist
os.makedirs(log_dir, exist_ok=True) os.makedirs(log_dir, exist_ok=True)
@@ -57,7 +51,6 @@ class LoggerManager:
log_format = TimezoneFormatter( log_format = TimezoneFormatter(
"[%(asctime)s] %(levelname)s - %(message)s", "[%(asctime)s] %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S", datefmt="%Y-%m-%d %H:%M:%S",
timezone=self.timezone
) )
# Rotation settings: 1MB max, 5 backups # Rotation settings: 1MB max, 5 backups
@@ -104,8 +97,8 @@ class LoggerManager:
self._credential_logger.handlers.clear() self._credential_logger.handlers.clear()
# Credential logger uses a simple format: timestamp|ip|username|password|path # Credential logger uses a simple format: timestamp|ip|username|password|path
credential_format = TimezoneFormatter("%(message)s", timezone=self.timezone) credential_format = TimezoneFormatter("%(message)s")
credential_file_handler = RotatingFileHandler( credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"), os.path.join(log_dir, "credentials.log"),
maxBytes=max_bytes, maxBytes=max_bytes,
@@ -157,6 +150,6 @@ def get_credential_logger() -> logging.Logger:
return _logger_manager.credentials return _logger_manager.credentials
def initialize_logging(log_dir: str = "logs", timezone: Optional[ZoneInfo] = None) -> None: def initialize_logging(log_dir: str = "logs") -> None:
"""Initialize the logging system.""" """Initialize the logging system."""
_logger_manager.initialize(log_dir, timezone) _logger_manager.initialize(log_dir)

View File

@@ -29,7 +29,6 @@ def print_usage():
print(' server:') print(' server:')
print(' port: 5000') print(' port: 5000')
print(' delay: 100') print(' delay: 100')
print(' timezone: null # or "America/New_York"')
print(' links:') print(' links:')
print(' min_length: 5') print(' min_length: 5')
print(' max_length: 15') print(' max_length: 15')
@@ -55,11 +54,8 @@ def main():
config = get_config() config = get_config()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone # Initialize logging with timezone
initialize_logging(timezone=tz) initialize_logging()
app_logger = get_app_logger() app_logger = get_app_logger()
access_logger = get_access_logger() access_logger = get_access_logger()
credential_logger = get_credential_logger() credential_logger = get_credential_logger()
@@ -71,8 +67,8 @@ def main():
except Exception as e: except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.') app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
tracker = AccessTracker(timezone=tz) tracker = AccessTracker()
analyzer = Analyzer(timezone=tz) analyzer = Analyzer()
Handler.config = config Handler.config = config
Handler.tracker = tracker Handler.tracker = tracker
@@ -99,7 +95,6 @@ def main():
try: try:
app_logger.info(f'Starting deception server on port {config.port}...') app_logger.info(f'Starting deception server on port {config.port}...')
app_logger.info(f'Timezone configured: {tz.key}')
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}') app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
if config.canary_token_url: if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries') app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')

View File

@@ -28,7 +28,7 @@ def main():
config = get_config() config = get_config()
db_manager = get_database() db_manager = get_database()
app_logger = get_app_logger() app_logger = get_app_logger()
http_risky_methods_threshold = config.http_risky_methods_threshold http_risky_methods_threshold = config.http_risky_methods_threshold
violated_robots_threshold = config.violated_robots_threshold violated_robots_threshold = config.violated_robots_threshold
uneven_request_timing_threshold = config.uneven_request_timing_threshold uneven_request_timing_threshold = config.uneven_request_timing_threshold
@@ -41,7 +41,7 @@ def main():
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = { weights = {
"attacker": { "attacker": {
@@ -87,13 +87,13 @@ def main():
total_accesses_count = len(ip_accesses) total_accesses_count = len(ip_accesses)
if total_accesses_count <= 0: if total_accesses_count <= 0:
return return
# Set category as "unknown" for the first 3 requests # Set category as "unknown" for the first 3 requests
if total_accesses_count < 3: if total_accesses_count < 3:
category = "unknown" category = "unknown"
analyzed_metrics = {} analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0} category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
last_analysis = datetime.now(tz=ZoneInfo('UTC')) last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return 0 return 0
#--------------------- HTTP Methods --------------------- #--------------------- HTTP Methods ---------------------
@@ -103,7 +103,7 @@ def main():
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"]) delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"]) head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"]) options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"]) patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
if total_accesses_count > http_risky_methods_threshold: if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
else: else:
@@ -129,7 +129,7 @@ def main():
if not line: if not line:
continue continue
parts = line.split(":") parts = line.split(":")
if parts[0] == "Disallow": if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/") parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}") #print(f"DISALLOW {parts[1]}")
@@ -151,18 +151,18 @@ def main():
score["good_crawler"]["robots_violations"] = False score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = False score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing --------------------- #--------------------- Requests Timing ---------------------
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior # Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses] timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now(tz=ZoneInfo('UTC')) now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = sorted(timestamps, reverse=True) timestamps = sorted(timestamps, reverse=True)
time_diffs = [] time_diffs = []
for i in range(0, len(timestamps)-1): for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds() diff = (timestamps[i] - timestamps[i+1]).total_seconds()
time_diffs.append(diff) time_diffs.append(diff)
mean = 0 mean = 0
variance = 0 variance = 0
std = 0 std = 0
@@ -212,14 +212,14 @@ def main():
except Exception: except Exception:
decoded_path = queried_path decoded_path = queried_path
decoded_path_twice = queried_path decoded_path_twice = queried_path
for name, pattern in wl.attack_patterns.items(): for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths # Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)): re.search(pattern, decoded_path_twice, re.IGNORECASE)):
attack_urls_found_list.append(f"{name}: {pattern}") attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates #remove duplicates
attack_urls_found_list = set(attack_urls_found_list) attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list) attack_urls_found_list = list(attack_urls_found_list)
@@ -266,6 +266,6 @@ def main():
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
category = max(category_scores, key=category_scores.get) category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now(tz=ZoneInfo('UTC')) last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
return return

View File

@@ -28,7 +28,7 @@ OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ---------------------- # ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool: def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes.""" """Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now(tz=ZoneInfo('UTC')) - timedelta(minutes=minutes) cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter( count = session.query(AccessLog).filter(
AccessLog.is_honeypot_trigger == True, AccessLog.is_honeypot_trigger == True,
AccessLog.timestamp >= cutoff_time AccessLog.timestamp >= cutoff_time

View File

@@ -15,21 +15,16 @@ def _escape(value) -> str:
return "" return ""
return html.escape(str(value)) return html.escape(str(value))
def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool = False) -> str: def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion """Format ISO timestamp for display with timezone conversion
Args: Args:
iso_timestamp: ISO format timestamp string (UTC) iso_timestamp: ISO format timestamp string (UTC)
timezone: IANA timezone string to convert to
time_only: If True, return only HH:MM:SS, otherwise full datetime time_only: If True, return only HH:MM:SS, otherwise full datetime
""" """
try: try:
# Parse UTC timestamp # Parse UTC timestamp
dt = datetime.fromisoformat(iso_timestamp) dt = datetime.fromisoformat(iso_timestamp)
# Convert to target timezone
if dt.tzinfo is not None:
dt = dt.astimezone(ZoneInfo(timezone))
if time_only: if time_only:
return dt.strftime("%H:%M:%S") return dt.strftime("%H:%M:%S")
return dt.strftime("%Y-%m-%d %H:%M:%S") return dt.strftime("%Y-%m-%d %H:%M:%S")
@@ -38,15 +33,14 @@ def format_timestamp(iso_timestamp: str, timezone: str = 'UTC', time_only: bool
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str = '') -> str: def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
"""Generate dashboard HTML with access statistics """Generate dashboard HTML with access statistics
Args: Args:
stats: Statistics dictionary stats: Statistics dictionary
timezone: IANA timezone string (e.g., 'Europe/Paris', 'America/New_York')
dashboard_path: The secret dashboard path for generating API URLs dashboard_path: The secret dashboard path for generating API URLs
""" """
# Generate IP rows with clickable functionality for dropdown stats # Generate IP rows with clickable functionality for dropdown stats
top_ips_rows = '\n'.join([ top_ips_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(ip)}"> f'''<tr class="ip-row" data-ip="{_escape(ip)}">
@@ -82,7 +76,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td class="ip-clickable">{_escape(log["ip"])}</td> <td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td> <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td> <td>{format_timestamp(log["timestamp"], time_only=True)}</td>
</tr> </tr>
<tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;"> <tr class="ip-stats-row" id="stats-row-suspicious-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="4" class="ip-stats-cell"> <td colspan="4" class="ip-stats-cell">
@@ -118,7 +112,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td>{_escape(", ".join(log["attack_types"]))}</td> <td>{_escape(", ".join(log["attack_types"]))}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td> <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td> <td>{format_timestamp(log["timestamp"],time_only=True)}</td>
</tr> </tr>
<tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;"> <tr class="ip-stats-row" id="stats-row-attack-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="5" class="ip-stats-cell"> <td colspan="5" class="ip-stats-cell">
@@ -137,7 +131,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
<td>{_escape(log["username"])}</td> <td>{_escape(log["username"])}</td>
<td>{_escape(log["password"])}</td> <td>{_escape(log["password"])}</td>
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td>{format_timestamp(log["timestamp"], timezone, time_only=True)}</td> <td>{format_timestamp(log["timestamp"], time_only=True)}</td>
</tr> </tr>
<tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;"> <tr class="ip-stats-row" id="stats-row-cred-{_escape(log["ip"]).replace(".", "-")}" style="display: none;">
<td colspan="5" class="ip-stats-cell"> <td colspan="5" class="ip-stats-cell">
@@ -537,7 +531,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
</a> </a>
</div> </div>
<h1>Krawl Dashboard</h1> <h1>Krawl Dashboard</h1>
<div class="stats-grid"> <div class="stats-grid">
<div class="stat-card"> <div class="stat-card">
<div class="stat-value">{stats['total_accesses']}</div> <div class="stat-value">{stats['total_accesses']}</div>
@@ -683,15 +677,13 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
</div> </div>
</div> </div>
<script> <script>
const SERVER_TIMEZONE = '{timezone}';
const DASHBOARD_PATH = '{dashboard_path}'; const DASHBOARD_PATH = '{dashboard_path}';
function formatTimestamp(isoTimestamp) {{ function formatTimestamp(isoTimestamp) {{
if (!isoTimestamp) return 'N/A'; if (!isoTimestamp) return 'N/A';
try {{ try {{
const date = new Date(isoTimestamp); const date = new Date(isoTimestamp);
return date.toLocaleString('en-US', {{ return date.toLocaleString('en-US', {{
timeZone: SERVER_TIMEZONE,
year: 'numeric', year: 'numeric',
month: '2-digit', month: '2-digit',
day: '2-digit', day: '2-digit',
@@ -705,7 +697,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
return new Date(isoTimestamp).toLocaleString(); return new Date(isoTimestamp).toLocaleString();
}} }}
}} }}
document.querySelectorAll('th.sortable').forEach(header => {{ document.querySelectorAll('th.sortable').forEach(header => {{
header.addEventListener('click', function() {{ header.addEventListener('click', function() {{
const table = this.closest('table'); const table = this.closest('table');
@@ -713,25 +705,25 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const rows = Array.from(tbody.querySelectorAll('tr')); const rows = Array.from(tbody.querySelectorAll('tr'));
const sortType = this.getAttribute('data-sort'); const sortType = this.getAttribute('data-sort');
const columnIndex = Array.from(this.parentElement.children).indexOf(this); const columnIndex = Array.from(this.parentElement.children).indexOf(this);
const isAscending = this.classList.contains('asc'); const isAscending = this.classList.contains('asc');
table.querySelectorAll('th.sortable').forEach(th => {{ table.querySelectorAll('th.sortable').forEach(th => {{
th.classList.remove('asc', 'desc'); th.classList.remove('asc', 'desc');
}}); }});
this.classList.add(isAscending ? 'desc' : 'asc'); this.classList.add(isAscending ? 'desc' : 'asc');
rows.sort((a, b) => {{ rows.sort((a, b) => {{
let aValue = a.cells[columnIndex].textContent.trim(); let aValue = a.cells[columnIndex].textContent.trim();
let bValue = b.cells[columnIndex].textContent.trim(); let bValue = b.cells[columnIndex].textContent.trim();
if (sortType === 'count') {{ if (sortType === 'count') {{
aValue = parseInt(aValue) || 0; aValue = parseInt(aValue) || 0;
bValue = parseInt(bValue) || 0; bValue = parseInt(bValue) || 0;
return isAscending ? bValue - aValue : aValue - bValue; return isAscending ? bValue - aValue : aValue - bValue;
}} }}
if (sortType === 'ip') {{ if (sortType === 'ip') {{
const ipToNum = ip => {{ const ipToNum = ip => {{
const parts = ip.split('.'); const parts = ip.split('.');
@@ -742,14 +734,14 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const bNum = ipToNum(bValue); const bNum = ipToNum(bValue);
return isAscending ? bNum - aNum : aNum - bNum; return isAscending ? bNum - aNum : aNum - bNum;
}} }}
if (isAscending) {{ if (isAscending) {{
return bValue.localeCompare(aValue); return bValue.localeCompare(aValue);
}} else {{ }} else {{
return aValue.localeCompare(bValue); return aValue.localeCompare(bValue);
}} }}
}}); }});
rows.forEach(row => tbody.appendChild(row)); rows.forEach(row => tbody.appendChild(row));
}}); }});
}}); }});
@@ -797,39 +789,39 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
}} }}
}}); }});
}}); }});
function formatIpStats(stats) {{ function formatIpStats(stats) {{
let html = '<div class="stats-left">'; let html = '<div class="stats-left">';
html += '<div class="stat-row">'; html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Total Requests:</span>'; html += '<span class="stat-label-sm">Total Requests:</span>';
html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`; html += `<span class="stat-value-sm">${{stats.total_requests || 0}}</span>`;
html += '</div>'; html += '</div>';
html += '<div class="stat-row">'; html += '<div class="stat-row">';
html += '<span class="stat-label-sm">First Seen:</span>'; html += '<span class="stat-label-sm">First Seen:</span>';
html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`; html += `<span class="stat-value-sm">${{formatTimestamp(stats.first_seen)}}</span>`;
html += '</div>'; html += '</div>';
html += '<div class="stat-row">'; html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Last Seen:</span>'; html += '<span class="stat-label-sm">Last Seen:</span>';
html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`; html += `<span class="stat-value-sm">${{formatTimestamp(stats.last_seen)}}</span>`;
html += '</div>'; html += '</div>';
if (stats.country_code || stats.city) {{ if (stats.country_code || stats.city) {{
html += '<div class="stat-row">'; html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Location:</span>'; html += '<span class="stat-label-sm">Location:</span>';
html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`; html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
html += '</div>'; html += '</div>';
}} }}
if (stats.asn_org) {{ if (stats.asn_org) {{
html += '<div class="stat-row">'; html += '<div class="stat-row">';
html += '<span class="stat-label-sm">ASN Org:</span>'; html += '<span class="stat-label-sm">ASN Org:</span>';
html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`; html += `<span class="stat-value-sm">${{stats.asn_org}}</span>`;
html += '</div>'; html += '</div>';
}} }}
if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{ if (stats.reputation_score !== null && stats.reputation_score !== undefined) {{
html += '<div class="stat-row">'; html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Reputation Score:</span>'; html += '<span class="stat-label-sm">Reputation Score:</span>';
@@ -847,7 +839,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
if (stats.category_history && stats.category_history.length > 0) {{ if (stats.category_history && stats.category_history.length > 0) {{
html += '<div class="timeline-container">'; html += '<div class="timeline-container">';
html += '<div class="timeline-header">'; html += '<div class="timeline-header">';
html += '<div class="timeline-title">Behavior Timeline</div>'; html += '<div class="timeline-title">Behavior Timeline</div>';
@@ -912,14 +904,14 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += '</div>'; html += '</div>';
html += '</div>'; html += '</div>';
}} }}
html += '</div>'; html += '</div>';
if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{ if (stats.category_scores && Object.keys(stats.category_scores).length > 0) {{
html += '<div class="stats-right">'; html += '<div class="stats-right">';
html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>'; html += '<div style="font-size: 13px; font-weight: 600; color: #58a6ff; margin-bottom: 10px;">Category Score</div>';
html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">'; html += '<svg class="radar-chart" viewBox="-30 -30 260 260" preserveAspectRatio="xMidYMid meet">';
const scores = {{ const scores = {{
attacker: stats.category_scores.attacker || 0, attacker: stats.category_scores.attacker || 0,
good_crawler: stats.category_scores.good_crawler || 0, good_crawler: stats.category_scores.good_crawler || 0,
@@ -927,15 +919,15 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
regular_user: stats.category_scores.regular_user || 0, regular_user: stats.category_scores.regular_user || 0,
unknown: stats.category_scores.unknown || 0 unknown: stats.category_scores.unknown || 0
}}; }};
const maxScore = Math.max(...Object.values(scores), 1); const maxScore = Math.max(...Object.values(scores), 1);
const minVisibleRadius = 0.15; const minVisibleRadius = 0.15;
const normalizedScores = {{}}; const normalizedScores = {{}};
Object.keys(scores).forEach(key => {{ Object.keys(scores).forEach(key => {{
normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius); normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius);
}}); }});
const colors = {{ const colors = {{
attacker: '#f85149', attacker: '#f85149',
good_crawler: '#3fb950', good_crawler: '#3fb950',
@@ -943,7 +935,7 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
regular_user: '#58a6ff', regular_user: '#58a6ff',
unknown: '#8b949e' unknown: '#8b949e'
}}; }};
const labels = {{ const labels = {{
attacker: 'Attacker', attacker: 'Attacker',
good_crawler: 'Good Bot', good_crawler: 'Good Bot',
@@ -951,28 +943,28 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
regular_user: 'User', regular_user: 'User',
unknown: 'Unknown' unknown: 'Unknown'
}}; }};
const cx = 100, cy = 100, maxRadius = 75; const cx = 100, cy = 100, maxRadius = 75;
for (let i = 1; i <= 5; i++) {{ for (let i = 1; i <= 5; i++) {{
const r = (maxRadius / 5) * i; const r = (maxRadius / 5) * i;
html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`; html += `<circle cx="${{cx}}" cy="${{cy}}" r="${{r}}" fill="none" stroke="#30363d" stroke-width="0.5"/>`;
}} }}
const angles = [0, 72, 144, 216, 288]; const angles = [0, 72, 144, 216, 288];
const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker']; const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker'];
angles.forEach((angle, i) => {{ angles.forEach((angle, i) => {{
const rad = (angle - 90) * Math.PI / 180; const rad = (angle - 90) * Math.PI / 180;
const x2 = cx + maxRadius * Math.cos(rad); const x2 = cx + maxRadius * Math.cos(rad);
const y2 = cy + maxRadius * Math.sin(rad); const y2 = cy + maxRadius * Math.sin(rad);
html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`; html += `<line x1="${{cx}}" y1="${{cy}}" x2="${{x2}}" y2="${{y2}}" stroke="#30363d" stroke-width="0.5"/>`;
const labelDist = maxRadius + 35; const labelDist = maxRadius + 35;
const lx = cx + labelDist * Math.cos(rad); const lx = cx + labelDist * Math.cos(rad);
const ly = cy + labelDist * Math.sin(rad); const ly = cy + labelDist * Math.sin(rad);
html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`; html += `<text x="${{lx}}" y="${{ly}}" fill="#8b949e" font-size="12" text-anchor="middle" dominant-baseline="middle">${{labels[keys[i]]}}</text>`;
}}); }});
let points = []; let points = [];
angles.forEach((angle, i) => {{ angles.forEach((angle, i) => {{
const normalizedScore = normalizedScores[keys[i]]; const normalizedScore = normalizedScores[keys[i]];
@@ -982,12 +974,12 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const y = cy + r * Math.sin(rad); const y = cy + r * Math.sin(rad);
points.push(`${{x}},${{y}}`); points.push(`${{x}},${{y}}`);
}}); }});
const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b);
const dominantColor = colors[dominantKey]; const dominantColor = colors[dominantKey];
html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`; html += `<polygon points="${{points.join(' ')}}" fill="${{dominantColor}}" fill-opacity="0.4" stroke="${{dominantColor}}" stroke-width="2.5"/>`;
angles.forEach((angle, i) => {{ angles.forEach((angle, i) => {{
const normalizedScore = normalizedScores[keys[i]]; const normalizedScore = normalizedScores[keys[i]];
const rad = (angle - 90) * Math.PI / 180; const rad = (angle - 90) * Math.PI / 180;
@@ -996,9 +988,9 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
const y = cy + r * Math.sin(rad); const y = cy + r * Math.sin(rad);
html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`; html += `<circle cx="${{x}}" cy="${{y}}" r="4.5" fill="${{colors[keys[i]]}}" stroke="#0d1117" stroke-width="2"/>`;
}}); }});
html += '</svg>'; html += '</svg>';
html += '<div class="radar-legend">'; html += '<div class="radar-legend">';
keys.forEach(key => {{ keys.forEach(key => {{
html += '<div class="radar-legend-item">'; html += '<div class="radar-legend-item">';
@@ -1007,10 +999,10 @@ def generate_dashboard(stats: dict, timezone: str = 'UTC', dashboard_path: str =
html += '</div>'; html += '</div>';
}}); }});
html += '</div>'; html += '</div>';
html += '</div>'; html += '</div>';
}} }}
return html; return html;
}} }}
</script> </script>

View File

@@ -17,7 +17,7 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis. persists data to SQLite for long-term storage and analysis.
""" """
def __init__(self, db_manager: Optional[DatabaseManager] = None, timezone: Optional[ZoneInfo] = None): def __init__(self, db_manager: Optional[DatabaseManager] = None):
""" """
Initialize the access tracker. Initialize the access tracker.
@@ -30,7 +30,6 @@ class AccessTracker:
self.user_agent_counts: Dict[str, int] = defaultdict(int) self.user_agent_counts: Dict[str, int] = defaultdict(int)
self.access_log: List[Dict] = [] self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = [] self.credential_attempts: List[Dict] = []
self.timezone = timezone or ZoneInfo('UTC')
self.suspicious_patterns = [ self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests', 'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix', 'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
@@ -40,7 +39,7 @@ class AccessTracker:
# Load attack patterns from wordlists # Load attack patterns from wordlists
wl = get_wordlists() wl = get_wordlists()
self.attack_types = wl.attack_patterns self.attack_types = wl.attack_patterns
# Fallback if wordlists not loaded # Fallback if wordlists not loaded
if not self.attack_types: if not self.attack_types:
self.attack_types = { self.attack_types = {
@@ -80,38 +79,38 @@ class AccessTracker:
""" """
if not post_data: if not post_data:
return None, None return None, None
username = None username = None
password = None password = None
try: try:
# Parse URL-encoded form data # Parse URL-encoded form data
parsed = urllib.parse.parse_qs(post_data) parsed = urllib.parse.parse_qs(post_data)
# Common username field names # Common username field names
username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account'] username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
for field in username_fields: for field in username_fields:
if field in parsed and parsed[field]: if field in parsed and parsed[field]:
username = parsed[field][0] username = parsed[field][0]
break break
# Common password field names # Common password field names
password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase'] password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
for field in password_fields: for field in password_fields:
if field in parsed and parsed[field]: if field in parsed and parsed[field]:
password = parsed[field][0] password = parsed[field][0]
break break
except Exception: except Exception:
# If parsing fails, try simple regex patterns # If parsing fails, try simple regex patterns
username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE) username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE) password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
if username_match: if username_match:
username = urllib.parse.unquote_plus(username_match.group(1)) username = urllib.parse.unquote_plus(username_match.group(1))
if password_match: if password_match:
password = urllib.parse.unquote_plus(password_match.group(1)) password = urllib.parse.unquote_plus(password_match.group(1))
return username, password return username, password
def record_credential_attempt(self, ip: str, path: str, username: str, password: str): def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
@@ -126,7 +125,7 @@ class AccessTracker:
'path': path, 'path': path,
'username': username, 'username': username,
'password': password, 'password': password,
'timestamp': datetime.now(self.timezone).isoformat() 'timestamp': datetime.now().isoformat()
}) })
# Persist to database # Persist to database
@@ -193,7 +192,7 @@ class AccessTracker:
'suspicious': is_suspicious, 'suspicious': is_suspicious,
'honeypot_triggered': self.is_honeypot_path(path), 'honeypot_triggered': self.is_honeypot_path(path),
'attack_types':attack_findings, 'attack_types':attack_findings,
'timestamp': datetime.now(self.timezone).isoformat() 'timestamp': datetime.now().isoformat()
}) })
# Persist to database # Persist to database