Linted code iwht black tool

This commit is contained in:
Lorenzo Venerandi
2026-01-23 22:00:21 +01:00
parent 25384585d9
commit 4450d3a4e3
22 changed files with 1387 additions and 868 deletions

View File

@@ -19,10 +19,12 @@ Functions for user activity analysis
app_logger = get_app_logger()
class Analyzer:
"""
Analyzes users activity and produces aggregated insights
"""
def __init__(self, db_manager: Optional[DatabaseManager] = None):
"""
Initialize the access tracker.
@@ -102,7 +104,6 @@ class Analyzer:
# }
# }
# accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
# total_accesses_count = len(accesses)
# if total_accesses_count <= 0:
@@ -119,7 +120,6 @@ class Analyzer:
# #--------------------- HTTP Methods ---------------------
# get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
# post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
# put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
@@ -214,7 +214,6 @@ class Analyzer:
# score["bad_crawler"]["uneven_request_timing"] = False
# score["regular_user"]["uneven_request_timing"] = False
# #--------------------- Different User Agents ---------------------
# #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# user_agents_used = [item["user_agent"] for item in accesses]
@@ -317,8 +316,6 @@ class Analyzer:
# return 0
# def update_ip_rep_infos(self, ip: str) -> list[str]:
# api_url = "https://iprep.lcrawl.com/api/iprep/"
# params = {

View File

@@ -14,12 +14,13 @@ import yaml
@dataclass
class Config:
"""Configuration class for the deception server"""
port: int = 5000
delay: int = 100 # milliseconds
server_header: str = ""
links_length_range: Tuple[int, int] = (5, 15)
links_per_page_range: Tuple[int, int] = (10, 15)
char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
char_space: str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: int = 10
canary_token_url: Optional[str] = None
canary_token_tries: int = 10
@@ -30,7 +31,9 @@ class Config:
probability_error_codes: int = 0 # Percentage (0-100)
# Crawl limiting settings - for legitimate vs malicious crawlers
max_pages_limit: int = 100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
max_pages_limit: int = (
100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
)
infinite_pages_for_malicious: bool = True # Infinite pages for malicious crawlers
ban_duration_seconds: int = 600 # Ban duration in seconds for IPs exceeding limits
@@ -47,84 +50,105 @@ class Config:
attack_urls_threshold: float = None
@classmethod
def from_yaml(cls) -> 'Config':
def from_yaml(cls) -> "Config":
"""Create configuration from YAML file"""
config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
config_location = os.getenv("CONFIG_LOCATION", "config.yaml")
config_path = Path(__file__).parent.parent / config_location
try:
with open(config_path, 'r') as f:
with open(config_path, "r") as f:
data = yaml.safe_load(f)
except FileNotFoundError:
print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
print(
f"Error: Configuration file '{config_path}' not found.", file=sys.stderr
)
print(
f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.",
file=sys.stderr,
)
sys.exit(1)
except yaml.YAMLError as e:
print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
print(
f"Error: Invalid YAML in configuration file '{config_path}': {e}",
file=sys.stderr,
)
sys.exit(1)
if data is None:
data = {}
# Extract nested values with defaults
server = data.get('server', {})
links = data.get('links', {})
canary = data.get('canary', {})
dashboard = data.get('dashboard', {})
api = data.get('api', {})
database = data.get('database', {})
behavior = data.get('behavior', {})
analyzer = data.get('analyzer') or {}
crawl = data.get('crawl', {})
server = data.get("server", {})
links = data.get("links", {})
canary = data.get("canary", {})
dashboard = data.get("dashboard", {})
api = data.get("api", {})
database = data.get("database", {})
behavior = data.get("behavior", {})
analyzer = data.get("analyzer") or {}
crawl = data.get("crawl", {})
# Handle dashboard_secret_path - auto-generate if null/not set
dashboard_path = dashboard.get('secret_path')
dashboard_path = dashboard.get("secret_path")
if dashboard_path is None:
dashboard_path = f'/{os.urandom(16).hex()}'
dashboard_path = f"/{os.urandom(16).hex()}"
else:
# ensure the dashboard path starts with a /
if dashboard_path[:1] != "/":
dashboard_path = f"/{dashboard_path}"
return cls(
port=server.get('port', 5000),
delay=server.get('delay', 100),
server_header=server.get('server_header',""),
port=server.get("port", 5000),
delay=server.get("delay", 100),
server_header=server.get("server_header", ""),
links_length_range=(
links.get('min_length', 5),
links.get('max_length', 15)
links.get("min_length", 5),
links.get("max_length", 15),
),
links_per_page_range=(
links.get('min_per_page', 10),
links.get('max_per_page', 15)
links.get("min_per_page", 10),
links.get("max_per_page", 15),
),
char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
max_counter=links.get('max_counter', 10),
canary_token_url=canary.get('token_url'),
canary_token_tries=canary.get('token_tries', 10),
char_space=links.get(
"char_space",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
),
max_counter=links.get("max_counter", 10),
canary_token_url=canary.get("token_url"),
canary_token_tries=canary.get("token_tries", 10),
dashboard_secret_path=dashboard_path,
api_server_url=api.get('server_url'),
api_server_port=api.get('server_port', 8080),
api_server_path=api.get('server_path', '/api/v2/users'),
probability_error_codes=behavior.get('probability_error_codes', 0),
database_path=database.get('path', 'data/krawl.db'),
database_retention_days=database.get('retention_days', 30),
http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1),
violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1),
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300),
user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2),
attack_urls_threshold=analyzer.get('attack_urls_threshold', 1),
infinite_pages_for_malicious=crawl.get('infinite_pages_for_malicious', True),
max_pages_limit=crawl.get('max_pages_limit', 200),
ban_duration_seconds=crawl.get('ban_duration_seconds', 60)
api_server_url=api.get("server_url"),
api_server_port=api.get("server_port", 8080),
api_server_path=api.get("server_path", "/api/v2/users"),
probability_error_codes=behavior.get("probability_error_codes", 0),
database_path=database.get("path", "data/krawl.db"),
database_retention_days=database.get("retention_days", 30),
http_risky_methods_threshold=analyzer.get(
"http_risky_methods_threshold", 0.1
),
violated_robots_threshold=analyzer.get("violated_robots_threshold", 0.1),
uneven_request_timing_threshold=analyzer.get(
"uneven_request_timing_threshold", 0.5
), # coefficient of variation
uneven_request_timing_time_window_seconds=analyzer.get(
"uneven_request_timing_time_window_seconds", 300
),
user_agents_used_threshold=analyzer.get("user_agents_used_threshold", 2),
attack_urls_threshold=analyzer.get("attack_urls_threshold", 1),
infinite_pages_for_malicious=crawl.get(
"infinite_pages_for_malicious", True
),
max_pages_limit=crawl.get("max_pages_limit", 200),
ban_duration_seconds=crawl.get("ban_duration_seconds", 60),
)
def __get_env_from_config(config: str) -> str:
env = config.upper().replace('.', '_').replace('-', '__').replace(' ', '_')
env = config.upper().replace(".", "_").replace("-", "__").replace(" ", "_")
return f"KRAWL_{env}"
return f'KRAWL_{env}'
def override_config_from_env(config: Config = None):
"""Initialize configuration from environment variables"""
@@ -140,14 +164,16 @@ def override_config_from_env(config: Config = None):
elif field_type == float:
setattr(config, field, float(env_value))
elif field_type == Tuple[int, int]:
parts = env_value.split(',')
parts = env_value.split(",")
if len(parts) == 2:
setattr(config, field, (int(parts[0]), int(parts[1])))
else:
setattr(config, field, env_value)
_config_instance = None
def get_config() -> Config:
"""Get the singleton Config instance"""
global _config_instance

View File

@@ -24,7 +24,15 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
cursor.execute("PRAGMA busy_timeout=30000")
cursor.close()
from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
from models import (
Base,
AccessLog,
CredentialAttempt,
AttackDetection,
IpStats,
CategoryHistory,
)
from sanitizer import (
sanitize_ip,
sanitize_path,
@@ -37,6 +45,7 @@ from logger import get_app_logger
applogger = get_app_logger()
class DatabaseManager:
"""
Singleton database manager for the Krawl honeypot.
@@ -44,6 +53,7 @@ class DatabaseManager:
Handles database initialization, session management, and provides
methods for persisting access logs, credentials, and attack detections.
"""
_instance: Optional["DatabaseManager"] = None
def __new__(cls) -> "DatabaseManager":
@@ -72,7 +82,7 @@ class DatabaseManager:
self._engine = create_engine(
database_url,
connect_args={"check_same_thread": False},
echo=False # Set to True for SQL debugging
echo=False, # Set to True for SQL debugging
)
# Create session factory with scoped_session for thread safety
@@ -96,7 +106,9 @@ class DatabaseManager:
def session(self) -> Session:
"""Get a thread-local database session."""
if not self._initialized:
raise RuntimeError("DatabaseManager not initialized. Call initialize() first.")
raise RuntimeError(
"DatabaseManager not initialized. Call initialize() first."
)
return self._Session()
def close_session(self) -> None:
@@ -113,7 +125,7 @@ class DatabaseManager:
is_suspicious: bool = False,
is_honeypot_trigger: bool = False,
attack_types: Optional[List[str]] = None,
matched_patterns: Optional[Dict[str, str]] = None
matched_patterns: Optional[Dict[str, str]] = None,
) -> Optional[int]:
"""
Persist an access log entry to the database.
@@ -141,7 +153,7 @@ class DatabaseManager:
method=method[:10],
is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger,
timestamp=datetime.now()
timestamp=datetime.now(),
)
session.add(access_log)
session.flush() # Get the ID before committing
@@ -155,7 +167,7 @@ class DatabaseManager:
attack_type=attack_type[:50],
matched_pattern=sanitize_attack_pattern(
matched_patterns.get(attack_type, "")
)
),
)
session.add(detection)
@@ -178,7 +190,7 @@ class DatabaseManager:
ip: str,
path: str,
username: Optional[str] = None,
password: Optional[str] = None
password: Optional[str] = None,
) -> Optional[int]:
"""
Persist a credential attempt to the database.
@@ -199,7 +211,7 @@ class DatabaseManager:
path=sanitize_path(path),
username=sanitize_credential(username),
password=sanitize_credential(password),
timestamp=datetime.now()
timestamp=datetime.now(),
)
session.add(credential)
session.commit()
@@ -230,14 +242,18 @@ class DatabaseManager:
ip_stats.last_seen = now
else:
ip_stats = IpStats(
ip=sanitized_ip,
total_requests=1,
first_seen=now,
last_seen=now
ip=sanitized_ip, total_requests=1, first_seen=now, last_seen=now
)
session.add(ip_stats)
def update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None:
def update_ip_stats_analysis(
self,
ip: str,
analyzed_metrics: Dict[str, object],
category: str,
category_scores: Dict[str, int],
last_analysis: datetime,
) -> None:
"""
Update IP statistics (ip is already persisted).
Records category change in history if category has changed.
@@ -250,7 +266,9 @@ class DatabaseManager:
last_analysis: timestamp of last analysis
"""
applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
applogger.debug(
f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}"
)
applogger.info(f"IP: {ip} category has been updated to {category}")
session = self.session
@@ -260,7 +278,9 @@ class DatabaseManager:
# Check if category has changed and record it
old_category = ip_stats.category
if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, last_analysis)
self._record_category_change(
sanitized_ip, old_category, category, last_analysis
)
ip_stats.analyzed_metrics = analyzed_metrics
ip_stats.category = category
@@ -286,11 +306,12 @@ class DatabaseManager:
sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
# Record the manual category change
old_category = ip_stats.category
if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, datetime.now())
self._record_category_change(
sanitized_ip, old_category, category, datetime.now()
)
ip_stats.category = category
ip_stats.manual_category = True
@@ -301,7 +322,13 @@ class DatabaseManager:
session.rollback()
print(f"Error updating manual category: {e}")
def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
def _record_category_change(
self,
ip: str,
old_category: Optional[str],
new_category: str,
timestamp: datetime,
) -> None:
"""
Internal method to record category changes in history.
Only records if there's an actual change from a previous category.
@@ -323,7 +350,7 @@ class DatabaseManager:
ip=ip,
old_category=old_category,
new_category=new_category,
timestamp=timestamp
timestamp=timestamp,
)
session.add(history_entry)
session.commit()
@@ -344,22 +371,32 @@ class DatabaseManager:
session = self.session
try:
sanitized_ip = sanitize_ip(ip)
history = session.query(CategoryHistory).filter(
CategoryHistory.ip == sanitized_ip
).order_by(CategoryHistory.timestamp.asc()).all()
history = (
session.query(CategoryHistory)
.filter(CategoryHistory.ip == sanitized_ip)
.order_by(CategoryHistory.timestamp.asc())
.all()
)
return [
{
'old_category': h.old_category,
'new_category': h.new_category,
'timestamp': h.timestamp.isoformat()
"old_category": h.old_category,
"new_category": h.new_category,
"timestamp": h.timestamp.isoformat(),
}
for h in history
]
finally:
self.close_session()
def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
def update_ip_rep_infos(
self,
ip: str,
country_code: str,
asn: str,
asn_org: str,
list_on: Dict[str, str],
) -> None:
"""
Update IP rep stats
@@ -400,20 +437,25 @@ class DatabaseManager:
"""
session = self.session
try:
ips = session.query(IpStats.ip).filter(
IpStats.country_code.is_(None),
~IpStats.ip.like('10.%'),
~IpStats.ip.like('172.16.%'),
~IpStats.ip.like('172.17.%'),
~IpStats.ip.like('172.18.%'),
~IpStats.ip.like('172.19.%'),
~IpStats.ip.like('172.2_.%'),
~IpStats.ip.like('172.30.%'),
~IpStats.ip.like('172.31.%'),
~IpStats.ip.like('192.168.%'),
~IpStats.ip.like('127.%'),
~IpStats.ip.like('169.254.%')
).limit(limit).all()
ips = (
session.query(IpStats.ip)
.filter(
IpStats.country_code.is_(None),
~IpStats.ip.like("10.%"),
~IpStats.ip.like("172.16.%"),
~IpStats.ip.like("172.17.%"),
~IpStats.ip.like("172.18.%"),
~IpStats.ip.like("172.19.%"),
~IpStats.ip.like("172.2_.%"),
~IpStats.ip.like("172.30.%"),
~IpStats.ip.like("172.31.%"),
~IpStats.ip.like("192.168.%"),
~IpStats.ip.like("127.%"),
~IpStats.ip.like("169.254.%"),
)
.limit(limit)
.all()
)
return [ip[0] for ip in ips]
finally:
self.close_session()
@@ -424,7 +466,7 @@ class DatabaseManager:
offset: int = 0,
ip_filter: Optional[str] = None,
suspicious_only: bool = False,
since_minutes: Optional[int] = None
since_minutes: Optional[int] = None,
) -> List[Dict[str, Any]]:
"""
Retrieve access logs with optional filtering.
@@ -455,15 +497,15 @@ class DatabaseManager:
return [
{
'id': log.id,
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'method': log.method,
'is_suspicious': log.is_suspicious,
'is_honeypot_trigger': log.is_honeypot_trigger,
'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections]
"id": log.id,
"ip": log.ip,
"path": log.path,
"user_agent": log.user_agent,
"method": log.method,
"is_suspicious": log.is_suspicious,
"is_honeypot_trigger": log.is_honeypot_trigger,
"timestamp": log.timestamp.isoformat(),
"attack_types": [d.attack_type for d in log.attack_detections],
}
for log in logs
]
@@ -521,10 +563,7 @@ class DatabaseManager:
# self.close_session()
def get_credential_attempts(
self,
limit: int = 100,
offset: int = 0,
ip_filter: Optional[str] = None
self, limit: int = 100, offset: int = 0, ip_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Retrieve credential attempts with optional filtering.
@@ -550,12 +589,12 @@ class DatabaseManager:
return [
{
'id': attempt.id,
'ip': attempt.ip,
'path': attempt.path,
'username': attempt.username,
'password': attempt.password,
'timestamp': attempt.timestamp.isoformat()
"id": attempt.id,
"ip": attempt.ip,
"path": attempt.path,
"username": attempt.username,
"password": attempt.password,
"timestamp": attempt.timestamp.isoformat(),
}
for attempt in attempts
]
@@ -574,26 +613,29 @@ class DatabaseManager:
"""
session = self.session
try:
stats = session.query(IpStats).order_by(
IpStats.total_requests.desc()
).limit(limit).all()
stats = (
session.query(IpStats)
.order_by(IpStats.total_requests.desc())
.limit(limit)
.all()
)
return [
{
'ip': s.ip,
'total_requests': s.total_requests,
'first_seen': s.first_seen.isoformat(),
'last_seen': s.last_seen.isoformat(),
'country_code': s.country_code,
'city': s.city,
'asn': s.asn,
'asn_org': s.asn_org,
'reputation_score': s.reputation_score,
'reputation_source': s.reputation_source,
'analyzed_metrics': s.analyzed_metrics,
'category': s.category,
'manual_category': s.manual_category,
'last_analysis': s.last_analysis
"ip": s.ip,
"total_requests": s.total_requests,
"first_seen": s.first_seen.isoformat(),
"last_seen": s.last_seen.isoformat(),
"country_code": s.country_code,
"city": s.city,
"asn": s.asn,
"asn_org": s.asn_org,
"reputation_score": s.reputation_score,
"reputation_source": s.reputation_source,
"analyzed_metrics": s.analyzed_metrics,
"category": s.category,
"manual_category": s.manual_category,
"last_analysis": s.last_analysis,
}
for s in stats
]
@@ -621,23 +663,25 @@ class DatabaseManager:
category_history = self.get_category_history(ip)
return {
'ip': stat.ip,
'total_requests': stat.total_requests,
'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
'country_code': stat.country_code,
'city': stat.city,
'asn': stat.asn,
'asn_org': stat.asn_org,
'list_on': stat.list_on or {},
'reputation_score': stat.reputation_score,
'reputation_source': stat.reputation_source,
'analyzed_metrics': stat.analyzed_metrics or {},
'category': stat.category,
'category_scores': stat.category_scores or {},
'manual_category': stat.manual_category,
'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
'category_history': category_history
"ip": stat.ip,
"total_requests": stat.total_requests,
"first_seen": stat.first_seen.isoformat() if stat.first_seen else None,
"last_seen": stat.last_seen.isoformat() if stat.last_seen else None,
"country_code": stat.country_code,
"city": stat.city,
"asn": stat.asn,
"asn_org": stat.asn_org,
"list_on": stat.list_on or {},
"reputation_score": stat.reputation_score,
"reputation_source": stat.reputation_source,
"analyzed_metrics": stat.analyzed_metrics or {},
"category": stat.category,
"category_scores": stat.category_scores or {},
"manual_category": stat.manual_category,
"last_analysis": (
stat.last_analysis.isoformat() if stat.last_analysis else None
),
"category_history": category_history,
}
finally:
self.close_session()
@@ -654,25 +698,32 @@ class DatabaseManager:
try:
# Get main aggregate counts in one query
result = session.query(
func.count(AccessLog.id).label('total_accesses'),
func.count(distinct(AccessLog.ip)).label('unique_ips'),
func.count(distinct(AccessLog.path)).label('unique_paths'),
func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label('suspicious_accesses'),
func.sum(case((AccessLog.is_honeypot_trigger == True, 1), else_=0)).label('honeypot_triggered')
func.count(AccessLog.id).label("total_accesses"),
func.count(distinct(AccessLog.ip)).label("unique_ips"),
func.count(distinct(AccessLog.path)).label("unique_paths"),
func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label(
"suspicious_accesses"
),
func.sum(
case((AccessLog.is_honeypot_trigger == True, 1), else_=0)
).label("honeypot_triggered"),
).first()
# Get unique IPs that triggered honeypots
honeypot_ips = session.query(
func.count(distinct(AccessLog.ip))
).filter(AccessLog.is_honeypot_trigger == True).scalar() or 0
honeypot_ips = (
session.query(func.count(distinct(AccessLog.ip)))
.filter(AccessLog.is_honeypot_trigger == True)
.scalar()
or 0
)
return {
'total_accesses': result.total_accesses or 0,
'unique_ips': result.unique_ips or 0,
'unique_paths': result.unique_paths or 0,
'suspicious_accesses': int(result.suspicious_accesses or 0),
'honeypot_triggered': int(result.honeypot_triggered or 0),
'honeypot_ips': honeypot_ips
"total_accesses": result.total_accesses or 0,
"unique_ips": result.unique_ips or 0,
"unique_paths": result.unique_paths or 0,
"suspicious_accesses": int(result.suspicious_accesses or 0),
"honeypot_triggered": int(result.honeypot_triggered or 0),
"honeypot_ips": honeypot_ips,
}
finally:
self.close_session()
@@ -689,12 +740,13 @@ class DatabaseManager:
"""
session = self.session
try:
results = session.query(
AccessLog.ip,
func.count(AccessLog.id).label('count')
).group_by(AccessLog.ip).order_by(
func.count(AccessLog.id).desc()
).limit(limit).all()
results = (
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
.group_by(AccessLog.ip)
.order_by(func.count(AccessLog.id).desc())
.limit(limit)
.all()
)
return [(row.ip, row.count) for row in results]
finally:
@@ -712,12 +764,13 @@ class DatabaseManager:
"""
session = self.session
try:
results = session.query(
AccessLog.path,
func.count(AccessLog.id).label('count')
).group_by(AccessLog.path).order_by(
func.count(AccessLog.id).desc()
).limit(limit).all()
results = (
session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
.group_by(AccessLog.path)
.order_by(func.count(AccessLog.id).desc())
.limit(limit)
.all()
)
return [(row.path, row.count) for row in results]
finally:
@@ -735,15 +788,16 @@ class DatabaseManager:
"""
session = self.session
try:
results = session.query(
AccessLog.user_agent,
func.count(AccessLog.id).label('count')
).filter(
AccessLog.user_agent.isnot(None),
AccessLog.user_agent != ''
).group_by(AccessLog.user_agent).order_by(
func.count(AccessLog.id).desc()
).limit(limit).all()
results = (
session.query(
AccessLog.user_agent, func.count(AccessLog.id).label("count")
)
.filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
.group_by(AccessLog.user_agent)
.order_by(func.count(AccessLog.id).desc())
.limit(limit)
.all()
)
return [(row.user_agent, row.count) for row in results]
finally:
@@ -761,16 +815,20 @@ class DatabaseManager:
"""
session = self.session
try:
logs = session.query(AccessLog).filter(
AccessLog.is_suspicious == True
).order_by(AccessLog.timestamp.desc()).limit(limit).all()
logs = (
session.query(AccessLog)
.filter(AccessLog.is_suspicious == True)
.order_by(AccessLog.timestamp.desc())
.limit(limit)
.all()
)
return [
{
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat()
"ip": log.ip,
"path": log.path,
"user_agent": log.user_agent,
"timestamp": log.timestamp.isoformat(),
}
for log in logs
]
@@ -787,12 +845,11 @@ class DatabaseManager:
session = self.session
try:
# Get all honeypot triggers grouped by IP
results = session.query(
AccessLog.ip,
AccessLog.path
).filter(
AccessLog.is_honeypot_trigger == True
).all()
results = (
session.query(AccessLog.ip, AccessLog.path)
.filter(AccessLog.is_honeypot_trigger == True)
.all()
)
# Group paths by IP
ip_paths: Dict[str, List[str]] = {}
@@ -819,17 +876,21 @@ class DatabaseManager:
session = self.session
try:
# Get access logs that have attack detections
logs = session.query(AccessLog).join(
AttackDetection
).order_by(AccessLog.timestamp.desc()).limit(limit).all()
logs = (
session.query(AccessLog)
.join(AttackDetection)
.order_by(AccessLog.timestamp.desc())
.limit(limit)
.all()
)
return [
{
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections]
"ip": log.ip,
"path": log.path,
"user_agent": log.user_agent,
"timestamp": log.timestamp.isoformat(),
"attack_types": [d.attack_type for d in log.attack_detections],
}
for log in logs
]

View File

@@ -11,6 +11,7 @@ from templates import html_templates
from wordlists import get_wordlists
from config import get_config
def random_username() -> str:
"""Generate random username"""
wl = get_wordlists()
@@ -21,10 +22,10 @@ def random_password() -> str:
"""Generate random password"""
wl = get_wordlists()
templates = [
lambda: ''.join(random.choices(string.ascii_letters + string.digits, k=12)),
lambda: "".join(random.choices(string.ascii_letters + string.digits, k=12)),
lambda: f"{random.choice(wl.password_prefixes)}{random.randint(100, 999)}!",
lambda: f"{random.choice(wl.simple_passwords)}{random.randint(1000, 9999)}",
lambda: ''.join(random.choices(string.ascii_lowercase, k=8)),
lambda: "".join(random.choices(string.ascii_lowercase, k=8)),
]
return random.choice(templates)()
@@ -36,6 +37,7 @@ def random_email(username: str = None) -> str:
username = random_username()
return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str:
"""Generate random server header from wordlists"""
config = get_config()
@@ -44,10 +46,11 @@ def random_server_header() -> str:
wl = get_wordlists()
return random.choice(wl.server_headers)
def random_api_key() -> str:
"""Generate random API key"""
wl = get_wordlists()
key = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
key = "".join(random.choices(string.ascii_letters + string.digits, k=32))
return random.choice(wl.api_key_prefixes) + key
@@ -87,14 +90,16 @@ def users_json() -> str:
users = []
for i in range(random.randint(3, 8)):
username = random_username()
users.append({
"id": i + 1,
"username": username,
"email": random_email(username),
"password": random_password(),
"role": random.choice(wl.user_roles),
"api_token": random_api_key()
})
users.append(
{
"id": i + 1,
"username": username,
"email": random_email(username),
"password": random_password(),
"role": random.choice(wl.user_roles),
"api_token": random_api_key(),
}
)
return json.dumps({"users": users}, indent=2)
@@ -102,20 +107,28 @@ def api_keys_json() -> str:
"""Generate fake api_keys.json with random data"""
keys = {
"stripe": {
"public_key": "pk_live_" + ''.join(random.choices(string.ascii_letters + string.digits, k=24)),
"secret_key": random_api_key()
"public_key": "pk_live_"
+ "".join(random.choices(string.ascii_letters + string.digits, k=24)),
"secret_key": random_api_key(),
},
"aws": {
"access_key_id": "AKIA" + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)),
"secret_access_key": ''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))
"access_key_id": "AKIA"
+ "".join(random.choices(string.ascii_uppercase + string.digits, k=16)),
"secret_access_key": "".join(
random.choices(string.ascii_letters + string.digits + "+/", k=40)
),
},
"sendgrid": {
"api_key": "SG." + ''.join(random.choices(string.ascii_letters + string.digits, k=48))
"api_key": "SG."
+ "".join(random.choices(string.ascii_letters + string.digits, k=48))
},
"twilio": {
"account_sid": "AC" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=32)),
"auth_token": ''.join(random.choices(string.ascii_lowercase + string.digits, k=32))
}
"account_sid": "AC"
+ "".join(random.choices(string.ascii_lowercase + string.digits, k=32)),
"auth_token": "".join(
random.choices(string.ascii_lowercase + string.digits, k=32)
),
},
}
return json.dumps(keys, indent=2)
@@ -128,46 +141,65 @@ def api_response(path: str) -> str:
users = []
for i in range(count):
username = random_username()
users.append({
"id": i + 1,
"username": username,
"email": random_email(username),
"role": random.choice(wl.user_roles)
})
users.append(
{
"id": i + 1,
"username": username,
"email": random_email(username),
"role": random.choice(wl.user_roles),
}
)
return users
responses = {
'/api/users': json.dumps({
"users": random_users(random.randint(2, 5)),
"total": random.randint(50, 500)
}, indent=2),
'/api/v1/users': json.dumps({
"status": "success",
"data": [{
"id": random.randint(1, 100),
"name": random_username(),
"api_key": random_api_key()
}]
}, indent=2),
'/api/v2/secrets': json.dumps({
"database": {
"host": random.choice(wl.database_hosts),
"username": random_username(),
"password": random_password(),
"database": random_database_name()
"/api/users": json.dumps(
{
"users": random_users(random.randint(2, 5)),
"total": random.randint(50, 500),
},
"api_keys": {
"stripe": random_api_key(),
"aws": 'AKIA' + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
}
}, indent=2),
'/api/config': json.dumps({
"app_name": random.choice(wl.application_names),
"debug": random.choice([True, False]),
"secret_key": random_api_key(),
"database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}"
}, indent=2),
'/.env': f"""APP_NAME={random.choice(wl.application_names)}
indent=2,
),
"/api/v1/users": json.dumps(
{
"status": "success",
"data": [
{
"id": random.randint(1, 100),
"name": random_username(),
"api_key": random_api_key(),
}
],
},
indent=2,
),
"/api/v2/secrets": json.dumps(
{
"database": {
"host": random.choice(wl.database_hosts),
"username": random_username(),
"password": random_password(),
"database": random_database_name(),
},
"api_keys": {
"stripe": random_api_key(),
"aws": "AKIA"
+ "".join(
random.choices(string.ascii_uppercase + string.digits, k=16)
),
},
},
indent=2,
),
"/api/config": json.dumps(
{
"app_name": random.choice(wl.application_names),
"debug": random.choice([True, False]),
"secret_key": random_api_key(),
"database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}",
},
indent=2,
),
"/.env": f"""APP_NAME={random.choice(wl.application_names)}
DEBUG={random.choice(['true', 'false'])}
APP_KEY=base64:{''.join(random.choices(string.ascii_letters + string.digits, k=32))}=
DB_CONNECTION=mysql
@@ -179,7 +211,7 @@ DB_PASSWORD={random_password()}
AWS_ACCESS_KEY_ID=AKIA{''.join(random.choices(string.ascii_uppercase + string.digits, k=16))}
AWS_SECRET_ACCESS_KEY={''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))}
STRIPE_SECRET={random_api_key()}
"""
""",
}
return responses.get(path, json.dumps({"error": "Not found"}, indent=2))
@@ -191,7 +223,9 @@ def directory_listing(path: str) -> str:
files = wl.directory_files
dirs = wl.directory_dirs
selected_files = [(f, random.randint(1024, 1024*1024))
for f in random.sample(files, min(6, len(files)))]
selected_files = [
(f, random.randint(1024, 1024 * 1024))
for f in random.sample(files, min(6, len(files)))
]
return html_templates.directory_listing(path, dirs, selected_files)

View File

@@ -14,8 +14,13 @@ from analyzer import Analyzer
from templates import html_templates
from templates.dashboard_template import generate_dashboard
from generators import (
credentials_txt, passwords_txt, users_json, api_keys_json,
api_response, directory_listing, random_server_header
credentials_txt,
passwords_txt,
users_json,
api_keys_json,
api_response,
directory_listing,
random_server_header,
)
from wordlists import get_wordlists
from sql_errors import generate_sql_error_response, get_sql_response_with_data
@@ -25,6 +30,7 @@ from server_errors import generate_server_error
class Handler(BaseHTTPRequestHandler):
"""HTTP request handler for the deception server"""
webpages: Optional[List[str]] = None
config: Config = None
tracker: AccessTracker = None
@@ -37,15 +43,15 @@ class Handler(BaseHTTPRequestHandler):
def _get_client_ip(self) -> str:
"""Extract client IP address from request, checking proxy headers first"""
# Headers might not be available during early error logging
if hasattr(self, 'headers') and self.headers:
if hasattr(self, "headers") and self.headers:
# Check X-Forwarded-For header (set by load balancers/proxies)
forwarded_for = self.headers.get('X-Forwarded-For')
forwarded_for = self.headers.get("X-Forwarded-For")
if forwarded_for:
# X-Forwarded-For can contain multiple IPs, get the first (original client)
return forwarded_for.split(',')[0].strip()
return forwarded_for.split(",")[0].strip()
# Check X-Real-IP header (set by nginx and other proxies)
real_ip = self.headers.get('X-Real-IP')
real_ip = self.headers.get("X-Real-IP")
if real_ip:
return real_ip.strip()
@@ -54,7 +60,7 @@ class Handler(BaseHTTPRequestHandler):
def _get_user_agent(self) -> str:
"""Extract user agent from request"""
return self.headers.get('User-Agent', '')
return self.headers.get("User-Agent", "")
def _get_category_by_ip(self, client_ip: str) -> str:
"""Get the category of an IP from the database"""
@@ -97,7 +103,7 @@ class Handler(BaseHTTPRequestHandler):
Returns True if the path was handled, False otherwise.
"""
# SQL-vulnerable endpoints
sql_endpoints = ['/api/search', '/api/sql', '/api/database']
sql_endpoints = ["/api/search", "/api/sql", "/api/database"]
base_path = urlparse(path).path
if base_path not in sql_endpoints:
@@ -112,22 +118,30 @@ class Handler(BaseHTTPRequestHandler):
user_agent = self._get_user_agent()
# Always check for SQL injection patterns
error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
error_msg, content_type, status_code = generate_sql_error_response(
query_string or ""
)
if error_msg:
# SQL injection detected - log and return error
self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
self.access_logger.warning(
f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
)
self.send_response(status_code)
self.send_header('Content-type', content_type)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_msg.encode())
else:
# No injection detected - return fake data
self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
self.access_logger.info(
f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
response_data = get_sql_response_with_data(base_path, query_string or "")
response_data = get_sql_response_with_data(
base_path, query_string or ""
)
self.wfile.write(response_data.encode())
return True
@@ -140,7 +154,7 @@ class Handler(BaseHTTPRequestHandler):
# Still send a response even on error
try:
self.send_response(500)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(b'{"error": "Internal server error"}')
except:
@@ -159,18 +173,22 @@ class Handler(BaseHTTPRequestHandler):
# Determine if we should apply crawler page limit based on config and IP category
should_apply_crawler_limit = False
if self.config.infinite_pages_for_malicious:
if (ip_category == "good_crawler" or ip_category == "regular_user") and page_visit_count >= self.config.max_pages_limit:
if (
ip_category == "good_crawler" or ip_category == "regular_user"
) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True
else:
if (ip_category == "good_crawler" or ip_category == "bad_crawler" or ip_category == "attacker") and page_visit_count >= self.config.max_pages_limit:
if (
ip_category == "good_crawler"
or ip_category == "bad_crawler"
or ip_category == "attacker"
) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True
# If good crawler reached max pages, return a simple page with no links
if should_apply_crawler_limit:
return html_templates.main_page(
Handler.counter,
'<p>Crawl limit reached.</p>'
Handler.counter, "<p>Crawl limit reached.</p>"
)
num_pages = random.randint(*self.config.links_per_page_range)
@@ -189,10 +207,12 @@ class Handler(BaseHTTPRequestHandler):
# Add links
if self.webpages is None:
for _ in range(num_pages):
address = ''.join([
random.choice(self.config.char_space)
for _ in range(random.randint(*self.config.links_length_range))
])
address = "".join(
[
random.choice(self.config.char_space)
for _ in range(random.randint(*self.config.links_length_range))
]
)
content += f"""
<div class="link-box">
<a href="{address}">{address}</a>
@@ -223,27 +243,36 @@ class Handler(BaseHTTPRequestHandler):
post_data = ""
from urllib.parse import urlparse
base_path = urlparse(self.path).path
if base_path in ['/api/search', '/api/sql', '/api/database']:
content_length = int(self.headers.get('Content-Length', 0))
if base_path in ["/api/search", "/api/sql", "/api/database"]:
content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
self.access_logger.info(
f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}"
)
error_msg, content_type, status_code = generate_sql_error_response(post_data)
error_msg, content_type, status_code = generate_sql_error_response(
post_data
)
try:
if error_msg:
self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
self.access_logger.warning(
f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}"
)
self.send_response(status_code)
self.send_header('Content-type', content_type)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_msg.encode())
else:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
response_data = get_sql_response_with_data(base_path, post_data)
self.wfile.write(response_data.encode())
@@ -253,28 +282,35 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
return
if base_path == '/api/contact':
content_length = int(self.headers.get('Content-Length', 0))
if base_path == "/api/contact":
content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
parsed_data = {}
for pair in post_data.split('&'):
if '=' in pair:
key, value = pair.split('=', 1)
for pair in post_data.split("&"):
if "=" in pair:
key, value = pair.split("=", 1)
from urllib.parse import unquote_plus
parsed_data[unquote_plus(key)] = unquote_plus(value)
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
if xss_detected:
self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
self.access_logger.warning(
f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}"
)
else:
self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
self.access_logger.info(
f"[XSS ENDPOINT POST] {client_ip} - {base_path}"
)
try:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
response_html = generate_xss_response(parsed_data)
self.wfile.write(response_html.encode())
@@ -284,11 +320,15 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in XSS POST handler: {str(e)}")
return
self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
self.access_logger.warning(
f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}"
)
content_length = int(self.headers.get('Content-Length', 0))
content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
@@ -301,18 +341,24 @@ class Handler(BaseHTTPRequestHandler):
self.credential_logger.info(credential_line)
# Also record in tracker for dashboard
self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
self.tracker.record_credential_attempt(
client_ip, self.path, username or "N/A", password or "N/A"
)
self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
self.access_logger.warning(
f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}"
)
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
self.tracker.record_access(
client_ip, self.path, user_agent, post_data, method="POST"
)
time.sleep(1)
try:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.login_error().encode())
except BrokenPipeError:
@@ -330,95 +376,102 @@ class Handler(BaseHTTPRequestHandler):
return True
try:
if path == '/robots.txt':
if path == "/robots.txt":
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(html_templates.robots_txt().encode())
return True
if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']:
if path in ["/credentials.txt", "/passwords.txt", "/admin_notes.txt"]:
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
if 'credentials' in path:
if "credentials" in path:
self.wfile.write(credentials_txt().encode())
else:
self.wfile.write(passwords_txt().encode())
return True
if path in ['/users.json', '/api_keys.json', '/config.json']:
if path in ["/users.json", "/api_keys.json", "/config.json"]:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
if 'users' in path:
if "users" in path:
self.wfile.write(users_json().encode())
elif 'api_keys' in path:
elif "api_keys" in path:
self.wfile.write(api_keys_json().encode())
else:
self.wfile.write(api_response('/api/config').encode())
self.wfile.write(api_response("/api/config").encode())
return True
if path in ['/admin', '/admin/', '/admin/login', '/login']:
if path in ["/admin", "/admin/", "/admin/login", "/login"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.login_form().encode())
return True
if path in ['/users', '/user', '/database', '/db', '/search']:
if path in ["/users", "/user", "/database", "/db", "/search"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.product_search().encode())
return True
if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
if path in ["/info", "/input", "/contact", "/feedback", "/comment"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.input_form().encode())
return True
if path == '/server':
if path == "/server":
error_html, content_type = generate_server_error()
self.send_response(500)
self.send_header('Content-type', content_type)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_html.encode())
return True
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
if path in ["/wp-login.php", "/wp-login", "/wp-admin", "/wp-admin/"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.wp_login().encode())
return True
if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower():
if path in ["/wp-content/", "/wp-includes/"] or "wordpress" in path.lower():
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.wordpress().encode())
return True
if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']:
if "phpmyadmin" in path.lower() or path in ["/pma/", "/phpMyAdmin/"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.phpmyadmin().encode())
return True
if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']:
if path.startswith("/api/") or path.startswith("/api") or path in ["/.env"]:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(api_response(path).encode())
return True
if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']:
if path in [
"/backup/",
"/uploads/",
"/private/",
"/admin/",
"/config/",
"/database/",
]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(directory_listing(path).encode())
return True
@@ -440,9 +493,12 @@ class Handler(BaseHTTPRequestHandler):
return
user_agent = self._get_user_agent()
if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path:
if (
self.config.dashboard_secret_path
and self.path == self.config.dashboard_secret_path
):
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
try:
stats = self.tracker.get_stats()
@@ -455,72 +511,93 @@ class Handler(BaseHTTPRequestHandler):
return
# API endpoint for fetching IP stats
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
if self.config.dashboard_secret_path and self.path.startswith(
f"{self.config.dashboard_secret_path}/api/ip-stats/"
):
ip_address = self.path.replace(
f"{self.config.dashboard_secret_path}/api/ip-stats/", ""
)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
# Prevent browser caching - force fresh data from database every time
self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
self.send_header('Pragma', 'no-cache')
self.send_header('Expires', '0')
self.send_header(
"Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
)
self.send_header("Pragma", "no-cache")
self.send_header("Expires", "0")
self.end_headers()
try:
from database import get_database
import json
db = get_database()
ip_stats = db.get_ip_stats_by_ip(ip_address)
if ip_stats:
self.wfile.write(json.dumps(ip_stats).encode())
else:
self.wfile.write(json.dumps({'error': 'IP not found'}).encode())
self.wfile.write(json.dumps({"error": "IP not found"}).encode())
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error fetching IP stats: {e}")
self.wfile.write(json.dumps({'error': str(e)}).encode())
self.wfile.write(json.dumps({"error": str(e)}).encode())
return
# API endpoint for downloading malicious IPs file
if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt":
if (
self.config.dashboard_secret_path
and self.path
== f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt"
):
import os
file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
file_path = os.path.join(
os.path.dirname(__file__), "exports", "malicious_ips.txt"
)
try:
if os.path.exists(file_path):
with open(file_path, 'rb') as f:
with open(file_path, "rb") as f:
content = f.read()
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"')
self.send_header('Content-Length', str(len(content)))
self.send_header("Content-type", "text/plain")
self.send_header(
"Content-Disposition",
'attachment; filename="malicious_ips.txt"',
)
self.send_header("Content-Length", str(len(content)))
self.end_headers()
self.wfile.write(content)
else:
self.send_response(404)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(b'File not found')
self.wfile.write(b"File not found")
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error serving malicious IPs file: {e}")
self.send_response(500)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(b'Internal server error')
self.wfile.write(b"Internal server error")
return
self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
self.tracker.record_access(client_ip, self.path, user_agent, method="GET")
# self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip)
if self.tracker.is_suspicious_user_agent(user_agent):
self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
self.access_logger.warning(
f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}"
)
if self._should_return_error():
error_code = self._get_random_error_code()
self.access_logger.info(f"Returning error {error_code} to {client_ip} - {self.path}")
self.access_logger.info(
f"Returning error {error_code} to {client_ip} - {self.path}"
)
self.send_response(error_code)
self.end_headers()
return
@@ -530,13 +607,15 @@ class Handler(BaseHTTPRequestHandler):
time.sleep(self.config.delay / 1000.0)
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
try:
# Increment page visit counter for this IP and get the current count
current_visit_count = self._increment_page_visit(client_ip)
self.wfile.write(self.generate_page(self.path, current_visit_count).encode())
self.wfile.write(
self.generate_page(self.path, current_visit_count).encode()
)
Handler.counter -= 1

View File

@@ -13,6 +13,7 @@ from datetime import datetime
class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt)
@@ -26,6 +27,7 @@ class TimezoneFormatter(logging.Formatter):
class LoggerManager:
"""Singleton logger manager for the application."""
_instance = None
def __new__(cls):
@@ -65,7 +67,7 @@ class LoggerManager:
app_file_handler = RotatingFileHandler(
os.path.join(log_dir, "krawl.log"),
maxBytes=max_bytes,
backupCount=backup_count
backupCount=backup_count,
)
app_file_handler.setFormatter(log_format)
self._app_logger.addHandler(app_file_handler)
@@ -82,7 +84,7 @@ class LoggerManager:
access_file_handler = RotatingFileHandler(
os.path.join(log_dir, "access.log"),
maxBytes=max_bytes,
backupCount=backup_count
backupCount=backup_count,
)
access_file_handler.setFormatter(log_format)
self._access_logger.addHandler(access_file_handler)
@@ -102,7 +104,7 @@ class LoggerManager:
credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"),
maxBytes=max_bytes,
backupCount=backup_count
backupCount=backup_count,
)
credential_file_handler.setFormatter(credential_format)
self._credential_logger.addHandler(credential_file_handler)

View File

@@ -25,6 +25,7 @@ from sanitizer import (
class Base(DeclarativeBase):
"""Base class for all ORM models."""
pass
@@ -35,30 +36,35 @@ class AccessLog(Base):
Stores request metadata, suspicious activity flags, and timestamps
for analysis and dashboard display.
"""
__tablename__ = 'access_logs'
__tablename__ = "access_logs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
#ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
# ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True)
method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET')
user_agent: Mapped[Optional[str]] = mapped_column(
String(MAX_USER_AGENT_LENGTH), nullable=True
)
method: Mapped[str] = mapped_column(String(10), nullable=False, default="GET")
is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
is_honeypot_trigger: Mapped[bool] = mapped_column(
Boolean, nullable=False, default=False
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Relationship to attack detections
attack_detections: Mapped[List["AttackDetection"]] = relationship(
"AttackDetection",
back_populates="access_log",
cascade="all, delete-orphan"
"AttackDetection", back_populates="access_log", cascade="all, delete-orphan"
)
# Indexes for common queries
__table_args__ = (
Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
Index('ix_access_logs_is_suspicious', 'is_suspicious'),
Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'),
Index("ix_access_logs_ip_timestamp", "ip", "timestamp"),
Index("ix_access_logs_is_suspicious", "is_suspicious"),
Index("ix_access_logs_is_honeypot_trigger", "is_honeypot_trigger"),
)
def __repr__(self) -> str:
@@ -71,19 +77,24 @@ class CredentialAttempt(Base):
Stores the submitted username and password along with request metadata.
"""
__tablename__ = 'credential_attempts'
__tablename__ = "credential_attempts"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
username: Mapped[Optional[str]] = mapped_column(
String(MAX_CREDENTIAL_LENGTH), nullable=True
)
password: Mapped[Optional[str]] = mapped_column(
String(MAX_CREDENTIAL_LENGTH), nullable=True
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Composite index for common queries
__table_args__ = (
Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'),
)
__table_args__ = (Index("ix_credential_attempts_ip_timestamp", "ip", "timestamp"),)
def __repr__(self) -> str:
return f"<CredentialAttempt(id={self.id}, ip='{self.ip}', username='{self.username}')>"
@@ -96,20 +107,25 @@ class AttackDetection(Base):
Linked to the parent AccessLog record. Multiple attack types can be
detected in a single request.
"""
__tablename__ = 'attack_detections'
__tablename__ = "attack_detections"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
access_log_id: Mapped[int] = mapped_column(
Integer,
ForeignKey('access_logs.id', ondelete='CASCADE'),
ForeignKey("access_logs.id", ondelete="CASCADE"),
nullable=False,
index=True
index=True,
)
attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True)
matched_pattern: Mapped[Optional[str]] = mapped_column(
String(MAX_ATTACK_PATTERN_LENGTH), nullable=True
)
# Relationship back to access log
access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections")
access_log: Mapped["AccessLog"] = relationship(
"AccessLog", back_populates="attack_detections"
)
def __repr__(self) -> str:
return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"
@@ -122,33 +138,43 @@ class IpStats(Base):
Includes fields for future GeoIP and reputation enrichment.
Updated on each request from an IP.
"""
__tablename__ = 'ip_stats'
__tablename__ = "ip_stats"
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True)
total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
first_seen: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow
)
last_seen: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow
)
# GeoIP fields (populated by future enrichment)
country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True)
city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
list_on: Mapped[Optional[Dict[str,str]]] = mapped_column(JSON, nullable=True)
asn_org: Mapped[Optional[str]] = mapped_column(
String(MAX_ASN_ORG_LENGTH), nullable=True
)
list_on: Mapped[Optional[Dict[str, str]]] = mapped_column(JSON, nullable=True)
# Reputation fields (populated by future enrichment)
reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True)
reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
reputation_source: Mapped[Optional[str]] = mapped_column(
String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True
)
reputation_updated: Mapped[Optional[datetime]] = mapped_column(
DateTime, nullable=True
)
#Analyzed metrics, category and category scores
analyzed_metrics: Mapped[Dict[str,object]] = mapped_column(JSON, nullable=True)
# Analyzed metrics, category and category scores
analyzed_metrics: Mapped[Dict[str, object]] = mapped_column(JSON, nullable=True)
category: Mapped[str] = mapped_column(String, nullable=True)
category_scores: Mapped[Dict[str,int]] = mapped_column(JSON, nullable=True)
category_scores: Mapped[Dict[str, int]] = mapped_column(JSON, nullable=True)
manual_category: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True)
last_analysis: Mapped[datetime] = mapped_column(DateTime, nullable=True)
def __repr__(self) -> str:
return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
@@ -160,18 +186,19 @@ class CategoryHistory(Base):
Tracks when an IP's category changes, storing both the previous
and new category along with timestamp for timeline visualization.
"""
__tablename__ = 'category_history'
__tablename__ = "category_history"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
new_category: Mapped[str] = mapped_column(String(50), nullable=False)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Composite index for efficient IP-based timeline queries
__table_args__ = (
Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
)
__table_args__ = (Index("ix_category_history_ip_timestamp", "ip", "timestamp"),)
def __repr__(self) -> str:
return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>"

View File

@@ -9,7 +9,6 @@ import html
import re
from typing import Optional, Dict
# Field length limits for database storage
MAX_IP_LENGTH = 45 # IPv6 max length
MAX_PATH_LENGTH = 2048 # URL max practical length
@@ -43,7 +42,7 @@ def sanitize_for_storage(value: Optional[str], max_length: int) -> str:
# Remove null bytes and control characters (except newline \n, tab \t, carriage return \r)
# Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', value)
cleaned = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", value)
# Truncate to max length
return cleaned[:max_length]
@@ -112,5 +111,6 @@ def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
return html.escape(value_str)
def sanitize_dict(value: Optional[Dict[str,str]], max_display_length):
def sanitize_dict(value: Optional[Dict[str, str]], max_display_length):
return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}

View File

@@ -12,43 +12,48 @@ from config import get_config
from tracker import AccessTracker
from analyzer import Analyzer
from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
from logger import (
initialize_logging,
get_app_logger,
get_access_logger,
get_credential_logger,
)
from database import initialize_database
from tasks_master import get_tasksmaster
def print_usage():
"""Print usage information"""
print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n')
print('Configuration:')
print(' Configuration is loaded from a YAML file (default: config.yaml)')
print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
print(' Example config.yaml structure:')
print(' server:')
print(' port: 5000')
print(' delay: 100')
print(' links:')
print(' min_length: 5')
print(' max_length: 15')
print(' min_per_page: 10')
print(' max_per_page: 15')
print(' canary:')
print(' token_url: null')
print(' token_tries: 10')
print(' dashboard:')
print(' secret_path: null # auto-generated if not set')
print(' database:')
print(f"Usage: {sys.argv[0]} [FILE]\n")
print("FILE is file containing a list of webpage names to serve, one per line.")
print("If no file is provided, random links will be generated.\n")
print("Configuration:")
print(" Configuration is loaded from a YAML file (default: config.yaml)")
print(" Set CONFIG_LOCATION environment variable to use a different file.\n")
print(" Example config.yaml structure:")
print(" server:")
print(" port: 5000")
print(" delay: 100")
print(" links:")
print(" min_length: 5")
print(" max_length: 15")
print(" min_per_page: 10")
print(" max_per_page: 15")
print(" canary:")
print(" token_url: null")
print(" token_tries: 10")
print(" dashboard:")
print(" secret_path: null # auto-generated if not set")
print(" database:")
print(' path: "data/krawl.db"')
print(' retention_days: 30')
print(' behavior:')
print(' probability_error_codes: 0')
print(" retention_days: 30")
print(" behavior:")
print(" probability_error_codes: 0")
def main():
"""Main entry point for the deception server"""
if '-h' in sys.argv or '--help' in sys.argv:
if "-h" in sys.argv or "--help" in sys.argv:
print_usage()
exit(0)
@@ -63,9 +68,11 @@ def main():
# Initialize database for persistent storage
try:
initialize_database(config.database_path)
app_logger.info(f'Database initialized at: {config.database_path}')
app_logger.info(f"Database initialized at: {config.database_path}")
except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
app_logger.warning(
f"Database initialization failed: {e}. Continuing with in-memory only."
)
tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds)
analyzer = Analyzer()
@@ -80,11 +87,13 @@ def main():
if len(sys.argv) == 2:
try:
with open(sys.argv[1], 'r') as f:
with open(sys.argv[1], "r") as f:
Handler.webpages = f.readlines()
if not Handler.webpages:
app_logger.warning('The file provided was empty. Using randomly generated links.')
app_logger.warning(
"The file provided was empty. Using randomly generated links."
)
Handler.webpages = None
except IOError:
app_logger.warning("Can't read input file. Using randomly generated links.")
@@ -94,25 +103,31 @@ def main():
tasks_master.run_scheduled_tasks()
try:
app_logger.info(f'Starting deception server on port {config.port}...')
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
app_logger.info(f"Starting deception server on port {config.port}...")
app_logger.info(f"Dashboard available at: {config.dashboard_secret_path}")
if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
app_logger.info(
f"Canary token will appear after {config.canary_token_tries} tries"
)
else:
app_logger.info('No canary token configured (set CANARY_TOKEN_URL to enable)')
app_logger.info(
"No canary token configured (set CANARY_TOKEN_URL to enable)"
)
server = HTTPServer(('0.0.0.0', config.port), Handler)
app_logger.info('Server started. Use <Ctrl-C> to stop.')
server = HTTPServer(("0.0.0.0", config.port), Handler)
app_logger.info("Server started. Use <Ctrl-C> to stop.")
server.serve_forever()
except KeyboardInterrupt:
app_logger.info('Stopping server...')
app_logger.info("Stopping server...")
server.socket.close()
app_logger.info('Server stopped')
app_logger.info("Server stopped")
except Exception as e:
app_logger.error(f'Error starting HTTP server on port {config.port}: {e}')
app_logger.error(f'Make sure you are root, if needed, and that port {config.port} is open.')
app_logger.error(f"Error starting HTTP server on port {config.port}: {e}")
app_logger.error(
f"Make sure you are root, if needed, and that port {config.port} is open."
)
exit(1)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -21,23 +21,23 @@ def generate_server_error() -> tuple[str, str]:
404: "Not Found",
500: "Internal Server Error",
502: "Bad Gateway",
503: "Service Unavailable"
503: "Service Unavailable",
}
code = random.choice(list(error_codes.keys()))
message = error_codes[code]
template = server_config.get('template', '')
version = random.choice(server_config.get('versions', ['1.0']))
template = server_config.get("template", "")
version = random.choice(server_config.get("versions", ["1.0"]))
html = template.replace('{code}', str(code))
html = html.replace('{message}', message)
html = html.replace('{version}', version)
html = template.replace("{code}", str(code))
html = html.replace("{message}", message)
html = html.replace("{version}", version)
if server_type == 'apache':
os = random.choice(server_config.get('os', ['Ubuntu']))
html = html.replace('{os}', os)
html = html.replace('{host}', 'localhost')
if server_type == "apache":
os = random.choice(server_config.get("os", ["Ubuntu"]))
html = html.replace("{os}", os)
html = html.replace("{host}", "localhost")
return (html, "text/html")
@@ -53,13 +53,13 @@ def get_server_header(server_type: str = None) -> str:
server_type = random.choice(list(server_errors.keys()))
server_config = server_errors.get(server_type, {})
version = random.choice(server_config.get('versions', ['1.0']))
version = random.choice(server_config.get("versions", ["1.0"]))
server_headers = {
'nginx': f"nginx/{version}",
'apache': f"Apache/{version}",
'iis': f"Microsoft-IIS/{version}",
'tomcat': f"Apache-Coyote/1.1"
"nginx": f"nginx/{version}",
"apache": f"Apache/{version}",
"iis": f"Microsoft-IIS/{version}",
"tomcat": f"Apache-Coyote/1.1",
}
return server_headers.get(server_type, "nginx/1.18.0")

View File

@@ -13,14 +13,14 @@ def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
query_lower = query_string.lower()
patterns = {
'quote': [r"'", r'"', r'`'],
'comment': [r'--', r'#', r'/\*', r'\*/'],
'union': [r'\bunion\b', r'\bunion\s+select\b'],
'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"],
'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'],
'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'],
'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'],
'info_schema': [r'information_schema', r'table_schema', r'table_name'],
"quote": [r"'", r'"', r"`"],
"comment": [r"--", r"#", r"/\*", r"\*/"],
"union": [r"\bunion\b", r"\bunion\s+select\b"],
"boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"],
"time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"],
"stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"],
"command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"],
"info_schema": [r"information_schema", r"table_schema", r"table_name"],
}
for injection_type, pattern_list in patterns.items():
@@ -31,7 +31,9 @@ def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
return None
def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
def get_random_sql_error(
db_type: str = None, injection_type: str = None
) -> Tuple[str, str]:
wl = get_wordlists()
sql_errors = wl.sql_errors
@@ -45,8 +47,8 @@ def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tup
if injection_type and injection_type in db_errors:
errors = db_errors[injection_type]
elif 'generic' in db_errors:
errors = db_errors['generic']
elif "generic" in db_errors:
errors = db_errors["generic"]
else:
all_errors = []
for error_list in db_errors.values():
@@ -56,18 +58,20 @@ def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tup
error_message = random.choice(errors) if errors else "Database error occurred"
if '{table}' in error_message:
tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions']
error_message = error_message.replace('{table}', random.choice(tables))
if "{table}" in error_message:
tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
error_message = error_message.replace("{table}", random.choice(tables))
if '{column}' in error_message:
columns = ['id', 'name', 'email', 'password', 'username', 'created_at']
error_message = error_message.replace('{column}', random.choice(columns))
if "{column}" in error_message:
columns = ["id", "name", "email", "password", "username", "created_at"]
error_message = error_message.replace("{column}", random.choice(columns))
return (error_message, "text/plain")
def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]:
def generate_sql_error_response(
query_string: str, db_type: str = None
) -> Tuple[str, str, int]:
injection_type = detect_sql_injection_pattern(query_string)
if not injection_type:
@@ -89,7 +93,7 @@ def get_sql_response_with_data(path: str, params: str) -> str:
injection_type = detect_sql_injection_pattern(params)
if injection_type in ['union', 'boolean', 'stacked']:
if injection_type in ["union", "boolean", "stacked"]:
data = {
"success": True,
"results": [
@@ -98,15 +102,14 @@ def get_sql_response_with_data(path: str, params: str) -> str:
"username": random_username(),
"email": random_email(),
"password_hash": random_password(),
"role": random.choice(["admin", "user", "moderator"])
"role": random.choice(["admin", "user", "moderator"]),
}
for i in range(1, random.randint(2, 5))
]
],
}
return json.dumps(data, indent=2)
return json.dumps({
"success": True,
"message": "Query executed successfully",
"results": []
}, indent=2)
return json.dumps(
{"success": True, "message": "Query executed successfully", "results": []},
indent=2,
)

View File

@@ -20,7 +20,7 @@ TASK_CONFIG = {
"name": "analyze-ips",
"cron": "*/1 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
@@ -34,48 +34,74 @@ def main():
uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
uneven_request_timing_time_window_seconds = (
config.uneven_request_timing_time_window_seconds
)
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["attacker"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["good_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["bad_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["regular_user"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
# 1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
"risky_http_methods": 6,
"robots_violations": 4,
"uneven_request_timing": 3,
"different_user_agents": 8,
"attack_url": 15
"attack_url": 15,
},
"good_crawler": {
"risky_http_methods": 1,
"robots_violations": 0,
"uneven_request_timing": 0,
"different_user_agents": 0,
"attack_url": 0
"attack_url": 0,
},
"bad_crawler": {
"risky_http_methods": 2,
"robots_violations": 7,
"uneven_request_timing": 0,
"different_user_agents": 5,
"attack_url": 5
"attack_url": 5,
},
"regular_user": {
"risky_http_methods": 0,
"robots_violations": 0,
"uneven_request_timing": 8,
"different_user_agents": 3,
"attack_url": 0
}
"attack_url": 0,
},
}
# Get IPs with recent activity (last minute to match cron schedule)
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
ips_to_analyze = {item['ip'] for item in recent_accesses}
ips_to_analyze = {item["ip"] for item in recent_accesses}
if not ips_to_analyze:
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
@@ -92,23 +118,51 @@ def main():
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
category_scores = {
"attacker": 0,
"good_crawler": 0,
"bad_crawler": 0,
"regular_user": 0,
"unknown": 0,
}
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return 0
#--------------------- HTTP Methods ---------------------
get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
# --------------------- HTTP Methods ---------------------
get_accesses_count = len(
[item for item in ip_accesses if item["method"] == "GET"]
)
post_accesses_count = len(
[item for item in ip_accesses if item["method"] == "POST"]
)
put_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PUT"]
)
delete_accesses_count = len(
[item for item in ip_accesses if item["method"] == "DELETE"]
)
head_accesses_count = len(
[item for item in ip_accesses if item["method"] == "HEAD"]
)
options_accesses_count = len(
[item for item in ip_accesses if item["method"] == "OPTIONS"]
)
patch_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PATCH"]
)
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
http_method_attacker_score = (
post_accesses_count
+ put_accesses_count
+ delete_accesses_count
+ options_accesses_count
+ patch_accesses_count
) / total_accesses_count
else:
http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
# print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
@@ -119,8 +173,8 @@ def main():
score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
#--------------------- Robots Violations ---------------------
#respect robots.txt and login/config pages access frequency
# --------------------- Robots Violations ---------------------
# respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
@@ -132,11 +186,20 @@ def main():
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}")
# print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
#print(f"Violated robots count: {violated_robots_count}")
# if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len(
[
item
for item in ip_accesses
if any(
item["path"].rstrip("/").startswith(disallow)
for disallow in robots_disallows
)
]
)
# print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count
else:
@@ -152,15 +215,20 @@ def main():
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
# --------------------- Requests Timing ---------------------
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = [
ts
for ts in timestamps
if now_utc - ts
<= timedelta(seconds=uneven_request_timing_time_window_seconds)
]
timestamps = sorted(timestamps, reverse=True)
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
for i in range(0, len(timestamps) - 1):
diff = (timestamps[i] - timestamps[i + 1]).total_seconds()
time_diffs.append(diff)
mean = 0
@@ -170,9 +238,11 @@ def main():
if time_diffs:
mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5
cv = std/mean
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
std = variance**0.5
cv = std / mean
app_logger.debug(
f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}"
)
if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
@@ -183,11 +253,11 @@ def main():
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
#--------------------- Different User Agents ---------------------
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# --------------------- Different User Agents ---------------------
# Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in ip_accesses]
user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}")
# print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
@@ -198,7 +268,7 @@ def main():
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs ---------------------
# --------------------- Attack URLs ---------------------
attack_urls_found_list = []
wl = get_wordlists()
if wl.attack_patterns:
@@ -215,12 +285,14 @@ def main():
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
if (
re.search(pattern, queried_path, re.IGNORECASE)
or re.search(pattern, decoded_path, re.IGNORECASE)
or re.search(pattern, decoded_path_twice, re.IGNORECASE)
):
attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates
# remove duplicates
attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list)
@@ -234,28 +306,102 @@ def main():
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
#--------------------- Calculate score ---------------------
# --------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
attacker_score = (
score["attacker"]["risky_http_methods"]
* weights["attacker"]["risky_http_methods"]
)
attacker_score = (
attacker_score
+ score["attacker"]["robots_violations"]
* weights["attacker"]["robots_violations"]
)
attacker_score = (
attacker_score
+ score["attacker"]["uneven_request_timing"]
* weights["attacker"]["uneven_request_timing"]
)
attacker_score = (
attacker_score
+ score["attacker"]["different_user_agents"]
* weights["attacker"]["different_user_agents"]
)
attacker_score = (
attacker_score
+ score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
)
good_crawler_score = (
score["good_crawler"]["risky_http_methods"]
* weights["good_crawler"]["risky_http_methods"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["robots_violations"]
* weights["good_crawler"]["robots_violations"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["uneven_request_timing"]
* weights["good_crawler"]["uneven_request_timing"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["different_user_agents"]
* weights["good_crawler"]["different_user_agents"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["attack_url"]
* weights["good_crawler"]["attack_url"]
)
bad_crawler_score = (
score["bad_crawler"]["risky_http_methods"]
* weights["bad_crawler"]["risky_http_methods"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["robots_violations"]
* weights["bad_crawler"]["robots_violations"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["uneven_request_timing"]
* weights["bad_crawler"]["uneven_request_timing"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["different_user_agents"]
* weights["bad_crawler"]["different_user_agents"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
)
regular_user_score = (
score["regular_user"]["risky_http_methods"]
* weights["regular_user"]["risky_http_methods"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["robots_violations"]
* weights["regular_user"]["robots_violations"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["uneven_request_timing"]
* weights["regular_user"]["uneven_request_timing"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["different_user_agents"]
* weights["regular_user"]["different_user_agents"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["attack_url"]
* weights["regular_user"]["attack_url"]
)
score_details = f"""
Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score}
@@ -263,9 +409,22 @@ def main():
Regular User score: {regular_user_score}
"""
app_logger.debug(score_details)
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
analyzed_metrics = {
"risky_http_methods": http_method_attacker_score,
"robots_violations": violated_robots_ratio,
"uneven_request_timing": mean,
"different_user_agents": user_agents_used,
"attack_url": attack_urls_found_list,
}
category_scores = {
"attacker": attacker_score,
"good_crawler": good_crawler_score,
"bad_crawler": bad_crawler_score,
"regular_user": regular_user_score,
}
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return

View File

@@ -11,7 +11,7 @@ TASK_CONFIG = {
"name": "fetch-ip-rep",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
@@ -21,7 +21,9 @@ def main():
# Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
app_logger.info(
f"{len(unenriched_ips)} IP's need to be have reputation enrichment."
)
for ip in unenriched_ips:
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"
@@ -43,8 +45,11 @@ def main():
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(
ip, sanitized_country_iso_code, sanitized_asn,
sanitized_asn_org, sanitized_list_on
ip,
sanitized_country_iso_code,
sanitized_asn,
sanitized_asn_org,
sanitized_list_on,
)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")

View File

@@ -17,24 +17,29 @@ TASK_CONFIG = {
"name": "export-malicious-ips",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
EXPORTS_DIR = "exports"
OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ----------------------
# TASK LOGIC
# ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter(
AccessLog.is_honeypot_trigger == True,
AccessLog.timestamp >= cutoff_time
).count()
count = (
session.query(AccessLog)
.filter(
AccessLog.is_honeypot_trigger == True, AccessLog.timestamp >= cutoff_time
)
.count()
)
return count > 0
def main():
"""
Export all IPs flagged as suspicious to a text file.
@@ -49,23 +54,29 @@ def main():
# Check for recent honeypot activity
if not has_recent_honeypot_access(session):
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
app_logger.info(
f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes"
)
return
# Query distinct suspicious IPs
results = session.query(distinct(AccessLog.ip)).filter(
AccessLog.is_suspicious == True
).all()
results = (
session.query(distinct(AccessLog.ip))
.filter(AccessLog.is_suspicious == True)
.all()
)
# Ensure exports directory exists
os.makedirs(EXPORTS_DIR, exist_ok=True)
# Write IPs to file (one per line)
with open(OUTPUT_FILE, 'w') as f:
with open(OUTPUT_FILE, "w") as f:
for (ip,) in results:
f.write(f"{ip}\n")
app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
app_logger.info(
f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}"
)
except Exception as e:
app_logger.error(f"[Background Task] {task_name} failed: {e}")

View File

@@ -6,7 +6,12 @@ import threading
import importlib
import importlib.util
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
from logger import (
initialize_logging,
get_app_logger,
get_access_logger,
get_credential_logger,
)
app_logger = get_app_logger()
@@ -28,7 +33,7 @@ except ModuleNotFoundError:
# ---------- TASKSMASTER CLASS ----------
class TasksMaster:
TASK_DEFAULT_CRON = '*/15 * * * *'
TASK_DEFAULT_CRON = "*/15 * * * *"
TASK_JITTER = 240
TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
@@ -36,7 +41,9 @@ class TasksMaster:
self.tasks = self._config_tasks()
self.scheduler = scheduler
self.last_run_times = {}
self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
self.scheduler.add_listener(
self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR
)
def _config_tasks(self):
"""
@@ -80,7 +87,7 @@ class TasksMaster:
for filename in sorted(os.listdir(folder_path)):
# skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
if not filename.endswith('.py') or filename.startswith("__"):
if not filename.endswith(".py") or filename.startswith("__"):
continue
path = os.path.join(folder_path, filename)
@@ -95,11 +102,13 @@ class TasksMaster:
continue
# if we have a tasks config and a main function, we attempt to schedule it
if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
if hasattr(module, "TASK_CONFIG") and hasattr(module, "main"):
# ensure task_config is a dict
if not isinstance(module.TASK_CONFIG, dict):
app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
app_logger.error(
f"TASK_CONFIG is not a dict in {filename}. Skipping task."
)
continue
task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
@@ -109,24 +118,26 @@ class TasksMaster:
try:
CronTrigger.from_crontab(task_cron)
except ValueError as ve:
app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
app_logger.error(
f"Invalid cron format for task {task_name}: {ve} - Skipping this task"
)
continue
task = {
'name': module.TASK_CONFIG.get('name', module_name),
'filename': filename,
'cron': task_cron,
"name": module.TASK_CONFIG.get("name", module_name),
"filename": filename,
"cron": task_cron,
"enabled": module.TASK_CONFIG.get("enabled", False),
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False),
}
tasks.append(task)
# we are missing things, and we log what's missing
else:
if not hasattr(module, 'TASK_CONFIG'):
if not hasattr(module, "TASK_CONFIG"):
app_logger.warning(f"Missing TASK_CONFIG in {filename}")
elif not hasattr(module, 'main'):
elif not hasattr(module, "main"):
app_logger.warning(f"Missing main() in {filename}")
return tasks
@@ -147,18 +158,32 @@ class TasksMaster:
# if task is disabled, skip this one
if not task_enabled:
app_logger.info(f"{task_name} is disabled in client config. Skipping task")
app_logger.info(
f"{task_name} is disabled in client config. Skipping task"
)
continue
try:
if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))):
if os.path.isfile(
os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))
):
# schedule the task now that everything has checked out above...
self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
self._schedule_task(
task_name, module_name, task_cron, run_when_loaded
)
app_logger.info(
f"Scheduled {module_name} cron is set to {task_cron}.",
extra={"task": task_to_run},
)
else:
app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
app_logger.info(
f"Skipping invalid or unsafe file: {task_to_run.get('filename')}",
extra={"task": task_to_run},
)
except Exception as e:
app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
app_logger.error(
f"Error scheduling task: {e}", extra={"tasks": task_to_run}
)
def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
try:
@@ -166,7 +191,7 @@ class TasksMaster:
module = importlib.import_module(f"tasks.{module_name}")
# Check if the module has a 'main' function
if hasattr(module, 'main'):
if hasattr(module, "main"):
app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
# unique_job_id
@@ -180,7 +205,9 @@ class TasksMaster:
# schedule the task / job
if run_when_loaded:
app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
app_logger.info(
f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start"
)
self.scheduler.add_job(
module.main,
@@ -189,7 +216,7 @@ class TasksMaster:
jitter=self.TASK_JITTER,
name=task_name,
next_run_time=datetime.datetime.now(),
max_instances=1
max_instances=1,
)
else:
self.scheduler.add_job(
@@ -198,7 +225,7 @@ class TasksMaster:
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
max_instances=1
max_instances=1,
)
else:
app_logger.error(f"{module_name} does not define a 'main' function.")
@@ -220,11 +247,13 @@ class TasksMaster:
jobs_list = []
for job in scheduled_jobs:
jobs_list.append({
jobs_list.append(
{
"id": job.id,
"name": job.name,
"next_run": job.next_run_time,
})
}
)
return jobs_list
def run_scheduled_tasks(self):
@@ -251,6 +280,7 @@ class TasksMaster:
# ---------- SINGLETON WRAPPER ----------
T = type
def singleton_loader(func):
"""Decorator to ensure only one instance exists."""
cache: dict[str, T] = {}
@@ -262,6 +292,7 @@ def singleton_loader(func):
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
@@ -283,6 +314,8 @@ def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster
# Auto-start scheduler if not already running
if not scheduler.running:
scheduler.start()
app_logger.info("TasksMaster scheduler started automatically with singleton creation.")
app_logger.info(
"TasksMaster scheduler started automatically with singleton creation."
)
return tm_instance

View File

@@ -8,8 +8,8 @@ from .template_loader import load_template, clear_cache, TemplateNotFoundError
from . import html_templates
__all__ = [
'load_template',
'clear_cache',
'TemplateNotFoundError',
'html_templates',
"load_template",
"clear_cache",
"TemplateNotFoundError",
"html_templates",
]

View File

@@ -9,12 +9,14 @@ import html
from datetime import datetime
from zoneinfo import ZoneInfo
def _escape(value) -> str:
"""Escape HTML special characters to prevent XSS attacks."""
if value is None:
return ""
return html.escape(str(value))
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion
@@ -30,10 +32,12 @@ def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
# Fallback for old format
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
return (
iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
)
def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
"""Generate dashboard HTML with access statistics
Args:
@@ -42,8 +46,8 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
"""
# Generate IP rows with clickable functionality for dropdown stats
top_ips_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
top_ips_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(ip)}">
<td class="rank">{i+1}</td>
<td class="ip-clickable">{_escape(ip)}</td>
<td>{count}</td>
@@ -54,25 +58,35 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for i, (ip, count) in enumerate(stats['top_ips'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
</tr>""" for i, (ip, count) in enumerate(stats["top_ips"])])
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate paths rows (CRITICAL: paths can contain XSS payloads)
top_paths_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
for i, (path, count) in enumerate(stats['top_paths'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
top_paths_rows = (
"\n".join(
[
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
for i, (path, count) in enumerate(stats["top_paths"])
]
)
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
top_ua_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
for i, (ua, count) in enumerate(stats['top_user_agents'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
top_ua_rows = (
"\n".join(
[
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
for i, (ua, count) in enumerate(stats["top_user_agents"])
]
)
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate suspicious accesses rows with clickable IPs
suspicious_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
suspicious_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
@@ -84,13 +98,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for log in stats['recent_suspicious'][-10:]
]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
</tr>""" for log in stats["recent_suspicious"][-10:]])
or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
)
# Generate honeypot triggered IPs rows with clickable IPs
honeypot_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
honeypot_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(ip)}">
<td class="ip-clickable">{_escape(ip)}</td>
<td style="word-break: break-all;">{_escape(", ".join(paths))}</td>
<td>{len(paths)}</td>
@@ -101,13 +115,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for ip, paths in stats.get('honeypot_triggered_ips', [])
]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
</tr>""" for ip, paths in stats.get("honeypot_triggered_ips", [])])
or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
)
# Generate attack types rows with clickable IPs
attack_type_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
attack_type_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td>
<td>{_escape(", ".join(log["attack_types"]))}</td>
@@ -120,13 +134,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for log in stats.get('attack_types', [])[-10:]
]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
</tr>""" for log in stats.get("attack_types", [])[-10:]])
or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
)
# Generate credential attempts rows with clickable IPs
credential_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
credential_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["username"])}</td>
<td>{_escape(log["password"])}</td>
@@ -139,9 +153,9 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for log in stats.get('credential_attempts', [])[-20:]
]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
</tr>""" for log in stats.get("credential_attempts", [])[-20:]])
or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
)
return f"""<!DOCTYPE html>
<html>

View File

@@ -11,6 +11,7 @@ from typing import Dict
class TemplateNotFoundError(Exception):
"""Raised when a template file cannot be found."""
pass
@@ -46,7 +47,7 @@ def load_template(name: str, **kwargs) -> str:
# Check cache first
if name not in _template_cache:
# Determine file path based on whether name has an extension
if '.' in name:
if "." in name:
file_path = _TEMPLATE_DIR / name
else:
file_path = _TEMPLATE_DIR / f"{name}.html"
@@ -54,7 +55,7 @@ def load_template(name: str, **kwargs) -> str:
if not file_path.exists():
raise TemplateNotFoundError(f"Template '{name}' not found at {file_path}")
_template_cache[name] = file_path.read_text(encoding='utf-8')
_template_cache[name] = file_path.read_text(encoding="utf-8")
template = _template_cache[name]

View File

@@ -17,7 +17,13 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis.
"""
def __init__(self, max_pages_limit, ban_duration_seconds, db_manager: Optional[DatabaseManager] = None):
def __init__(
self,
max_pages_limit,
ban_duration_seconds,
db_manager: Optional[DatabaseManager] = None,
):
"""
Initialize the access tracker.
@@ -37,9 +43,27 @@ class AccessTracker:
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
"bot",
"crawler",
"spider",
"scraper",
"curl",
"wget",
"python-requests",
"scanner",
"nikto",
"sqlmap",
"nmap",
"masscan",
"nessus",
"acunetix",
"burp",
"zap",
"w3af",
"metasploit",
"nuclei",
"gobuster",
"dirbuster",
]
# Load attack patterns from wordlists
@@ -49,11 +73,11 @@ class AccessTracker:
# Fallback if wordlists not loaded
if not self.attack_types:
self.attack_types = {
'path_traversal': r'\.\.',
'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
'xss_attempt': r'(<script|javascript:|onerror=|onload=)',
'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)',
'shell_injection': r'(\||;|`|\$\(|&&)',
"path_traversal": r"\.\.",
"sql_injection": r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
"xss_attempt": r"(<script|javascript:|onerror=|onload=)",
"common_probes": r"(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)",
"shell_injection": r"(\||;|`|\$\(|&&)",
}
# Track IPs that accessed honeypot paths from robots.txt
@@ -94,14 +118,22 @@ class AccessTracker:
parsed = urllib.parse.parse_qs(post_data)
# Common username field names
username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
username_fields = [
"username",
"user",
"login",
"email",
"log",
"userid",
"account",
]
for field in username_fields:
if field in parsed and parsed[field]:
username = parsed[field][0]
break
# Common password field names
password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
password_fields = ["password", "pass", "passwd", "pwd", "passphrase"]
for field in password_fields:
if field in parsed and parsed[field]:
password = parsed[field][0]
@@ -109,8 +141,12 @@ class AccessTracker:
except Exception:
# If parsing fails, try simple regex patterns
username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
username_match = re.search(
r"(?:username|user|login|email|log)=([^&\s]+)", post_data, re.IGNORECASE
)
password_match = re.search(
r"(?:password|pass|passwd|pwd)=([^&\s]+)", post_data, re.IGNORECASE
)
if username_match:
username = urllib.parse.unquote_plus(username_match.group(1))
@@ -119,29 +155,30 @@ class AccessTracker:
return username, password
def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
def record_credential_attempt(
self, ip: str, path: str, username: str, password: str
):
"""
Record a credential login attempt.
Stores in both in-memory list and SQLite database.
"""
# In-memory storage for dashboard
self.credential_attempts.append({
'ip': ip,
'path': path,
'username': username,
'password': password,
'timestamp': datetime.now().isoformat()
})
self.credential_attempts.append(
{
"ip": ip,
"path": path,
"username": username,
"password": password,
"timestamp": datetime.now().isoformat(),
}
)
# Persist to database
if self.db:
try:
self.db.persist_credential(
ip=ip,
path=path,
username=username,
password=password
ip=ip, path=path, username=username, password=password
)
except Exception:
# Don't crash if database persistence fails
@@ -151,9 +188,9 @@ class AccessTracker:
self,
ip: str,
path: str,
user_agent: str = '',
body: str = '',
method: str = 'GET'
user_agent: str = "",
body: str = "",
method: str = "GET",
):
"""
Record an access attempt.
@@ -180,9 +217,9 @@ class AccessTracker:
attack_findings.extend(self.detect_attack_type(body))
is_suspicious = (
self.is_suspicious_user_agent(user_agent) or
self.is_honeypot_path(path) or
len(attack_findings) > 0
self.is_suspicious_user_agent(user_agent)
or self.is_honeypot_path(path)
or len(attack_findings) > 0
)
is_honeypot = self.is_honeypot_path(path)
@@ -191,15 +228,17 @@ class AccessTracker:
self.honeypot_triggered[ip].append(path)
# In-memory storage for dashboard
self.access_log.append({
'ip': ip,
'path': path,
'user_agent': user_agent,
'suspicious': is_suspicious,
'honeypot_triggered': self.is_honeypot_path(path),
'attack_types':attack_findings,
'timestamp': datetime.now().isoformat()
})
self.access_log.append(
{
"ip": ip,
"path": path,
"user_agent": user_agent,
"suspicious": is_suspicious,
"honeypot_triggered": self.is_honeypot_path(path),
"attack_types": attack_findings,
"timestamp": datetime.now().isoformat(),
}
)
# Persist to database
if self.db:
@@ -211,13 +250,13 @@ class AccessTracker:
method=method,
is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot,
attack_types=attack_findings if attack_findings else None
attack_types=attack_findings if attack_findings else None,
)
except Exception:
# Don't crash if database persistence fails
pass
def detect_attack_type(self, data:str) -> list[str]:
def detect_attack_type(self, data: str) -> list[str]:
"""
Returns a list of all attack types found in path data
"""
@@ -230,27 +269,37 @@ class AccessTracker:
def is_honeypot_path(self, path: str) -> bool:
"""Check if path is one of the honeypot traps from robots.txt"""
honeypot_paths = [
'/admin',
'/admin/',
'/backup',
'/backup/',
'/config',
'/config/',
'/private',
'/private/',
'/database',
'/database/',
'/credentials.txt',
'/passwords.txt',
'/admin_notes.txt',
'/api_keys.json',
'/.env',
'/wp-admin',
'/wp-admin/',
'/phpmyadmin',
'/phpMyAdmin/'
"/admin",
"/admin/",
"/backup",
"/backup/",
"/config",
"/config/",
"/private",
"/private/",
"/database",
"/database/",
"/credentials.txt",
"/passwords.txt",
"/admin_notes.txt",
"/api_keys.json",
"/.env",
"/wp-admin",
"/wp-admin/",
"/phpmyadmin",
"/phpMyAdmin/",
]
return path in honeypot_paths or any(hp in path.lower() for hp in ['/backup', '/admin', '/config', '/private', '/database', 'phpmyadmin'])
return path in honeypot_paths or any(
hp in path.lower()
for hp in [
"/backup",
"/admin",
"/config",
"/private",
"/database",
"phpmyadmin",
]
)
def is_suspicious_user_agent(self, user_agent: str) -> bool:
"""Check if user agent matches suspicious patterns"""
@@ -272,6 +321,7 @@ class AccessTracker:
"""
try:
from sanitizer import sanitize_ip
# Sanitize the IP address
safe_ip = sanitize_ip(client_ip)
@@ -281,16 +331,17 @@ class AccessTracker:
return False
ip_stats = db.get_ip_stats_by_ip(safe_ip)
if not ip_stats or not ip_stats.get('category'):
if not ip_stats or not ip_stats.get("category"):
return False
# Check if category matches "good crawler"
category = ip_stats.get('category', '').lower().strip()
category = ip_stats.get("category", "").lower().strip()
return category
except Exception as e:
# Log but don't crash on database errors
import logging
logging.error(f"Error checking IP category for {client_ip}: {str(e)}")
return False
@@ -315,7 +366,9 @@ class AccessTracker:
# Set ban if reached limit
if self.ip_page_visits[client_ip]["count"] >= self.max_pages_limit:
self.ip_page_visits[client_ip]["ban_timestamp"] = datetime.now().isoformat()
self.ip_page_visits[client_ip][
"ban_timestamp"
] = datetime.now().isoformat()
return self.ip_page_visits[client_ip]["count"]
@@ -337,8 +390,10 @@ class AccessTracker:
if ban_timestamp is not None:
banned = True
#Check if ban period has expired (> 60 seconds)
ban_time = datetime.fromisoformat(self.ip_page_visits[client_ip]["ban_timestamp"])
# Check if ban period has expired (> 60 seconds)
ban_time = datetime.fromisoformat(
self.ip_page_visits[client_ip]["ban_timestamp"]
)
time_diff = datetime.now() - ban_time
if time_diff.total_seconds() > self.ban_duration_seconds:
self.ip_page_visits[client_ip]["count"] = 0
@@ -350,7 +405,6 @@ class AccessTracker:
except Exception:
return False
def get_page_visit_count(self, client_ip: str) -> int:
"""
Get the current page visit count for an IP.
@@ -372,20 +426,24 @@ class AccessTracker:
def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]:
"""Get top N paths by access count"""
return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[:limit]
return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[
:limit
]
def get_top_user_agents(self, limit: int = 10) -> List[Tuple[str, int]]:
"""Get top N user agents by access count"""
return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[:limit]
return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[
:limit
]
def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent suspicious accesses"""
suspicious = [log for log in self.access_log if log.get('suspicious', False)]
suspicious = [log for log in self.access_log if log.get("suspicious", False)]
return suspicious[-limit:]
def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent accesses with detected attack types"""
attacks = [log for log in self.access_log if log.get('attack_types')]
attacks = [log for log in self.access_log if log.get("attack_types")]
return attacks[-limit:]
def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
@@ -401,12 +459,12 @@ class AccessTracker:
stats = self.db.get_dashboard_counts()
# Add detailed lists from database
stats['top_ips'] = self.db.get_top_ips(10)
stats['top_paths'] = self.db.get_top_paths(10)
stats['top_user_agents'] = self.db.get_top_user_agents(10)
stats['recent_suspicious'] = self.db.get_recent_suspicious(20)
stats['honeypot_triggered_ips'] = self.db.get_honeypot_triggered_ips()
stats['attack_types'] = self.db.get_recent_attacks(20)
stats['credential_attempts'] = self.db.get_credential_attempts(limit=50)
stats["top_ips"] = self.db.get_top_ips(10)
stats["top_paths"] = self.db.get_top_paths(10)
stats["top_user_agents"] = self.db.get_top_user_agents(10)
stats["recent_suspicious"] = self.db.get_recent_suspicious(20)
stats["honeypot_triggered_ips"] = self.db.get_honeypot_triggered_ips()
stats["attack_types"] = self.db.get_recent_attacks(20)
stats["credential_attempts"] = self.db.get_credential_attempts(limit=50)
return stats

View File

@@ -19,13 +19,15 @@ class Wordlists:
def _load_config(self):
"""Load wordlists from JSON file"""
config_path = Path(__file__).parent.parent / 'wordlists.json'
config_path = Path(__file__).parent.parent / "wordlists.json"
try:
with open(config_path, 'r') as f:
with open(config_path, "r") as f:
return json.load(f)
except FileNotFoundError:
get_app_logger().warning(f"Wordlists file {config_path} not found, using default values")
get_app_logger().warning(
f"Wordlists file {config_path} not found, using default values"
)
return self._get_defaults()
except json.JSONDecodeError as e:
get_app_logger().warning(f"Invalid JSON in {config_path}: {e}")
@@ -36,29 +38,21 @@ class Wordlists:
return {
"usernames": {
"prefixes": ["admin", "user", "root"],
"suffixes": ["", "_prod", "_dev"]
"suffixes": ["", "_prod", "_dev"],
},
"passwords": {
"prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "demo", "password"]
},
"emails": {
"domains": ["example.com", "test.com"]
},
"api_keys": {
"prefixes": ["sk_live_", "api_", ""]
"simple": ["test", "demo", "password"],
},
"emails": {"domains": ["example.com", "test.com"]},
"api_keys": {"prefixes": ["sk_live_", "api_", ""]},
"databases": {
"names": ["production", "main_db"],
"hosts": ["localhost", "db.internal"]
"hosts": ["localhost", "db.internal"],
},
"applications": {
"names": ["WebApp", "Dashboard"]
},
"users": {
"roles": ["Administrator", "User"]
},
"server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"]
"applications": {"names": ["WebApp", "Dashboard"]},
"users": {"roles": ["Administrator", "User"]},
"server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"],
}
@property
@@ -137,10 +131,10 @@ class Wordlists:
_wordlists_instance = None
def get_wordlists():
"""Get the singleton Wordlists instance"""
global _wordlists_instance
if _wordlists_instance is None:
_wordlists_instance = Wordlists()
return _wordlists_instance

View File

@@ -10,10 +10,10 @@ def detect_xss_pattern(input_string: str) -> bool:
return False
wl = get_wordlists()
xss_pattern = wl.attack_patterns.get('xss_attempt', '')
xss_pattern = wl.attack_patterns.get("xss_attempt", "")
if not xss_pattern:
xss_pattern = r'(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()'
xss_pattern = r"(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()"
return bool(re.search(xss_pattern, input_string, re.IGNORECASE))