Linted code iwht black tool

This commit is contained in:
Lorenzo Venerandi
2026-01-23 22:00:21 +01:00
parent 25384585d9
commit 4450d3a4e3
22 changed files with 1387 additions and 868 deletions

View File

@@ -19,10 +19,12 @@ Functions for user activity analysis
app_logger = get_app_logger()
class Analyzer:
"""
Analyzes users activity and produces aggregated insights
"""
def __init__(self, db_manager: Optional[DatabaseManager] = None):
"""
Initialize the access tracker.
@@ -102,7 +104,6 @@ class Analyzer:
# }
# }
# accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
# total_accesses_count = len(accesses)
# if total_accesses_count <= 0:
@@ -119,7 +120,6 @@ class Analyzer:
# #--------------------- HTTP Methods ---------------------
# get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
# post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
# put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
@@ -214,7 +214,6 @@ class Analyzer:
# score["bad_crawler"]["uneven_request_timing"] = False
# score["regular_user"]["uneven_request_timing"] = False
# #--------------------- Different User Agents ---------------------
# #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# user_agents_used = [item["user_agent"] for item in accesses]
@@ -317,8 +316,6 @@ class Analyzer:
# return 0
# def update_ip_rep_infos(self, ip: str) -> list[str]:
# api_url = "https://iprep.lcrawl.com/api/iprep/"
# params = {

View File

@@ -14,12 +14,13 @@ import yaml
@dataclass
class Config:
"""Configuration class for the deception server"""
port: int = 5000
delay: int = 100 # milliseconds
server_header: str = ""
links_length_range: Tuple[int, int] = (5, 15)
links_per_page_range: Tuple[int, int] = (10, 15)
char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
char_space: str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: int = 10
canary_token_url: Optional[str] = None
canary_token_tries: int = 10
@@ -30,7 +31,9 @@ class Config:
probability_error_codes: int = 0 # Percentage (0-100)
# Crawl limiting settings - for legitimate vs malicious crawlers
max_pages_limit: int = 100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
max_pages_limit: int = (
100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
)
infinite_pages_for_malicious: bool = True # Infinite pages for malicious crawlers
ban_duration_seconds: int = 600 # Ban duration in seconds for IPs exceeding limits
@@ -47,90 +50,111 @@ class Config:
attack_urls_threshold: float = None
@classmethod
def from_yaml(cls) -> 'Config':
def from_yaml(cls) -> "Config":
"""Create configuration from YAML file"""
config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
config_location = os.getenv("CONFIG_LOCATION", "config.yaml")
config_path = Path(__file__).parent.parent / config_location
try:
with open(config_path, 'r') as f:
with open(config_path, "r") as f:
data = yaml.safe_load(f)
except FileNotFoundError:
print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
print(
f"Error: Configuration file '{config_path}' not found.", file=sys.stderr
)
print(
f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.",
file=sys.stderr,
)
sys.exit(1)
except yaml.YAMLError as e:
print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
print(
f"Error: Invalid YAML in configuration file '{config_path}': {e}",
file=sys.stderr,
)
sys.exit(1)
if data is None:
data = {}
# Extract nested values with defaults
server = data.get('server', {})
links = data.get('links', {})
canary = data.get('canary', {})
dashboard = data.get('dashboard', {})
api = data.get('api', {})
database = data.get('database', {})
behavior = data.get('behavior', {})
analyzer = data.get('analyzer') or {}
crawl = data.get('crawl', {})
server = data.get("server", {})
links = data.get("links", {})
canary = data.get("canary", {})
dashboard = data.get("dashboard", {})
api = data.get("api", {})
database = data.get("database", {})
behavior = data.get("behavior", {})
analyzer = data.get("analyzer") or {}
crawl = data.get("crawl", {})
# Handle dashboard_secret_path - auto-generate if null/not set
dashboard_path = dashboard.get('secret_path')
dashboard_path = dashboard.get("secret_path")
if dashboard_path is None:
dashboard_path = f'/{os.urandom(16).hex()}'
dashboard_path = f"/{os.urandom(16).hex()}"
else:
# ensure the dashboard path starts with a /
if dashboard_path[:1] != "/":
dashboard_path = f"/{dashboard_path}"
return cls(
port=server.get('port', 5000),
delay=server.get('delay', 100),
server_header=server.get('server_header',""),
port=server.get("port", 5000),
delay=server.get("delay", 100),
server_header=server.get("server_header", ""),
links_length_range=(
links.get('min_length', 5),
links.get('max_length', 15)
links.get("min_length", 5),
links.get("max_length", 15),
),
links_per_page_range=(
links.get('min_per_page', 10),
links.get('max_per_page', 15)
links.get("min_per_page", 10),
links.get("max_per_page", 15),
),
char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
max_counter=links.get('max_counter', 10),
canary_token_url=canary.get('token_url'),
canary_token_tries=canary.get('token_tries', 10),
char_space=links.get(
"char_space",
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
),
max_counter=links.get("max_counter", 10),
canary_token_url=canary.get("token_url"),
canary_token_tries=canary.get("token_tries", 10),
dashboard_secret_path=dashboard_path,
api_server_url=api.get('server_url'),
api_server_port=api.get('server_port', 8080),
api_server_path=api.get('server_path', '/api/v2/users'),
probability_error_codes=behavior.get('probability_error_codes', 0),
database_path=database.get('path', 'data/krawl.db'),
database_retention_days=database.get('retention_days', 30),
http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1),
violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1),
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300),
user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2),
attack_urls_threshold=analyzer.get('attack_urls_threshold', 1),
infinite_pages_for_malicious=crawl.get('infinite_pages_for_malicious', True),
max_pages_limit=crawl.get('max_pages_limit', 200),
ban_duration_seconds=crawl.get('ban_duration_seconds', 60)
api_server_url=api.get("server_url"),
api_server_port=api.get("server_port", 8080),
api_server_path=api.get("server_path", "/api/v2/users"),
probability_error_codes=behavior.get("probability_error_codes", 0),
database_path=database.get("path", "data/krawl.db"),
database_retention_days=database.get("retention_days", 30),
http_risky_methods_threshold=analyzer.get(
"http_risky_methods_threshold", 0.1
),
violated_robots_threshold=analyzer.get("violated_robots_threshold", 0.1),
uneven_request_timing_threshold=analyzer.get(
"uneven_request_timing_threshold", 0.5
), # coefficient of variation
uneven_request_timing_time_window_seconds=analyzer.get(
"uneven_request_timing_time_window_seconds", 300
),
user_agents_used_threshold=analyzer.get("user_agents_used_threshold", 2),
attack_urls_threshold=analyzer.get("attack_urls_threshold", 1),
infinite_pages_for_malicious=crawl.get(
"infinite_pages_for_malicious", True
),
max_pages_limit=crawl.get("max_pages_limit", 200),
ban_duration_seconds=crawl.get("ban_duration_seconds", 60),
)
def __get_env_from_config(config: str) -> str:
env = config.upper().replace('.', '_').replace('-', '__').replace(' ', '_')
return f'KRAWL_{env}'
env = config.upper().replace(".", "_").replace("-", "__").replace(" ", "_")
return f"KRAWL_{env}"
def override_config_from_env(config: Config = None):
"""Initialize configuration from environment variables"""
for field in config.__dataclass_fields__:
env_var = __get_env_from_config(field)
if env_var in os.environ:
field_type = config.__dataclass_fields__[field].type
@@ -140,20 +164,22 @@ def override_config_from_env(config: Config = None):
elif field_type == float:
setattr(config, field, float(env_value))
elif field_type == Tuple[int, int]:
parts = env_value.split(',')
parts = env_value.split(",")
if len(parts) == 2:
setattr(config, field, (int(parts[0]), int(parts[1])))
else:
setattr(config, field, env_value)
_config_instance = None
def get_config() -> Config:
"""Get the singleton Config instance"""
global _config_instance
if _config_instance is None:
_config_instance = Config.from_yaml()
override_config_from_env(_config_instance)
return _config_instance
return _config_instance

View File

@@ -24,7 +24,15 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
cursor.execute("PRAGMA busy_timeout=30000")
cursor.close()
from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
from models import (
Base,
AccessLog,
CredentialAttempt,
AttackDetection,
IpStats,
CategoryHistory,
)
from sanitizer import (
sanitize_ip,
sanitize_path,
@@ -37,6 +45,7 @@ from logger import get_app_logger
applogger = get_app_logger()
class DatabaseManager:
"""
Singleton database manager for the Krawl honeypot.
@@ -44,6 +53,7 @@ class DatabaseManager:
Handles database initialization, session management, and provides
methods for persisting access logs, credentials, and attack detections.
"""
_instance: Optional["DatabaseManager"] = None
def __new__(cls) -> "DatabaseManager":
@@ -72,7 +82,7 @@ class DatabaseManager:
self._engine = create_engine(
database_url,
connect_args={"check_same_thread": False},
echo=False # Set to True for SQL debugging
echo=False, # Set to True for SQL debugging
)
# Create session factory with scoped_session for thread safety
@@ -96,7 +106,9 @@ class DatabaseManager:
def session(self) -> Session:
"""Get a thread-local database session."""
if not self._initialized:
raise RuntimeError("DatabaseManager not initialized. Call initialize() first.")
raise RuntimeError(
"DatabaseManager not initialized. Call initialize() first."
)
return self._Session()
def close_session(self) -> None:
@@ -113,7 +125,7 @@ class DatabaseManager:
is_suspicious: bool = False,
is_honeypot_trigger: bool = False,
attack_types: Optional[List[str]] = None,
matched_patterns: Optional[Dict[str, str]] = None
matched_patterns: Optional[Dict[str, str]] = None,
) -> Optional[int]:
"""
Persist an access log entry to the database.
@@ -141,7 +153,7 @@ class DatabaseManager:
method=method[:10],
is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger,
timestamp=datetime.now()
timestamp=datetime.now(),
)
session.add(access_log)
session.flush() # Get the ID before committing
@@ -155,7 +167,7 @@ class DatabaseManager:
attack_type=attack_type[:50],
matched_pattern=sanitize_attack_pattern(
matched_patterns.get(attack_type, "")
)
),
)
session.add(detection)
@@ -178,7 +190,7 @@ class DatabaseManager:
ip: str,
path: str,
username: Optional[str] = None,
password: Optional[str] = None
password: Optional[str] = None,
) -> Optional[int]:
"""
Persist a credential attempt to the database.
@@ -199,7 +211,7 @@ class DatabaseManager:
path=sanitize_path(path),
username=sanitize_credential(username),
password=sanitize_credential(password),
timestamp=datetime.now()
timestamp=datetime.now(),
)
session.add(credential)
session.commit()
@@ -230,14 +242,18 @@ class DatabaseManager:
ip_stats.last_seen = now
else:
ip_stats = IpStats(
ip=sanitized_ip,
total_requests=1,
first_seen=now,
last_seen=now
ip=sanitized_ip, total_requests=1, first_seen=now, last_seen=now
)
session.add(ip_stats)
def update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None:
def update_ip_stats_analysis(
self,
ip: str,
analyzed_metrics: Dict[str, object],
category: str,
category_scores: Dict[str, int],
last_analysis: datetime,
) -> None:
"""
Update IP statistics (ip is already persisted).
Records category change in history if category has changed.
@@ -250,7 +266,9 @@ class DatabaseManager:
last_analysis: timestamp of last analysis
"""
applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
applogger.debug(
f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}"
)
applogger.info(f"IP: {ip} category has been updated to {category}")
session = self.session
@@ -260,7 +278,9 @@ class DatabaseManager:
# Check if category has changed and record it
old_category = ip_stats.category
if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, last_analysis)
self._record_category_change(
sanitized_ip, old_category, category, last_analysis
)
ip_stats.analyzed_metrics = analyzed_metrics
ip_stats.category = category
@@ -286,11 +306,12 @@ class DatabaseManager:
sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
# Record the manual category change
old_category = ip_stats.category
if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, datetime.now())
self._record_category_change(
sanitized_ip, old_category, category, datetime.now()
)
ip_stats.category = category
ip_stats.manual_category = True
@@ -301,7 +322,13 @@ class DatabaseManager:
session.rollback()
print(f"Error updating manual category: {e}")
def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
def _record_category_change(
self,
ip: str,
old_category: Optional[str],
new_category: str,
timestamp: datetime,
) -> None:
"""
Internal method to record category changes in history.
Only records if there's an actual change from a previous category.
@@ -323,7 +350,7 @@ class DatabaseManager:
ip=ip,
old_category=old_category,
new_category=new_category,
timestamp=timestamp
timestamp=timestamp,
)
session.add(history_entry)
session.commit()
@@ -344,22 +371,32 @@ class DatabaseManager:
session = self.session
try:
sanitized_ip = sanitize_ip(ip)
history = session.query(CategoryHistory).filter(
CategoryHistory.ip == sanitized_ip
).order_by(CategoryHistory.timestamp.asc()).all()
history = (
session.query(CategoryHistory)
.filter(CategoryHistory.ip == sanitized_ip)
.order_by(CategoryHistory.timestamp.asc())
.all()
)
return [
{
'old_category': h.old_category,
'new_category': h.new_category,
'timestamp': h.timestamp.isoformat()
"old_category": h.old_category,
"new_category": h.new_category,
"timestamp": h.timestamp.isoformat(),
}
for h in history
]
finally:
self.close_session()
def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
def update_ip_rep_infos(
self,
ip: str,
country_code: str,
asn: str,
asn_org: str,
list_on: Dict[str, str],
) -> None:
"""
Update IP rep stats
@@ -400,20 +437,25 @@ class DatabaseManager:
"""
session = self.session
try:
ips = session.query(IpStats.ip).filter(
IpStats.country_code.is_(None),
~IpStats.ip.like('10.%'),
~IpStats.ip.like('172.16.%'),
~IpStats.ip.like('172.17.%'),
~IpStats.ip.like('172.18.%'),
~IpStats.ip.like('172.19.%'),
~IpStats.ip.like('172.2_.%'),
~IpStats.ip.like('172.30.%'),
~IpStats.ip.like('172.31.%'),
~IpStats.ip.like('192.168.%'),
~IpStats.ip.like('127.%'),
~IpStats.ip.like('169.254.%')
).limit(limit).all()
ips = (
session.query(IpStats.ip)
.filter(
IpStats.country_code.is_(None),
~IpStats.ip.like("10.%"),
~IpStats.ip.like("172.16.%"),
~IpStats.ip.like("172.17.%"),
~IpStats.ip.like("172.18.%"),
~IpStats.ip.like("172.19.%"),
~IpStats.ip.like("172.2_.%"),
~IpStats.ip.like("172.30.%"),
~IpStats.ip.like("172.31.%"),
~IpStats.ip.like("192.168.%"),
~IpStats.ip.like("127.%"),
~IpStats.ip.like("169.254.%"),
)
.limit(limit)
.all()
)
return [ip[0] for ip in ips]
finally:
self.close_session()
@@ -424,7 +466,7 @@ class DatabaseManager:
offset: int = 0,
ip_filter: Optional[str] = None,
suspicious_only: bool = False,
since_minutes: Optional[int] = None
since_minutes: Optional[int] = None,
) -> List[Dict[str, Any]]:
"""
Retrieve access logs with optional filtering.
@@ -455,15 +497,15 @@ class DatabaseManager:
return [
{
'id': log.id,
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'method': log.method,
'is_suspicious': log.is_suspicious,
'is_honeypot_trigger': log.is_honeypot_trigger,
'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections]
"id": log.id,
"ip": log.ip,
"path": log.path,
"user_agent": log.user_agent,
"method": log.method,
"is_suspicious": log.is_suspicious,
"is_honeypot_trigger": log.is_honeypot_trigger,
"timestamp": log.timestamp.isoformat(),
"attack_types": [d.attack_type for d in log.attack_detections],
}
for log in logs
]
@@ -521,10 +563,7 @@ class DatabaseManager:
# self.close_session()
def get_credential_attempts(
self,
limit: int = 100,
offset: int = 0,
ip_filter: Optional[str] = None
self, limit: int = 100, offset: int = 0, ip_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Retrieve credential attempts with optional filtering.
@@ -550,12 +589,12 @@ class DatabaseManager:
return [
{
'id': attempt.id,
'ip': attempt.ip,
'path': attempt.path,
'username': attempt.username,
'password': attempt.password,
'timestamp': attempt.timestamp.isoformat()
"id": attempt.id,
"ip": attempt.ip,
"path": attempt.path,
"username": attempt.username,
"password": attempt.password,
"timestamp": attempt.timestamp.isoformat(),
}
for attempt in attempts
]
@@ -574,26 +613,29 @@ class DatabaseManager:
"""
session = self.session
try:
stats = session.query(IpStats).order_by(
IpStats.total_requests.desc()
).limit(limit).all()
stats = (
session.query(IpStats)
.order_by(IpStats.total_requests.desc())
.limit(limit)
.all()
)
return [
{
'ip': s.ip,
'total_requests': s.total_requests,
'first_seen': s.first_seen.isoformat(),
'last_seen': s.last_seen.isoformat(),
'country_code': s.country_code,
'city': s.city,
'asn': s.asn,
'asn_org': s.asn_org,
'reputation_score': s.reputation_score,
'reputation_source': s.reputation_source,
'analyzed_metrics': s.analyzed_metrics,
'category': s.category,
'manual_category': s.manual_category,
'last_analysis': s.last_analysis
"ip": s.ip,
"total_requests": s.total_requests,
"first_seen": s.first_seen.isoformat(),
"last_seen": s.last_seen.isoformat(),
"country_code": s.country_code,
"city": s.city,
"asn": s.asn,
"asn_org": s.asn_org,
"reputation_score": s.reputation_score,
"reputation_source": s.reputation_source,
"analyzed_metrics": s.analyzed_metrics,
"category": s.category,
"manual_category": s.manual_category,
"last_analysis": s.last_analysis,
}
for s in stats
]
@@ -621,23 +663,25 @@ class DatabaseManager:
category_history = self.get_category_history(ip)
return {
'ip': stat.ip,
'total_requests': stat.total_requests,
'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
'country_code': stat.country_code,
'city': stat.city,
'asn': stat.asn,
'asn_org': stat.asn_org,
'list_on': stat.list_on or {},
'reputation_score': stat.reputation_score,
'reputation_source': stat.reputation_source,
'analyzed_metrics': stat.analyzed_metrics or {},
'category': stat.category,
'category_scores': stat.category_scores or {},
'manual_category': stat.manual_category,
'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
'category_history': category_history
"ip": stat.ip,
"total_requests": stat.total_requests,
"first_seen": stat.first_seen.isoformat() if stat.first_seen else None,
"last_seen": stat.last_seen.isoformat() if stat.last_seen else None,
"country_code": stat.country_code,
"city": stat.city,
"asn": stat.asn,
"asn_org": stat.asn_org,
"list_on": stat.list_on or {},
"reputation_score": stat.reputation_score,
"reputation_source": stat.reputation_source,
"analyzed_metrics": stat.analyzed_metrics or {},
"category": stat.category,
"category_scores": stat.category_scores or {},
"manual_category": stat.manual_category,
"last_analysis": (
stat.last_analysis.isoformat() if stat.last_analysis else None
),
"category_history": category_history,
}
finally:
self.close_session()
@@ -654,25 +698,32 @@ class DatabaseManager:
try:
# Get main aggregate counts in one query
result = session.query(
func.count(AccessLog.id).label('total_accesses'),
func.count(distinct(AccessLog.ip)).label('unique_ips'),
func.count(distinct(AccessLog.path)).label('unique_paths'),
func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label('suspicious_accesses'),
func.sum(case((AccessLog.is_honeypot_trigger == True, 1), else_=0)).label('honeypot_triggered')
func.count(AccessLog.id).label("total_accesses"),
func.count(distinct(AccessLog.ip)).label("unique_ips"),
func.count(distinct(AccessLog.path)).label("unique_paths"),
func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label(
"suspicious_accesses"
),
func.sum(
case((AccessLog.is_honeypot_trigger == True, 1), else_=0)
).label("honeypot_triggered"),
).first()
# Get unique IPs that triggered honeypots
honeypot_ips = session.query(
func.count(distinct(AccessLog.ip))
).filter(AccessLog.is_honeypot_trigger == True).scalar() or 0
honeypot_ips = (
session.query(func.count(distinct(AccessLog.ip)))
.filter(AccessLog.is_honeypot_trigger == True)
.scalar()
or 0
)
return {
'total_accesses': result.total_accesses or 0,
'unique_ips': result.unique_ips or 0,
'unique_paths': result.unique_paths or 0,
'suspicious_accesses': int(result.suspicious_accesses or 0),
'honeypot_triggered': int(result.honeypot_triggered or 0),
'honeypot_ips': honeypot_ips
"total_accesses": result.total_accesses or 0,
"unique_ips": result.unique_ips or 0,
"unique_paths": result.unique_paths or 0,
"suspicious_accesses": int(result.suspicious_accesses or 0),
"honeypot_triggered": int(result.honeypot_triggered or 0),
"honeypot_ips": honeypot_ips,
}
finally:
self.close_session()
@@ -689,12 +740,13 @@ class DatabaseManager:
"""
session = self.session
try:
results = session.query(
AccessLog.ip,
func.count(AccessLog.id).label('count')
).group_by(AccessLog.ip).order_by(
func.count(AccessLog.id).desc()
).limit(limit).all()
results = (
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
.group_by(AccessLog.ip)
.order_by(func.count(AccessLog.id).desc())
.limit(limit)
.all()
)
return [(row.ip, row.count) for row in results]
finally:
@@ -712,12 +764,13 @@ class DatabaseManager:
"""
session = self.session
try:
results = session.query(
AccessLog.path,
func.count(AccessLog.id).label('count')
).group_by(AccessLog.path).order_by(
func.count(AccessLog.id).desc()
).limit(limit).all()
results = (
session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
.group_by(AccessLog.path)
.order_by(func.count(AccessLog.id).desc())
.limit(limit)
.all()
)
return [(row.path, row.count) for row in results]
finally:
@@ -735,15 +788,16 @@ class DatabaseManager:
"""
session = self.session
try:
results = session.query(
AccessLog.user_agent,
func.count(AccessLog.id).label('count')
).filter(
AccessLog.user_agent.isnot(None),
AccessLog.user_agent != ''
).group_by(AccessLog.user_agent).order_by(
func.count(AccessLog.id).desc()
).limit(limit).all()
results = (
session.query(
AccessLog.user_agent, func.count(AccessLog.id).label("count")
)
.filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
.group_by(AccessLog.user_agent)
.order_by(func.count(AccessLog.id).desc())
.limit(limit)
.all()
)
return [(row.user_agent, row.count) for row in results]
finally:
@@ -761,16 +815,20 @@ class DatabaseManager:
"""
session = self.session
try:
logs = session.query(AccessLog).filter(
AccessLog.is_suspicious == True
).order_by(AccessLog.timestamp.desc()).limit(limit).all()
logs = (
session.query(AccessLog)
.filter(AccessLog.is_suspicious == True)
.order_by(AccessLog.timestamp.desc())
.limit(limit)
.all()
)
return [
{
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat()
"ip": log.ip,
"path": log.path,
"user_agent": log.user_agent,
"timestamp": log.timestamp.isoformat(),
}
for log in logs
]
@@ -787,12 +845,11 @@ class DatabaseManager:
session = self.session
try:
# Get all honeypot triggers grouped by IP
results = session.query(
AccessLog.ip,
AccessLog.path
).filter(
AccessLog.is_honeypot_trigger == True
).all()
results = (
session.query(AccessLog.ip, AccessLog.path)
.filter(AccessLog.is_honeypot_trigger == True)
.all()
)
# Group paths by IP
ip_paths: Dict[str, List[str]] = {}
@@ -819,17 +876,21 @@ class DatabaseManager:
session = self.session
try:
# Get access logs that have attack detections
logs = session.query(AccessLog).join(
AttackDetection
).order_by(AccessLog.timestamp.desc()).limit(limit).all()
logs = (
session.query(AccessLog)
.join(AttackDetection)
.order_by(AccessLog.timestamp.desc())
.limit(limit)
.all()
)
return [
{
'ip': log.ip,
'path': log.path,
'user_agent': log.user_agent,
'timestamp': log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections]
"ip": log.ip,
"path": log.path,
"user_agent": log.user_agent,
"timestamp": log.timestamp.isoformat(),
"attack_types": [d.attack_type for d in log.attack_detections],
}
for log in logs
]

View File

@@ -11,6 +11,7 @@ from templates import html_templates
from wordlists import get_wordlists
from config import get_config
def random_username() -> str:
"""Generate random username"""
wl = get_wordlists()
@@ -21,10 +22,10 @@ def random_password() -> str:
"""Generate random password"""
wl = get_wordlists()
templates = [
lambda: ''.join(random.choices(string.ascii_letters + string.digits, k=12)),
lambda: "".join(random.choices(string.ascii_letters + string.digits, k=12)),
lambda: f"{random.choice(wl.password_prefixes)}{random.randint(100, 999)}!",
lambda: f"{random.choice(wl.simple_passwords)}{random.randint(1000, 9999)}",
lambda: ''.join(random.choices(string.ascii_lowercase, k=8)),
lambda: "".join(random.choices(string.ascii_lowercase, k=8)),
]
return random.choice(templates)()
@@ -36,6 +37,7 @@ def random_email(username: str = None) -> str:
username = random_username()
return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str:
"""Generate random server header from wordlists"""
config = get_config()
@@ -44,10 +46,11 @@ def random_server_header() -> str:
wl = get_wordlists()
return random.choice(wl.server_headers)
def random_api_key() -> str:
"""Generate random API key"""
wl = get_wordlists()
key = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
key = "".join(random.choices(string.ascii_letters + string.digits, k=32))
return random.choice(wl.api_key_prefixes) + key
@@ -87,14 +90,16 @@ def users_json() -> str:
users = []
for i in range(random.randint(3, 8)):
username = random_username()
users.append({
"id": i + 1,
"username": username,
"email": random_email(username),
"password": random_password(),
"role": random.choice(wl.user_roles),
"api_token": random_api_key()
})
users.append(
{
"id": i + 1,
"username": username,
"email": random_email(username),
"password": random_password(),
"role": random.choice(wl.user_roles),
"api_token": random_api_key(),
}
)
return json.dumps({"users": users}, indent=2)
@@ -102,20 +107,28 @@ def api_keys_json() -> str:
"""Generate fake api_keys.json with random data"""
keys = {
"stripe": {
"public_key": "pk_live_" + ''.join(random.choices(string.ascii_letters + string.digits, k=24)),
"secret_key": random_api_key()
"public_key": "pk_live_"
+ "".join(random.choices(string.ascii_letters + string.digits, k=24)),
"secret_key": random_api_key(),
},
"aws": {
"access_key_id": "AKIA" + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)),
"secret_access_key": ''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))
"access_key_id": "AKIA"
+ "".join(random.choices(string.ascii_uppercase + string.digits, k=16)),
"secret_access_key": "".join(
random.choices(string.ascii_letters + string.digits + "+/", k=40)
),
},
"sendgrid": {
"api_key": "SG." + ''.join(random.choices(string.ascii_letters + string.digits, k=48))
"api_key": "SG."
+ "".join(random.choices(string.ascii_letters + string.digits, k=48))
},
"twilio": {
"account_sid": "AC" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=32)),
"auth_token": ''.join(random.choices(string.ascii_lowercase + string.digits, k=32))
}
"account_sid": "AC"
+ "".join(random.choices(string.ascii_lowercase + string.digits, k=32)),
"auth_token": "".join(
random.choices(string.ascii_lowercase + string.digits, k=32)
),
},
}
return json.dumps(keys, indent=2)
@@ -123,51 +136,70 @@ def api_keys_json() -> str:
def api_response(path: str) -> str:
"""Generate fake API JSON responses with random data"""
wl = get_wordlists()
def random_users(count: int = 3):
users = []
for i in range(count):
username = random_username()
users.append({
"id": i + 1,
"username": username,
"email": random_email(username),
"role": random.choice(wl.user_roles)
})
users.append(
{
"id": i + 1,
"username": username,
"email": random_email(username),
"role": random.choice(wl.user_roles),
}
)
return users
responses = {
'/api/users': json.dumps({
"users": random_users(random.randint(2, 5)),
"total": random.randint(50, 500)
}, indent=2),
'/api/v1/users': json.dumps({
"status": "success",
"data": [{
"id": random.randint(1, 100),
"name": random_username(),
"api_key": random_api_key()
}]
}, indent=2),
'/api/v2/secrets': json.dumps({
"database": {
"host": random.choice(wl.database_hosts),
"username": random_username(),
"password": random_password(),
"database": random_database_name()
"/api/users": json.dumps(
{
"users": random_users(random.randint(2, 5)),
"total": random.randint(50, 500),
},
"api_keys": {
"stripe": random_api_key(),
"aws": 'AKIA' + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
}
}, indent=2),
'/api/config': json.dumps({
"app_name": random.choice(wl.application_names),
"debug": random.choice([True, False]),
"secret_key": random_api_key(),
"database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}"
}, indent=2),
'/.env': f"""APP_NAME={random.choice(wl.application_names)}
indent=2,
),
"/api/v1/users": json.dumps(
{
"status": "success",
"data": [
{
"id": random.randint(1, 100),
"name": random_username(),
"api_key": random_api_key(),
}
],
},
indent=2,
),
"/api/v2/secrets": json.dumps(
{
"database": {
"host": random.choice(wl.database_hosts),
"username": random_username(),
"password": random_password(),
"database": random_database_name(),
},
"api_keys": {
"stripe": random_api_key(),
"aws": "AKIA"
+ "".join(
random.choices(string.ascii_uppercase + string.digits, k=16)
),
},
},
indent=2,
),
"/api/config": json.dumps(
{
"app_name": random.choice(wl.application_names),
"debug": random.choice([True, False]),
"secret_key": random_api_key(),
"database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}",
},
indent=2,
),
"/.env": f"""APP_NAME={random.choice(wl.application_names)}
DEBUG={random.choice(['true', 'false'])}
APP_KEY=base64:{''.join(random.choices(string.ascii_letters + string.digits, k=32))}=
DB_CONNECTION=mysql
@@ -179,7 +211,7 @@ DB_PASSWORD={random_password()}
AWS_ACCESS_KEY_ID=AKIA{''.join(random.choices(string.ascii_uppercase + string.digits, k=16))}
AWS_SECRET_ACCESS_KEY={''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))}
STRIPE_SECRET={random_api_key()}
"""
""",
}
return responses.get(path, json.dumps({"error": "Not found"}, indent=2))
@@ -187,11 +219,13 @@ STRIPE_SECRET={random_api_key()}
def directory_listing(path: str) -> str:
"""Generate fake directory listing using wordlists"""
wl = get_wordlists()
files = wl.directory_files
dirs = wl.directory_dirs
selected_files = [(f, random.randint(1024, 1024*1024))
for f in random.sample(files, min(6, len(files)))]
selected_files = [
(f, random.randint(1024, 1024 * 1024))
for f in random.sample(files, min(6, len(files)))
]
return html_templates.directory_listing(path, dirs, selected_files)

View File

@@ -14,8 +14,13 @@ from analyzer import Analyzer
from templates import html_templates
from templates.dashboard_template import generate_dashboard
from generators import (
credentials_txt, passwords_txt, users_json, api_keys_json,
api_response, directory_listing, random_server_header
credentials_txt,
passwords_txt,
users_json,
api_keys_json,
api_response,
directory_listing,
random_server_header,
)
from wordlists import get_wordlists
from sql_errors import generate_sql_error_response, get_sql_response_with_data
@@ -25,6 +30,7 @@ from server_errors import generate_server_error
class Handler(BaseHTTPRequestHandler):
"""HTTP request handler for the deception server"""
webpages: Optional[List[str]] = None
config: Config = None
tracker: AccessTracker = None
@@ -37,15 +43,15 @@ class Handler(BaseHTTPRequestHandler):
def _get_client_ip(self) -> str:
"""Extract client IP address from request, checking proxy headers first"""
# Headers might not be available during early error logging
if hasattr(self, 'headers') and self.headers:
if hasattr(self, "headers") and self.headers:
# Check X-Forwarded-For header (set by load balancers/proxies)
forwarded_for = self.headers.get('X-Forwarded-For')
forwarded_for = self.headers.get("X-Forwarded-For")
if forwarded_for:
# X-Forwarded-For can contain multiple IPs, get the first (original client)
return forwarded_for.split(',')[0].strip()
return forwarded_for.split(",")[0].strip()
# Check X-Real-IP header (set by nginx and other proxies)
real_ip = self.headers.get('X-Real-IP')
real_ip = self.headers.get("X-Real-IP")
if real_ip:
return real_ip.strip()
@@ -54,7 +60,7 @@ class Handler(BaseHTTPRequestHandler):
def _get_user_agent(self) -> str:
"""Extract user agent from request"""
return self.headers.get('User-Agent', '')
return self.headers.get("User-Agent", "")
def _get_category_by_ip(self, client_ip: str) -> str:
"""Get the category of an IP from the database"""
@@ -97,7 +103,7 @@ class Handler(BaseHTTPRequestHandler):
Returns True if the path was handled, False otherwise.
"""
# SQL-vulnerable endpoints
sql_endpoints = ['/api/search', '/api/sql', '/api/database']
sql_endpoints = ["/api/search", "/api/sql", "/api/database"]
base_path = urlparse(path).path
if base_path not in sql_endpoints:
@@ -112,22 +118,30 @@ class Handler(BaseHTTPRequestHandler):
user_agent = self._get_user_agent()
# Always check for SQL injection patterns
error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
error_msg, content_type, status_code = generate_sql_error_response(
query_string or ""
)
if error_msg:
# SQL injection detected - log and return error
self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
self.access_logger.warning(
f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
)
self.send_response(status_code)
self.send_header('Content-type', content_type)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_msg.encode())
else:
# No injection detected - return fake data
self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
self.access_logger.info(
f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
response_data = get_sql_response_with_data(base_path, query_string or "")
response_data = get_sql_response_with_data(
base_path, query_string or ""
)
self.wfile.write(response_data.encode())
return True
@@ -140,7 +154,7 @@ class Handler(BaseHTTPRequestHandler):
# Still send a response even on error
try:
self.send_response(500)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(b'{"error": "Internal server error"}')
except:
@@ -148,31 +162,35 @@ class Handler(BaseHTTPRequestHandler):
return True
def generate_page(self, seed: str, page_visit_count: int) -> str:
"""Generate a webpage containing random links or canary token"""
"""Generate a webpage containing random links or canary token"""
random.seed(seed)
num_pages = random.randint(*self.config.links_per_page_range)
# Check if this is a good crawler by IP category from database
ip_category = self._get_category_by_ip(self._get_client_ip())
# Determine if we should apply crawler page limit based on config and IP category
should_apply_crawler_limit = False
if self.config.infinite_pages_for_malicious:
if (ip_category == "good_crawler" or ip_category == "regular_user") and page_visit_count >= self.config.max_pages_limit:
if (
ip_category == "good_crawler" or ip_category == "regular_user"
) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True
else:
if (ip_category == "good_crawler" or ip_category == "bad_crawler" or ip_category == "attacker") and page_visit_count >= self.config.max_pages_limit:
if (
ip_category == "good_crawler"
or ip_category == "bad_crawler"
or ip_category == "attacker"
) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True
# If good crawler reached max pages, return a simple page with no links
if should_apply_crawler_limit:
return html_templates.main_page(
Handler.counter,
'<p>Crawl limit reached.</p>'
Handler.counter, "<p>Crawl limit reached.</p>"
)
num_pages = random.randint(*self.config.links_per_page_range)
# Build the content HTML
@@ -189,10 +207,12 @@ class Handler(BaseHTTPRequestHandler):
# Add links
if self.webpages is None:
for _ in range(num_pages):
address = ''.join([
random.choice(self.config.char_space)
for _ in range(random.randint(*self.config.links_length_range))
])
address = "".join(
[
random.choice(self.config.char_space)
for _ in range(random.randint(*self.config.links_length_range))
]
)
content += f"""
<div class="link-box">
<a href="{address}">{address}</a>
@@ -223,27 +243,36 @@ class Handler(BaseHTTPRequestHandler):
post_data = ""
from urllib.parse import urlparse
base_path = urlparse(self.path).path
if base_path in ['/api/search', '/api/sql', '/api/database']:
content_length = int(self.headers.get('Content-Length', 0))
if base_path in ["/api/search", "/api/sql", "/api/database"]:
content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
self.access_logger.info(
f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}"
)
error_msg, content_type, status_code = generate_sql_error_response(post_data)
error_msg, content_type, status_code = generate_sql_error_response(
post_data
)
try:
if error_msg:
self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
self.access_logger.warning(
f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}"
)
self.send_response(status_code)
self.send_header('Content-type', content_type)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_msg.encode())
else:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
response_data = get_sql_response_with_data(base_path, post_data)
self.wfile.write(response_data.encode())
@@ -253,28 +282,35 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
return
if base_path == '/api/contact':
content_length = int(self.headers.get('Content-Length', 0))
if base_path == "/api/contact":
content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
parsed_data = {}
for pair in post_data.split('&'):
if '=' in pair:
key, value = pair.split('=', 1)
for pair in post_data.split("&"):
if "=" in pair:
key, value = pair.split("=", 1)
from urllib.parse import unquote_plus
parsed_data[unquote_plus(key)] = unquote_plus(value)
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
if xss_detected:
self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
self.access_logger.warning(
f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}"
)
else:
self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
self.access_logger.info(
f"[XSS ENDPOINT POST] {client_ip} - {base_path}"
)
try:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
response_html = generate_xss_response(parsed_data)
self.wfile.write(response_html.encode())
@@ -284,11 +320,15 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in XSS POST handler: {str(e)}")
return
self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
self.access_logger.warning(
f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}"
)
content_length = int(self.headers.get('Content-Length', 0))
content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
@@ -301,18 +341,24 @@ class Handler(BaseHTTPRequestHandler):
self.credential_logger.info(credential_line)
# Also record in tracker for dashboard
self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
self.tracker.record_credential_attempt(
client_ip, self.path, username or "N/A", password or "N/A"
)
self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
self.access_logger.warning(
f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}"
)
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
self.tracker.record_access(
client_ip, self.path, user_agent, post_data, method="POST"
)
time.sleep(1)
try:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.login_error().encode())
except BrokenPipeError:
@@ -330,95 +376,102 @@ class Handler(BaseHTTPRequestHandler):
return True
try:
if path == '/robots.txt':
if path == "/robots.txt":
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(html_templates.robots_txt().encode())
return True
if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']:
if path in ["/credentials.txt", "/passwords.txt", "/admin_notes.txt"]:
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
if 'credentials' in path:
if "credentials" in path:
self.wfile.write(credentials_txt().encode())
else:
self.wfile.write(passwords_txt().encode())
return True
if path in ['/users.json', '/api_keys.json', '/config.json']:
if path in ["/users.json", "/api_keys.json", "/config.json"]:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
if 'users' in path:
if "users" in path:
self.wfile.write(users_json().encode())
elif 'api_keys' in path:
elif "api_keys" in path:
self.wfile.write(api_keys_json().encode())
else:
self.wfile.write(api_response('/api/config').encode())
self.wfile.write(api_response("/api/config").encode())
return True
if path in ['/admin', '/admin/', '/admin/login', '/login']:
if path in ["/admin", "/admin/", "/admin/login", "/login"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.login_form().encode())
return True
if path in ['/users', '/user', '/database', '/db', '/search']:
if path in ["/users", "/user", "/database", "/db", "/search"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.product_search().encode())
return True
if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
if path in ["/info", "/input", "/contact", "/feedback", "/comment"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.input_form().encode())
return True
if path == '/server':
if path == "/server":
error_html, content_type = generate_server_error()
self.send_response(500)
self.send_header('Content-type', content_type)
self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_html.encode())
return True
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
if path in ["/wp-login.php", "/wp-login", "/wp-admin", "/wp-admin/"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.wp_login().encode())
return True
if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower():
if path in ["/wp-content/", "/wp-includes/"] or "wordpress" in path.lower():
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.wordpress().encode())
return True
if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']:
if "phpmyadmin" in path.lower() or path in ["/pma/", "/phpMyAdmin/"]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.phpmyadmin().encode())
return True
if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']:
if path.startswith("/api/") or path.startswith("/api") or path in ["/.env"]:
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(api_response(path).encode())
return True
if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']:
if path in [
"/backup/",
"/uploads/",
"/private/",
"/admin/",
"/config/",
"/database/",
]:
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(directory_listing(path).encode())
return True
@@ -440,9 +493,12 @@ class Handler(BaseHTTPRequestHandler):
return
user_agent = self._get_user_agent()
if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path:
if (
self.config.dashboard_secret_path
and self.path == self.config.dashboard_secret_path
):
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
try:
stats = self.tracker.get_stats()
@@ -455,72 +511,93 @@ class Handler(BaseHTTPRequestHandler):
return
# API endpoint for fetching IP stats
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
if self.config.dashboard_secret_path and self.path.startswith(
f"{self.config.dashboard_secret_path}/api/ip-stats/"
):
ip_address = self.path.replace(
f"{self.config.dashboard_secret_path}/api/ip-stats/", ""
)
self.send_response(200)
self.send_header('Content-type', 'application/json')
self.send_header('Access-Control-Allow-Origin', '*')
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
# Prevent browser caching - force fresh data from database every time
self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
self.send_header('Pragma', 'no-cache')
self.send_header('Expires', '0')
self.send_header(
"Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
)
self.send_header("Pragma", "no-cache")
self.send_header("Expires", "0")
self.end_headers()
try:
from database import get_database
import json
db = get_database()
ip_stats = db.get_ip_stats_by_ip(ip_address)
if ip_stats:
self.wfile.write(json.dumps(ip_stats).encode())
else:
self.wfile.write(json.dumps({'error': 'IP not found'}).encode())
self.wfile.write(json.dumps({"error": "IP not found"}).encode())
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error fetching IP stats: {e}")
self.wfile.write(json.dumps({'error': str(e)}).encode())
self.wfile.write(json.dumps({"error": str(e)}).encode())
return
# API endpoint for downloading malicious IPs file
if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt":
if (
self.config.dashboard_secret_path
and self.path
== f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt"
):
import os
file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
file_path = os.path.join(
os.path.dirname(__file__), "exports", "malicious_ips.txt"
)
try:
if os.path.exists(file_path):
with open(file_path, 'rb') as f:
with open(file_path, "rb") as f:
content = f.read()
self.send_response(200)
self.send_header('Content-type', 'text/plain')
self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"')
self.send_header('Content-Length', str(len(content)))
self.send_header("Content-type", "text/plain")
self.send_header(
"Content-Disposition",
'attachment; filename="malicious_ips.txt"',
)
self.send_header("Content-Length", str(len(content)))
self.end_headers()
self.wfile.write(content)
else:
self.send_response(404)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(b'File not found')
self.wfile.write(b"File not found")
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error serving malicious IPs file: {e}")
self.send_response(500)
self.send_header('Content-type', 'text/plain')
self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(b'Internal server error')
self.wfile.write(b"Internal server error")
return
self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
self.tracker.record_access(client_ip, self.path, user_agent, method="GET")
# self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip)
if self.tracker.is_suspicious_user_agent(user_agent):
self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
self.access_logger.warning(
f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}"
)
if self._should_return_error():
error_code = self._get_random_error_code()
self.access_logger.info(f"Returning error {error_code} to {client_ip} - {self.path}")
self.access_logger.info(
f"Returning error {error_code} to {client_ip} - {self.path}"
)
self.send_response(error_code)
self.end_headers()
return
@@ -530,13 +607,15 @@ class Handler(BaseHTTPRequestHandler):
time.sleep(self.config.delay / 1000.0)
self.send_response(200)
self.send_header('Content-type', 'text/html')
self.send_header("Content-type", "text/html")
self.end_headers()
try:
# Increment page visit counter for this IP and get the current count
current_visit_count = self._increment_page_visit(client_ip)
self.wfile.write(self.generate_page(self.path, current_visit_count).encode())
self.wfile.write(
self.generate_page(self.path, current_visit_count).encode()
)
Handler.counter -= 1

View File

@@ -13,6 +13,7 @@ from datetime import datetime
class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt)
@@ -26,6 +27,7 @@ class TimezoneFormatter(logging.Formatter):
class LoggerManager:
"""Singleton logger manager for the application."""
_instance = None
def __new__(cls):
@@ -65,7 +67,7 @@ class LoggerManager:
app_file_handler = RotatingFileHandler(
os.path.join(log_dir, "krawl.log"),
maxBytes=max_bytes,
backupCount=backup_count
backupCount=backup_count,
)
app_file_handler.setFormatter(log_format)
self._app_logger.addHandler(app_file_handler)
@@ -82,7 +84,7 @@ class LoggerManager:
access_file_handler = RotatingFileHandler(
os.path.join(log_dir, "access.log"),
maxBytes=max_bytes,
backupCount=backup_count
backupCount=backup_count,
)
access_file_handler.setFormatter(log_format)
self._access_logger.addHandler(access_file_handler)
@@ -102,7 +104,7 @@ class LoggerManager:
credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"),
maxBytes=max_bytes,
backupCount=backup_count
backupCount=backup_count,
)
credential_file_handler.setFormatter(credential_format)
self._credential_logger.addHandler(credential_file_handler)

View File

@@ -17,20 +17,20 @@ from models import Base, CategoryHistory
def migrate():
"""Create CategoryHistory table if it doesn't exist."""
print("Starting migration: Adding CategoryHistory table...")
try:
db = get_database()
# Initialize database if not already done
if not db._initialized:
db.initialize()
# Create only the CategoryHistory table
CategoryHistory.__table__.create(db._engine, checkfirst=True)
print("✓ Migration completed successfully!")
print(" - CategoryHistory table created")
except Exception as e:
print(f"✗ Migration failed: {e}")
sys.exit(1)

View File

@@ -25,6 +25,7 @@ from sanitizer import (
class Base(DeclarativeBase):
"""Base class for all ORM models."""
pass
@@ -35,30 +36,35 @@ class AccessLog(Base):
Stores request metadata, suspicious activity flags, and timestamps
for analysis and dashboard display.
"""
__tablename__ = 'access_logs'
__tablename__ = "access_logs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
#ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
# ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True)
method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET')
user_agent: Mapped[Optional[str]] = mapped_column(
String(MAX_USER_AGENT_LENGTH), nullable=True
)
method: Mapped[str] = mapped_column(String(10), nullable=False, default="GET")
is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
is_honeypot_trigger: Mapped[bool] = mapped_column(
Boolean, nullable=False, default=False
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Relationship to attack detections
attack_detections: Mapped[List["AttackDetection"]] = relationship(
"AttackDetection",
back_populates="access_log",
cascade="all, delete-orphan"
"AttackDetection", back_populates="access_log", cascade="all, delete-orphan"
)
# Indexes for common queries
__table_args__ = (
Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
Index('ix_access_logs_is_suspicious', 'is_suspicious'),
Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'),
Index("ix_access_logs_ip_timestamp", "ip", "timestamp"),
Index("ix_access_logs_is_suspicious", "is_suspicious"),
Index("ix_access_logs_is_honeypot_trigger", "is_honeypot_trigger"),
)
def __repr__(self) -> str:
@@ -71,19 +77,24 @@ class CredentialAttempt(Base):
Stores the submitted username and password along with request metadata.
"""
__tablename__ = 'credential_attempts'
__tablename__ = "credential_attempts"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
username: Mapped[Optional[str]] = mapped_column(
String(MAX_CREDENTIAL_LENGTH), nullable=True
)
password: Mapped[Optional[str]] = mapped_column(
String(MAX_CREDENTIAL_LENGTH), nullable=True
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Composite index for common queries
__table_args__ = (
Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'),
)
__table_args__ = (Index("ix_credential_attempts_ip_timestamp", "ip", "timestamp"),)
def __repr__(self) -> str:
return f"<CredentialAttempt(id={self.id}, ip='{self.ip}', username='{self.username}')>"
@@ -96,20 +107,25 @@ class AttackDetection(Base):
Linked to the parent AccessLog record. Multiple attack types can be
detected in a single request.
"""
__tablename__ = 'attack_detections'
__tablename__ = "attack_detections"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
access_log_id: Mapped[int] = mapped_column(
Integer,
ForeignKey('access_logs.id', ondelete='CASCADE'),
ForeignKey("access_logs.id", ondelete="CASCADE"),
nullable=False,
index=True
index=True,
)
attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True)
matched_pattern: Mapped[Optional[str]] = mapped_column(
String(MAX_ATTACK_PATTERN_LENGTH), nullable=True
)
# Relationship back to access log
access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections")
access_log: Mapped["AccessLog"] = relationship(
"AccessLog", back_populates="attack_detections"
)
def __repr__(self) -> str:
return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"
@@ -122,33 +138,43 @@ class IpStats(Base):
Includes fields for future GeoIP and reputation enrichment.
Updated on each request from an IP.
"""
__tablename__ = 'ip_stats'
__tablename__ = "ip_stats"
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True)
total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
first_seen: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow
)
last_seen: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow
)
# GeoIP fields (populated by future enrichment)
country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True)
city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
list_on: Mapped[Optional[Dict[str,str]]] = mapped_column(JSON, nullable=True)
asn_org: Mapped[Optional[str]] = mapped_column(
String(MAX_ASN_ORG_LENGTH), nullable=True
)
list_on: Mapped[Optional[Dict[str, str]]] = mapped_column(JSON, nullable=True)
# Reputation fields (populated by future enrichment)
reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True)
reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
reputation_source: Mapped[Optional[str]] = mapped_column(
String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True
)
reputation_updated: Mapped[Optional[datetime]] = mapped_column(
DateTime, nullable=True
)
#Analyzed metrics, category and category scores
analyzed_metrics: Mapped[Dict[str,object]] = mapped_column(JSON, nullable=True)
# Analyzed metrics, category and category scores
analyzed_metrics: Mapped[Dict[str, object]] = mapped_column(JSON, nullable=True)
category: Mapped[str] = mapped_column(String, nullable=True)
category_scores: Mapped[Dict[str,int]] = mapped_column(JSON, nullable=True)
category_scores: Mapped[Dict[str, int]] = mapped_column(JSON, nullable=True)
manual_category: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True)
last_analysis: Mapped[datetime] = mapped_column(DateTime, nullable=True)
def __repr__(self) -> str:
return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
@@ -160,18 +186,19 @@ class CategoryHistory(Base):
Tracks when an IP's category changes, storing both the previous
and new category along with timestamp for timeline visualization.
"""
__tablename__ = 'category_history'
__tablename__ = "category_history"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
new_category: Mapped[str] = mapped_column(String(50), nullable=False)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Composite index for efficient IP-based timeline queries
__table_args__ = (
Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
)
__table_args__ = (Index("ix_category_history_ip_timestamp", "ip", "timestamp"),)
def __repr__(self) -> str:
return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>"
@@ -205,4 +232,4 @@ class CategoryHistory(Base):
# )
# def __repr__(self) -> str:
# return f"<AccessLog(id={self.id}, ip='{self.ip}', path='{self.path[:50]}')>"
# return f"<AccessLog(id={self.id}, ip='{self.ip}', path='{self.path[:50]}')>"

View File

@@ -9,7 +9,6 @@ import html
import re
from typing import Optional, Dict
# Field length limits for database storage
MAX_IP_LENGTH = 45 # IPv6 max length
MAX_PATH_LENGTH = 2048 # URL max practical length
@@ -43,7 +42,7 @@ def sanitize_for_storage(value: Optional[str], max_length: int) -> str:
# Remove null bytes and control characters (except newline \n, tab \t, carriage return \r)
# Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', value)
cleaned = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", value)
# Truncate to max length
return cleaned[:max_length]
@@ -112,5 +111,6 @@ def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
return html.escape(value_str)
def sanitize_dict(value: Optional[Dict[str,str]], max_display_length):
return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}
def sanitize_dict(value: Optional[Dict[str, str]], max_display_length):
return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}

View File

@@ -12,43 +12,48 @@ from config import get_config
from tracker import AccessTracker
from analyzer import Analyzer
from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
from logger import (
initialize_logging,
get_app_logger,
get_access_logger,
get_credential_logger,
)
from database import initialize_database
from tasks_master import get_tasksmaster
def print_usage():
"""Print usage information"""
print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n')
print('Configuration:')
print(' Configuration is loaded from a YAML file (default: config.yaml)')
print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
print(' Example config.yaml structure:')
print(' server:')
print(' port: 5000')
print(' delay: 100')
print(' links:')
print(' min_length: 5')
print(' max_length: 15')
print(' min_per_page: 10')
print(' max_per_page: 15')
print(' canary:')
print(' token_url: null')
print(' token_tries: 10')
print(' dashboard:')
print(' secret_path: null # auto-generated if not set')
print(' database:')
print(f"Usage: {sys.argv[0]} [FILE]\n")
print("FILE is file containing a list of webpage names to serve, one per line.")
print("If no file is provided, random links will be generated.\n")
print("Configuration:")
print(" Configuration is loaded from a YAML file (default: config.yaml)")
print(" Set CONFIG_LOCATION environment variable to use a different file.\n")
print(" Example config.yaml structure:")
print(" server:")
print(" port: 5000")
print(" delay: 100")
print(" links:")
print(" min_length: 5")
print(" max_length: 15")
print(" min_per_page: 10")
print(" max_per_page: 15")
print(" canary:")
print(" token_url: null")
print(" token_tries: 10")
print(" dashboard:")
print(" secret_path: null # auto-generated if not set")
print(" database:")
print(' path: "data/krawl.db"')
print(' retention_days: 30')
print(' behavior:')
print(' probability_error_codes: 0')
print(" retention_days: 30")
print(" behavior:")
print(" probability_error_codes: 0")
def main():
"""Main entry point for the deception server"""
if '-h' in sys.argv or '--help' in sys.argv:
if "-h" in sys.argv or "--help" in sys.argv:
print_usage()
exit(0)
@@ -63,9 +68,11 @@ def main():
# Initialize database for persistent storage
try:
initialize_database(config.database_path)
app_logger.info(f'Database initialized at: {config.database_path}')
app_logger.info(f"Database initialized at: {config.database_path}")
except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
app_logger.warning(
f"Database initialization failed: {e}. Continuing with in-memory only."
)
tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds)
analyzer = Analyzer()
@@ -80,11 +87,13 @@ def main():
if len(sys.argv) == 2:
try:
with open(sys.argv[1], 'r') as f:
with open(sys.argv[1], "r") as f:
Handler.webpages = f.readlines()
if not Handler.webpages:
app_logger.warning('The file provided was empty. Using randomly generated links.')
app_logger.warning(
"The file provided was empty. Using randomly generated links."
)
Handler.webpages = None
except IOError:
app_logger.warning("Can't read input file. Using randomly generated links.")
@@ -94,25 +103,31 @@ def main():
tasks_master.run_scheduled_tasks()
try:
app_logger.info(f'Starting deception server on port {config.port}...')
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
app_logger.info(f"Starting deception server on port {config.port}...")
app_logger.info(f"Dashboard available at: {config.dashboard_secret_path}")
if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
app_logger.info(
f"Canary token will appear after {config.canary_token_tries} tries"
)
else:
app_logger.info('No canary token configured (set CANARY_TOKEN_URL to enable)')
app_logger.info(
"No canary token configured (set CANARY_TOKEN_URL to enable)"
)
server = HTTPServer(('0.0.0.0', config.port), Handler)
app_logger.info('Server started. Use <Ctrl-C> to stop.')
server = HTTPServer(("0.0.0.0", config.port), Handler)
app_logger.info("Server started. Use <Ctrl-C> to stop.")
server.serve_forever()
except KeyboardInterrupt:
app_logger.info('Stopping server...')
app_logger.info("Stopping server...")
server.socket.close()
app_logger.info('Server stopped')
app_logger.info("Server stopped")
except Exception as e:
app_logger.error(f'Error starting HTTP server on port {config.port}: {e}')
app_logger.error(f'Make sure you are root, if needed, and that port {config.port} is open.')
app_logger.error(f"Error starting HTTP server on port {config.port}: {e}")
app_logger.error(
f"Make sure you are root, if needed, and that port {config.port} is open."
)
exit(1)
if __name__ == '__main__':
if __name__ == "__main__":
main()

View File

@@ -7,13 +7,13 @@ from wordlists import get_wordlists
def generate_server_error() -> tuple[str, str]:
wl = get_wordlists()
server_errors = wl.server_errors
if not server_errors:
return ("500 Internal Server Error", "text/html")
server_type = random.choice(list(server_errors.keys()))
server_config = server_errors[server_type]
error_codes = {
400: "Bad Request",
401: "Unauthorized",
@@ -21,45 +21,45 @@ def generate_server_error() -> tuple[str, str]:
404: "Not Found",
500: "Internal Server Error",
502: "Bad Gateway",
503: "Service Unavailable"
503: "Service Unavailable",
}
code = random.choice(list(error_codes.keys()))
message = error_codes[code]
template = server_config.get('template', '')
version = random.choice(server_config.get('versions', ['1.0']))
html = template.replace('{code}', str(code))
html = html.replace('{message}', message)
html = html.replace('{version}', version)
if server_type == 'apache':
os = random.choice(server_config.get('os', ['Ubuntu']))
html = html.replace('{os}', os)
html = html.replace('{host}', 'localhost')
template = server_config.get("template", "")
version = random.choice(server_config.get("versions", ["1.0"]))
html = template.replace("{code}", str(code))
html = html.replace("{message}", message)
html = html.replace("{version}", version)
if server_type == "apache":
os = random.choice(server_config.get("os", ["Ubuntu"]))
html = html.replace("{os}", os)
html = html.replace("{host}", "localhost")
return (html, "text/html")
def get_server_header(server_type: str = None) -> str:
wl = get_wordlists()
server_errors = wl.server_errors
if not server_errors:
return "nginx/1.18.0"
if not server_type:
server_type = random.choice(list(server_errors.keys()))
server_config = server_errors.get(server_type, {})
version = random.choice(server_config.get('versions', ['1.0']))
version = random.choice(server_config.get("versions", ["1.0"]))
server_headers = {
'nginx': f"nginx/{version}",
'apache': f"Apache/{version}",
'iis': f"Microsoft-IIS/{version}",
'tomcat': f"Apache-Coyote/1.1"
"nginx": f"nginx/{version}",
"apache": f"Apache/{version}",
"iis": f"Microsoft-IIS/{version}",
"tomcat": f"Apache-Coyote/1.1",
}
return server_headers.get(server_type, "nginx/1.18.0")

View File

@@ -9,87 +9,91 @@ from wordlists import get_wordlists
def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
if not query_string:
return None
query_lower = query_string.lower()
patterns = {
'quote': [r"'", r'"', r'`'],
'comment': [r'--', r'#', r'/\*', r'\*/'],
'union': [r'\bunion\b', r'\bunion\s+select\b'],
'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"],
'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'],
'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'],
'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'],
'info_schema': [r'information_schema', r'table_schema', r'table_name'],
"quote": [r"'", r'"', r"`"],
"comment": [r"--", r"#", r"/\*", r"\*/"],
"union": [r"\bunion\b", r"\bunion\s+select\b"],
"boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"],
"time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"],
"stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"],
"command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"],
"info_schema": [r"information_schema", r"table_schema", r"table_name"],
}
for injection_type, pattern_list in patterns.items():
for pattern in pattern_list:
if re.search(pattern, query_lower):
return injection_type
return None
def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
def get_random_sql_error(
db_type: str = None, injection_type: str = None
) -> Tuple[str, str]:
wl = get_wordlists()
sql_errors = wl.sql_errors
if not sql_errors:
return ("Database error occurred", "text/plain")
if not db_type:
db_type = random.choice(list(sql_errors.keys()))
db_errors = sql_errors.get(db_type, {})
if injection_type and injection_type in db_errors:
errors = db_errors[injection_type]
elif 'generic' in db_errors:
errors = db_errors['generic']
elif "generic" in db_errors:
errors = db_errors["generic"]
else:
all_errors = []
for error_list in db_errors.values():
if isinstance(error_list, list):
all_errors.extend(error_list)
errors = all_errors if all_errors else ["Database error occurred"]
error_message = random.choice(errors) if errors else "Database error occurred"
if '{table}' in error_message:
tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions']
error_message = error_message.replace('{table}', random.choice(tables))
if '{column}' in error_message:
columns = ['id', 'name', 'email', 'password', 'username', 'created_at']
error_message = error_message.replace('{column}', random.choice(columns))
if "{table}" in error_message:
tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
error_message = error_message.replace("{table}", random.choice(tables))
if "{column}" in error_message:
columns = ["id", "name", "email", "password", "username", "created_at"]
error_message = error_message.replace("{column}", random.choice(columns))
return (error_message, "text/plain")
def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]:
def generate_sql_error_response(
query_string: str, db_type: str = None
) -> Tuple[str, str, int]:
injection_type = detect_sql_injection_pattern(query_string)
if not injection_type:
return (None, None, None)
error_message, content_type = get_random_sql_error(db_type, injection_type)
status_code = 500
if random.random() < 0.3:
status_code = 200
return (error_message, content_type, status_code)
def get_sql_response_with_data(path: str, params: str) -> str:
import json
from generators import random_username, random_email, random_password
injection_type = detect_sql_injection_pattern(params)
if injection_type in ['union', 'boolean', 'stacked']:
if injection_type in ["union", "boolean", "stacked"]:
data = {
"success": True,
"results": [
@@ -98,15 +102,14 @@ def get_sql_response_with_data(path: str, params: str) -> str:
"username": random_username(),
"email": random_email(),
"password_hash": random_password(),
"role": random.choice(["admin", "user", "moderator"])
"role": random.choice(["admin", "user", "moderator"]),
}
for i in range(1, random.randint(2, 5))
]
],
}
return json.dumps(data, indent=2)
return json.dumps({
"success": True,
"message": "Query executed successfully",
"results": []
}, indent=2)
return json.dumps(
{"success": True, "message": "Query executed successfully", "results": []},
indent=2,
)

View File

@@ -20,7 +20,7 @@ TASK_CONFIG = {
"name": "analyze-ips",
"cron": "*/1 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
@@ -34,48 +34,74 @@ def main():
uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
uneven_request_timing_time_window_seconds = (
config.uneven_request_timing_time_window_seconds
)
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["attacker"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["good_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["bad_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["regular_user"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
# 1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
"risky_http_methods": 6,
"robots_violations": 4,
"uneven_request_timing": 3,
"different_user_agents": 8,
"attack_url": 15
"attack_url": 15,
},
"good_crawler": {
"risky_http_methods": 1,
"robots_violations": 0,
"uneven_request_timing": 0,
"different_user_agents": 0,
"attack_url": 0
"attack_url": 0,
},
"bad_crawler": {
"risky_http_methods": 2,
"robots_violations": 7,
"uneven_request_timing": 0,
"different_user_agents": 5,
"attack_url": 5
"attack_url": 5,
},
"regular_user": {
"risky_http_methods": 0,
"robots_violations": 0,
"uneven_request_timing": 8,
"different_user_agents": 3,
"attack_url": 0
}
"attack_url": 0,
},
}
# Get IPs with recent activity (last minute to match cron schedule)
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
ips_to_analyze = {item['ip'] for item in recent_accesses}
ips_to_analyze = {item["ip"] for item in recent_accesses}
if not ips_to_analyze:
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
@@ -92,23 +118,51 @@ def main():
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
category_scores = {
"attacker": 0,
"good_crawler": 0,
"bad_crawler": 0,
"regular_user": 0,
"unknown": 0,
}
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return 0
#--------------------- HTTP Methods ---------------------
get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
# --------------------- HTTP Methods ---------------------
get_accesses_count = len(
[item for item in ip_accesses if item["method"] == "GET"]
)
post_accesses_count = len(
[item for item in ip_accesses if item["method"] == "POST"]
)
put_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PUT"]
)
delete_accesses_count = len(
[item for item in ip_accesses if item["method"] == "DELETE"]
)
head_accesses_count = len(
[item for item in ip_accesses if item["method"] == "HEAD"]
)
options_accesses_count = len(
[item for item in ip_accesses if item["method"] == "OPTIONS"]
)
patch_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PATCH"]
)
if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
http_method_attacker_score = (
post_accesses_count
+ put_accesses_count
+ delete_accesses_count
+ options_accesses_count
+ patch_accesses_count
) / total_accesses_count
else:
http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
# print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
@@ -119,8 +173,8 @@ def main():
score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
#--------------------- Robots Violations ---------------------
#respect robots.txt and login/config pages access frequency
# --------------------- Robots Violations ---------------------
# respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
@@ -132,11 +186,20 @@ def main():
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}")
# print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
#print(f"Violated robots count: {violated_robots_count}")
# if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len(
[
item
for item in ip_accesses
if any(
item["path"].rstrip("/").startswith(disallow)
for disallow in robots_disallows
)
]
)
# print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count
else:
@@ -152,15 +215,20 @@ def main():
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
# --------------------- Requests Timing ---------------------
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = [
ts
for ts in timestamps
if now_utc - ts
<= timedelta(seconds=uneven_request_timing_time_window_seconds)
]
timestamps = sorted(timestamps, reverse=True)
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
for i in range(0, len(timestamps) - 1):
diff = (timestamps[i] - timestamps[i + 1]).total_seconds()
time_diffs.append(diff)
mean = 0
@@ -170,9 +238,11 @@ def main():
if time_diffs:
mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5
cv = std/mean
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
std = variance**0.5
cv = std / mean
app_logger.debug(
f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}"
)
if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
@@ -183,11 +253,11 @@ def main():
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
#--------------------- Different User Agents ---------------------
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# --------------------- Different User Agents ---------------------
# Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in ip_accesses]
user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}")
# print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
@@ -198,7 +268,7 @@ def main():
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs ---------------------
# --------------------- Attack URLs ---------------------
attack_urls_found_list = []
wl = get_wordlists()
if wl.attack_patterns:
@@ -215,12 +285,14 @@ def main():
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or
re.search(pattern, decoded_path, re.IGNORECASE) or
re.search(pattern, decoded_path_twice, re.IGNORECASE)):
if (
re.search(pattern, queried_path, re.IGNORECASE)
or re.search(pattern, decoded_path, re.IGNORECASE)
or re.search(pattern, decoded_path_twice, re.IGNORECASE)
):
attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates
# remove duplicates
attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list)
@@ -234,28 +306,102 @@ def main():
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
#--------------------- Calculate score ---------------------
# --------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
attacker_score = (
score["attacker"]["risky_http_methods"]
* weights["attacker"]["risky_http_methods"]
)
attacker_score = (
attacker_score
+ score["attacker"]["robots_violations"]
* weights["attacker"]["robots_violations"]
)
attacker_score = (
attacker_score
+ score["attacker"]["uneven_request_timing"]
* weights["attacker"]["uneven_request_timing"]
)
attacker_score = (
attacker_score
+ score["attacker"]["different_user_agents"]
* weights["attacker"]["different_user_agents"]
)
attacker_score = (
attacker_score
+ score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
)
good_crawler_score = (
score["good_crawler"]["risky_http_methods"]
* weights["good_crawler"]["risky_http_methods"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["robots_violations"]
* weights["good_crawler"]["robots_violations"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["uneven_request_timing"]
* weights["good_crawler"]["uneven_request_timing"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["different_user_agents"]
* weights["good_crawler"]["different_user_agents"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["attack_url"]
* weights["good_crawler"]["attack_url"]
)
bad_crawler_score = (
score["bad_crawler"]["risky_http_methods"]
* weights["bad_crawler"]["risky_http_methods"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["robots_violations"]
* weights["bad_crawler"]["robots_violations"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["uneven_request_timing"]
* weights["bad_crawler"]["uneven_request_timing"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["different_user_agents"]
* weights["bad_crawler"]["different_user_agents"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
)
regular_user_score = (
score["regular_user"]["risky_http_methods"]
* weights["regular_user"]["risky_http_methods"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["robots_violations"]
* weights["regular_user"]["robots_violations"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["uneven_request_timing"]
* weights["regular_user"]["uneven_request_timing"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["different_user_agents"]
* weights["regular_user"]["different_user_agents"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["attack_url"]
* weights["regular_user"]["attack_url"]
)
score_details = f"""
Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score}
@@ -263,9 +409,22 @@ def main():
Regular User score: {regular_user_score}
"""
app_logger.debug(score_details)
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
analyzed_metrics = {
"risky_http_methods": http_method_attacker_score,
"robots_violations": violated_robots_ratio,
"uneven_request_timing": mean,
"different_user_agents": user_agents_used,
"attack_url": attack_urls_found_list,
}
category_scores = {
"attacker": attacker_score,
"good_crawler": good_crawler_score,
"bad_crawler": bad_crawler_score,
"regular_user": regular_user_score,
}
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return

View File

@@ -11,7 +11,7 @@ TASK_CONFIG = {
"name": "fetch-ip-rep",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
@@ -21,7 +21,9 @@ def main():
# Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
app_logger.info(
f"{len(unenriched_ips)} IP's need to be have reputation enrichment."
)
for ip in unenriched_ips:
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"
@@ -43,8 +45,11 @@ def main():
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(
ip, sanitized_country_iso_code, sanitized_asn,
sanitized_asn_org, sanitized_list_on
ip,
sanitized_country_iso_code,
sanitized_asn,
sanitized_asn_org,
sanitized_list_on,
)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")

View File

@@ -17,24 +17,29 @@ TASK_CONFIG = {
"name": "export-malicious-ips",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
"run_when_loaded": True,
}
EXPORTS_DIR = "exports"
OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ----------------------
# TASK LOGIC
# ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter(
AccessLog.is_honeypot_trigger == True,
AccessLog.timestamp >= cutoff_time
).count()
count = (
session.query(AccessLog)
.filter(
AccessLog.is_honeypot_trigger == True, AccessLog.timestamp >= cutoff_time
)
.count()
)
return count > 0
def main():
"""
Export all IPs flagged as suspicious to a text file.
@@ -49,23 +54,29 @@ def main():
# Check for recent honeypot activity
if not has_recent_honeypot_access(session):
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
app_logger.info(
f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes"
)
return
# Query distinct suspicious IPs
results = session.query(distinct(AccessLog.ip)).filter(
AccessLog.is_suspicious == True
).all()
results = (
session.query(distinct(AccessLog.ip))
.filter(AccessLog.is_suspicious == True)
.all()
)
# Ensure exports directory exists
os.makedirs(EXPORTS_DIR, exist_ok=True)
# Write IPs to file (one per line)
with open(OUTPUT_FILE, 'w') as f:
with open(OUTPUT_FILE, "w") as f:
for (ip,) in results:
f.write(f"{ip}\n")
app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
app_logger.info(
f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}"
)
except Exception as e:
app_logger.error(f"[Background Task] {task_name} failed: {e}")

View File

@@ -6,7 +6,12 @@ import threading
import importlib
import importlib.util
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
from logger import (
initialize_logging,
get_app_logger,
get_access_logger,
get_credential_logger,
)
app_logger = get_app_logger()
@@ -27,8 +32,8 @@ except ModuleNotFoundError:
# ---------- TASKSMASTER CLASS ----------
class TasksMaster:
TASK_DEFAULT_CRON = '*/15 * * * *'
TASK_DEFAULT_CRON = "*/15 * * * *"
TASK_JITTER = 240
TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
@@ -36,7 +41,9 @@ class TasksMaster:
self.tasks = self._config_tasks()
self.scheduler = scheduler
self.last_run_times = {}
self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
self.scheduler.add_listener(
self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR
)
def _config_tasks(self):
"""
@@ -80,7 +87,7 @@ class TasksMaster:
for filename in sorted(os.listdir(folder_path)):
# skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
if not filename.endswith('.py') or filename.startswith("__"):
if not filename.endswith(".py") or filename.startswith("__"):
continue
path = os.path.join(folder_path, filename)
@@ -93,13 +100,15 @@ class TasksMaster:
except Exception as e:
app_logger.error(f"Failed to import {filename}: {e}")
continue
# if we have a tasks config and a main function, we attempt to schedule it
if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
if hasattr(module, "TASK_CONFIG") and hasattr(module, "main"):
# ensure task_config is a dict
if not isinstance(module.TASK_CONFIG, dict):
app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
app_logger.error(
f"TASK_CONFIG is not a dict in {filename}. Skipping task."
)
continue
task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
@@ -109,24 +118,26 @@ class TasksMaster:
try:
CronTrigger.from_crontab(task_cron)
except ValueError as ve:
app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
app_logger.error(
f"Invalid cron format for task {task_name}: {ve} - Skipping this task"
)
continue
task = {
'name': module.TASK_CONFIG.get('name', module_name),
'filename': filename,
'cron': task_cron,
"name": module.TASK_CONFIG.get("name", module_name),
"filename": filename,
"cron": task_cron,
"enabled": module.TASK_CONFIG.get("enabled", False),
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False),
}
tasks.append(task)
# we are missing things, and we log what's missing
else:
if not hasattr(module, 'TASK_CONFIG'):
if not hasattr(module, "TASK_CONFIG"):
app_logger.warning(f"Missing TASK_CONFIG in {filename}")
elif not hasattr(module, 'main'):
elif not hasattr(module, "main"):
app_logger.warning(f"Missing main() in {filename}")
return tasks
@@ -135,7 +146,7 @@ class TasksMaster:
# for each task in the tasks config file...
for task_to_run in self.tasks:
# remember, these tasks, are built from the "load_tasks_from_folder" function,
# remember, these tasks, are built from the "load_tasks_from_folder" function,
# if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here.
task_name = task_to_run.get("name")
run_when_loaded = task_to_run.get("run_when_loaded")
@@ -147,28 +158,42 @@ class TasksMaster:
# if task is disabled, skip this one
if not task_enabled:
app_logger.info(f"{task_name} is disabled in client config. Skipping task")
app_logger.info(
f"{task_name} is disabled in client config. Skipping task"
)
continue
try:
if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))):
if os.path.isfile(
os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))
):
# schedule the task now that everything has checked out above...
self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
self._schedule_task(
task_name, module_name, task_cron, run_when_loaded
)
app_logger.info(
f"Scheduled {module_name} cron is set to {task_cron}.",
extra={"task": task_to_run},
)
else:
app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
app_logger.info(
f"Skipping invalid or unsafe file: {task_to_run.get('filename')}",
extra={"task": task_to_run},
)
except Exception as e:
app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
app_logger.error(
f"Error scheduling task: {e}", extra={"tasks": task_to_run}
)
def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
try:
# Dynamically import the module
module = importlib.import_module(f"tasks.{module_name}")
# Check if the module has a 'main' function
if hasattr(module, 'main'):
if hasattr(module, "main"):
app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
# unique_job_id
job_identifier = f"{module_name}__{task_name}"
@@ -180,29 +205,31 @@ class TasksMaster:
# schedule the task / job
if run_when_loaded:
app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
app_logger.info(
f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start"
)
self.scheduler.add_job(
module.main,
trigger,
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
next_run_time=datetime.datetime.now(),
max_instances=1
)
else:
self.scheduler.add_job(
module.main,
trigger,
module.main,
trigger,
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
max_instances=1
next_run_time=datetime.datetime.now(),
max_instances=1,
)
else:
self.scheduler.add_job(
module.main,
trigger,
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
max_instances=1,
)
else:
app_logger.error(f"{module_name} does not define a 'main' function.")
except Exception as e:
app_logger.error(f"Failed to load {module_name}: {e}")
@@ -218,13 +245,15 @@ class TasksMaster:
def list_jobs(self):
scheduled_jobs = self.scheduler.get_jobs()
jobs_list = []
for job in scheduled_jobs:
jobs_list.append({
jobs_list.append(
{
"id": job.id,
"name": job.name,
"next_run": job.next_run_time,
})
}
)
return jobs_list
def run_scheduled_tasks(self):
@@ -235,7 +264,7 @@ class TasksMaster:
1. Retrieves the current task configurations and updates internal state.
2. Adds new jobs to the scheduler based on the latest configuration.
3. Starts the scheduler to begin executing tasks at their defined intervals.
This ensures the scheduler is always running with the most up-to-date
task definitions and enabled status.
"""
@@ -251,6 +280,7 @@ class TasksMaster:
# ---------- SINGLETON WRAPPER ----------
T = type
def singleton_loader(func):
"""Decorator to ensure only one instance exists."""
cache: dict[str, T] = {}
@@ -262,6 +292,7 @@ def singleton_loader(func):
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
return wrapper
@@ -283,6 +314,8 @@ def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster
# Auto-start scheduler if not already running
if not scheduler.running:
scheduler.start()
app_logger.info("TasksMaster scheduler started automatically with singleton creation.")
app_logger.info(
"TasksMaster scheduler started automatically with singleton creation."
)
return tm_instance

View File

@@ -8,8 +8,8 @@ from .template_loader import load_template, clear_cache, TemplateNotFoundError
from . import html_templates
__all__ = [
'load_template',
'clear_cache',
'TemplateNotFoundError',
'html_templates',
"load_template",
"clear_cache",
"TemplateNotFoundError",
"html_templates",
]

View File

@@ -9,12 +9,14 @@ import html
from datetime import datetime
from zoneinfo import ZoneInfo
def _escape(value) -> str:
"""Escape HTML special characters to prevent XSS attacks."""
if value is None:
return ""
return html.escape(str(value))
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion
@@ -30,10 +32,12 @@ def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
# Fallback for old format
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
return (
iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
)
def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
"""Generate dashboard HTML with access statistics
Args:
@@ -42,8 +46,8 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
"""
# Generate IP rows with clickable functionality for dropdown stats
top_ips_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
top_ips_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(ip)}">
<td class="rank">{i+1}</td>
<td class="ip-clickable">{_escape(ip)}</td>
<td>{count}</td>
@@ -54,25 +58,35 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for i, (ip, count) in enumerate(stats['top_ips'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
</tr>""" for i, (ip, count) in enumerate(stats["top_ips"])])
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate paths rows (CRITICAL: paths can contain XSS payloads)
top_paths_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
for i, (path, count) in enumerate(stats['top_paths'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
top_paths_rows = (
"\n".join(
[
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
for i, (path, count) in enumerate(stats["top_paths"])
]
)
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
top_ua_rows = '\n'.join([
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
for i, (ua, count) in enumerate(stats['top_user_agents'])
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
top_ua_rows = (
"\n".join(
[
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
for i, (ua, count) in enumerate(stats["top_user_agents"])
]
)
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate suspicious accesses rows with clickable IPs
suspicious_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
suspicious_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
@@ -84,13 +98,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for log in stats['recent_suspicious'][-10:]
]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
</tr>""" for log in stats["recent_suspicious"][-10:]])
or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
)
# Generate honeypot triggered IPs rows with clickable IPs
honeypot_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(ip)}">
honeypot_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(ip)}">
<td class="ip-clickable">{_escape(ip)}</td>
<td style="word-break: break-all;">{_escape(", ".join(paths))}</td>
<td>{len(paths)}</td>
@@ -101,13 +115,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for ip, paths in stats.get('honeypot_triggered_ips', [])
]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
</tr>""" for ip, paths in stats.get("honeypot_triggered_ips", [])])
or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
)
# Generate attack types rows with clickable IPs
attack_type_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
attack_type_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td>
<td>{_escape(", ".join(log["attack_types"]))}</td>
@@ -120,13 +134,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for log in stats.get('attack_types', [])[-10:]
]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
</tr>""" for log in stats.get("attack_types", [])[-10:]])
or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
)
# Generate credential attempts rows with clickable IPs
credential_rows = '\n'.join([
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}">
credential_rows = (
"\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["username"])}</td>
<td>{_escape(log["password"])}</td>
@@ -139,9 +153,9 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div>
</div>
</td>
</tr>'''
for log in stats.get('credential_attempts', [])[-20:]
]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
</tr>""" for log in stats.get("credential_attempts", [])[-20:]])
or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
)
return f"""<!DOCTYPE html>
<html>

View File

@@ -11,6 +11,7 @@ from typing import Dict
class TemplateNotFoundError(Exception):
"""Raised when a template file cannot be found."""
pass
@@ -42,11 +43,11 @@ def load_template(name: str, **kwargs) -> str:
"""
# debug
# print(f"Loading Template: {name}")
# Check cache first
if name not in _template_cache:
# Determine file path based on whether name has an extension
if '.' in name:
if "." in name:
file_path = _TEMPLATE_DIR / name
else:
file_path = _TEMPLATE_DIR / f"{name}.html"
@@ -54,7 +55,7 @@ def load_template(name: str, **kwargs) -> str:
if not file_path.exists():
raise TemplateNotFoundError(f"Template '{name}' not found at {file_path}")
_template_cache[name] = file_path.read_text(encoding='utf-8')
_template_cache[name] = file_path.read_text(encoding="utf-8")
template = _template_cache[name]

View File

@@ -17,7 +17,13 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis.
"""
def __init__(self, max_pages_limit, ban_duration_seconds, db_manager: Optional[DatabaseManager] = None):
def __init__(
self,
max_pages_limit,
ban_duration_seconds,
db_manager: Optional[DatabaseManager] = None,
):
"""
Initialize the access tracker.
@@ -32,14 +38,32 @@ class AccessTracker:
self.user_agent_counts: Dict[str, int] = defaultdict(int)
self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = []
# Track pages visited by each IP (for good crawler limiting)
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
"bot",
"crawler",
"spider",
"scraper",
"curl",
"wget",
"python-requests",
"scanner",
"nikto",
"sqlmap",
"nmap",
"masscan",
"nessus",
"acunetix",
"burp",
"zap",
"w3af",
"metasploit",
"nuclei",
"gobuster",
"dirbuster",
]
# Load attack patterns from wordlists
@@ -49,11 +73,11 @@ class AccessTracker:
# Fallback if wordlists not loaded
if not self.attack_types:
self.attack_types = {
'path_traversal': r'\.\.',
'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
'xss_attempt': r'(<script|javascript:|onerror=|onload=)',
'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)',
'shell_injection': r'(\||;|`|\$\(|&&)',
"path_traversal": r"\.\.",
"sql_injection": r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
"xss_attempt": r"(<script|javascript:|onerror=|onload=)",
"common_probes": r"(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)",
"shell_injection": r"(\||;|`|\$\(|&&)",
}
# Track IPs that accessed honeypot paths from robots.txt
@@ -94,14 +118,22 @@ class AccessTracker:
parsed = urllib.parse.parse_qs(post_data)
# Common username field names
username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account']
username_fields = [
"username",
"user",
"login",
"email",
"log",
"userid",
"account",
]
for field in username_fields:
if field in parsed and parsed[field]:
username = parsed[field][0]
break
# Common password field names
password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase']
password_fields = ["password", "pass", "passwd", "pwd", "passphrase"]
for field in password_fields:
if field in parsed and parsed[field]:
password = parsed[field][0]
@@ -109,8 +141,12 @@ class AccessTracker:
except Exception:
# If parsing fails, try simple regex patterns
username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE)
password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE)
username_match = re.search(
r"(?:username|user|login|email|log)=([^&\s]+)", post_data, re.IGNORECASE
)
password_match = re.search(
r"(?:password|pass|passwd|pwd)=([^&\s]+)", post_data, re.IGNORECASE
)
if username_match:
username = urllib.parse.unquote_plus(username_match.group(1))
@@ -119,29 +155,30 @@ class AccessTracker:
return username, password
def record_credential_attempt(self, ip: str, path: str, username: str, password: str):
def record_credential_attempt(
self, ip: str, path: str, username: str, password: str
):
"""
Record a credential login attempt.
Stores in both in-memory list and SQLite database.
"""
# In-memory storage for dashboard
self.credential_attempts.append({
'ip': ip,
'path': path,
'username': username,
'password': password,
'timestamp': datetime.now().isoformat()
})
self.credential_attempts.append(
{
"ip": ip,
"path": path,
"username": username,
"password": password,
"timestamp": datetime.now().isoformat(),
}
)
# Persist to database
if self.db:
try:
self.db.persist_credential(
ip=ip,
path=path,
username=username,
password=password
ip=ip, path=path, username=username, password=password
)
except Exception:
# Don't crash if database persistence fails
@@ -151,9 +188,9 @@ class AccessTracker:
self,
ip: str,
path: str,
user_agent: str = '',
body: str = '',
method: str = 'GET'
user_agent: str = "",
body: str = "",
method: str = "GET",
):
"""
Record an access attempt.
@@ -180,9 +217,9 @@ class AccessTracker:
attack_findings.extend(self.detect_attack_type(body))
is_suspicious = (
self.is_suspicious_user_agent(user_agent) or
self.is_honeypot_path(path) or
len(attack_findings) > 0
self.is_suspicious_user_agent(user_agent)
or self.is_honeypot_path(path)
or len(attack_findings) > 0
)
is_honeypot = self.is_honeypot_path(path)
@@ -191,15 +228,17 @@ class AccessTracker:
self.honeypot_triggered[ip].append(path)
# In-memory storage for dashboard
self.access_log.append({
'ip': ip,
'path': path,
'user_agent': user_agent,
'suspicious': is_suspicious,
'honeypot_triggered': self.is_honeypot_path(path),
'attack_types':attack_findings,
'timestamp': datetime.now().isoformat()
})
self.access_log.append(
{
"ip": ip,
"path": path,
"user_agent": user_agent,
"suspicious": is_suspicious,
"honeypot_triggered": self.is_honeypot_path(path),
"attack_types": attack_findings,
"timestamp": datetime.now().isoformat(),
}
)
# Persist to database
if self.db:
@@ -211,13 +250,13 @@ class AccessTracker:
method=method,
is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot,
attack_types=attack_findings if attack_findings else None
attack_types=attack_findings if attack_findings else None,
)
except Exception:
# Don't crash if database persistence fails
pass
def detect_attack_type(self, data:str) -> list[str]:
def detect_attack_type(self, data: str) -> list[str]:
"""
Returns a list of all attack types found in path data
"""
@@ -230,27 +269,37 @@ class AccessTracker:
def is_honeypot_path(self, path: str) -> bool:
"""Check if path is one of the honeypot traps from robots.txt"""
honeypot_paths = [
'/admin',
'/admin/',
'/backup',
'/backup/',
'/config',
'/config/',
'/private',
'/private/',
'/database',
'/database/',
'/credentials.txt',
'/passwords.txt',
'/admin_notes.txt',
'/api_keys.json',
'/.env',
'/wp-admin',
'/wp-admin/',
'/phpmyadmin',
'/phpMyAdmin/'
"/admin",
"/admin/",
"/backup",
"/backup/",
"/config",
"/config/",
"/private",
"/private/",
"/database",
"/database/",
"/credentials.txt",
"/passwords.txt",
"/admin_notes.txt",
"/api_keys.json",
"/.env",
"/wp-admin",
"/wp-admin/",
"/phpmyadmin",
"/phpMyAdmin/",
]
return path in honeypot_paths or any(hp in path.lower() for hp in ['/backup', '/admin', '/config', '/private', '/database', 'phpmyadmin'])
return path in honeypot_paths or any(
hp in path.lower()
for hp in [
"/backup",
"/admin",
"/config",
"/private",
"/database",
"phpmyadmin",
]
)
def is_suspicious_user_agent(self, user_agent: str) -> bool:
"""Check if user agent matches suspicious patterns"""
@@ -263,34 +312,36 @@ class AccessTracker:
"""
Check if an IP has been categorized as a 'good crawler' in the database.
Uses the IP category from IpStats table.
Args:
client_ip: The client IP address (will be sanitized)
Returns:
True if the IP is categorized as 'good crawler', False otherwise
"""
try:
from sanitizer import sanitize_ip
# Sanitize the IP address
safe_ip = sanitize_ip(client_ip)
# Query the database for this IP's category
db = self.db
if not db:
return False
ip_stats = db.get_ip_stats_by_ip(safe_ip)
if not ip_stats or not ip_stats.get('category'):
if not ip_stats or not ip_stats.get("category"):
return False
# Check if category matches "good crawler"
category = ip_stats.get('category', '').lower().strip()
category = ip_stats.get("category", "").lower().strip()
return category
except Exception as e:
# Log but don't crash on database errors
import logging
logging.error(f"Error checking IP category for {client_ip}: {str(e)}")
return False
@@ -298,10 +349,10 @@ class AccessTracker:
"""
Increment page visit counter for an IP and return the new count.
If ban timestamp exists and 60+ seconds have passed, reset the counter.
Args:
client_ip: The client IP address
Returns:
The updated page visit count for this IP
"""
@@ -309,55 +360,58 @@ class AccessTracker:
# Initialize if not exists
if client_ip not in self.ip_page_visits:
self.ip_page_visits[client_ip] = {"count": 0, "ban_timestamp": None}
# Increment count
self.ip_page_visits[client_ip]["count"] += 1
# Set ban if reached limit
if self.ip_page_visits[client_ip]["count"] >= self.max_pages_limit:
self.ip_page_visits[client_ip]["ban_timestamp"] = datetime.now().isoformat()
self.ip_page_visits[client_ip][
"ban_timestamp"
] = datetime.now().isoformat()
return self.ip_page_visits[client_ip]["count"]
except Exception:
return 0
def is_banned_ip(self, client_ip: str) -> bool:
"""
Check if an IP is currently banned due to exceeding page visit limits.
Args:
client_ip: The client IP address
Returns:
True if the IP is banned, False otherwise
"""
"""
try:
if client_ip in self.ip_page_visits:
ban_timestamp = self.ip_page_visits[client_ip]["ban_timestamp"]
if ban_timestamp is not None:
banned = True
#Check if ban period has expired (> 60 seconds)
ban_time = datetime.fromisoformat(self.ip_page_visits[client_ip]["ban_timestamp"])
# Check if ban period has expired (> 60 seconds)
ban_time = datetime.fromisoformat(
self.ip_page_visits[client_ip]["ban_timestamp"]
)
time_diff = datetime.now() - ban_time
if time_diff.total_seconds() > self.ban_duration_seconds:
self.ip_page_visits[client_ip]["count"] = 0
self.ip_page_visits[client_ip]["ban_timestamp"] = None
banned = False
return banned
except Exception:
return False
def get_page_visit_count(self, client_ip: str) -> int:
"""
Get the current page visit count for an IP.
Args:
client_ip: The client IP address
Returns:
The page visit count for this IP
"""
@@ -372,20 +426,24 @@ class AccessTracker:
def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]:
"""Get top N paths by access count"""
return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[:limit]
return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[
:limit
]
def get_top_user_agents(self, limit: int = 10) -> List[Tuple[str, int]]:
"""Get top N user agents by access count"""
return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[:limit]
return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[
:limit
]
def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent suspicious accesses"""
suspicious = [log for log in self.access_log if log.get('suspicious', False)]
suspicious = [log for log in self.access_log if log.get("suspicious", False)]
return suspicious[-limit:]
def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent accesses with detected attack types"""
attacks = [log for log in self.access_log if log.get('attack_types')]
attacks = [log for log in self.access_log if log.get("attack_types")]
return attacks[-limit:]
def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
@@ -401,12 +459,12 @@ class AccessTracker:
stats = self.db.get_dashboard_counts()
# Add detailed lists from database
stats['top_ips'] = self.db.get_top_ips(10)
stats['top_paths'] = self.db.get_top_paths(10)
stats['top_user_agents'] = self.db.get_top_user_agents(10)
stats['recent_suspicious'] = self.db.get_recent_suspicious(20)
stats['honeypot_triggered_ips'] = self.db.get_honeypot_triggered_ips()
stats['attack_types'] = self.db.get_recent_attacks(20)
stats['credential_attempts'] = self.db.get_credential_attempts(limit=50)
stats["top_ips"] = self.db.get_top_ips(10)
stats["top_paths"] = self.db.get_top_paths(10)
stats["top_user_agents"] = self.db.get_top_user_agents(10)
stats["recent_suspicious"] = self.db.get_recent_suspicious(20)
stats["honeypot_triggered_ips"] = self.db.get_honeypot_triggered_ips()
stats["attack_types"] = self.db.get_recent_attacks(20)
stats["credential_attempts"] = self.db.get_credential_attempts(limit=50)
return stats

View File

@@ -13,122 +13,116 @@ from logger import get_app_logger
class Wordlists:
"""Loads and provides access to wordlists from wordlists.json"""
def __init__(self):
self._data = self._load_config()
def _load_config(self):
"""Load wordlists from JSON file"""
config_path = Path(__file__).parent.parent / 'wordlists.json'
config_path = Path(__file__).parent.parent / "wordlists.json"
try:
with open(config_path, 'r') as f:
with open(config_path, "r") as f:
return json.load(f)
except FileNotFoundError:
get_app_logger().warning(f"Wordlists file {config_path} not found, using default values")
get_app_logger().warning(
f"Wordlists file {config_path} not found, using default values"
)
return self._get_defaults()
except json.JSONDecodeError as e:
get_app_logger().warning(f"Invalid JSON in {config_path}: {e}")
return self._get_defaults()
def _get_defaults(self):
"""Fallback default wordlists if JSON file is missing or invalid"""
return {
"usernames": {
"prefixes": ["admin", "user", "root"],
"suffixes": ["", "_prod", "_dev"]
"suffixes": ["", "_prod", "_dev"],
},
"passwords": {
"prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "demo", "password"]
},
"emails": {
"domains": ["example.com", "test.com"]
},
"api_keys": {
"prefixes": ["sk_live_", "api_", ""]
"simple": ["test", "demo", "password"],
},
"emails": {"domains": ["example.com", "test.com"]},
"api_keys": {"prefixes": ["sk_live_", "api_", ""]},
"databases": {
"names": ["production", "main_db"],
"hosts": ["localhost", "db.internal"]
"hosts": ["localhost", "db.internal"],
},
"applications": {
"names": ["WebApp", "Dashboard"]
},
"users": {
"roles": ["Administrator", "User"]
},
"server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"]
"applications": {"names": ["WebApp", "Dashboard"]},
"users": {"roles": ["Administrator", "User"]},
"server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"],
}
@property
def username_prefixes(self):
return self._data.get("usernames", {}).get("prefixes", [])
@property
def username_suffixes(self):
return self._data.get("usernames", {}).get("suffixes", [])
@property
def password_prefixes(self):
return self._data.get("passwords", {}).get("prefixes", [])
@property
def simple_passwords(self):
return self._data.get("passwords", {}).get("simple", [])
@property
def email_domains(self):
return self._data.get("emails", {}).get("domains", [])
@property
def api_key_prefixes(self):
return self._data.get("api_keys", {}).get("prefixes", [])
@property
def database_names(self):
return self._data.get("databases", {}).get("names", [])
@property
def database_hosts(self):
return self._data.get("databases", {}).get("hosts", [])
@property
def application_names(self):
return self._data.get("applications", {}).get("names", [])
@property
def user_roles(self):
return self._data.get("users", {}).get("roles", [])
@property
def directory_files(self):
return self._data.get("directory_listing", {}).get("files", [])
@property
def directory_dirs(self):
return self._data.get("directory_listing", {}).get("directories", [])
@property
def error_codes(self):
return self._data.get("error_codes", [])
@property
def sql_errors(self):
return self._data.get("sql_errors", {})
@property
def attack_patterns(self):
return self._data.get("attack_patterns", {})
@property
def server_errors(self):
return self._data.get("server_errors", {})
@property
def server_headers(self):
return self._data.get("server_headers", [])
@property
def attack_urls(self):
"""Deprecated: use attack_patterns instead. Returns attack_patterns for backward compatibility."""
@@ -137,10 +131,10 @@ class Wordlists:
_wordlists_instance = None
def get_wordlists():
"""Get the singleton Wordlists instance"""
global _wordlists_instance
if _wordlists_instance is None:
_wordlists_instance = Wordlists()
return _wordlists_instance

View File

@@ -8,25 +8,25 @@ from wordlists import get_wordlists
def detect_xss_pattern(input_string: str) -> bool:
if not input_string:
return False
wl = get_wordlists()
xss_pattern = wl.attack_patterns.get('xss_attempt', '')
xss_pattern = wl.attack_patterns.get("xss_attempt", "")
if not xss_pattern:
xss_pattern = r'(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()'
xss_pattern = r"(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()"
return bool(re.search(xss_pattern, input_string, re.IGNORECASE))
def generate_xss_response(input_data: dict) -> str:
xss_detected = False
reflected_content = []
for key, value in input_data.items():
if detect_xss_pattern(value):
xss_detected = True
reflected_content.append(f"<p><strong>{key}:</strong> {value}</p>")
if xss_detected:
html = f"""
<!DOCTYPE html>
@@ -51,7 +51,7 @@ def generate_xss_response(input_data: dict) -> str:
</html>
"""
return html
return """
<!DOCTYPE html>
<html>