diff --git a/src/analyzer.py b/src/analyzer.py
index c0ff515..860a206 100644
--- a/src/analyzer.py
+++ b/src/analyzer.py
@@ -19,10 +19,12 @@ Functions for user activity analysis
app_logger = get_app_logger()
+
class Analyzer:
"""
Analyzes users activity and produces aggregated insights
"""
+
def __init__(self, db_manager: Optional[DatabaseManager] = None):
"""
Initialize the access tracker.
@@ -102,7 +104,6 @@ class Analyzer:
# }
# }
-
# accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
# total_accesses_count = len(accesses)
# if total_accesses_count <= 0:
@@ -119,7 +120,6 @@ class Analyzer:
# #--------------------- HTTP Methods ---------------------
-
# get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
# post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
# put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
@@ -214,7 +214,6 @@ class Analyzer:
# score["bad_crawler"]["uneven_request_timing"] = False
# score["regular_user"]["uneven_request_timing"] = False
-
# #--------------------- Different User Agents ---------------------
# #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# user_agents_used = [item["user_agent"] for item in accesses]
@@ -317,8 +316,6 @@ class Analyzer:
# return 0
-
-
# def update_ip_rep_infos(self, ip: str) -> list[str]:
# api_url = "https://iprep.lcrawl.com/api/iprep/"
# params = {
diff --git a/src/config.py b/src/config.py
index 771e8c2..629c18c 100644
--- a/src/config.py
+++ b/src/config.py
@@ -14,12 +14,13 @@ import yaml
@dataclass
class Config:
"""Configuration class for the deception server"""
+
port: int = 5000
delay: int = 100 # milliseconds
server_header: str = ""
links_length_range: Tuple[int, int] = (5, 15)
links_per_page_range: Tuple[int, int] = (10, 15)
- char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
+ char_space: str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: int = 10
canary_token_url: Optional[str] = None
canary_token_tries: int = 10
@@ -30,7 +31,9 @@ class Config:
probability_error_codes: int = 0 # Percentage (0-100)
# Crawl limiting settings - for legitimate vs malicious crawlers
- max_pages_limit: int = 100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
+ max_pages_limit: int = (
+ 100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
+ )
infinite_pages_for_malicious: bool = True # Infinite pages for malicious crawlers
ban_duration_seconds: int = 600 # Ban duration in seconds for IPs exceeding limits
@@ -47,90 +50,111 @@ class Config:
attack_urls_threshold: float = None
@classmethod
- def from_yaml(cls) -> 'Config':
+ def from_yaml(cls) -> "Config":
"""Create configuration from YAML file"""
- config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
+ config_location = os.getenv("CONFIG_LOCATION", "config.yaml")
config_path = Path(__file__).parent.parent / config_location
try:
- with open(config_path, 'r') as f:
+ with open(config_path, "r") as f:
data = yaml.safe_load(f)
except FileNotFoundError:
- print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
- print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
+ print(
+ f"Error: Configuration file '{config_path}' not found.", file=sys.stderr
+ )
+ print(
+ f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.",
+ file=sys.stderr,
+ )
sys.exit(1)
except yaml.YAMLError as e:
- print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
+ print(
+ f"Error: Invalid YAML in configuration file '{config_path}': {e}",
+ file=sys.stderr,
+ )
sys.exit(1)
if data is None:
data = {}
# Extract nested values with defaults
- server = data.get('server', {})
- links = data.get('links', {})
- canary = data.get('canary', {})
- dashboard = data.get('dashboard', {})
- api = data.get('api', {})
- database = data.get('database', {})
- behavior = data.get('behavior', {})
- analyzer = data.get('analyzer') or {}
- crawl = data.get('crawl', {})
+ server = data.get("server", {})
+ links = data.get("links", {})
+ canary = data.get("canary", {})
+ dashboard = data.get("dashboard", {})
+ api = data.get("api", {})
+ database = data.get("database", {})
+ behavior = data.get("behavior", {})
+ analyzer = data.get("analyzer") or {}
+ crawl = data.get("crawl", {})
# Handle dashboard_secret_path - auto-generate if null/not set
- dashboard_path = dashboard.get('secret_path')
+ dashboard_path = dashboard.get("secret_path")
if dashboard_path is None:
- dashboard_path = f'/{os.urandom(16).hex()}'
+ dashboard_path = f"/{os.urandom(16).hex()}"
else:
# ensure the dashboard path starts with a /
if dashboard_path[:1] != "/":
dashboard_path = f"/{dashboard_path}"
return cls(
- port=server.get('port', 5000),
- delay=server.get('delay', 100),
- server_header=server.get('server_header',""),
+ port=server.get("port", 5000),
+ delay=server.get("delay", 100),
+ server_header=server.get("server_header", ""),
links_length_range=(
- links.get('min_length', 5),
- links.get('max_length', 15)
+ links.get("min_length", 5),
+ links.get("max_length", 15),
),
links_per_page_range=(
- links.get('min_per_page', 10),
- links.get('max_per_page', 15)
+ links.get("min_per_page", 10),
+ links.get("max_per_page", 15),
),
- char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
- max_counter=links.get('max_counter', 10),
- canary_token_url=canary.get('token_url'),
- canary_token_tries=canary.get('token_tries', 10),
+ char_space=links.get(
+ "char_space",
+ "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
+ ),
+ max_counter=links.get("max_counter", 10),
+ canary_token_url=canary.get("token_url"),
+ canary_token_tries=canary.get("token_tries", 10),
dashboard_secret_path=dashboard_path,
- api_server_url=api.get('server_url'),
- api_server_port=api.get('server_port', 8080),
- api_server_path=api.get('server_path', '/api/v2/users'),
- probability_error_codes=behavior.get('probability_error_codes', 0),
- database_path=database.get('path', 'data/krawl.db'),
- database_retention_days=database.get('retention_days', 30),
- http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1),
- violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1),
- uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation
- uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300),
- user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2),
- attack_urls_threshold=analyzer.get('attack_urls_threshold', 1),
- infinite_pages_for_malicious=crawl.get('infinite_pages_for_malicious', True),
- max_pages_limit=crawl.get('max_pages_limit', 200),
- ban_duration_seconds=crawl.get('ban_duration_seconds', 60)
+ api_server_url=api.get("server_url"),
+ api_server_port=api.get("server_port", 8080),
+ api_server_path=api.get("server_path", "/api/v2/users"),
+ probability_error_codes=behavior.get("probability_error_codes", 0),
+ database_path=database.get("path", "data/krawl.db"),
+ database_retention_days=database.get("retention_days", 30),
+ http_risky_methods_threshold=analyzer.get(
+ "http_risky_methods_threshold", 0.1
+ ),
+ violated_robots_threshold=analyzer.get("violated_robots_threshold", 0.1),
+ uneven_request_timing_threshold=analyzer.get(
+ "uneven_request_timing_threshold", 0.5
+ ), # coefficient of variation
+ uneven_request_timing_time_window_seconds=analyzer.get(
+ "uneven_request_timing_time_window_seconds", 300
+ ),
+ user_agents_used_threshold=analyzer.get("user_agents_used_threshold", 2),
+ attack_urls_threshold=analyzer.get("attack_urls_threshold", 1),
+ infinite_pages_for_malicious=crawl.get(
+ "infinite_pages_for_malicious", True
+ ),
+ max_pages_limit=crawl.get("max_pages_limit", 200),
+ ban_duration_seconds=crawl.get("ban_duration_seconds", 60),
)
+
def __get_env_from_config(config: str) -> str:
-
- env = config.upper().replace('.', '_').replace('-', '__').replace(' ', '_')
-
- return f'KRAWL_{env}'
+
+ env = config.upper().replace(".", "_").replace("-", "__").replace(" ", "_")
+
+ return f"KRAWL_{env}"
+
def override_config_from_env(config: Config = None):
"""Initialize configuration from environment variables"""
-
+
for field in config.__dataclass_fields__:
-
+
env_var = __get_env_from_config(field)
if env_var in os.environ:
field_type = config.__dataclass_fields__[field].type
@@ -140,20 +164,22 @@ def override_config_from_env(config: Config = None):
elif field_type == float:
setattr(config, field, float(env_value))
elif field_type == Tuple[int, int]:
- parts = env_value.split(',')
+ parts = env_value.split(",")
if len(parts) == 2:
setattr(config, field, (int(parts[0]), int(parts[1])))
else:
setattr(config, field, env_value)
+
_config_instance = None
+
def get_config() -> Config:
"""Get the singleton Config instance"""
global _config_instance
if _config_instance is None:
_config_instance = Config.from_yaml()
-
+
override_config_from_env(_config_instance)
-
- return _config_instance
\ No newline at end of file
+
+ return _config_instance
diff --git a/src/database.py b/src/database.py
index bfe2725..6f21d91 100644
--- a/src/database.py
+++ b/src/database.py
@@ -24,7 +24,15 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
cursor.execute("PRAGMA busy_timeout=30000")
cursor.close()
-from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
+
+from models import (
+ Base,
+ AccessLog,
+ CredentialAttempt,
+ AttackDetection,
+ IpStats,
+ CategoryHistory,
+)
from sanitizer import (
sanitize_ip,
sanitize_path,
@@ -37,6 +45,7 @@ from logger import get_app_logger
applogger = get_app_logger()
+
class DatabaseManager:
"""
Singleton database manager for the Krawl honeypot.
@@ -44,6 +53,7 @@ class DatabaseManager:
Handles database initialization, session management, and provides
methods for persisting access logs, credentials, and attack detections.
"""
+
_instance: Optional["DatabaseManager"] = None
def __new__(cls) -> "DatabaseManager":
@@ -72,7 +82,7 @@ class DatabaseManager:
self._engine = create_engine(
database_url,
connect_args={"check_same_thread": False},
- echo=False # Set to True for SQL debugging
+ echo=False, # Set to True for SQL debugging
)
# Create session factory with scoped_session for thread safety
@@ -96,7 +106,9 @@ class DatabaseManager:
def session(self) -> Session:
"""Get a thread-local database session."""
if not self._initialized:
- raise RuntimeError("DatabaseManager not initialized. Call initialize() first.")
+ raise RuntimeError(
+ "DatabaseManager not initialized. Call initialize() first."
+ )
return self._Session()
def close_session(self) -> None:
@@ -113,7 +125,7 @@ class DatabaseManager:
is_suspicious: bool = False,
is_honeypot_trigger: bool = False,
attack_types: Optional[List[str]] = None,
- matched_patterns: Optional[Dict[str, str]] = None
+ matched_patterns: Optional[Dict[str, str]] = None,
) -> Optional[int]:
"""
Persist an access log entry to the database.
@@ -141,7 +153,7 @@ class DatabaseManager:
method=method[:10],
is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger,
- timestamp=datetime.now()
+ timestamp=datetime.now(),
)
session.add(access_log)
session.flush() # Get the ID before committing
@@ -155,7 +167,7 @@ class DatabaseManager:
attack_type=attack_type[:50],
matched_pattern=sanitize_attack_pattern(
matched_patterns.get(attack_type, "")
- )
+ ),
)
session.add(detection)
@@ -178,7 +190,7 @@ class DatabaseManager:
ip: str,
path: str,
username: Optional[str] = None,
- password: Optional[str] = None
+ password: Optional[str] = None,
) -> Optional[int]:
"""
Persist a credential attempt to the database.
@@ -199,7 +211,7 @@ class DatabaseManager:
path=sanitize_path(path),
username=sanitize_credential(username),
password=sanitize_credential(password),
- timestamp=datetime.now()
+ timestamp=datetime.now(),
)
session.add(credential)
session.commit()
@@ -230,14 +242,18 @@ class DatabaseManager:
ip_stats.last_seen = now
else:
ip_stats = IpStats(
- ip=sanitized_ip,
- total_requests=1,
- first_seen=now,
- last_seen=now
+ ip=sanitized_ip, total_requests=1, first_seen=now, last_seen=now
)
session.add(ip_stats)
- def update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None:
+ def update_ip_stats_analysis(
+ self,
+ ip: str,
+ analyzed_metrics: Dict[str, object],
+ category: str,
+ category_scores: Dict[str, int],
+ last_analysis: datetime,
+ ) -> None:
"""
Update IP statistics (ip is already persisted).
Records category change in history if category has changed.
@@ -250,7 +266,9 @@ class DatabaseManager:
last_analysis: timestamp of last analysis
"""
- applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}")
+ applogger.debug(
+ f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}"
+ )
applogger.info(f"IP: {ip} category has been updated to {category}")
session = self.session
@@ -260,7 +278,9 @@ class DatabaseManager:
# Check if category has changed and record it
old_category = ip_stats.category
if old_category != category:
- self._record_category_change(sanitized_ip, old_category, category, last_analysis)
+ self._record_category_change(
+ sanitized_ip, old_category, category, last_analysis
+ )
ip_stats.analyzed_metrics = analyzed_metrics
ip_stats.category = category
@@ -286,11 +306,12 @@ class DatabaseManager:
sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
-
# Record the manual category change
old_category = ip_stats.category
if old_category != category:
- self._record_category_change(sanitized_ip, old_category, category, datetime.now())
+ self._record_category_change(
+ sanitized_ip, old_category, category, datetime.now()
+ )
ip_stats.category = category
ip_stats.manual_category = True
@@ -301,7 +322,13 @@ class DatabaseManager:
session.rollback()
print(f"Error updating manual category: {e}")
- def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None:
+ def _record_category_change(
+ self,
+ ip: str,
+ old_category: Optional[str],
+ new_category: str,
+ timestamp: datetime,
+ ) -> None:
"""
Internal method to record category changes in history.
Only records if there's an actual change from a previous category.
@@ -323,7 +350,7 @@ class DatabaseManager:
ip=ip,
old_category=old_category,
new_category=new_category,
- timestamp=timestamp
+ timestamp=timestamp,
)
session.add(history_entry)
session.commit()
@@ -344,22 +371,32 @@ class DatabaseManager:
session = self.session
try:
sanitized_ip = sanitize_ip(ip)
- history = session.query(CategoryHistory).filter(
- CategoryHistory.ip == sanitized_ip
- ).order_by(CategoryHistory.timestamp.asc()).all()
+ history = (
+ session.query(CategoryHistory)
+ .filter(CategoryHistory.ip == sanitized_ip)
+ .order_by(CategoryHistory.timestamp.asc())
+ .all()
+ )
return [
{
- 'old_category': h.old_category,
- 'new_category': h.new_category,
- 'timestamp': h.timestamp.isoformat()
+ "old_category": h.old_category,
+ "new_category": h.new_category,
+ "timestamp": h.timestamp.isoformat(),
}
for h in history
]
finally:
self.close_session()
- def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None:
+ def update_ip_rep_infos(
+ self,
+ ip: str,
+ country_code: str,
+ asn: str,
+ asn_org: str,
+ list_on: Dict[str, str],
+ ) -> None:
"""
Update IP rep stats
@@ -400,20 +437,25 @@ class DatabaseManager:
"""
session = self.session
try:
- ips = session.query(IpStats.ip).filter(
- IpStats.country_code.is_(None),
- ~IpStats.ip.like('10.%'),
- ~IpStats.ip.like('172.16.%'),
- ~IpStats.ip.like('172.17.%'),
- ~IpStats.ip.like('172.18.%'),
- ~IpStats.ip.like('172.19.%'),
- ~IpStats.ip.like('172.2_.%'),
- ~IpStats.ip.like('172.30.%'),
- ~IpStats.ip.like('172.31.%'),
- ~IpStats.ip.like('192.168.%'),
- ~IpStats.ip.like('127.%'),
- ~IpStats.ip.like('169.254.%')
- ).limit(limit).all()
+ ips = (
+ session.query(IpStats.ip)
+ .filter(
+ IpStats.country_code.is_(None),
+ ~IpStats.ip.like("10.%"),
+ ~IpStats.ip.like("172.16.%"),
+ ~IpStats.ip.like("172.17.%"),
+ ~IpStats.ip.like("172.18.%"),
+ ~IpStats.ip.like("172.19.%"),
+ ~IpStats.ip.like("172.2_.%"),
+ ~IpStats.ip.like("172.30.%"),
+ ~IpStats.ip.like("172.31.%"),
+ ~IpStats.ip.like("192.168.%"),
+ ~IpStats.ip.like("127.%"),
+ ~IpStats.ip.like("169.254.%"),
+ )
+ .limit(limit)
+ .all()
+ )
return [ip[0] for ip in ips]
finally:
self.close_session()
@@ -424,7 +466,7 @@ class DatabaseManager:
offset: int = 0,
ip_filter: Optional[str] = None,
suspicious_only: bool = False,
- since_minutes: Optional[int] = None
+ since_minutes: Optional[int] = None,
) -> List[Dict[str, Any]]:
"""
Retrieve access logs with optional filtering.
@@ -455,15 +497,15 @@ class DatabaseManager:
return [
{
- 'id': log.id,
- 'ip': log.ip,
- 'path': log.path,
- 'user_agent': log.user_agent,
- 'method': log.method,
- 'is_suspicious': log.is_suspicious,
- 'is_honeypot_trigger': log.is_honeypot_trigger,
- 'timestamp': log.timestamp.isoformat(),
- 'attack_types': [d.attack_type for d in log.attack_detections]
+ "id": log.id,
+ "ip": log.ip,
+ "path": log.path,
+ "user_agent": log.user_agent,
+ "method": log.method,
+ "is_suspicious": log.is_suspicious,
+ "is_honeypot_trigger": log.is_honeypot_trigger,
+ "timestamp": log.timestamp.isoformat(),
+ "attack_types": [d.attack_type for d in log.attack_detections],
}
for log in logs
]
@@ -521,10 +563,7 @@ class DatabaseManager:
# self.close_session()
def get_credential_attempts(
- self,
- limit: int = 100,
- offset: int = 0,
- ip_filter: Optional[str] = None
+ self, limit: int = 100, offset: int = 0, ip_filter: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Retrieve credential attempts with optional filtering.
@@ -550,12 +589,12 @@ class DatabaseManager:
return [
{
- 'id': attempt.id,
- 'ip': attempt.ip,
- 'path': attempt.path,
- 'username': attempt.username,
- 'password': attempt.password,
- 'timestamp': attempt.timestamp.isoformat()
+ "id": attempt.id,
+ "ip": attempt.ip,
+ "path": attempt.path,
+ "username": attempt.username,
+ "password": attempt.password,
+ "timestamp": attempt.timestamp.isoformat(),
}
for attempt in attempts
]
@@ -574,26 +613,29 @@ class DatabaseManager:
"""
session = self.session
try:
- stats = session.query(IpStats).order_by(
- IpStats.total_requests.desc()
- ).limit(limit).all()
+ stats = (
+ session.query(IpStats)
+ .order_by(IpStats.total_requests.desc())
+ .limit(limit)
+ .all()
+ )
return [
{
- 'ip': s.ip,
- 'total_requests': s.total_requests,
- 'first_seen': s.first_seen.isoformat(),
- 'last_seen': s.last_seen.isoformat(),
- 'country_code': s.country_code,
- 'city': s.city,
- 'asn': s.asn,
- 'asn_org': s.asn_org,
- 'reputation_score': s.reputation_score,
- 'reputation_source': s.reputation_source,
- 'analyzed_metrics': s.analyzed_metrics,
- 'category': s.category,
- 'manual_category': s.manual_category,
- 'last_analysis': s.last_analysis
+ "ip": s.ip,
+ "total_requests": s.total_requests,
+ "first_seen": s.first_seen.isoformat(),
+ "last_seen": s.last_seen.isoformat(),
+ "country_code": s.country_code,
+ "city": s.city,
+ "asn": s.asn,
+ "asn_org": s.asn_org,
+ "reputation_score": s.reputation_score,
+ "reputation_source": s.reputation_source,
+ "analyzed_metrics": s.analyzed_metrics,
+ "category": s.category,
+ "manual_category": s.manual_category,
+ "last_analysis": s.last_analysis,
}
for s in stats
]
@@ -621,23 +663,25 @@ class DatabaseManager:
category_history = self.get_category_history(ip)
return {
- 'ip': stat.ip,
- 'total_requests': stat.total_requests,
- 'first_seen': stat.first_seen.isoformat() if stat.first_seen else None,
- 'last_seen': stat.last_seen.isoformat() if stat.last_seen else None,
- 'country_code': stat.country_code,
- 'city': stat.city,
- 'asn': stat.asn,
- 'asn_org': stat.asn_org,
- 'list_on': stat.list_on or {},
- 'reputation_score': stat.reputation_score,
- 'reputation_source': stat.reputation_source,
- 'analyzed_metrics': stat.analyzed_metrics or {},
- 'category': stat.category,
- 'category_scores': stat.category_scores or {},
- 'manual_category': stat.manual_category,
- 'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None,
- 'category_history': category_history
+ "ip": stat.ip,
+ "total_requests": stat.total_requests,
+ "first_seen": stat.first_seen.isoformat() if stat.first_seen else None,
+ "last_seen": stat.last_seen.isoformat() if stat.last_seen else None,
+ "country_code": stat.country_code,
+ "city": stat.city,
+ "asn": stat.asn,
+ "asn_org": stat.asn_org,
+ "list_on": stat.list_on or {},
+ "reputation_score": stat.reputation_score,
+ "reputation_source": stat.reputation_source,
+ "analyzed_metrics": stat.analyzed_metrics or {},
+ "category": stat.category,
+ "category_scores": stat.category_scores or {},
+ "manual_category": stat.manual_category,
+ "last_analysis": (
+ stat.last_analysis.isoformat() if stat.last_analysis else None
+ ),
+ "category_history": category_history,
}
finally:
self.close_session()
@@ -654,25 +698,32 @@ class DatabaseManager:
try:
# Get main aggregate counts in one query
result = session.query(
- func.count(AccessLog.id).label('total_accesses'),
- func.count(distinct(AccessLog.ip)).label('unique_ips'),
- func.count(distinct(AccessLog.path)).label('unique_paths'),
- func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label('suspicious_accesses'),
- func.sum(case((AccessLog.is_honeypot_trigger == True, 1), else_=0)).label('honeypot_triggered')
+ func.count(AccessLog.id).label("total_accesses"),
+ func.count(distinct(AccessLog.ip)).label("unique_ips"),
+ func.count(distinct(AccessLog.path)).label("unique_paths"),
+ func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label(
+ "suspicious_accesses"
+ ),
+ func.sum(
+ case((AccessLog.is_honeypot_trigger == True, 1), else_=0)
+ ).label("honeypot_triggered"),
).first()
# Get unique IPs that triggered honeypots
- honeypot_ips = session.query(
- func.count(distinct(AccessLog.ip))
- ).filter(AccessLog.is_honeypot_trigger == True).scalar() or 0
+ honeypot_ips = (
+ session.query(func.count(distinct(AccessLog.ip)))
+ .filter(AccessLog.is_honeypot_trigger == True)
+ .scalar()
+ or 0
+ )
return {
- 'total_accesses': result.total_accesses or 0,
- 'unique_ips': result.unique_ips or 0,
- 'unique_paths': result.unique_paths or 0,
- 'suspicious_accesses': int(result.suspicious_accesses or 0),
- 'honeypot_triggered': int(result.honeypot_triggered or 0),
- 'honeypot_ips': honeypot_ips
+ "total_accesses": result.total_accesses or 0,
+ "unique_ips": result.unique_ips or 0,
+ "unique_paths": result.unique_paths or 0,
+ "suspicious_accesses": int(result.suspicious_accesses or 0),
+ "honeypot_triggered": int(result.honeypot_triggered or 0),
+ "honeypot_ips": honeypot_ips,
}
finally:
self.close_session()
@@ -689,12 +740,13 @@ class DatabaseManager:
"""
session = self.session
try:
- results = session.query(
- AccessLog.ip,
- func.count(AccessLog.id).label('count')
- ).group_by(AccessLog.ip).order_by(
- func.count(AccessLog.id).desc()
- ).limit(limit).all()
+ results = (
+ session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
+ .group_by(AccessLog.ip)
+ .order_by(func.count(AccessLog.id).desc())
+ .limit(limit)
+ .all()
+ )
return [(row.ip, row.count) for row in results]
finally:
@@ -712,12 +764,13 @@ class DatabaseManager:
"""
session = self.session
try:
- results = session.query(
- AccessLog.path,
- func.count(AccessLog.id).label('count')
- ).group_by(AccessLog.path).order_by(
- func.count(AccessLog.id).desc()
- ).limit(limit).all()
+ results = (
+ session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
+ .group_by(AccessLog.path)
+ .order_by(func.count(AccessLog.id).desc())
+ .limit(limit)
+ .all()
+ )
return [(row.path, row.count) for row in results]
finally:
@@ -735,15 +788,16 @@ class DatabaseManager:
"""
session = self.session
try:
- results = session.query(
- AccessLog.user_agent,
- func.count(AccessLog.id).label('count')
- ).filter(
- AccessLog.user_agent.isnot(None),
- AccessLog.user_agent != ''
- ).group_by(AccessLog.user_agent).order_by(
- func.count(AccessLog.id).desc()
- ).limit(limit).all()
+ results = (
+ session.query(
+ AccessLog.user_agent, func.count(AccessLog.id).label("count")
+ )
+ .filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
+ .group_by(AccessLog.user_agent)
+ .order_by(func.count(AccessLog.id).desc())
+ .limit(limit)
+ .all()
+ )
return [(row.user_agent, row.count) for row in results]
finally:
@@ -761,16 +815,20 @@ class DatabaseManager:
"""
session = self.session
try:
- logs = session.query(AccessLog).filter(
- AccessLog.is_suspicious == True
- ).order_by(AccessLog.timestamp.desc()).limit(limit).all()
+ logs = (
+ session.query(AccessLog)
+ .filter(AccessLog.is_suspicious == True)
+ .order_by(AccessLog.timestamp.desc())
+ .limit(limit)
+ .all()
+ )
return [
{
- 'ip': log.ip,
- 'path': log.path,
- 'user_agent': log.user_agent,
- 'timestamp': log.timestamp.isoformat()
+ "ip": log.ip,
+ "path": log.path,
+ "user_agent": log.user_agent,
+ "timestamp": log.timestamp.isoformat(),
}
for log in logs
]
@@ -787,12 +845,11 @@ class DatabaseManager:
session = self.session
try:
# Get all honeypot triggers grouped by IP
- results = session.query(
- AccessLog.ip,
- AccessLog.path
- ).filter(
- AccessLog.is_honeypot_trigger == True
- ).all()
+ results = (
+ session.query(AccessLog.ip, AccessLog.path)
+ .filter(AccessLog.is_honeypot_trigger == True)
+ .all()
+ )
# Group paths by IP
ip_paths: Dict[str, List[str]] = {}
@@ -819,17 +876,21 @@ class DatabaseManager:
session = self.session
try:
# Get access logs that have attack detections
- logs = session.query(AccessLog).join(
- AttackDetection
- ).order_by(AccessLog.timestamp.desc()).limit(limit).all()
+ logs = (
+ session.query(AccessLog)
+ .join(AttackDetection)
+ .order_by(AccessLog.timestamp.desc())
+ .limit(limit)
+ .all()
+ )
return [
{
- 'ip': log.ip,
- 'path': log.path,
- 'user_agent': log.user_agent,
- 'timestamp': log.timestamp.isoformat(),
- 'attack_types': [d.attack_type for d in log.attack_detections]
+ "ip": log.ip,
+ "path": log.path,
+ "user_agent": log.user_agent,
+ "timestamp": log.timestamp.isoformat(),
+ "attack_types": [d.attack_type for d in log.attack_detections],
}
for log in logs
]
diff --git a/src/generators.py b/src/generators.py
index 92eb590..fd29f38 100644
--- a/src/generators.py
+++ b/src/generators.py
@@ -11,6 +11,7 @@ from templates import html_templates
from wordlists import get_wordlists
from config import get_config
+
def random_username() -> str:
"""Generate random username"""
wl = get_wordlists()
@@ -21,10 +22,10 @@ def random_password() -> str:
"""Generate random password"""
wl = get_wordlists()
templates = [
- lambda: ''.join(random.choices(string.ascii_letters + string.digits, k=12)),
+ lambda: "".join(random.choices(string.ascii_letters + string.digits, k=12)),
lambda: f"{random.choice(wl.password_prefixes)}{random.randint(100, 999)}!",
lambda: f"{random.choice(wl.simple_passwords)}{random.randint(1000, 9999)}",
- lambda: ''.join(random.choices(string.ascii_lowercase, k=8)),
+ lambda: "".join(random.choices(string.ascii_lowercase, k=8)),
]
return random.choice(templates)()
@@ -36,6 +37,7 @@ def random_email(username: str = None) -> str:
username = random_username()
return f"{username}@{random.choice(wl.email_domains)}"
+
def random_server_header() -> str:
"""Generate random server header from wordlists"""
config = get_config()
@@ -44,10 +46,11 @@ def random_server_header() -> str:
wl = get_wordlists()
return random.choice(wl.server_headers)
+
def random_api_key() -> str:
"""Generate random API key"""
wl = get_wordlists()
- key = ''.join(random.choices(string.ascii_letters + string.digits, k=32))
+ key = "".join(random.choices(string.ascii_letters + string.digits, k=32))
return random.choice(wl.api_key_prefixes) + key
@@ -87,14 +90,16 @@ def users_json() -> str:
users = []
for i in range(random.randint(3, 8)):
username = random_username()
- users.append({
- "id": i + 1,
- "username": username,
- "email": random_email(username),
- "password": random_password(),
- "role": random.choice(wl.user_roles),
- "api_token": random_api_key()
- })
+ users.append(
+ {
+ "id": i + 1,
+ "username": username,
+ "email": random_email(username),
+ "password": random_password(),
+ "role": random.choice(wl.user_roles),
+ "api_token": random_api_key(),
+ }
+ )
return json.dumps({"users": users}, indent=2)
@@ -102,20 +107,28 @@ def api_keys_json() -> str:
"""Generate fake api_keys.json with random data"""
keys = {
"stripe": {
- "public_key": "pk_live_" + ''.join(random.choices(string.ascii_letters + string.digits, k=24)),
- "secret_key": random_api_key()
+ "public_key": "pk_live_"
+ + "".join(random.choices(string.ascii_letters + string.digits, k=24)),
+ "secret_key": random_api_key(),
},
"aws": {
- "access_key_id": "AKIA" + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)),
- "secret_access_key": ''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))
+ "access_key_id": "AKIA"
+ + "".join(random.choices(string.ascii_uppercase + string.digits, k=16)),
+ "secret_access_key": "".join(
+ random.choices(string.ascii_letters + string.digits + "+/", k=40)
+ ),
},
"sendgrid": {
- "api_key": "SG." + ''.join(random.choices(string.ascii_letters + string.digits, k=48))
+ "api_key": "SG."
+ + "".join(random.choices(string.ascii_letters + string.digits, k=48))
},
"twilio": {
- "account_sid": "AC" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=32)),
- "auth_token": ''.join(random.choices(string.ascii_lowercase + string.digits, k=32))
- }
+ "account_sid": "AC"
+ + "".join(random.choices(string.ascii_lowercase + string.digits, k=32)),
+ "auth_token": "".join(
+ random.choices(string.ascii_lowercase + string.digits, k=32)
+ ),
+ },
}
return json.dumps(keys, indent=2)
@@ -123,51 +136,70 @@ def api_keys_json() -> str:
def api_response(path: str) -> str:
"""Generate fake API JSON responses with random data"""
wl = get_wordlists()
-
+
def random_users(count: int = 3):
users = []
for i in range(count):
username = random_username()
- users.append({
- "id": i + 1,
- "username": username,
- "email": random_email(username),
- "role": random.choice(wl.user_roles)
- })
+ users.append(
+ {
+ "id": i + 1,
+ "username": username,
+ "email": random_email(username),
+ "role": random.choice(wl.user_roles),
+ }
+ )
return users
-
+
responses = {
- '/api/users': json.dumps({
- "users": random_users(random.randint(2, 5)),
- "total": random.randint(50, 500)
- }, indent=2),
- '/api/v1/users': json.dumps({
- "status": "success",
- "data": [{
- "id": random.randint(1, 100),
- "name": random_username(),
- "api_key": random_api_key()
- }]
- }, indent=2),
- '/api/v2/secrets': json.dumps({
- "database": {
- "host": random.choice(wl.database_hosts),
- "username": random_username(),
- "password": random_password(),
- "database": random_database_name()
+ "/api/users": json.dumps(
+ {
+ "users": random_users(random.randint(2, 5)),
+ "total": random.randint(50, 500),
},
- "api_keys": {
- "stripe": random_api_key(),
- "aws": 'AKIA' + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16))
- }
- }, indent=2),
- '/api/config': json.dumps({
- "app_name": random.choice(wl.application_names),
- "debug": random.choice([True, False]),
- "secret_key": random_api_key(),
- "database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}"
- }, indent=2),
- '/.env': f"""APP_NAME={random.choice(wl.application_names)}
+ indent=2,
+ ),
+ "/api/v1/users": json.dumps(
+ {
+ "status": "success",
+ "data": [
+ {
+ "id": random.randint(1, 100),
+ "name": random_username(),
+ "api_key": random_api_key(),
+ }
+ ],
+ },
+ indent=2,
+ ),
+ "/api/v2/secrets": json.dumps(
+ {
+ "database": {
+ "host": random.choice(wl.database_hosts),
+ "username": random_username(),
+ "password": random_password(),
+ "database": random_database_name(),
+ },
+ "api_keys": {
+ "stripe": random_api_key(),
+ "aws": "AKIA"
+ + "".join(
+ random.choices(string.ascii_uppercase + string.digits, k=16)
+ ),
+ },
+ },
+ indent=2,
+ ),
+ "/api/config": json.dumps(
+ {
+ "app_name": random.choice(wl.application_names),
+ "debug": random.choice([True, False]),
+ "secret_key": random_api_key(),
+ "database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}",
+ },
+ indent=2,
+ ),
+ "/.env": f"""APP_NAME={random.choice(wl.application_names)}
DEBUG={random.choice(['true', 'false'])}
APP_KEY=base64:{''.join(random.choices(string.ascii_letters + string.digits, k=32))}=
DB_CONNECTION=mysql
@@ -179,7 +211,7 @@ DB_PASSWORD={random_password()}
AWS_ACCESS_KEY_ID=AKIA{''.join(random.choices(string.ascii_uppercase + string.digits, k=16))}
AWS_SECRET_ACCESS_KEY={''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))}
STRIPE_SECRET={random_api_key()}
-"""
+""",
}
return responses.get(path, json.dumps({"error": "Not found"}, indent=2))
@@ -187,11 +219,13 @@ STRIPE_SECRET={random_api_key()}
def directory_listing(path: str) -> str:
"""Generate fake directory listing using wordlists"""
wl = get_wordlists()
-
+
files = wl.directory_files
dirs = wl.directory_dirs
-
- selected_files = [(f, random.randint(1024, 1024*1024))
- for f in random.sample(files, min(6, len(files)))]
-
+
+ selected_files = [
+ (f, random.randint(1024, 1024 * 1024))
+ for f in random.sample(files, min(6, len(files)))
+ ]
+
return html_templates.directory_listing(path, dirs, selected_files)
diff --git a/src/handler.py b/src/handler.py
index 9cae1ce..1be7c2c 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -14,8 +14,13 @@ from analyzer import Analyzer
from templates import html_templates
from templates.dashboard_template import generate_dashboard
from generators import (
- credentials_txt, passwords_txt, users_json, api_keys_json,
- api_response, directory_listing, random_server_header
+ credentials_txt,
+ passwords_txt,
+ users_json,
+ api_keys_json,
+ api_response,
+ directory_listing,
+ random_server_header,
)
from wordlists import get_wordlists
from sql_errors import generate_sql_error_response, get_sql_response_with_data
@@ -25,6 +30,7 @@ from server_errors import generate_server_error
class Handler(BaseHTTPRequestHandler):
"""HTTP request handler for the deception server"""
+
webpages: Optional[List[str]] = None
config: Config = None
tracker: AccessTracker = None
@@ -37,15 +43,15 @@ class Handler(BaseHTTPRequestHandler):
def _get_client_ip(self) -> str:
"""Extract client IP address from request, checking proxy headers first"""
# Headers might not be available during early error logging
- if hasattr(self, 'headers') and self.headers:
+ if hasattr(self, "headers") and self.headers:
# Check X-Forwarded-For header (set by load balancers/proxies)
- forwarded_for = self.headers.get('X-Forwarded-For')
+ forwarded_for = self.headers.get("X-Forwarded-For")
if forwarded_for:
# X-Forwarded-For can contain multiple IPs, get the first (original client)
- return forwarded_for.split(',')[0].strip()
+ return forwarded_for.split(",")[0].strip()
# Check X-Real-IP header (set by nginx and other proxies)
- real_ip = self.headers.get('X-Real-IP')
+ real_ip = self.headers.get("X-Real-IP")
if real_ip:
return real_ip.strip()
@@ -54,7 +60,7 @@ class Handler(BaseHTTPRequestHandler):
def _get_user_agent(self) -> str:
"""Extract user agent from request"""
- return self.headers.get('User-Agent', '')
+ return self.headers.get("User-Agent", "")
def _get_category_by_ip(self, client_ip: str) -> str:
"""Get the category of an IP from the database"""
@@ -97,7 +103,7 @@ class Handler(BaseHTTPRequestHandler):
Returns True if the path was handled, False otherwise.
"""
# SQL-vulnerable endpoints
- sql_endpoints = ['/api/search', '/api/sql', '/api/database']
+ sql_endpoints = ["/api/search", "/api/sql", "/api/database"]
base_path = urlparse(path).path
if base_path not in sql_endpoints:
@@ -112,22 +118,30 @@ class Handler(BaseHTTPRequestHandler):
user_agent = self._get_user_agent()
# Always check for SQL injection patterns
- error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
+ error_msg, content_type, status_code = generate_sql_error_response(
+ query_string or ""
+ )
if error_msg:
# SQL injection detected - log and return error
- self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
+ self.access_logger.warning(
+ f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
+ )
self.send_response(status_code)
- self.send_header('Content-type', content_type)
+ self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_msg.encode())
else:
# No injection detected - return fake data
- self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
+ self.access_logger.info(
+ f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
+ )
self.send_response(200)
- self.send_header('Content-type', 'application/json')
+ self.send_header("Content-type", "application/json")
self.end_headers()
- response_data = get_sql_response_with_data(base_path, query_string or "")
+ response_data = get_sql_response_with_data(
+ base_path, query_string or ""
+ )
self.wfile.write(response_data.encode())
return True
@@ -140,7 +154,7 @@ class Handler(BaseHTTPRequestHandler):
# Still send a response even on error
try:
self.send_response(500)
- self.send_header('Content-type', 'application/json')
+ self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(b'{"error": "Internal server error"}')
except:
@@ -148,31 +162,35 @@ class Handler(BaseHTTPRequestHandler):
return True
def generate_page(self, seed: str, page_visit_count: int) -> str:
- """Generate a webpage containing random links or canary token"""
+ """Generate a webpage containing random links or canary token"""
random.seed(seed)
num_pages = random.randint(*self.config.links_per_page_range)
-
+
# Check if this is a good crawler by IP category from database
ip_category = self._get_category_by_ip(self._get_client_ip())
-
+
# Determine if we should apply crawler page limit based on config and IP category
should_apply_crawler_limit = False
if self.config.infinite_pages_for_malicious:
- if (ip_category == "good_crawler" or ip_category == "regular_user") and page_visit_count >= self.config.max_pages_limit:
+ if (
+ ip_category == "good_crawler" or ip_category == "regular_user"
+ ) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True
else:
- if (ip_category == "good_crawler" or ip_category == "bad_crawler" or ip_category == "attacker") and page_visit_count >= self.config.max_pages_limit:
+ if (
+ ip_category == "good_crawler"
+ or ip_category == "bad_crawler"
+ or ip_category == "attacker"
+ ) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True
-
# If good crawler reached max pages, return a simple page with no links
if should_apply_crawler_limit:
return html_templates.main_page(
- Handler.counter,
- '
Crawl limit reached.
'
+ Handler.counter, "Crawl limit reached.
"
)
-
+
num_pages = random.randint(*self.config.links_per_page_range)
# Build the content HTML
@@ -189,10 +207,12 @@ class Handler(BaseHTTPRequestHandler):
# Add links
if self.webpages is None:
for _ in range(num_pages):
- address = ''.join([
- random.choice(self.config.char_space)
- for _ in range(random.randint(*self.config.links_length_range))
- ])
+ address = "".join(
+ [
+ random.choice(self.config.char_space)
+ for _ in range(random.randint(*self.config.links_length_range))
+ ]
+ )
content += f"""
{address}
@@ -223,27 +243,36 @@ class Handler(BaseHTTPRequestHandler):
post_data = ""
from urllib.parse import urlparse
+
base_path = urlparse(self.path).path
- if base_path in ['/api/search', '/api/sql', '/api/database']:
- content_length = int(self.headers.get('Content-Length', 0))
+ if base_path in ["/api/search", "/api/sql", "/api/database"]:
+ content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
- post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+ post_data = self.rfile.read(content_length).decode(
+ "utf-8", errors="replace"
+ )
- self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
+ self.access_logger.info(
+ f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}"
+ )
- error_msg, content_type, status_code = generate_sql_error_response(post_data)
+ error_msg, content_type, status_code = generate_sql_error_response(
+ post_data
+ )
try:
if error_msg:
- self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
+ self.access_logger.warning(
+ f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}"
+ )
self.send_response(status_code)
- self.send_header('Content-type', content_type)
+ self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_msg.encode())
else:
self.send_response(200)
- self.send_header('Content-type', 'application/json')
+ self.send_header("Content-type", "application/json")
self.end_headers()
response_data = get_sql_response_with_data(base_path, post_data)
self.wfile.write(response_data.encode())
@@ -253,28 +282,35 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
return
- if base_path == '/api/contact':
- content_length = int(self.headers.get('Content-Length', 0))
+ if base_path == "/api/contact":
+ content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
- post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+ post_data = self.rfile.read(content_length).decode(
+ "utf-8", errors="replace"
+ )
parsed_data = {}
- for pair in post_data.split('&'):
- if '=' in pair:
- key, value = pair.split('=', 1)
+ for pair in post_data.split("&"):
+ if "=" in pair:
+ key, value = pair.split("=", 1)
from urllib.parse import unquote_plus
+
parsed_data[unquote_plus(key)] = unquote_plus(value)
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
if xss_detected:
- self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
+ self.access_logger.warning(
+ f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}"
+ )
else:
- self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
+ self.access_logger.info(
+ f"[XSS ENDPOINT POST] {client_ip} - {base_path}"
+ )
try:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
response_html = generate_xss_response(parsed_data)
self.wfile.write(response_html.encode())
@@ -284,11 +320,15 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in XSS POST handler: {str(e)}")
return
- self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
+ self.access_logger.warning(
+ f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}"
+ )
- content_length = int(self.headers.get('Content-Length', 0))
+ content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0:
- post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+ post_data = self.rfile.read(content_length).decode(
+ "utf-8", errors="replace"
+ )
self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
@@ -301,18 +341,24 @@ class Handler(BaseHTTPRequestHandler):
self.credential_logger.info(credential_line)
# Also record in tracker for dashboard
- self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A')
+ self.tracker.record_credential_attempt(
+ client_ip, self.path, username or "N/A", password or "N/A"
+ )
- self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}")
+ self.access_logger.warning(
+ f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}"
+ )
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
- self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST')
+ self.tracker.record_access(
+ client_ip, self.path, user_agent, post_data, method="POST"
+ )
time.sleep(1)
try:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.login_error().encode())
except BrokenPipeError:
@@ -330,95 +376,102 @@ class Handler(BaseHTTPRequestHandler):
return True
try:
- if path == '/robots.txt':
+ if path == "/robots.txt":
self.send_response(200)
- self.send_header('Content-type', 'text/plain')
+ self.send_header("Content-type", "text/plain")
self.end_headers()
self.wfile.write(html_templates.robots_txt().encode())
return True
- if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']:
+ if path in ["/credentials.txt", "/passwords.txt", "/admin_notes.txt"]:
self.send_response(200)
- self.send_header('Content-type', 'text/plain')
+ self.send_header("Content-type", "text/plain")
self.end_headers()
- if 'credentials' in path:
+ if "credentials" in path:
self.wfile.write(credentials_txt().encode())
else:
self.wfile.write(passwords_txt().encode())
return True
- if path in ['/users.json', '/api_keys.json', '/config.json']:
+ if path in ["/users.json", "/api_keys.json", "/config.json"]:
self.send_response(200)
- self.send_header('Content-type', 'application/json')
+ self.send_header("Content-type", "application/json")
self.end_headers()
- if 'users' in path:
+ if "users" in path:
self.wfile.write(users_json().encode())
- elif 'api_keys' in path:
+ elif "api_keys" in path:
self.wfile.write(api_keys_json().encode())
else:
- self.wfile.write(api_response('/api/config').encode())
+ self.wfile.write(api_response("/api/config").encode())
return True
- if path in ['/admin', '/admin/', '/admin/login', '/login']:
+ if path in ["/admin", "/admin/", "/admin/login", "/login"]:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.login_form().encode())
return True
- if path in ['/users', '/user', '/database', '/db', '/search']:
+ if path in ["/users", "/user", "/database", "/db", "/search"]:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.product_search().encode())
return True
- if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
+ if path in ["/info", "/input", "/contact", "/feedback", "/comment"]:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.input_form().encode())
return True
- if path == '/server':
+ if path == "/server":
error_html, content_type = generate_server_error()
self.send_response(500)
- self.send_header('Content-type', content_type)
+ self.send_header("Content-type", content_type)
self.end_headers()
self.wfile.write(error_html.encode())
return True
- if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
+ if path in ["/wp-login.php", "/wp-login", "/wp-admin", "/wp-admin/"]:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.wp_login().encode())
return True
- if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower():
+ if path in ["/wp-content/", "/wp-includes/"] or "wordpress" in path.lower():
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.wordpress().encode())
return True
- if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']:
+ if "phpmyadmin" in path.lower() or path in ["/pma/", "/phpMyAdmin/"]:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(html_templates.phpmyadmin().encode())
return True
- if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']:
+ if path.startswith("/api/") or path.startswith("/api") or path in ["/.env"]:
self.send_response(200)
- self.send_header('Content-type', 'application/json')
+ self.send_header("Content-type", "application/json")
self.end_headers()
self.wfile.write(api_response(path).encode())
return True
- if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']:
+ if path in [
+ "/backup/",
+ "/uploads/",
+ "/private/",
+ "/admin/",
+ "/config/",
+ "/database/",
+ ]:
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
self.wfile.write(directory_listing(path).encode())
return True
@@ -440,9 +493,12 @@ class Handler(BaseHTTPRequestHandler):
return
user_agent = self._get_user_agent()
- if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path:
+ if (
+ self.config.dashboard_secret_path
+ and self.path == self.config.dashboard_secret_path
+ ):
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
try:
stats = self.tracker.get_stats()
@@ -455,72 +511,93 @@ class Handler(BaseHTTPRequestHandler):
return
# API endpoint for fetching IP stats
- if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"):
- ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "")
+ if self.config.dashboard_secret_path and self.path.startswith(
+ f"{self.config.dashboard_secret_path}/api/ip-stats/"
+ ):
+ ip_address = self.path.replace(
+ f"{self.config.dashboard_secret_path}/api/ip-stats/", ""
+ )
self.send_response(200)
- self.send_header('Content-type', 'application/json')
- self.send_header('Access-Control-Allow-Origin', '*')
+ self.send_header("Content-type", "application/json")
+ self.send_header("Access-Control-Allow-Origin", "*")
# Prevent browser caching - force fresh data from database every time
- self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0')
- self.send_header('Pragma', 'no-cache')
- self.send_header('Expires', '0')
+ self.send_header(
+ "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
+ )
+ self.send_header("Pragma", "no-cache")
+ self.send_header("Expires", "0")
self.end_headers()
try:
from database import get_database
import json
+
db = get_database()
ip_stats = db.get_ip_stats_by_ip(ip_address)
if ip_stats:
self.wfile.write(json.dumps(ip_stats).encode())
else:
- self.wfile.write(json.dumps({'error': 'IP not found'}).encode())
+ self.wfile.write(json.dumps({"error": "IP not found"}).encode())
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error fetching IP stats: {e}")
- self.wfile.write(json.dumps({'error': str(e)}).encode())
+ self.wfile.write(json.dumps({"error": str(e)}).encode())
return
# API endpoint for downloading malicious IPs file
- if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt":
+ if (
+ self.config.dashboard_secret_path
+ and self.path
+ == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt"
+ ):
import os
- file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
+
+ file_path = os.path.join(
+ os.path.dirname(__file__), "exports", "malicious_ips.txt"
+ )
try:
if os.path.exists(file_path):
- with open(file_path, 'rb') as f:
+ with open(file_path, "rb") as f:
content = f.read()
self.send_response(200)
- self.send_header('Content-type', 'text/plain')
- self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"')
- self.send_header('Content-Length', str(len(content)))
+ self.send_header("Content-type", "text/plain")
+ self.send_header(
+ "Content-Disposition",
+ 'attachment; filename="malicious_ips.txt"',
+ )
+ self.send_header("Content-Length", str(len(content)))
self.end_headers()
self.wfile.write(content)
else:
self.send_response(404)
- self.send_header('Content-type', 'text/plain')
+ self.send_header("Content-type", "text/plain")
self.end_headers()
- self.wfile.write(b'File not found')
+ self.wfile.write(b"File not found")
except BrokenPipeError:
pass
except Exception as e:
self.app_logger.error(f"Error serving malicious IPs file: {e}")
self.send_response(500)
- self.send_header('Content-type', 'text/plain')
+ self.send_header("Content-type", "text/plain")
self.end_headers()
- self.wfile.write(b'Internal server error')
+ self.wfile.write(b"Internal server error")
return
- self.tracker.record_access(client_ip, self.path, user_agent, method='GET')
+ self.tracker.record_access(client_ip, self.path, user_agent, method="GET")
# self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip)
if self.tracker.is_suspicious_user_agent(user_agent):
- self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}")
+ self.access_logger.warning(
+ f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}"
+ )
if self._should_return_error():
error_code = self._get_random_error_code()
- self.access_logger.info(f"Returning error {error_code} to {client_ip} - {self.path}")
+ self.access_logger.info(
+ f"Returning error {error_code} to {client_ip} - {self.path}"
+ )
self.send_response(error_code)
self.end_headers()
return
@@ -530,13 +607,15 @@ class Handler(BaseHTTPRequestHandler):
time.sleep(self.config.delay / 1000.0)
self.send_response(200)
- self.send_header('Content-type', 'text/html')
+ self.send_header("Content-type", "text/html")
self.end_headers()
try:
# Increment page visit counter for this IP and get the current count
current_visit_count = self._increment_page_visit(client_ip)
- self.wfile.write(self.generate_page(self.path, current_visit_count).encode())
+ self.wfile.write(
+ self.generate_page(self.path, current_visit_count).encode()
+ )
Handler.counter -= 1
diff --git a/src/logger.py b/src/logger.py
index bf16c77..9762002 100644
--- a/src/logger.py
+++ b/src/logger.py
@@ -13,6 +13,7 @@ from datetime import datetime
class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone"""
+
def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt)
@@ -26,6 +27,7 @@ class TimezoneFormatter(logging.Formatter):
class LoggerManager:
"""Singleton logger manager for the application."""
+
_instance = None
def __new__(cls):
@@ -65,7 +67,7 @@ class LoggerManager:
app_file_handler = RotatingFileHandler(
os.path.join(log_dir, "krawl.log"),
maxBytes=max_bytes,
- backupCount=backup_count
+ backupCount=backup_count,
)
app_file_handler.setFormatter(log_format)
self._app_logger.addHandler(app_file_handler)
@@ -82,7 +84,7 @@ class LoggerManager:
access_file_handler = RotatingFileHandler(
os.path.join(log_dir, "access.log"),
maxBytes=max_bytes,
- backupCount=backup_count
+ backupCount=backup_count,
)
access_file_handler.setFormatter(log_format)
self._access_logger.addHandler(access_file_handler)
@@ -102,7 +104,7 @@ class LoggerManager:
credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"),
maxBytes=max_bytes,
- backupCount=backup_count
+ backupCount=backup_count,
)
credential_file_handler.setFormatter(credential_format)
self._credential_logger.addHandler(credential_file_handler)
diff --git a/src/migrations/add_category_history.py b/src/migrations/add_category_history.py
index 654204e..622b61c 100644
--- a/src/migrations/add_category_history.py
+++ b/src/migrations/add_category_history.py
@@ -17,20 +17,20 @@ from models import Base, CategoryHistory
def migrate():
"""Create CategoryHistory table if it doesn't exist."""
print("Starting migration: Adding CategoryHistory table...")
-
+
try:
db = get_database()
-
+
# Initialize database if not already done
if not db._initialized:
db.initialize()
-
+
# Create only the CategoryHistory table
CategoryHistory.__table__.create(db._engine, checkfirst=True)
-
+
print("✓ Migration completed successfully!")
print(" - CategoryHistory table created")
-
+
except Exception as e:
print(f"✗ Migration failed: {e}")
sys.exit(1)
diff --git a/src/models.py b/src/models.py
index 4a13278..3789ab2 100644
--- a/src/models.py
+++ b/src/models.py
@@ -25,6 +25,7 @@ from sanitizer import (
class Base(DeclarativeBase):
"""Base class for all ORM models."""
+
pass
@@ -35,30 +36,35 @@ class AccessLog(Base):
Stores request metadata, suspicious activity flags, and timestamps
for analysis and dashboard display.
"""
- __tablename__ = 'access_logs'
+
+ __tablename__ = "access_logs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
- #ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
+ # ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
- user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True)
- method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET')
+ user_agent: Mapped[Optional[str]] = mapped_column(
+ String(MAX_USER_AGENT_LENGTH), nullable=True
+ )
+ method: Mapped[str] = mapped_column(String(10), nullable=False, default="GET")
is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
- is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
- timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
+ is_honeypot_trigger: Mapped[bool] = mapped_column(
+ Boolean, nullable=False, default=False
+ )
+ timestamp: Mapped[datetime] = mapped_column(
+ DateTime, nullable=False, default=datetime.utcnow, index=True
+ )
# Relationship to attack detections
attack_detections: Mapped[List["AttackDetection"]] = relationship(
- "AttackDetection",
- back_populates="access_log",
- cascade="all, delete-orphan"
+ "AttackDetection", back_populates="access_log", cascade="all, delete-orphan"
)
# Indexes for common queries
__table_args__ = (
- Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'),
- Index('ix_access_logs_is_suspicious', 'is_suspicious'),
- Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'),
+ Index("ix_access_logs_ip_timestamp", "ip", "timestamp"),
+ Index("ix_access_logs_is_suspicious", "is_suspicious"),
+ Index("ix_access_logs_is_honeypot_trigger", "is_honeypot_trigger"),
)
def __repr__(self) -> str:
@@ -71,19 +77,24 @@ class CredentialAttempt(Base):
Stores the submitted username and password along with request metadata.
"""
- __tablename__ = 'credential_attempts'
+
+ __tablename__ = "credential_attempts"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
- username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
- password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True)
- timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
+ username: Mapped[Optional[str]] = mapped_column(
+ String(MAX_CREDENTIAL_LENGTH), nullable=True
+ )
+ password: Mapped[Optional[str]] = mapped_column(
+ String(MAX_CREDENTIAL_LENGTH), nullable=True
+ )
+ timestamp: Mapped[datetime] = mapped_column(
+ DateTime, nullable=False, default=datetime.utcnow, index=True
+ )
# Composite index for common queries
- __table_args__ = (
- Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'),
- )
+ __table_args__ = (Index("ix_credential_attempts_ip_timestamp", "ip", "timestamp"),)
def __repr__(self) -> str:
return f"
"
@@ -96,20 +107,25 @@ class AttackDetection(Base):
Linked to the parent AccessLog record. Multiple attack types can be
detected in a single request.
"""
- __tablename__ = 'attack_detections'
+
+ __tablename__ = "attack_detections"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
access_log_id: Mapped[int] = mapped_column(
Integer,
- ForeignKey('access_logs.id', ondelete='CASCADE'),
+ ForeignKey("access_logs.id", ondelete="CASCADE"),
nullable=False,
- index=True
+ index=True,
)
attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
- matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True)
+ matched_pattern: Mapped[Optional[str]] = mapped_column(
+ String(MAX_ATTACK_PATTERN_LENGTH), nullable=True
+ )
# Relationship back to access log
- access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections")
+ access_log: Mapped["AccessLog"] = relationship(
+ "AccessLog", back_populates="attack_detections"
+ )
def __repr__(self) -> str:
return f""
@@ -122,33 +138,43 @@ class IpStats(Base):
Includes fields for future GeoIP and reputation enrichment.
Updated on each request from an IP.
"""
- __tablename__ = 'ip_stats'
+
+ __tablename__ = "ip_stats"
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True)
total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
- first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
- last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow)
+ first_seen: Mapped[datetime] = mapped_column(
+ DateTime, nullable=False, default=datetime.utcnow
+ )
+ last_seen: Mapped[datetime] = mapped_column(
+ DateTime, nullable=False, default=datetime.utcnow
+ )
# GeoIP fields (populated by future enrichment)
country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True)
city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
- asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True)
- list_on: Mapped[Optional[Dict[str,str]]] = mapped_column(JSON, nullable=True)
+ asn_org: Mapped[Optional[str]] = mapped_column(
+ String(MAX_ASN_ORG_LENGTH), nullable=True
+ )
+ list_on: Mapped[Optional[Dict[str, str]]] = mapped_column(JSON, nullable=True)
# Reputation fields (populated by future enrichment)
reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
- reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True)
- reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True)
+ reputation_source: Mapped[Optional[str]] = mapped_column(
+ String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True
+ )
+ reputation_updated: Mapped[Optional[datetime]] = mapped_column(
+ DateTime, nullable=True
+ )
- #Analyzed metrics, category and category scores
- analyzed_metrics: Mapped[Dict[str,object]] = mapped_column(JSON, nullable=True)
+ # Analyzed metrics, category and category scores
+ analyzed_metrics: Mapped[Dict[str, object]] = mapped_column(JSON, nullable=True)
category: Mapped[str] = mapped_column(String, nullable=True)
- category_scores: Mapped[Dict[str,int]] = mapped_column(JSON, nullable=True)
+ category_scores: Mapped[Dict[str, int]] = mapped_column(JSON, nullable=True)
manual_category: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True)
last_analysis: Mapped[datetime] = mapped_column(DateTime, nullable=True)
-
def __repr__(self) -> str:
return f""
@@ -160,18 +186,19 @@ class CategoryHistory(Base):
Tracks when an IP's category changes, storing both the previous
and new category along with timestamp for timeline visualization.
"""
- __tablename__ = 'category_history'
+
+ __tablename__ = "category_history"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
new_category: Mapped[str] = mapped_column(String(50), nullable=False)
- timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True)
+ timestamp: Mapped[datetime] = mapped_column(
+ DateTime, nullable=False, default=datetime.utcnow, index=True
+ )
# Composite index for efficient IP-based timeline queries
- __table_args__ = (
- Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
- )
+ __table_args__ = (Index("ix_category_history_ip_timestamp", "ip", "timestamp"),)
def __repr__(self) -> str:
return f" {self.new_category})>"
@@ -205,4 +232,4 @@ class CategoryHistory(Base):
# )
# def __repr__(self) -> str:
-# return f""
\ No newline at end of file
+# return f""
diff --git a/src/sanitizer.py b/src/sanitizer.py
index a04d0c0..fa2ba60 100644
--- a/src/sanitizer.py
+++ b/src/sanitizer.py
@@ -9,7 +9,6 @@ import html
import re
from typing import Optional, Dict
-
# Field length limits for database storage
MAX_IP_LENGTH = 45 # IPv6 max length
MAX_PATH_LENGTH = 2048 # URL max practical length
@@ -43,7 +42,7 @@ def sanitize_for_storage(value: Optional[str], max_length: int) -> str:
# Remove null bytes and control characters (except newline \n, tab \t, carriage return \r)
# Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
- cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', value)
+ cleaned = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", value)
# Truncate to max length
return cleaned[:max_length]
@@ -112,5 +111,6 @@ def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
return html.escape(value_str)
-def sanitize_dict(value: Optional[Dict[str,str]], max_display_length):
- return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}
\ No newline at end of file
+
+def sanitize_dict(value: Optional[Dict[str, str]], max_display_length):
+ return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}
diff --git a/src/server.py b/src/server.py
index 05bc006..524359d 100644
--- a/src/server.py
+++ b/src/server.py
@@ -12,43 +12,48 @@ from config import get_config
from tracker import AccessTracker
from analyzer import Analyzer
from handler import Handler
-from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
+from logger import (
+ initialize_logging,
+ get_app_logger,
+ get_access_logger,
+ get_credential_logger,
+)
from database import initialize_database
from tasks_master import get_tasksmaster
def print_usage():
"""Print usage information"""
- print(f'Usage: {sys.argv[0]} [FILE]\n')
- print('FILE is file containing a list of webpage names to serve, one per line.')
- print('If no file is provided, random links will be generated.\n')
- print('Configuration:')
- print(' Configuration is loaded from a YAML file (default: config.yaml)')
- print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
- print(' Example config.yaml structure:')
- print(' server:')
- print(' port: 5000')
- print(' delay: 100')
- print(' links:')
- print(' min_length: 5')
- print(' max_length: 15')
- print(' min_per_page: 10')
- print(' max_per_page: 15')
- print(' canary:')
- print(' token_url: null')
- print(' token_tries: 10')
- print(' dashboard:')
- print(' secret_path: null # auto-generated if not set')
- print(' database:')
+ print(f"Usage: {sys.argv[0]} [FILE]\n")
+ print("FILE is file containing a list of webpage names to serve, one per line.")
+ print("If no file is provided, random links will be generated.\n")
+ print("Configuration:")
+ print(" Configuration is loaded from a YAML file (default: config.yaml)")
+ print(" Set CONFIG_LOCATION environment variable to use a different file.\n")
+ print(" Example config.yaml structure:")
+ print(" server:")
+ print(" port: 5000")
+ print(" delay: 100")
+ print(" links:")
+ print(" min_length: 5")
+ print(" max_length: 15")
+ print(" min_per_page: 10")
+ print(" max_per_page: 15")
+ print(" canary:")
+ print(" token_url: null")
+ print(" token_tries: 10")
+ print(" dashboard:")
+ print(" secret_path: null # auto-generated if not set")
+ print(" database:")
print(' path: "data/krawl.db"')
- print(' retention_days: 30')
- print(' behavior:')
- print(' probability_error_codes: 0')
+ print(" retention_days: 30")
+ print(" behavior:")
+ print(" probability_error_codes: 0")
def main():
"""Main entry point for the deception server"""
- if '-h' in sys.argv or '--help' in sys.argv:
+ if "-h" in sys.argv or "--help" in sys.argv:
print_usage()
exit(0)
@@ -63,9 +68,11 @@ def main():
# Initialize database for persistent storage
try:
initialize_database(config.database_path)
- app_logger.info(f'Database initialized at: {config.database_path}')
+ app_logger.info(f"Database initialized at: {config.database_path}")
except Exception as e:
- app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.')
+ app_logger.warning(
+ f"Database initialization failed: {e}. Continuing with in-memory only."
+ )
tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds)
analyzer = Analyzer()
@@ -80,11 +87,13 @@ def main():
if len(sys.argv) == 2:
try:
- with open(sys.argv[1], 'r') as f:
+ with open(sys.argv[1], "r") as f:
Handler.webpages = f.readlines()
if not Handler.webpages:
- app_logger.warning('The file provided was empty. Using randomly generated links.')
+ app_logger.warning(
+ "The file provided was empty. Using randomly generated links."
+ )
Handler.webpages = None
except IOError:
app_logger.warning("Can't read input file. Using randomly generated links.")
@@ -94,25 +103,31 @@ def main():
tasks_master.run_scheduled_tasks()
try:
- app_logger.info(f'Starting deception server on port {config.port}...')
- app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}')
+ app_logger.info(f"Starting deception server on port {config.port}...")
+ app_logger.info(f"Dashboard available at: {config.dashboard_secret_path}")
if config.canary_token_url:
- app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries')
+ app_logger.info(
+ f"Canary token will appear after {config.canary_token_tries} tries"
+ )
else:
- app_logger.info('No canary token configured (set CANARY_TOKEN_URL to enable)')
+ app_logger.info(
+ "No canary token configured (set CANARY_TOKEN_URL to enable)"
+ )
- server = HTTPServer(('0.0.0.0', config.port), Handler)
- app_logger.info('Server started. Use to stop.')
+ server = HTTPServer(("0.0.0.0", config.port), Handler)
+ app_logger.info("Server started. Use to stop.")
server.serve_forever()
except KeyboardInterrupt:
- app_logger.info('Stopping server...')
+ app_logger.info("Stopping server...")
server.socket.close()
- app_logger.info('Server stopped')
+ app_logger.info("Server stopped")
except Exception as e:
- app_logger.error(f'Error starting HTTP server on port {config.port}: {e}')
- app_logger.error(f'Make sure you are root, if needed, and that port {config.port} is open.')
+ app_logger.error(f"Error starting HTTP server on port {config.port}: {e}")
+ app_logger.error(
+ f"Make sure you are root, if needed, and that port {config.port} is open."
+ )
exit(1)
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/src/server_errors.py b/src/server_errors.py
index 7591c64..7b55654 100644
--- a/src/server_errors.py
+++ b/src/server_errors.py
@@ -7,13 +7,13 @@ from wordlists import get_wordlists
def generate_server_error() -> tuple[str, str]:
wl = get_wordlists()
server_errors = wl.server_errors
-
+
if not server_errors:
return ("500 Internal Server Error", "text/html")
-
+
server_type = random.choice(list(server_errors.keys()))
server_config = server_errors[server_type]
-
+
error_codes = {
400: "Bad Request",
401: "Unauthorized",
@@ -21,45 +21,45 @@ def generate_server_error() -> tuple[str, str]:
404: "Not Found",
500: "Internal Server Error",
502: "Bad Gateway",
- 503: "Service Unavailable"
+ 503: "Service Unavailable",
}
-
+
code = random.choice(list(error_codes.keys()))
message = error_codes[code]
-
- template = server_config.get('template', '')
- version = random.choice(server_config.get('versions', ['1.0']))
-
- html = template.replace('{code}', str(code))
- html = html.replace('{message}', message)
- html = html.replace('{version}', version)
-
- if server_type == 'apache':
- os = random.choice(server_config.get('os', ['Ubuntu']))
- html = html.replace('{os}', os)
- html = html.replace('{host}', 'localhost')
-
+
+ template = server_config.get("template", "")
+ version = random.choice(server_config.get("versions", ["1.0"]))
+
+ html = template.replace("{code}", str(code))
+ html = html.replace("{message}", message)
+ html = html.replace("{version}", version)
+
+ if server_type == "apache":
+ os = random.choice(server_config.get("os", ["Ubuntu"]))
+ html = html.replace("{os}", os)
+ html = html.replace("{host}", "localhost")
+
return (html, "text/html")
def get_server_header(server_type: str = None) -> str:
wl = get_wordlists()
server_errors = wl.server_errors
-
+
if not server_errors:
return "nginx/1.18.0"
-
+
if not server_type:
server_type = random.choice(list(server_errors.keys()))
-
+
server_config = server_errors.get(server_type, {})
- version = random.choice(server_config.get('versions', ['1.0']))
-
+ version = random.choice(server_config.get("versions", ["1.0"]))
+
server_headers = {
- 'nginx': f"nginx/{version}",
- 'apache': f"Apache/{version}",
- 'iis': f"Microsoft-IIS/{version}",
- 'tomcat': f"Apache-Coyote/1.1"
+ "nginx": f"nginx/{version}",
+ "apache": f"Apache/{version}",
+ "iis": f"Microsoft-IIS/{version}",
+ "tomcat": f"Apache-Coyote/1.1",
}
-
+
return server_headers.get(server_type, "nginx/1.18.0")
diff --git a/src/sql_errors.py b/src/sql_errors.py
index dc84886..583f7ed 100644
--- a/src/sql_errors.py
+++ b/src/sql_errors.py
@@ -9,87 +9,91 @@ from wordlists import get_wordlists
def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
if not query_string:
return None
-
+
query_lower = query_string.lower()
-
+
patterns = {
- 'quote': [r"'", r'"', r'`'],
- 'comment': [r'--', r'#', r'/\*', r'\*/'],
- 'union': [r'\bunion\b', r'\bunion\s+select\b'],
- 'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"],
- 'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'],
- 'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'],
- 'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'],
- 'info_schema': [r'information_schema', r'table_schema', r'table_name'],
+ "quote": [r"'", r'"', r"`"],
+ "comment": [r"--", r"#", r"/\*", r"\*/"],
+ "union": [r"\bunion\b", r"\bunion\s+select\b"],
+ "boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"],
+ "time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"],
+ "stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"],
+ "command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"],
+ "info_schema": [r"information_schema", r"table_schema", r"table_name"],
}
-
+
for injection_type, pattern_list in patterns.items():
for pattern in pattern_list:
if re.search(pattern, query_lower):
return injection_type
-
+
return None
-def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
+def get_random_sql_error(
+ db_type: str = None, injection_type: str = None
+) -> Tuple[str, str]:
wl = get_wordlists()
sql_errors = wl.sql_errors
-
+
if not sql_errors:
return ("Database error occurred", "text/plain")
-
+
if not db_type:
db_type = random.choice(list(sql_errors.keys()))
-
+
db_errors = sql_errors.get(db_type, {})
-
+
if injection_type and injection_type in db_errors:
errors = db_errors[injection_type]
- elif 'generic' in db_errors:
- errors = db_errors['generic']
+ elif "generic" in db_errors:
+ errors = db_errors["generic"]
else:
all_errors = []
for error_list in db_errors.values():
if isinstance(error_list, list):
all_errors.extend(error_list)
errors = all_errors if all_errors else ["Database error occurred"]
-
+
error_message = random.choice(errors) if errors else "Database error occurred"
-
- if '{table}' in error_message:
- tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions']
- error_message = error_message.replace('{table}', random.choice(tables))
-
- if '{column}' in error_message:
- columns = ['id', 'name', 'email', 'password', 'username', 'created_at']
- error_message = error_message.replace('{column}', random.choice(columns))
-
+
+ if "{table}" in error_message:
+ tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
+ error_message = error_message.replace("{table}", random.choice(tables))
+
+ if "{column}" in error_message:
+ columns = ["id", "name", "email", "password", "username", "created_at"]
+ error_message = error_message.replace("{column}", random.choice(columns))
+
return (error_message, "text/plain")
-def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]:
+def generate_sql_error_response(
+ query_string: str, db_type: str = None
+) -> Tuple[str, str, int]:
injection_type = detect_sql_injection_pattern(query_string)
-
+
if not injection_type:
return (None, None, None)
-
+
error_message, content_type = get_random_sql_error(db_type, injection_type)
-
+
status_code = 500
-
+
if random.random() < 0.3:
status_code = 200
-
+
return (error_message, content_type, status_code)
def get_sql_response_with_data(path: str, params: str) -> str:
import json
from generators import random_username, random_email, random_password
-
+
injection_type = detect_sql_injection_pattern(params)
-
- if injection_type in ['union', 'boolean', 'stacked']:
+
+ if injection_type in ["union", "boolean", "stacked"]:
data = {
"success": True,
"results": [
@@ -98,15 +102,14 @@ def get_sql_response_with_data(path: str, params: str) -> str:
"username": random_username(),
"email": random_email(),
"password_hash": random_password(),
- "role": random.choice(["admin", "user", "moderator"])
+ "role": random.choice(["admin", "user", "moderator"]),
}
for i in range(1, random.randint(2, 5))
- ]
+ ],
}
return json.dumps(data, indent=2)
-
- return json.dumps({
- "success": True,
- "message": "Query executed successfully",
- "results": []
- }, indent=2)
+
+ return json.dumps(
+ {"success": True, "message": "Query executed successfully", "results": []},
+ indent=2,
+ )
diff --git a/src/tasks/analyze_ips.py b/src/tasks/analyze_ips.py
index 788d9a3..7602f18 100644
--- a/src/tasks/analyze_ips.py
+++ b/src/tasks/analyze_ips.py
@@ -20,7 +20,7 @@ TASK_CONFIG = {
"name": "analyze-ips",
"cron": "*/1 * * * *",
"enabled": True,
- "run_when_loaded": True
+ "run_when_loaded": True,
}
@@ -34,48 +34,74 @@ def main():
uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold
- uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
+ uneven_request_timing_time_window_seconds = (
+ config.uneven_request_timing_time_window_seconds
+ )
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {}
- score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
- score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
- score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
- score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
+ score["attacker"] = {
+ "risky_http_methods": False,
+ "robots_violations": False,
+ "uneven_request_timing": False,
+ "different_user_agents": False,
+ "attack_url": False,
+ }
+ score["good_crawler"] = {
+ "risky_http_methods": False,
+ "robots_violations": False,
+ "uneven_request_timing": False,
+ "different_user_agents": False,
+ "attack_url": False,
+ }
+ score["bad_crawler"] = {
+ "risky_http_methods": False,
+ "robots_violations": False,
+ "uneven_request_timing": False,
+ "different_user_agents": False,
+ "attack_url": False,
+ }
+ score["regular_user"] = {
+ "risky_http_methods": False,
+ "robots_violations": False,
+ "uneven_request_timing": False,
+ "different_user_agents": False,
+ "attack_url": False,
+ }
- #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
+ # 1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = {
"attacker": {
"risky_http_methods": 6,
"robots_violations": 4,
"uneven_request_timing": 3,
"different_user_agents": 8,
- "attack_url": 15
+ "attack_url": 15,
},
"good_crawler": {
"risky_http_methods": 1,
"robots_violations": 0,
"uneven_request_timing": 0,
"different_user_agents": 0,
- "attack_url": 0
+ "attack_url": 0,
},
"bad_crawler": {
"risky_http_methods": 2,
"robots_violations": 7,
"uneven_request_timing": 0,
"different_user_agents": 5,
- "attack_url": 5
+ "attack_url": 5,
},
"regular_user": {
"risky_http_methods": 0,
"robots_violations": 0,
"uneven_request_timing": 8,
"different_user_agents": 3,
- "attack_url": 0
- }
+ "attack_url": 0,
+ },
}
# Get IPs with recent activity (last minute to match cron schedule)
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
- ips_to_analyze = {item['ip'] for item in recent_accesses}
+ ips_to_analyze = {item["ip"] for item in recent_accesses}
if not ips_to_analyze:
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
@@ -92,23 +118,51 @@ def main():
if total_accesses_count < 3:
category = "unknown"
analyzed_metrics = {}
- category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0}
+ category_scores = {
+ "attacker": 0,
+ "good_crawler": 0,
+ "bad_crawler": 0,
+ "regular_user": 0,
+ "unknown": 0,
+ }
last_analysis = datetime.now()
- db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+ db_manager.update_ip_stats_analysis(
+ ip, analyzed_metrics, category, category_scores, last_analysis
+ )
return 0
- #--------------------- HTTP Methods ---------------------
- get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"])
- post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"])
- put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"])
- delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"])
- head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"])
- options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"])
- patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"])
+ # --------------------- HTTP Methods ---------------------
+ get_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "GET"]
+ )
+ post_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "POST"]
+ )
+ put_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "PUT"]
+ )
+ delete_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "DELETE"]
+ )
+ head_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "HEAD"]
+ )
+ options_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "OPTIONS"]
+ )
+ patch_accesses_count = len(
+ [item for item in ip_accesses if item["method"] == "PATCH"]
+ )
if total_accesses_count > http_risky_methods_threshold:
- http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
+ http_method_attacker_score = (
+ post_accesses_count
+ + put_accesses_count
+ + delete_accesses_count
+ + options_accesses_count
+ + patch_accesses_count
+ ) / total_accesses_count
else:
http_method_attacker_score = 0
- #print(f"HTTP Method attacker score: {http_method_attacker_score}")
+ # print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
@@ -119,8 +173,8 @@ def main():
score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False
- #--------------------- Robots Violations ---------------------
- #respect robots.txt and login/config pages access frequency
+ # --------------------- Robots Violations ---------------------
+ # respect robots.txt and login/config pages access frequency
robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f:
@@ -132,11 +186,20 @@ def main():
if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/")
- #print(f"DISALLOW {parts[1]}")
+ # print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip())
- #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
- violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)])
- #print(f"Violated robots count: {violated_robots_count}")
+ # if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
+ violated_robots_count = len(
+ [
+ item
+ for item in ip_accesses
+ if any(
+ item["path"].rstrip("/").startswith(disallow)
+ for disallow in robots_disallows
+ )
+ ]
+ )
+ # print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count
else:
@@ -152,15 +215,20 @@ def main():
score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
- #--------------------- Requests Timing ---------------------
+ # --------------------- Requests Timing ---------------------
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now()
- timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
+ timestamps = [
+ ts
+ for ts in timestamps
+ if now_utc - ts
+ <= timedelta(seconds=uneven_request_timing_time_window_seconds)
+ ]
timestamps = sorted(timestamps, reverse=True)
time_diffs = []
- for i in range(0, len(timestamps)-1):
- diff = (timestamps[i] - timestamps[i+1]).total_seconds()
+ for i in range(0, len(timestamps) - 1):
+ diff = (timestamps[i] - timestamps[i + 1]).total_seconds()
time_diffs.append(diff)
mean = 0
@@ -170,9 +238,11 @@ def main():
if time_diffs:
mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
- std = variance ** 0.5
- cv = std/mean
- app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
+ std = variance**0.5
+ cv = std / mean
+ app_logger.debug(
+ f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}"
+ )
if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
@@ -183,11 +253,11 @@ def main():
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
- #--------------------- Different User Agents ---------------------
- #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
+ # --------------------- Different User Agents ---------------------
+ # Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in ip_accesses]
user_agents_used = list(dict.fromkeys(user_agents_used))
- #print(f"User agents used: {user_agents_used}")
+ # print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
@@ -198,7 +268,7 @@ def main():
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
- #--------------------- Attack URLs ---------------------
+ # --------------------- Attack URLs ---------------------
attack_urls_found_list = []
wl = get_wordlists()
if wl.attack_patterns:
@@ -215,12 +285,14 @@ def main():
for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths
- if (re.search(pattern, queried_path, re.IGNORECASE) or
- re.search(pattern, decoded_path, re.IGNORECASE) or
- re.search(pattern, decoded_path_twice, re.IGNORECASE)):
+ if (
+ re.search(pattern, queried_path, re.IGNORECASE)
+ or re.search(pattern, decoded_path, re.IGNORECASE)
+ or re.search(pattern, decoded_path_twice, re.IGNORECASE)
+ ):
attack_urls_found_list.append(f"{name}: {pattern}")
- #remove duplicates
+ # remove duplicates
attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list)
@@ -234,28 +306,102 @@ def main():
score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False
- #--------------------- Calculate score ---------------------
+ # --------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
- attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"]
- attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"]
- attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
- attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"]
- attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
- good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"]
- good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"]
- good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"]
- good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"]
- good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"]
- bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"]
- bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"]
- bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"]
- bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"]
- bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
- regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"]
- regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"]
- regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"]
- regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"]
- regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"]
+ attacker_score = (
+ score["attacker"]["risky_http_methods"]
+ * weights["attacker"]["risky_http_methods"]
+ )
+ attacker_score = (
+ attacker_score
+ + score["attacker"]["robots_violations"]
+ * weights["attacker"]["robots_violations"]
+ )
+ attacker_score = (
+ attacker_score
+ + score["attacker"]["uneven_request_timing"]
+ * weights["attacker"]["uneven_request_timing"]
+ )
+ attacker_score = (
+ attacker_score
+ + score["attacker"]["different_user_agents"]
+ * weights["attacker"]["different_user_agents"]
+ )
+ attacker_score = (
+ attacker_score
+ + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
+ )
+ good_crawler_score = (
+ score["good_crawler"]["risky_http_methods"]
+ * weights["good_crawler"]["risky_http_methods"]
+ )
+ good_crawler_score = (
+ good_crawler_score
+ + score["good_crawler"]["robots_violations"]
+ * weights["good_crawler"]["robots_violations"]
+ )
+ good_crawler_score = (
+ good_crawler_score
+ + score["good_crawler"]["uneven_request_timing"]
+ * weights["good_crawler"]["uneven_request_timing"]
+ )
+ good_crawler_score = (
+ good_crawler_score
+ + score["good_crawler"]["different_user_agents"]
+ * weights["good_crawler"]["different_user_agents"]
+ )
+ good_crawler_score = (
+ good_crawler_score
+ + score["good_crawler"]["attack_url"]
+ * weights["good_crawler"]["attack_url"]
+ )
+ bad_crawler_score = (
+ score["bad_crawler"]["risky_http_methods"]
+ * weights["bad_crawler"]["risky_http_methods"]
+ )
+ bad_crawler_score = (
+ bad_crawler_score
+ + score["bad_crawler"]["robots_violations"]
+ * weights["bad_crawler"]["robots_violations"]
+ )
+ bad_crawler_score = (
+ bad_crawler_score
+ + score["bad_crawler"]["uneven_request_timing"]
+ * weights["bad_crawler"]["uneven_request_timing"]
+ )
+ bad_crawler_score = (
+ bad_crawler_score
+ + score["bad_crawler"]["different_user_agents"]
+ * weights["bad_crawler"]["different_user_agents"]
+ )
+ bad_crawler_score = (
+ bad_crawler_score
+ + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
+ )
+ regular_user_score = (
+ score["regular_user"]["risky_http_methods"]
+ * weights["regular_user"]["risky_http_methods"]
+ )
+ regular_user_score = (
+ regular_user_score
+ + score["regular_user"]["robots_violations"]
+ * weights["regular_user"]["robots_violations"]
+ )
+ regular_user_score = (
+ regular_user_score
+ + score["regular_user"]["uneven_request_timing"]
+ * weights["regular_user"]["uneven_request_timing"]
+ )
+ regular_user_score = (
+ regular_user_score
+ + score["regular_user"]["different_user_agents"]
+ * weights["regular_user"]["different_user_agents"]
+ )
+ regular_user_score = (
+ regular_user_score
+ + score["regular_user"]["attack_url"]
+ * weights["regular_user"]["attack_url"]
+ )
score_details = f"""
Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score}
@@ -263,9 +409,22 @@ def main():
Regular User score: {regular_user_score}
"""
app_logger.debug(score_details)
- analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list}
- category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score}
+ analyzed_metrics = {
+ "risky_http_methods": http_method_attacker_score,
+ "robots_violations": violated_robots_ratio,
+ "uneven_request_timing": mean,
+ "different_user_agents": user_agents_used,
+ "attack_url": attack_urls_found_list,
+ }
+ category_scores = {
+ "attacker": attacker_score,
+ "good_crawler": good_crawler_score,
+ "bad_crawler": bad_crawler_score,
+ "regular_user": regular_user_score,
+ }
category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now()
- db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis)
+ db_manager.update_ip_stats_analysis(
+ ip, analyzed_metrics, category, category_scores, last_analysis
+ )
return
diff --git a/src/tasks/fetch_ip_rep.py b/src/tasks/fetch_ip_rep.py
index efddaea..577133a 100644
--- a/src/tasks/fetch_ip_rep.py
+++ b/src/tasks/fetch_ip_rep.py
@@ -11,7 +11,7 @@ TASK_CONFIG = {
"name": "fetch-ip-rep",
"cron": "*/5 * * * *",
"enabled": True,
- "run_when_loaded": True
+ "run_when_loaded": True,
}
@@ -21,7 +21,9 @@ def main():
# Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
- app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.")
+ app_logger.info(
+ f"{len(unenriched_ips)} IP's need to be have reputation enrichment."
+ )
for ip in unenriched_ips:
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"
@@ -43,8 +45,11 @@ def main():
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(
- ip, sanitized_country_iso_code, sanitized_asn,
- sanitized_asn_org, sanitized_list_on
+ ip,
+ sanitized_country_iso_code,
+ sanitized_asn,
+ sanitized_asn_org,
+ sanitized_list_on,
)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")
diff --git a/src/tasks/top_attacking_ips.py b/src/tasks/top_attacking_ips.py
index cb95d57..75cff41 100644
--- a/src/tasks/top_attacking_ips.py
+++ b/src/tasks/top_attacking_ips.py
@@ -17,24 +17,29 @@ TASK_CONFIG = {
"name": "export-malicious-ips",
"cron": "*/5 * * * *",
"enabled": True,
- "run_when_loaded": True
+ "run_when_loaded": True,
}
EXPORTS_DIR = "exports"
OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
+
# ----------------------
# TASK LOGIC
# ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes)
- count = session.query(AccessLog).filter(
- AccessLog.is_honeypot_trigger == True,
- AccessLog.timestamp >= cutoff_time
- ).count()
+ count = (
+ session.query(AccessLog)
+ .filter(
+ AccessLog.is_honeypot_trigger == True, AccessLog.timestamp >= cutoff_time
+ )
+ .count()
+ )
return count > 0
+
def main():
"""
Export all IPs flagged as suspicious to a text file.
@@ -49,23 +54,29 @@ def main():
# Check for recent honeypot activity
if not has_recent_honeypot_access(session):
- app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes")
+ app_logger.info(
+ f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes"
+ )
return
# Query distinct suspicious IPs
- results = session.query(distinct(AccessLog.ip)).filter(
- AccessLog.is_suspicious == True
- ).all()
+ results = (
+ session.query(distinct(AccessLog.ip))
+ .filter(AccessLog.is_suspicious == True)
+ .all()
+ )
# Ensure exports directory exists
os.makedirs(EXPORTS_DIR, exist_ok=True)
# Write IPs to file (one per line)
- with open(OUTPUT_FILE, 'w') as f:
+ with open(OUTPUT_FILE, "w") as f:
for (ip,) in results:
f.write(f"{ip}\n")
- app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}")
+ app_logger.info(
+ f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}"
+ )
except Exception as e:
app_logger.error(f"[Background Task] {task_name} failed: {e}")
diff --git a/src/tasks_master.py b/src/tasks_master.py
index 264471c..9017c49 100644
--- a/src/tasks_master.py
+++ b/src/tasks_master.py
@@ -6,7 +6,12 @@ import threading
import importlib
import importlib.util
-from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
+from logger import (
+ initialize_logging,
+ get_app_logger,
+ get_access_logger,
+ get_credential_logger,
+)
app_logger = get_app_logger()
@@ -27,8 +32,8 @@ except ModuleNotFoundError:
# ---------- TASKSMASTER CLASS ----------
class TasksMaster:
-
- TASK_DEFAULT_CRON = '*/15 * * * *'
+
+ TASK_DEFAULT_CRON = "*/15 * * * *"
TASK_JITTER = 240
TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
@@ -36,7 +41,9 @@ class TasksMaster:
self.tasks = self._config_tasks()
self.scheduler = scheduler
self.last_run_times = {}
- self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR)
+ self.scheduler.add_listener(
+ self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR
+ )
def _config_tasks(self):
"""
@@ -80,7 +87,7 @@ class TasksMaster:
for filename in sorted(os.listdir(folder_path)):
# skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
- if not filename.endswith('.py') or filename.startswith("__"):
+ if not filename.endswith(".py") or filename.startswith("__"):
continue
path = os.path.join(folder_path, filename)
@@ -93,13 +100,15 @@ class TasksMaster:
except Exception as e:
app_logger.error(f"Failed to import {filename}: {e}")
continue
-
+
# if we have a tasks config and a main function, we attempt to schedule it
- if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'):
+ if hasattr(module, "TASK_CONFIG") and hasattr(module, "main"):
# ensure task_config is a dict
if not isinstance(module.TASK_CONFIG, dict):
- app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.")
+ app_logger.error(
+ f"TASK_CONFIG is not a dict in {filename}. Skipping task."
+ )
continue
task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
@@ -109,24 +118,26 @@ class TasksMaster:
try:
CronTrigger.from_crontab(task_cron)
except ValueError as ve:
- app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task")
+ app_logger.error(
+ f"Invalid cron format for task {task_name}: {ve} - Skipping this task"
+ )
continue
task = {
- 'name': module.TASK_CONFIG.get('name', module_name),
- 'filename': filename,
- 'cron': task_cron,
+ "name": module.TASK_CONFIG.get("name", module_name),
+ "filename": filename,
+ "cron": task_cron,
"enabled": module.TASK_CONFIG.get("enabled", False),
- "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False)
+ "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False),
}
tasks.append(task)
# we are missing things, and we log what's missing
else:
- if not hasattr(module, 'TASK_CONFIG'):
+ if not hasattr(module, "TASK_CONFIG"):
app_logger.warning(f"Missing TASK_CONFIG in {filename}")
- elif not hasattr(module, 'main'):
+ elif not hasattr(module, "main"):
app_logger.warning(f"Missing main() in {filename}")
return tasks
@@ -135,7 +146,7 @@ class TasksMaster:
# for each task in the tasks config file...
for task_to_run in self.tasks:
- # remember, these tasks, are built from the "load_tasks_from_folder" function,
+ # remember, these tasks, are built from the "load_tasks_from_folder" function,
# if you want to pass data from the TASKS_CONFIG dict, you need to pass it there to get it here.
task_name = task_to_run.get("name")
run_when_loaded = task_to_run.get("run_when_loaded")
@@ -147,28 +158,42 @@ class TasksMaster:
# if task is disabled, skip this one
if not task_enabled:
- app_logger.info(f"{task_name} is disabled in client config. Skipping task")
+ app_logger.info(
+ f"{task_name} is disabled in client config. Skipping task"
+ )
continue
try:
- if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))):
+ if os.path.isfile(
+ os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))
+ ):
# schedule the task now that everything has checked out above...
- self._schedule_task(task_name, module_name, task_cron, run_when_loaded)
- app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run})
+ self._schedule_task(
+ task_name, module_name, task_cron, run_when_loaded
+ )
+ app_logger.info(
+ f"Scheduled {module_name} cron is set to {task_cron}.",
+ extra={"task": task_to_run},
+ )
else:
- app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run})
+ app_logger.info(
+ f"Skipping invalid or unsafe file: {task_to_run.get('filename')}",
+ extra={"task": task_to_run},
+ )
except Exception as e:
- app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run})
-
+ app_logger.error(
+ f"Error scheduling task: {e}", extra={"tasks": task_to_run}
+ )
+
def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
try:
# Dynamically import the module
module = importlib.import_module(f"tasks.{module_name}")
# Check if the module has a 'main' function
- if hasattr(module, 'main'):
+ if hasattr(module, "main"):
app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
-
+
# unique_job_id
job_identifier = f"{module_name}__{task_name}"
@@ -180,29 +205,31 @@ class TasksMaster:
# schedule the task / job
if run_when_loaded:
- app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start")
+ app_logger.info(
+ f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start"
+ )
self.scheduler.add_job(
- module.main,
- trigger,
- id=job_identifier,
- jitter=self.TASK_JITTER,
- name=task_name,
- next_run_time=datetime.datetime.now(),
- max_instances=1
- )
- else:
- self.scheduler.add_job(
- module.main,
- trigger,
+ module.main,
+ trigger,
id=job_identifier,
jitter=self.TASK_JITTER,
name=task_name,
- max_instances=1
+ next_run_time=datetime.datetime.now(),
+ max_instances=1,
+ )
+ else:
+ self.scheduler.add_job(
+ module.main,
+ trigger,
+ id=job_identifier,
+ jitter=self.TASK_JITTER,
+ name=task_name,
+ max_instances=1,
)
else:
app_logger.error(f"{module_name} does not define a 'main' function.")
-
+
except Exception as e:
app_logger.error(f"Failed to load {module_name}: {e}")
@@ -218,13 +245,15 @@ class TasksMaster:
def list_jobs(self):
scheduled_jobs = self.scheduler.get_jobs()
jobs_list = []
-
+
for job in scheduled_jobs:
- jobs_list.append({
+ jobs_list.append(
+ {
"id": job.id,
"name": job.name,
"next_run": job.next_run_time,
- })
+ }
+ )
return jobs_list
def run_scheduled_tasks(self):
@@ -235,7 +264,7 @@ class TasksMaster:
1. Retrieves the current task configurations and updates internal state.
2. Adds new jobs to the scheduler based on the latest configuration.
3. Starts the scheduler to begin executing tasks at their defined intervals.
-
+
This ensures the scheduler is always running with the most up-to-date
task definitions and enabled status.
"""
@@ -251,6 +280,7 @@ class TasksMaster:
# ---------- SINGLETON WRAPPER ----------
T = type
+
def singleton_loader(func):
"""Decorator to ensure only one instance exists."""
cache: dict[str, T] = {}
@@ -262,6 +292,7 @@ def singleton_loader(func):
if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__]
+
return wrapper
@@ -283,6 +314,8 @@ def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster
# Auto-start scheduler if not already running
if not scheduler.running:
scheduler.start()
- app_logger.info("TasksMaster scheduler started automatically with singleton creation.")
+ app_logger.info(
+ "TasksMaster scheduler started automatically with singleton creation."
+ )
return tm_instance
diff --git a/src/templates/__init__.py b/src/templates/__init__.py
index 3eb9f72..4c6c355 100644
--- a/src/templates/__init__.py
+++ b/src/templates/__init__.py
@@ -8,8 +8,8 @@ from .template_loader import load_template, clear_cache, TemplateNotFoundError
from . import html_templates
__all__ = [
- 'load_template',
- 'clear_cache',
- 'TemplateNotFoundError',
- 'html_templates',
+ "load_template",
+ "clear_cache",
+ "TemplateNotFoundError",
+ "html_templates",
]
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 04b653e..5d31bb8 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -9,12 +9,14 @@ import html
from datetime import datetime
from zoneinfo import ZoneInfo
+
def _escape(value) -> str:
"""Escape HTML special characters to prevent XSS attacks."""
if value is None:
return ""
return html.escape(str(value))
+
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion
@@ -30,10 +32,12 @@ def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception:
# Fallback for old format
- return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
+ return (
+ iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
+ )
-def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
+def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
"""Generate dashboard HTML with access statistics
Args:
@@ -42,8 +46,8 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
"""
# Generate IP rows with clickable functionality for dropdown stats
- top_ips_rows = '\n'.join([
- f'''
+ top_ips_rows = (
+ "\n".join([f"""
| {i+1} |
{_escape(ip)} |
{count} |
@@ -54,25 +58,35 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
Loading stats...
-
'''
- for i, (ip, count) in enumerate(stats['top_ips'])
- ]) or '| No data |
'
+ """ for i, (ip, count) in enumerate(stats["top_ips"])])
+ or '| No data |
'
+ )
# Generate paths rows (CRITICAL: paths can contain XSS payloads)
- top_paths_rows = '\n'.join([
- f'| {i+1} | {_escape(path)} | {count} |
'
- for i, (path, count) in enumerate(stats['top_paths'])
- ]) or '| No data |
'
+ top_paths_rows = (
+ "\n".join(
+ [
+ f'| {i+1} | {_escape(path)} | {count} |
'
+ for i, (path, count) in enumerate(stats["top_paths"])
+ ]
+ )
+ or '| No data |
'
+ )
# Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
- top_ua_rows = '\n'.join([
- f'| {i+1} | {_escape(ua[:80])} | {count} |
'
- for i, (ua, count) in enumerate(stats['top_user_agents'])
- ]) or '| No data |
'
+ top_ua_rows = (
+ "\n".join(
+ [
+ f'| {i+1} | {_escape(ua[:80])} | {count} |
'
+ for i, (ua, count) in enumerate(stats["top_user_agents"])
+ ]
+ )
+ or '| No data |
'
+ )
# Generate suspicious accesses rows with clickable IPs
- suspicious_rows = '\n'.join([
- f'''
+ suspicious_rows = (
+ "\n".join([f"""
| {_escape(log["ip"])} |
{_escape(log["path"])} |
{_escape(log["user_agent"][:60])} |
@@ -84,13 +98,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
Loading stats...
-
'''
- for log in stats['recent_suspicious'][-10:]
- ]) or '| No suspicious activity detected |
'
+ """ for log in stats["recent_suspicious"][-10:]])
+ or '| No suspicious activity detected |
'
+ )
# Generate honeypot triggered IPs rows with clickable IPs
- honeypot_rows = '\n'.join([
- f'''
+ honeypot_rows = (
+ "\n".join([f"""
| {_escape(ip)} |
{_escape(", ".join(paths))} |
{len(paths)} |
@@ -101,13 +115,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
Loading stats...
-
'''
- for ip, paths in stats.get('honeypot_triggered_ips', [])
- ]) or '| No honeypot triggers yet |
'
+ """ for ip, paths in stats.get("honeypot_triggered_ips", [])])
+ or '| No honeypot triggers yet |
'
+ )
# Generate attack types rows with clickable IPs
- attack_type_rows = '\n'.join([
- f'''
+ attack_type_rows = (
+ "\n".join([f"""
| {_escape(log["ip"])} |
{_escape(log["path"])} |
{_escape(", ".join(log["attack_types"]))} |
@@ -120,13 +134,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
Loading stats...
-
'''
- for log in stats.get('attack_types', [])[-10:]
- ]) or '| No attacks detected |
'
+ """ for log in stats.get("attack_types", [])[-10:]])
+ or '| No attacks detected |
'
+ )
# Generate credential attempts rows with clickable IPs
- credential_rows = '\n'.join([
- f'''
+ credential_rows = (
+ "\n".join([f"""
| {_escape(log["ip"])} |
{_escape(log["username"])} |
{_escape(log["password"])} |
@@ -139,9 +153,9 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
Loading stats...
-
'''
- for log in stats.get('credential_attempts', [])[-20:]
- ]) or '| No credentials captured yet |
'
+ """ for log in stats.get("credential_attempts", [])[-20:]])
+ or '| No credentials captured yet |
'
+ )
return f"""
diff --git a/src/templates/template_loader.py b/src/templates/template_loader.py
index fd1febc..fe53bf5 100644
--- a/src/templates/template_loader.py
+++ b/src/templates/template_loader.py
@@ -11,6 +11,7 @@ from typing import Dict
class TemplateNotFoundError(Exception):
"""Raised when a template file cannot be found."""
+
pass
@@ -42,11 +43,11 @@ def load_template(name: str, **kwargs) -> str:
"""
# debug
# print(f"Loading Template: {name}")
-
+
# Check cache first
if name not in _template_cache:
# Determine file path based on whether name has an extension
- if '.' in name:
+ if "." in name:
file_path = _TEMPLATE_DIR / name
else:
file_path = _TEMPLATE_DIR / f"{name}.html"
@@ -54,7 +55,7 @@ def load_template(name: str, **kwargs) -> str:
if not file_path.exists():
raise TemplateNotFoundError(f"Template '{name}' not found at {file_path}")
- _template_cache[name] = file_path.read_text(encoding='utf-8')
+ _template_cache[name] = file_path.read_text(encoding="utf-8")
template = _template_cache[name]
diff --git a/src/tracker.py b/src/tracker.py
index da07569..f7024ac 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -17,7 +17,13 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis.
"""
- def __init__(self, max_pages_limit, ban_duration_seconds, db_manager: Optional[DatabaseManager] = None):
+
+ def __init__(
+ self,
+ max_pages_limit,
+ ban_duration_seconds,
+ db_manager: Optional[DatabaseManager] = None,
+ ):
"""
Initialize the access tracker.
@@ -32,14 +38,32 @@ class AccessTracker:
self.user_agent_counts: Dict[str, int] = defaultdict(int)
self.access_log: List[Dict] = []
self.credential_attempts: List[Dict] = []
-
+
# Track pages visited by each IP (for good crawler limiting)
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
-
+
self.suspicious_patterns = [
- 'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests',
- 'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix',
- 'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster'
+ "bot",
+ "crawler",
+ "spider",
+ "scraper",
+ "curl",
+ "wget",
+ "python-requests",
+ "scanner",
+ "nikto",
+ "sqlmap",
+ "nmap",
+ "masscan",
+ "nessus",
+ "acunetix",
+ "burp",
+ "zap",
+ "w3af",
+ "metasploit",
+ "nuclei",
+ "gobuster",
+ "dirbuster",
]
# Load attack patterns from wordlists
@@ -49,11 +73,11 @@ class AccessTracker:
# Fallback if wordlists not loaded
if not self.attack_types:
self.attack_types = {
- 'path_traversal': r'\.\.',
- 'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
- 'xss_attempt': r'( str:
xss_detected = False
reflected_content = []
-
+
for key, value in input_data.items():
if detect_xss_pattern(value):
xss_detected = True
reflected_content.append(f"{key}: {value}
")
-
+
if xss_detected:
html = f"""
@@ -51,7 +51,7 @@ def generate_xss_response(input_data: dict) -> str:
"""
return html
-
+
return """