Linted code iwht black tool

This commit is contained in:
Lorenzo Venerandi
2026-01-23 22:00:21 +01:00
parent 25384585d9
commit 4450d3a4e3
22 changed files with 1387 additions and 868 deletions

View File

@@ -19,10 +19,12 @@ Functions for user activity analysis
app_logger = get_app_logger() app_logger = get_app_logger()
class Analyzer: class Analyzer:
""" """
Analyzes users activity and produces aggregated insights Analyzes users activity and produces aggregated insights
""" """
def __init__(self, db_manager: Optional[DatabaseManager] = None): def __init__(self, db_manager: Optional[DatabaseManager] = None):
""" """
Initialize the access tracker. Initialize the access tracker.
@@ -102,7 +104,6 @@ class Analyzer:
# } # }
# } # }
# accesses = self.db.get_access_logs(ip_filter = ip, limit=1000) # accesses = self.db.get_access_logs(ip_filter = ip, limit=1000)
# total_accesses_count = len(accesses) # total_accesses_count = len(accesses)
# if total_accesses_count <= 0: # if total_accesses_count <= 0:
@@ -119,7 +120,6 @@ class Analyzer:
# #--------------------- HTTP Methods --------------------- # #--------------------- HTTP Methods ---------------------
# get_accesses_count = len([item for item in accesses if item["method"] == "GET"]) # get_accesses_count = len([item for item in accesses if item["method"] == "GET"])
# post_accesses_count = len([item for item in accesses if item["method"] == "POST"]) # post_accesses_count = len([item for item in accesses if item["method"] == "POST"])
# put_accesses_count = len([item for item in accesses if item["method"] == "PUT"]) # put_accesses_count = len([item for item in accesses if item["method"] == "PUT"])
@@ -214,7 +214,6 @@ class Analyzer:
# score["bad_crawler"]["uneven_request_timing"] = False # score["bad_crawler"]["uneven_request_timing"] = False
# score["regular_user"]["uneven_request_timing"] = False # score["regular_user"]["uneven_request_timing"] = False
# #--------------------- Different User Agents --------------------- # #--------------------- Different User Agents ---------------------
# #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers # #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
# user_agents_used = [item["user_agent"] for item in accesses] # user_agents_used = [item["user_agent"] for item in accesses]
@@ -317,8 +316,6 @@ class Analyzer:
# return 0 # return 0
# def update_ip_rep_infos(self, ip: str) -> list[str]: # def update_ip_rep_infos(self, ip: str) -> list[str]:
# api_url = "https://iprep.lcrawl.com/api/iprep/" # api_url = "https://iprep.lcrawl.com/api/iprep/"
# params = { # params = {

View File

@@ -14,12 +14,13 @@ import yaml
@dataclass @dataclass
class Config: class Config:
"""Configuration class for the deception server""" """Configuration class for the deception server"""
port: int = 5000 port: int = 5000
delay: int = 100 # milliseconds delay: int = 100 # milliseconds
server_header: str = "" server_header: str = ""
links_length_range: Tuple[int, int] = (5, 15) links_length_range: Tuple[int, int] = (5, 15)
links_per_page_range: Tuple[int, int] = (10, 15) links_per_page_range: Tuple[int, int] = (10, 15)
char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' char_space: str = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: int = 10 max_counter: int = 10
canary_token_url: Optional[str] = None canary_token_url: Optional[str] = None
canary_token_tries: int = 10 canary_token_tries: int = 10
@@ -30,7 +31,9 @@ class Config:
probability_error_codes: int = 0 # Percentage (0-100) probability_error_codes: int = 0 # Percentage (0-100)
# Crawl limiting settings - for legitimate vs malicious crawlers # Crawl limiting settings - for legitimate vs malicious crawlers
max_pages_limit: int = 100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False) max_pages_limit: int = (
100 # Max pages limit for good crawlers and regular users (and bad crawlers/attackers if infinite_pages_for_malicious is False)
)
infinite_pages_for_malicious: bool = True # Infinite pages for malicious crawlers infinite_pages_for_malicious: bool = True # Infinite pages for malicious crawlers
ban_duration_seconds: int = 600 # Ban duration in seconds for IPs exceeding limits ban_duration_seconds: int = 600 # Ban duration in seconds for IPs exceeding limits
@@ -47,84 +50,105 @@ class Config:
attack_urls_threshold: float = None attack_urls_threshold: float = None
@classmethod @classmethod
def from_yaml(cls) -> 'Config': def from_yaml(cls) -> "Config":
"""Create configuration from YAML file""" """Create configuration from YAML file"""
config_location = os.getenv('CONFIG_LOCATION', 'config.yaml') config_location = os.getenv("CONFIG_LOCATION", "config.yaml")
config_path = Path(__file__).parent.parent / config_location config_path = Path(__file__).parent.parent / config_location
try: try:
with open(config_path, 'r') as f: with open(config_path, "r") as f:
data = yaml.safe_load(f) data = yaml.safe_load(f)
except FileNotFoundError: except FileNotFoundError:
print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr) print(
print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr) f"Error: Configuration file '{config_path}' not found.", file=sys.stderr
)
print(
f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.",
file=sys.stderr,
)
sys.exit(1) sys.exit(1)
except yaml.YAMLError as e: except yaml.YAMLError as e:
print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr) print(
f"Error: Invalid YAML in configuration file '{config_path}': {e}",
file=sys.stderr,
)
sys.exit(1) sys.exit(1)
if data is None: if data is None:
data = {} data = {}
# Extract nested values with defaults # Extract nested values with defaults
server = data.get('server', {}) server = data.get("server", {})
links = data.get('links', {}) links = data.get("links", {})
canary = data.get('canary', {}) canary = data.get("canary", {})
dashboard = data.get('dashboard', {}) dashboard = data.get("dashboard", {})
api = data.get('api', {}) api = data.get("api", {})
database = data.get('database', {}) database = data.get("database", {})
behavior = data.get('behavior', {}) behavior = data.get("behavior", {})
analyzer = data.get('analyzer') or {} analyzer = data.get("analyzer") or {}
crawl = data.get('crawl', {}) crawl = data.get("crawl", {})
# Handle dashboard_secret_path - auto-generate if null/not set # Handle dashboard_secret_path - auto-generate if null/not set
dashboard_path = dashboard.get('secret_path') dashboard_path = dashboard.get("secret_path")
if dashboard_path is None: if dashboard_path is None:
dashboard_path = f'/{os.urandom(16).hex()}' dashboard_path = f"/{os.urandom(16).hex()}"
else: else:
# ensure the dashboard path starts with a / # ensure the dashboard path starts with a /
if dashboard_path[:1] != "/": if dashboard_path[:1] != "/":
dashboard_path = f"/{dashboard_path}" dashboard_path = f"/{dashboard_path}"
return cls( return cls(
port=server.get('port', 5000), port=server.get("port", 5000),
delay=server.get('delay', 100), delay=server.get("delay", 100),
server_header=server.get('server_header',""), server_header=server.get("server_header", ""),
links_length_range=( links_length_range=(
links.get('min_length', 5), links.get("min_length", 5),
links.get('max_length', 15) links.get("max_length", 15),
), ),
links_per_page_range=( links_per_page_range=(
links.get('min_per_page', 10), links.get("min_per_page", 10),
links.get('max_per_page', 15) links.get("max_per_page", 15),
), ),
char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), char_space=links.get(
max_counter=links.get('max_counter', 10), "char_space",
canary_token_url=canary.get('token_url'), "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789",
canary_token_tries=canary.get('token_tries', 10), ),
max_counter=links.get("max_counter", 10),
canary_token_url=canary.get("token_url"),
canary_token_tries=canary.get("token_tries", 10),
dashboard_secret_path=dashboard_path, dashboard_secret_path=dashboard_path,
api_server_url=api.get('server_url'), api_server_url=api.get("server_url"),
api_server_port=api.get('server_port', 8080), api_server_port=api.get("server_port", 8080),
api_server_path=api.get('server_path', '/api/v2/users'), api_server_path=api.get("server_path", "/api/v2/users"),
probability_error_codes=behavior.get('probability_error_codes', 0), probability_error_codes=behavior.get("probability_error_codes", 0),
database_path=database.get('path', 'data/krawl.db'), database_path=database.get("path", "data/krawl.db"),
database_retention_days=database.get('retention_days', 30), database_retention_days=database.get("retention_days", 30),
http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1), http_risky_methods_threshold=analyzer.get(
violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1), "http_risky_methods_threshold", 0.1
uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 0.5), # coefficient of variation ),
uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300), violated_robots_threshold=analyzer.get("violated_robots_threshold", 0.1),
user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 2), uneven_request_timing_threshold=analyzer.get(
attack_urls_threshold=analyzer.get('attack_urls_threshold', 1), "uneven_request_timing_threshold", 0.5
infinite_pages_for_malicious=crawl.get('infinite_pages_for_malicious', True), ), # coefficient of variation
max_pages_limit=crawl.get('max_pages_limit', 200), uneven_request_timing_time_window_seconds=analyzer.get(
ban_duration_seconds=crawl.get('ban_duration_seconds', 60) "uneven_request_timing_time_window_seconds", 300
),
user_agents_used_threshold=analyzer.get("user_agents_used_threshold", 2),
attack_urls_threshold=analyzer.get("attack_urls_threshold", 1),
infinite_pages_for_malicious=crawl.get(
"infinite_pages_for_malicious", True
),
max_pages_limit=crawl.get("max_pages_limit", 200),
ban_duration_seconds=crawl.get("ban_duration_seconds", 60),
) )
def __get_env_from_config(config: str) -> str: def __get_env_from_config(config: str) -> str:
env = config.upper().replace('.', '_').replace('-', '__').replace(' ', '_') env = config.upper().replace(".", "_").replace("-", "__").replace(" ", "_")
return f"KRAWL_{env}"
return f'KRAWL_{env}'
def override_config_from_env(config: Config = None): def override_config_from_env(config: Config = None):
"""Initialize configuration from environment variables""" """Initialize configuration from environment variables"""
@@ -140,14 +164,16 @@ def override_config_from_env(config: Config = None):
elif field_type == float: elif field_type == float:
setattr(config, field, float(env_value)) setattr(config, field, float(env_value))
elif field_type == Tuple[int, int]: elif field_type == Tuple[int, int]:
parts = env_value.split(',') parts = env_value.split(",")
if len(parts) == 2: if len(parts) == 2:
setattr(config, field, (int(parts[0]), int(parts[1]))) setattr(config, field, (int(parts[0]), int(parts[1])))
else: else:
setattr(config, field, env_value) setattr(config, field, env_value)
_config_instance = None _config_instance = None
def get_config() -> Config: def get_config() -> Config:
"""Get the singleton Config instance""" """Get the singleton Config instance"""
global _config_instance global _config_instance

View File

@@ -24,7 +24,15 @@ def set_sqlite_pragma(dbapi_connection, connection_record):
cursor.execute("PRAGMA busy_timeout=30000") cursor.execute("PRAGMA busy_timeout=30000")
cursor.close() cursor.close()
from models import Base, AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
from models import (
Base,
AccessLog,
CredentialAttempt,
AttackDetection,
IpStats,
CategoryHistory,
)
from sanitizer import ( from sanitizer import (
sanitize_ip, sanitize_ip,
sanitize_path, sanitize_path,
@@ -37,6 +45,7 @@ from logger import get_app_logger
applogger = get_app_logger() applogger = get_app_logger()
class DatabaseManager: class DatabaseManager:
""" """
Singleton database manager for the Krawl honeypot. Singleton database manager for the Krawl honeypot.
@@ -44,6 +53,7 @@ class DatabaseManager:
Handles database initialization, session management, and provides Handles database initialization, session management, and provides
methods for persisting access logs, credentials, and attack detections. methods for persisting access logs, credentials, and attack detections.
""" """
_instance: Optional["DatabaseManager"] = None _instance: Optional["DatabaseManager"] = None
def __new__(cls) -> "DatabaseManager": def __new__(cls) -> "DatabaseManager":
@@ -72,7 +82,7 @@ class DatabaseManager:
self._engine = create_engine( self._engine = create_engine(
database_url, database_url,
connect_args={"check_same_thread": False}, connect_args={"check_same_thread": False},
echo=False # Set to True for SQL debugging echo=False, # Set to True for SQL debugging
) )
# Create session factory with scoped_session for thread safety # Create session factory with scoped_session for thread safety
@@ -96,7 +106,9 @@ class DatabaseManager:
def session(self) -> Session: def session(self) -> Session:
"""Get a thread-local database session.""" """Get a thread-local database session."""
if not self._initialized: if not self._initialized:
raise RuntimeError("DatabaseManager not initialized. Call initialize() first.") raise RuntimeError(
"DatabaseManager not initialized. Call initialize() first."
)
return self._Session() return self._Session()
def close_session(self) -> None: def close_session(self) -> None:
@@ -113,7 +125,7 @@ class DatabaseManager:
is_suspicious: bool = False, is_suspicious: bool = False,
is_honeypot_trigger: bool = False, is_honeypot_trigger: bool = False,
attack_types: Optional[List[str]] = None, attack_types: Optional[List[str]] = None,
matched_patterns: Optional[Dict[str, str]] = None matched_patterns: Optional[Dict[str, str]] = None,
) -> Optional[int]: ) -> Optional[int]:
""" """
Persist an access log entry to the database. Persist an access log entry to the database.
@@ -141,7 +153,7 @@ class DatabaseManager:
method=method[:10], method=method[:10],
is_suspicious=is_suspicious, is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot_trigger, is_honeypot_trigger=is_honeypot_trigger,
timestamp=datetime.now() timestamp=datetime.now(),
) )
session.add(access_log) session.add(access_log)
session.flush() # Get the ID before committing session.flush() # Get the ID before committing
@@ -155,7 +167,7 @@ class DatabaseManager:
attack_type=attack_type[:50], attack_type=attack_type[:50],
matched_pattern=sanitize_attack_pattern( matched_pattern=sanitize_attack_pattern(
matched_patterns.get(attack_type, "") matched_patterns.get(attack_type, "")
) ),
) )
session.add(detection) session.add(detection)
@@ -178,7 +190,7 @@ class DatabaseManager:
ip: str, ip: str,
path: str, path: str,
username: Optional[str] = None, username: Optional[str] = None,
password: Optional[str] = None password: Optional[str] = None,
) -> Optional[int]: ) -> Optional[int]:
""" """
Persist a credential attempt to the database. Persist a credential attempt to the database.
@@ -199,7 +211,7 @@ class DatabaseManager:
path=sanitize_path(path), path=sanitize_path(path),
username=sanitize_credential(username), username=sanitize_credential(username),
password=sanitize_credential(password), password=sanitize_credential(password),
timestamp=datetime.now() timestamp=datetime.now(),
) )
session.add(credential) session.add(credential)
session.commit() session.commit()
@@ -230,14 +242,18 @@ class DatabaseManager:
ip_stats.last_seen = now ip_stats.last_seen = now
else: else:
ip_stats = IpStats( ip_stats = IpStats(
ip=sanitized_ip, ip=sanitized_ip, total_requests=1, first_seen=now, last_seen=now
total_requests=1,
first_seen=now,
last_seen=now
) )
session.add(ip_stats) session.add(ip_stats)
def update_ip_stats_analysis(self, ip: str, analyzed_metrics: Dict[str, object], category: str, category_scores: Dict[str, int], last_analysis: datetime) -> None: def update_ip_stats_analysis(
self,
ip: str,
analyzed_metrics: Dict[str, object],
category: str,
category_scores: Dict[str, int],
last_analysis: datetime,
) -> None:
""" """
Update IP statistics (ip is already persisted). Update IP statistics (ip is already persisted).
Records category change in history if category has changed. Records category change in history if category has changed.
@@ -250,7 +266,9 @@ class DatabaseManager:
last_analysis: timestamp of last analysis last_analysis: timestamp of last analysis
""" """
applogger.debug(f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}") applogger.debug(
f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}"
)
applogger.info(f"IP: {ip} category has been updated to {category}") applogger.info(f"IP: {ip} category has been updated to {category}")
session = self.session session = self.session
@@ -260,7 +278,9 @@ class DatabaseManager:
# Check if category has changed and record it # Check if category has changed and record it
old_category = ip_stats.category old_category = ip_stats.category
if old_category != category: if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, last_analysis) self._record_category_change(
sanitized_ip, old_category, category, last_analysis
)
ip_stats.analyzed_metrics = analyzed_metrics ip_stats.analyzed_metrics = analyzed_metrics
ip_stats.category = category ip_stats.category = category
@@ -286,11 +306,12 @@ class DatabaseManager:
sanitized_ip = sanitize_ip(ip) sanitized_ip = sanitize_ip(ip)
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
# Record the manual category change # Record the manual category change
old_category = ip_stats.category old_category = ip_stats.category
if old_category != category: if old_category != category:
self._record_category_change(sanitized_ip, old_category, category, datetime.now()) self._record_category_change(
sanitized_ip, old_category, category, datetime.now()
)
ip_stats.category = category ip_stats.category = category
ip_stats.manual_category = True ip_stats.manual_category = True
@@ -301,7 +322,13 @@ class DatabaseManager:
session.rollback() session.rollback()
print(f"Error updating manual category: {e}") print(f"Error updating manual category: {e}")
def _record_category_change(self, ip: str, old_category: Optional[str], new_category: str, timestamp: datetime) -> None: def _record_category_change(
self,
ip: str,
old_category: Optional[str],
new_category: str,
timestamp: datetime,
) -> None:
""" """
Internal method to record category changes in history. Internal method to record category changes in history.
Only records if there's an actual change from a previous category. Only records if there's an actual change from a previous category.
@@ -323,7 +350,7 @@ class DatabaseManager:
ip=ip, ip=ip,
old_category=old_category, old_category=old_category,
new_category=new_category, new_category=new_category,
timestamp=timestamp timestamp=timestamp,
) )
session.add(history_entry) session.add(history_entry)
session.commit() session.commit()
@@ -344,22 +371,32 @@ class DatabaseManager:
session = self.session session = self.session
try: try:
sanitized_ip = sanitize_ip(ip) sanitized_ip = sanitize_ip(ip)
history = session.query(CategoryHistory).filter( history = (
CategoryHistory.ip == sanitized_ip session.query(CategoryHistory)
).order_by(CategoryHistory.timestamp.asc()).all() .filter(CategoryHistory.ip == sanitized_ip)
.order_by(CategoryHistory.timestamp.asc())
.all()
)
return [ return [
{ {
'old_category': h.old_category, "old_category": h.old_category,
'new_category': h.new_category, "new_category": h.new_category,
'timestamp': h.timestamp.isoformat() "timestamp": h.timestamp.isoformat(),
} }
for h in history for h in history
] ]
finally: finally:
self.close_session() self.close_session()
def update_ip_rep_infos(self, ip: str, country_code: str, asn: str, asn_org: str, list_on: Dict[str,str]) -> None: def update_ip_rep_infos(
self,
ip: str,
country_code: str,
asn: str,
asn_org: str,
list_on: Dict[str, str],
) -> None:
""" """
Update IP rep stats Update IP rep stats
@@ -400,20 +437,25 @@ class DatabaseManager:
""" """
session = self.session session = self.session
try: try:
ips = session.query(IpStats.ip).filter( ips = (
IpStats.country_code.is_(None), session.query(IpStats.ip)
~IpStats.ip.like('10.%'), .filter(
~IpStats.ip.like('172.16.%'), IpStats.country_code.is_(None),
~IpStats.ip.like('172.17.%'), ~IpStats.ip.like("10.%"),
~IpStats.ip.like('172.18.%'), ~IpStats.ip.like("172.16.%"),
~IpStats.ip.like('172.19.%'), ~IpStats.ip.like("172.17.%"),
~IpStats.ip.like('172.2_.%'), ~IpStats.ip.like("172.18.%"),
~IpStats.ip.like('172.30.%'), ~IpStats.ip.like("172.19.%"),
~IpStats.ip.like('172.31.%'), ~IpStats.ip.like("172.2_.%"),
~IpStats.ip.like('192.168.%'), ~IpStats.ip.like("172.30.%"),
~IpStats.ip.like('127.%'), ~IpStats.ip.like("172.31.%"),
~IpStats.ip.like('169.254.%') ~IpStats.ip.like("192.168.%"),
).limit(limit).all() ~IpStats.ip.like("127.%"),
~IpStats.ip.like("169.254.%"),
)
.limit(limit)
.all()
)
return [ip[0] for ip in ips] return [ip[0] for ip in ips]
finally: finally:
self.close_session() self.close_session()
@@ -424,7 +466,7 @@ class DatabaseManager:
offset: int = 0, offset: int = 0,
ip_filter: Optional[str] = None, ip_filter: Optional[str] = None,
suspicious_only: bool = False, suspicious_only: bool = False,
since_minutes: Optional[int] = None since_minutes: Optional[int] = None,
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
""" """
Retrieve access logs with optional filtering. Retrieve access logs with optional filtering.
@@ -455,15 +497,15 @@ class DatabaseManager:
return [ return [
{ {
'id': log.id, "id": log.id,
'ip': log.ip, "ip": log.ip,
'path': log.path, "path": log.path,
'user_agent': log.user_agent, "user_agent": log.user_agent,
'method': log.method, "method": log.method,
'is_suspicious': log.is_suspicious, "is_suspicious": log.is_suspicious,
'is_honeypot_trigger': log.is_honeypot_trigger, "is_honeypot_trigger": log.is_honeypot_trigger,
'timestamp': log.timestamp.isoformat(), "timestamp": log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections] "attack_types": [d.attack_type for d in log.attack_detections],
} }
for log in logs for log in logs
] ]
@@ -521,10 +563,7 @@ class DatabaseManager:
# self.close_session() # self.close_session()
def get_credential_attempts( def get_credential_attempts(
self, self, limit: int = 100, offset: int = 0, ip_filter: Optional[str] = None
limit: int = 100,
offset: int = 0,
ip_filter: Optional[str] = None
) -> List[Dict[str, Any]]: ) -> List[Dict[str, Any]]:
""" """
Retrieve credential attempts with optional filtering. Retrieve credential attempts with optional filtering.
@@ -550,12 +589,12 @@ class DatabaseManager:
return [ return [
{ {
'id': attempt.id, "id": attempt.id,
'ip': attempt.ip, "ip": attempt.ip,
'path': attempt.path, "path": attempt.path,
'username': attempt.username, "username": attempt.username,
'password': attempt.password, "password": attempt.password,
'timestamp': attempt.timestamp.isoformat() "timestamp": attempt.timestamp.isoformat(),
} }
for attempt in attempts for attempt in attempts
] ]
@@ -574,26 +613,29 @@ class DatabaseManager:
""" """
session = self.session session = self.session
try: try:
stats = session.query(IpStats).order_by( stats = (
IpStats.total_requests.desc() session.query(IpStats)
).limit(limit).all() .order_by(IpStats.total_requests.desc())
.limit(limit)
.all()
)
return [ return [
{ {
'ip': s.ip, "ip": s.ip,
'total_requests': s.total_requests, "total_requests": s.total_requests,
'first_seen': s.first_seen.isoformat(), "first_seen": s.first_seen.isoformat(),
'last_seen': s.last_seen.isoformat(), "last_seen": s.last_seen.isoformat(),
'country_code': s.country_code, "country_code": s.country_code,
'city': s.city, "city": s.city,
'asn': s.asn, "asn": s.asn,
'asn_org': s.asn_org, "asn_org": s.asn_org,
'reputation_score': s.reputation_score, "reputation_score": s.reputation_score,
'reputation_source': s.reputation_source, "reputation_source": s.reputation_source,
'analyzed_metrics': s.analyzed_metrics, "analyzed_metrics": s.analyzed_metrics,
'category': s.category, "category": s.category,
'manual_category': s.manual_category, "manual_category": s.manual_category,
'last_analysis': s.last_analysis "last_analysis": s.last_analysis,
} }
for s in stats for s in stats
] ]
@@ -621,23 +663,25 @@ class DatabaseManager:
category_history = self.get_category_history(ip) category_history = self.get_category_history(ip)
return { return {
'ip': stat.ip, "ip": stat.ip,
'total_requests': stat.total_requests, "total_requests": stat.total_requests,
'first_seen': stat.first_seen.isoformat() if stat.first_seen else None, "first_seen": stat.first_seen.isoformat() if stat.first_seen else None,
'last_seen': stat.last_seen.isoformat() if stat.last_seen else None, "last_seen": stat.last_seen.isoformat() if stat.last_seen else None,
'country_code': stat.country_code, "country_code": stat.country_code,
'city': stat.city, "city": stat.city,
'asn': stat.asn, "asn": stat.asn,
'asn_org': stat.asn_org, "asn_org": stat.asn_org,
'list_on': stat.list_on or {}, "list_on": stat.list_on or {},
'reputation_score': stat.reputation_score, "reputation_score": stat.reputation_score,
'reputation_source': stat.reputation_source, "reputation_source": stat.reputation_source,
'analyzed_metrics': stat.analyzed_metrics or {}, "analyzed_metrics": stat.analyzed_metrics or {},
'category': stat.category, "category": stat.category,
'category_scores': stat.category_scores or {}, "category_scores": stat.category_scores or {},
'manual_category': stat.manual_category, "manual_category": stat.manual_category,
'last_analysis': stat.last_analysis.isoformat() if stat.last_analysis else None, "last_analysis": (
'category_history': category_history stat.last_analysis.isoformat() if stat.last_analysis else None
),
"category_history": category_history,
} }
finally: finally:
self.close_session() self.close_session()
@@ -654,25 +698,32 @@ class DatabaseManager:
try: try:
# Get main aggregate counts in one query # Get main aggregate counts in one query
result = session.query( result = session.query(
func.count(AccessLog.id).label('total_accesses'), func.count(AccessLog.id).label("total_accesses"),
func.count(distinct(AccessLog.ip)).label('unique_ips'), func.count(distinct(AccessLog.ip)).label("unique_ips"),
func.count(distinct(AccessLog.path)).label('unique_paths'), func.count(distinct(AccessLog.path)).label("unique_paths"),
func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label('suspicious_accesses'), func.sum(case((AccessLog.is_suspicious == True, 1), else_=0)).label(
func.sum(case((AccessLog.is_honeypot_trigger == True, 1), else_=0)).label('honeypot_triggered') "suspicious_accesses"
),
func.sum(
case((AccessLog.is_honeypot_trigger == True, 1), else_=0)
).label("honeypot_triggered"),
).first() ).first()
# Get unique IPs that triggered honeypots # Get unique IPs that triggered honeypots
honeypot_ips = session.query( honeypot_ips = (
func.count(distinct(AccessLog.ip)) session.query(func.count(distinct(AccessLog.ip)))
).filter(AccessLog.is_honeypot_trigger == True).scalar() or 0 .filter(AccessLog.is_honeypot_trigger == True)
.scalar()
or 0
)
return { return {
'total_accesses': result.total_accesses or 0, "total_accesses": result.total_accesses or 0,
'unique_ips': result.unique_ips or 0, "unique_ips": result.unique_ips or 0,
'unique_paths': result.unique_paths or 0, "unique_paths": result.unique_paths or 0,
'suspicious_accesses': int(result.suspicious_accesses or 0), "suspicious_accesses": int(result.suspicious_accesses or 0),
'honeypot_triggered': int(result.honeypot_triggered or 0), "honeypot_triggered": int(result.honeypot_triggered or 0),
'honeypot_ips': honeypot_ips "honeypot_ips": honeypot_ips,
} }
finally: finally:
self.close_session() self.close_session()
@@ -689,12 +740,13 @@ class DatabaseManager:
""" """
session = self.session session = self.session
try: try:
results = session.query( results = (
AccessLog.ip, session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
func.count(AccessLog.id).label('count') .group_by(AccessLog.ip)
).group_by(AccessLog.ip).order_by( .order_by(func.count(AccessLog.id).desc())
func.count(AccessLog.id).desc() .limit(limit)
).limit(limit).all() .all()
)
return [(row.ip, row.count) for row in results] return [(row.ip, row.count) for row in results]
finally: finally:
@@ -712,12 +764,13 @@ class DatabaseManager:
""" """
session = self.session session = self.session
try: try:
results = session.query( results = (
AccessLog.path, session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
func.count(AccessLog.id).label('count') .group_by(AccessLog.path)
).group_by(AccessLog.path).order_by( .order_by(func.count(AccessLog.id).desc())
func.count(AccessLog.id).desc() .limit(limit)
).limit(limit).all() .all()
)
return [(row.path, row.count) for row in results] return [(row.path, row.count) for row in results]
finally: finally:
@@ -735,15 +788,16 @@ class DatabaseManager:
""" """
session = self.session session = self.session
try: try:
results = session.query( results = (
AccessLog.user_agent, session.query(
func.count(AccessLog.id).label('count') AccessLog.user_agent, func.count(AccessLog.id).label("count")
).filter( )
AccessLog.user_agent.isnot(None), .filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
AccessLog.user_agent != '' .group_by(AccessLog.user_agent)
).group_by(AccessLog.user_agent).order_by( .order_by(func.count(AccessLog.id).desc())
func.count(AccessLog.id).desc() .limit(limit)
).limit(limit).all() .all()
)
return [(row.user_agent, row.count) for row in results] return [(row.user_agent, row.count) for row in results]
finally: finally:
@@ -761,16 +815,20 @@ class DatabaseManager:
""" """
session = self.session session = self.session
try: try:
logs = session.query(AccessLog).filter( logs = (
AccessLog.is_suspicious == True session.query(AccessLog)
).order_by(AccessLog.timestamp.desc()).limit(limit).all() .filter(AccessLog.is_suspicious == True)
.order_by(AccessLog.timestamp.desc())
.limit(limit)
.all()
)
return [ return [
{ {
'ip': log.ip, "ip": log.ip,
'path': log.path, "path": log.path,
'user_agent': log.user_agent, "user_agent": log.user_agent,
'timestamp': log.timestamp.isoformat() "timestamp": log.timestamp.isoformat(),
} }
for log in logs for log in logs
] ]
@@ -787,12 +845,11 @@ class DatabaseManager:
session = self.session session = self.session
try: try:
# Get all honeypot triggers grouped by IP # Get all honeypot triggers grouped by IP
results = session.query( results = (
AccessLog.ip, session.query(AccessLog.ip, AccessLog.path)
AccessLog.path .filter(AccessLog.is_honeypot_trigger == True)
).filter( .all()
AccessLog.is_honeypot_trigger == True )
).all()
# Group paths by IP # Group paths by IP
ip_paths: Dict[str, List[str]] = {} ip_paths: Dict[str, List[str]] = {}
@@ -819,17 +876,21 @@ class DatabaseManager:
session = self.session session = self.session
try: try:
# Get access logs that have attack detections # Get access logs that have attack detections
logs = session.query(AccessLog).join( logs = (
AttackDetection session.query(AccessLog)
).order_by(AccessLog.timestamp.desc()).limit(limit).all() .join(AttackDetection)
.order_by(AccessLog.timestamp.desc())
.limit(limit)
.all()
)
return [ return [
{ {
'ip': log.ip, "ip": log.ip,
'path': log.path, "path": log.path,
'user_agent': log.user_agent, "user_agent": log.user_agent,
'timestamp': log.timestamp.isoformat(), "timestamp": log.timestamp.isoformat(),
'attack_types': [d.attack_type for d in log.attack_detections] "attack_types": [d.attack_type for d in log.attack_detections],
} }
for log in logs for log in logs
] ]

View File

@@ -11,6 +11,7 @@ from templates import html_templates
from wordlists import get_wordlists from wordlists import get_wordlists
from config import get_config from config import get_config
def random_username() -> str: def random_username() -> str:
"""Generate random username""" """Generate random username"""
wl = get_wordlists() wl = get_wordlists()
@@ -21,10 +22,10 @@ def random_password() -> str:
"""Generate random password""" """Generate random password"""
wl = get_wordlists() wl = get_wordlists()
templates = [ templates = [
lambda: ''.join(random.choices(string.ascii_letters + string.digits, k=12)), lambda: "".join(random.choices(string.ascii_letters + string.digits, k=12)),
lambda: f"{random.choice(wl.password_prefixes)}{random.randint(100, 999)}!", lambda: f"{random.choice(wl.password_prefixes)}{random.randint(100, 999)}!",
lambda: f"{random.choice(wl.simple_passwords)}{random.randint(1000, 9999)}", lambda: f"{random.choice(wl.simple_passwords)}{random.randint(1000, 9999)}",
lambda: ''.join(random.choices(string.ascii_lowercase, k=8)), lambda: "".join(random.choices(string.ascii_lowercase, k=8)),
] ]
return random.choice(templates)() return random.choice(templates)()
@@ -36,6 +37,7 @@ def random_email(username: str = None) -> str:
username = random_username() username = random_username()
return f"{username}@{random.choice(wl.email_domains)}" return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str: def random_server_header() -> str:
"""Generate random server header from wordlists""" """Generate random server header from wordlists"""
config = get_config() config = get_config()
@@ -44,10 +46,11 @@ def random_server_header() -> str:
wl = get_wordlists() wl = get_wordlists()
return random.choice(wl.server_headers) return random.choice(wl.server_headers)
def random_api_key() -> str: def random_api_key() -> str:
"""Generate random API key""" """Generate random API key"""
wl = get_wordlists() wl = get_wordlists()
key = ''.join(random.choices(string.ascii_letters + string.digits, k=32)) key = "".join(random.choices(string.ascii_letters + string.digits, k=32))
return random.choice(wl.api_key_prefixes) + key return random.choice(wl.api_key_prefixes) + key
@@ -87,14 +90,16 @@ def users_json() -> str:
users = [] users = []
for i in range(random.randint(3, 8)): for i in range(random.randint(3, 8)):
username = random_username() username = random_username()
users.append({ users.append(
"id": i + 1, {
"username": username, "id": i + 1,
"email": random_email(username), "username": username,
"password": random_password(), "email": random_email(username),
"role": random.choice(wl.user_roles), "password": random_password(),
"api_token": random_api_key() "role": random.choice(wl.user_roles),
}) "api_token": random_api_key(),
}
)
return json.dumps({"users": users}, indent=2) return json.dumps({"users": users}, indent=2)
@@ -102,20 +107,28 @@ def api_keys_json() -> str:
"""Generate fake api_keys.json with random data""" """Generate fake api_keys.json with random data"""
keys = { keys = {
"stripe": { "stripe": {
"public_key": "pk_live_" + ''.join(random.choices(string.ascii_letters + string.digits, k=24)), "public_key": "pk_live_"
"secret_key": random_api_key() + "".join(random.choices(string.ascii_letters + string.digits, k=24)),
"secret_key": random_api_key(),
}, },
"aws": { "aws": {
"access_key_id": "AKIA" + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)), "access_key_id": "AKIA"
"secret_access_key": ''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40)) + "".join(random.choices(string.ascii_uppercase + string.digits, k=16)),
"secret_access_key": "".join(
random.choices(string.ascii_letters + string.digits + "+/", k=40)
),
}, },
"sendgrid": { "sendgrid": {
"api_key": "SG." + ''.join(random.choices(string.ascii_letters + string.digits, k=48)) "api_key": "SG."
+ "".join(random.choices(string.ascii_letters + string.digits, k=48))
}, },
"twilio": { "twilio": {
"account_sid": "AC" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=32)), "account_sid": "AC"
"auth_token": ''.join(random.choices(string.ascii_lowercase + string.digits, k=32)) + "".join(random.choices(string.ascii_lowercase + string.digits, k=32)),
} "auth_token": "".join(
random.choices(string.ascii_lowercase + string.digits, k=32)
),
},
} }
return json.dumps(keys, indent=2) return json.dumps(keys, indent=2)
@@ -128,46 +141,65 @@ def api_response(path: str) -> str:
users = [] users = []
for i in range(count): for i in range(count):
username = random_username() username = random_username()
users.append({ users.append(
"id": i + 1, {
"username": username, "id": i + 1,
"email": random_email(username), "username": username,
"role": random.choice(wl.user_roles) "email": random_email(username),
}) "role": random.choice(wl.user_roles),
}
)
return users return users
responses = { responses = {
'/api/users': json.dumps({ "/api/users": json.dumps(
"users": random_users(random.randint(2, 5)), {
"total": random.randint(50, 500) "users": random_users(random.randint(2, 5)),
}, indent=2), "total": random.randint(50, 500),
'/api/v1/users': json.dumps({
"status": "success",
"data": [{
"id": random.randint(1, 100),
"name": random_username(),
"api_key": random_api_key()
}]
}, indent=2),
'/api/v2/secrets': json.dumps({
"database": {
"host": random.choice(wl.database_hosts),
"username": random_username(),
"password": random_password(),
"database": random_database_name()
}, },
"api_keys": { indent=2,
"stripe": random_api_key(), ),
"aws": 'AKIA' + ''.join(random.choices(string.ascii_uppercase + string.digits, k=16)) "/api/v1/users": json.dumps(
} {
}, indent=2), "status": "success",
'/api/config': json.dumps({ "data": [
"app_name": random.choice(wl.application_names), {
"debug": random.choice([True, False]), "id": random.randint(1, 100),
"secret_key": random_api_key(), "name": random_username(),
"database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}" "api_key": random_api_key(),
}, indent=2), }
'/.env': f"""APP_NAME={random.choice(wl.application_names)} ],
},
indent=2,
),
"/api/v2/secrets": json.dumps(
{
"database": {
"host": random.choice(wl.database_hosts),
"username": random_username(),
"password": random_password(),
"database": random_database_name(),
},
"api_keys": {
"stripe": random_api_key(),
"aws": "AKIA"
+ "".join(
random.choices(string.ascii_uppercase + string.digits, k=16)
),
},
},
indent=2,
),
"/api/config": json.dumps(
{
"app_name": random.choice(wl.application_names),
"debug": random.choice([True, False]),
"secret_key": random_api_key(),
"database_url": f"postgresql://{random_username()}:{random_password()}@localhost/{random_database_name()}",
},
indent=2,
),
"/.env": f"""APP_NAME={random.choice(wl.application_names)}
DEBUG={random.choice(['true', 'false'])} DEBUG={random.choice(['true', 'false'])}
APP_KEY=base64:{''.join(random.choices(string.ascii_letters + string.digits, k=32))}= APP_KEY=base64:{''.join(random.choices(string.ascii_letters + string.digits, k=32))}=
DB_CONNECTION=mysql DB_CONNECTION=mysql
@@ -179,7 +211,7 @@ DB_PASSWORD={random_password()}
AWS_ACCESS_KEY_ID=AKIA{''.join(random.choices(string.ascii_uppercase + string.digits, k=16))} AWS_ACCESS_KEY_ID=AKIA{''.join(random.choices(string.ascii_uppercase + string.digits, k=16))}
AWS_SECRET_ACCESS_KEY={''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))} AWS_SECRET_ACCESS_KEY={''.join(random.choices(string.ascii_letters + string.digits + '+/', k=40))}
STRIPE_SECRET={random_api_key()} STRIPE_SECRET={random_api_key()}
""" """,
} }
return responses.get(path, json.dumps({"error": "Not found"}, indent=2)) return responses.get(path, json.dumps({"error": "Not found"}, indent=2))
@@ -191,7 +223,9 @@ def directory_listing(path: str) -> str:
files = wl.directory_files files = wl.directory_files
dirs = wl.directory_dirs dirs = wl.directory_dirs
selected_files = [(f, random.randint(1024, 1024*1024)) selected_files = [
for f in random.sample(files, min(6, len(files)))] (f, random.randint(1024, 1024 * 1024))
for f in random.sample(files, min(6, len(files)))
]
return html_templates.directory_listing(path, dirs, selected_files) return html_templates.directory_listing(path, dirs, selected_files)

View File

@@ -14,8 +14,13 @@ from analyzer import Analyzer
from templates import html_templates from templates import html_templates
from templates.dashboard_template import generate_dashboard from templates.dashboard_template import generate_dashboard
from generators import ( from generators import (
credentials_txt, passwords_txt, users_json, api_keys_json, credentials_txt,
api_response, directory_listing, random_server_header passwords_txt,
users_json,
api_keys_json,
api_response,
directory_listing,
random_server_header,
) )
from wordlists import get_wordlists from wordlists import get_wordlists
from sql_errors import generate_sql_error_response, get_sql_response_with_data from sql_errors import generate_sql_error_response, get_sql_response_with_data
@@ -25,6 +30,7 @@ from server_errors import generate_server_error
class Handler(BaseHTTPRequestHandler): class Handler(BaseHTTPRequestHandler):
"""HTTP request handler for the deception server""" """HTTP request handler for the deception server"""
webpages: Optional[List[str]] = None webpages: Optional[List[str]] = None
config: Config = None config: Config = None
tracker: AccessTracker = None tracker: AccessTracker = None
@@ -37,15 +43,15 @@ class Handler(BaseHTTPRequestHandler):
def _get_client_ip(self) -> str: def _get_client_ip(self) -> str:
"""Extract client IP address from request, checking proxy headers first""" """Extract client IP address from request, checking proxy headers first"""
# Headers might not be available during early error logging # Headers might not be available during early error logging
if hasattr(self, 'headers') and self.headers: if hasattr(self, "headers") and self.headers:
# Check X-Forwarded-For header (set by load balancers/proxies) # Check X-Forwarded-For header (set by load balancers/proxies)
forwarded_for = self.headers.get('X-Forwarded-For') forwarded_for = self.headers.get("X-Forwarded-For")
if forwarded_for: if forwarded_for:
# X-Forwarded-For can contain multiple IPs, get the first (original client) # X-Forwarded-For can contain multiple IPs, get the first (original client)
return forwarded_for.split(',')[0].strip() return forwarded_for.split(",")[0].strip()
# Check X-Real-IP header (set by nginx and other proxies) # Check X-Real-IP header (set by nginx and other proxies)
real_ip = self.headers.get('X-Real-IP') real_ip = self.headers.get("X-Real-IP")
if real_ip: if real_ip:
return real_ip.strip() return real_ip.strip()
@@ -54,7 +60,7 @@ class Handler(BaseHTTPRequestHandler):
def _get_user_agent(self) -> str: def _get_user_agent(self) -> str:
"""Extract user agent from request""" """Extract user agent from request"""
return self.headers.get('User-Agent', '') return self.headers.get("User-Agent", "")
def _get_category_by_ip(self, client_ip: str) -> str: def _get_category_by_ip(self, client_ip: str) -> str:
"""Get the category of an IP from the database""" """Get the category of an IP from the database"""
@@ -97,7 +103,7 @@ class Handler(BaseHTTPRequestHandler):
Returns True if the path was handled, False otherwise. Returns True if the path was handled, False otherwise.
""" """
# SQL-vulnerable endpoints # SQL-vulnerable endpoints
sql_endpoints = ['/api/search', '/api/sql', '/api/database'] sql_endpoints = ["/api/search", "/api/sql", "/api/database"]
base_path = urlparse(path).path base_path = urlparse(path).path
if base_path not in sql_endpoints: if base_path not in sql_endpoints:
@@ -112,22 +118,30 @@ class Handler(BaseHTTPRequestHandler):
user_agent = self._get_user_agent() user_agent = self._get_user_agent()
# Always check for SQL injection patterns # Always check for SQL injection patterns
error_msg, content_type, status_code = generate_sql_error_response(query_string or "") error_msg, content_type, status_code = generate_sql_error_response(
query_string or ""
)
if error_msg: if error_msg:
# SQL injection detected - log and return error # SQL injection detected - log and return error
self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") self.access_logger.warning(
f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
)
self.send_response(status_code) self.send_response(status_code)
self.send_header('Content-type', content_type) self.send_header("Content-type", content_type)
self.end_headers() self.end_headers()
self.wfile.write(error_msg.encode()) self.wfile.write(error_msg.encode())
else: else:
# No injection detected - return fake data # No injection detected - return fake data
self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") self.access_logger.info(
f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}"
)
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header("Content-type", "application/json")
self.end_headers() self.end_headers()
response_data = get_sql_response_with_data(base_path, query_string or "") response_data = get_sql_response_with_data(
base_path, query_string or ""
)
self.wfile.write(response_data.encode()) self.wfile.write(response_data.encode())
return True return True
@@ -140,7 +154,7 @@ class Handler(BaseHTTPRequestHandler):
# Still send a response even on error # Still send a response even on error
try: try:
self.send_response(500) self.send_response(500)
self.send_header('Content-type', 'application/json') self.send_header("Content-type", "application/json")
self.end_headers() self.end_headers()
self.wfile.write(b'{"error": "Internal server error"}') self.wfile.write(b'{"error": "Internal server error"}')
except: except:
@@ -159,18 +173,22 @@ class Handler(BaseHTTPRequestHandler):
# Determine if we should apply crawler page limit based on config and IP category # Determine if we should apply crawler page limit based on config and IP category
should_apply_crawler_limit = False should_apply_crawler_limit = False
if self.config.infinite_pages_for_malicious: if self.config.infinite_pages_for_malicious:
if (ip_category == "good_crawler" or ip_category == "regular_user") and page_visit_count >= self.config.max_pages_limit: if (
ip_category == "good_crawler" or ip_category == "regular_user"
) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True should_apply_crawler_limit = True
else: else:
if (ip_category == "good_crawler" or ip_category == "bad_crawler" or ip_category == "attacker") and page_visit_count >= self.config.max_pages_limit: if (
ip_category == "good_crawler"
or ip_category == "bad_crawler"
or ip_category == "attacker"
) and page_visit_count >= self.config.max_pages_limit:
should_apply_crawler_limit = True should_apply_crawler_limit = True
# If good crawler reached max pages, return a simple page with no links # If good crawler reached max pages, return a simple page with no links
if should_apply_crawler_limit: if should_apply_crawler_limit:
return html_templates.main_page( return html_templates.main_page(
Handler.counter, Handler.counter, "<p>Crawl limit reached.</p>"
'<p>Crawl limit reached.</p>'
) )
num_pages = random.randint(*self.config.links_per_page_range) num_pages = random.randint(*self.config.links_per_page_range)
@@ -189,10 +207,12 @@ class Handler(BaseHTTPRequestHandler):
# Add links # Add links
if self.webpages is None: if self.webpages is None:
for _ in range(num_pages): for _ in range(num_pages):
address = ''.join([ address = "".join(
random.choice(self.config.char_space) [
for _ in range(random.randint(*self.config.links_length_range)) random.choice(self.config.char_space)
]) for _ in range(random.randint(*self.config.links_length_range))
]
)
content += f""" content += f"""
<div class="link-box"> <div class="link-box">
<a href="{address}">{address}</a> <a href="{address}">{address}</a>
@@ -223,27 +243,36 @@ class Handler(BaseHTTPRequestHandler):
post_data = "" post_data = ""
from urllib.parse import urlparse from urllib.parse import urlparse
base_path = urlparse(self.path).path base_path = urlparse(self.path).path
if base_path in ['/api/search', '/api/sql', '/api/database']: if base_path in ["/api/search", "/api/sql", "/api/database"]:
content_length = int(self.headers.get('Content-Length', 0)) content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0: if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}") self.access_logger.info(
f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}"
)
error_msg, content_type, status_code = generate_sql_error_response(post_data) error_msg, content_type, status_code = generate_sql_error_response(
post_data
)
try: try:
if error_msg: if error_msg:
self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}") self.access_logger.warning(
f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}"
)
self.send_response(status_code) self.send_response(status_code)
self.send_header('Content-type', content_type) self.send_header("Content-type", content_type)
self.end_headers() self.end_headers()
self.wfile.write(error_msg.encode()) self.wfile.write(error_msg.encode())
else: else:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header("Content-type", "application/json")
self.end_headers() self.end_headers()
response_data = get_sql_response_with_data(base_path, post_data) response_data = get_sql_response_with_data(base_path, post_data)
self.wfile.write(response_data.encode()) self.wfile.write(response_data.encode())
@@ -253,28 +282,35 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in SQL POST handler: {str(e)}") self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
return return
if base_path == '/api/contact': if base_path == "/api/contact":
content_length = int(self.headers.get('Content-Length', 0)) content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0: if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
parsed_data = {} parsed_data = {}
for pair in post_data.split('&'): for pair in post_data.split("&"):
if '=' in pair: if "=" in pair:
key, value = pair.split('=', 1) key, value = pair.split("=", 1)
from urllib.parse import unquote_plus from urllib.parse import unquote_plus
parsed_data[unquote_plus(key)] = unquote_plus(value) parsed_data[unquote_plus(key)] = unquote_plus(value)
xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values()) xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
if xss_detected: if xss_detected:
self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}") self.access_logger.warning(
f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}"
)
else: else:
self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}") self.access_logger.info(
f"[XSS ENDPOINT POST] {client_ip} - {base_path}"
)
try: try:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
response_html = generate_xss_response(parsed_data) response_html = generate_xss_response(parsed_data)
self.wfile.write(response_html.encode()) self.wfile.write(response_html.encode())
@@ -284,11 +320,15 @@ class Handler(BaseHTTPRequestHandler):
self.app_logger.error(f"Error in XSS POST handler: {str(e)}") self.app_logger.error(f"Error in XSS POST handler: {str(e)}")
return return
self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}") self.access_logger.warning(
f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}"
)
content_length = int(self.headers.get('Content-Length', 0)) content_length = int(self.headers.get("Content-Length", 0))
if content_length > 0: if content_length > 0:
post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") post_data = self.rfile.read(content_length).decode(
"utf-8", errors="replace"
)
self.access_logger.warning(f"[POST DATA] {post_data[:200]}") self.access_logger.warning(f"[POST DATA] {post_data[:200]}")
@@ -301,18 +341,24 @@ class Handler(BaseHTTPRequestHandler):
self.credential_logger.info(credential_line) self.credential_logger.info(credential_line)
# Also record in tracker for dashboard # Also record in tracker for dashboard
self.tracker.record_credential_attempt(client_ip, self.path, username or 'N/A', password or 'N/A') self.tracker.record_credential_attempt(
client_ip, self.path, username or "N/A", password or "N/A"
)
self.access_logger.warning(f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}") self.access_logger.warning(
f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}"
)
# send the post data (body) to the record_access function so the post data can be used to detect suspicious things. # send the post data (body) to the record_access function so the post data can be used to detect suspicious things.
self.tracker.record_access(client_ip, self.path, user_agent, post_data, method='POST') self.tracker.record_access(
client_ip, self.path, user_agent, post_data, method="POST"
)
time.sleep(1) time.sleep(1)
try: try:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.login_error().encode()) self.wfile.write(html_templates.login_error().encode())
except BrokenPipeError: except BrokenPipeError:
@@ -330,95 +376,102 @@ class Handler(BaseHTTPRequestHandler):
return True return True
try: try:
if path == '/robots.txt': if path == "/robots.txt":
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/plain') self.send_header("Content-type", "text/plain")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.robots_txt().encode()) self.wfile.write(html_templates.robots_txt().encode())
return True return True
if path in ['/credentials.txt', '/passwords.txt', '/admin_notes.txt']: if path in ["/credentials.txt", "/passwords.txt", "/admin_notes.txt"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/plain') self.send_header("Content-type", "text/plain")
self.end_headers() self.end_headers()
if 'credentials' in path: if "credentials" in path:
self.wfile.write(credentials_txt().encode()) self.wfile.write(credentials_txt().encode())
else: else:
self.wfile.write(passwords_txt().encode()) self.wfile.write(passwords_txt().encode())
return True return True
if path in ['/users.json', '/api_keys.json', '/config.json']: if path in ["/users.json", "/api_keys.json", "/config.json"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header("Content-type", "application/json")
self.end_headers() self.end_headers()
if 'users' in path: if "users" in path:
self.wfile.write(users_json().encode()) self.wfile.write(users_json().encode())
elif 'api_keys' in path: elif "api_keys" in path:
self.wfile.write(api_keys_json().encode()) self.wfile.write(api_keys_json().encode())
else: else:
self.wfile.write(api_response('/api/config').encode()) self.wfile.write(api_response("/api/config").encode())
return True return True
if path in ['/admin', '/admin/', '/admin/login', '/login']: if path in ["/admin", "/admin/", "/admin/login", "/login"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.login_form().encode()) self.wfile.write(html_templates.login_form().encode())
return True return True
if path in ['/users', '/user', '/database', '/db', '/search']: if path in ["/users", "/user", "/database", "/db", "/search"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.product_search().encode()) self.wfile.write(html_templates.product_search().encode())
return True return True
if path in ['/info', '/input', '/contact', '/feedback', '/comment']: if path in ["/info", "/input", "/contact", "/feedback", "/comment"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.input_form().encode()) self.wfile.write(html_templates.input_form().encode())
return True return True
if path == '/server': if path == "/server":
error_html, content_type = generate_server_error() error_html, content_type = generate_server_error()
self.send_response(500) self.send_response(500)
self.send_header('Content-type', content_type) self.send_header("Content-type", content_type)
self.end_headers() self.end_headers()
self.wfile.write(error_html.encode()) self.wfile.write(error_html.encode())
return True return True
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']: if path in ["/wp-login.php", "/wp-login", "/wp-admin", "/wp-admin/"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.wp_login().encode()) self.wfile.write(html_templates.wp_login().encode())
return True return True
if path in ['/wp-content/', '/wp-includes/'] or 'wordpress' in path.lower(): if path in ["/wp-content/", "/wp-includes/"] or "wordpress" in path.lower():
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.wordpress().encode()) self.wfile.write(html_templates.wordpress().encode())
return True return True
if 'phpmyadmin' in path.lower() or path in ['/pma/', '/phpMyAdmin/']: if "phpmyadmin" in path.lower() or path in ["/pma/", "/phpMyAdmin/"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(html_templates.phpmyadmin().encode()) self.wfile.write(html_templates.phpmyadmin().encode())
return True return True
if path.startswith('/api/') or path.startswith('/api') or path in ['/.env']: if path.startswith("/api/") or path.startswith("/api") or path in ["/.env"]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header("Content-type", "application/json")
self.end_headers() self.end_headers()
self.wfile.write(api_response(path).encode()) self.wfile.write(api_response(path).encode())
return True return True
if path in ['/backup/', '/uploads/', '/private/', '/admin/', '/config/', '/database/']: if path in [
"/backup/",
"/uploads/",
"/private/",
"/admin/",
"/config/",
"/database/",
]:
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
self.wfile.write(directory_listing(path).encode()) self.wfile.write(directory_listing(path).encode())
return True return True
@@ -440,9 +493,12 @@ class Handler(BaseHTTPRequestHandler):
return return
user_agent = self._get_user_agent() user_agent = self._get_user_agent()
if self.config.dashboard_secret_path and self.path == self.config.dashboard_secret_path: if (
self.config.dashboard_secret_path
and self.path == self.config.dashboard_secret_path
):
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
try: try:
stats = self.tracker.get_stats() stats = self.tracker.get_stats()
@@ -455,72 +511,93 @@ class Handler(BaseHTTPRequestHandler):
return return
# API endpoint for fetching IP stats # API endpoint for fetching IP stats
if self.config.dashboard_secret_path and self.path.startswith(f"{self.config.dashboard_secret_path}/api/ip-stats/"): if self.config.dashboard_secret_path and self.path.startswith(
ip_address = self.path.replace(f"{self.config.dashboard_secret_path}/api/ip-stats/", "") f"{self.config.dashboard_secret_path}/api/ip-stats/"
):
ip_address = self.path.replace(
f"{self.config.dashboard_secret_path}/api/ip-stats/", ""
)
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'application/json') self.send_header("Content-type", "application/json")
self.send_header('Access-Control-Allow-Origin', '*') self.send_header("Access-Control-Allow-Origin", "*")
# Prevent browser caching - force fresh data from database every time # Prevent browser caching - force fresh data from database every time
self.send_header('Cache-Control', 'no-store, no-cache, must-revalidate, max-age=0') self.send_header(
self.send_header('Pragma', 'no-cache') "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
self.send_header('Expires', '0') )
self.send_header("Pragma", "no-cache")
self.send_header("Expires", "0")
self.end_headers() self.end_headers()
try: try:
from database import get_database from database import get_database
import json import json
db = get_database() db = get_database()
ip_stats = db.get_ip_stats_by_ip(ip_address) ip_stats = db.get_ip_stats_by_ip(ip_address)
if ip_stats: if ip_stats:
self.wfile.write(json.dumps(ip_stats).encode()) self.wfile.write(json.dumps(ip_stats).encode())
else: else:
self.wfile.write(json.dumps({'error': 'IP not found'}).encode()) self.wfile.write(json.dumps({"error": "IP not found"}).encode())
except BrokenPipeError: except BrokenPipeError:
pass pass
except Exception as e: except Exception as e:
self.app_logger.error(f"Error fetching IP stats: {e}") self.app_logger.error(f"Error fetching IP stats: {e}")
self.wfile.write(json.dumps({'error': str(e)}).encode()) self.wfile.write(json.dumps({"error": str(e)}).encode())
return return
# API endpoint for downloading malicious IPs file # API endpoint for downloading malicious IPs file
if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt": if (
self.config.dashboard_secret_path
and self.path
== f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt"
):
import os import os
file_path = os.path.join(os.path.dirname(__file__), 'exports', 'malicious_ips.txt')
file_path = os.path.join(
os.path.dirname(__file__), "exports", "malicious_ips.txt"
)
try: try:
if os.path.exists(file_path): if os.path.exists(file_path):
with open(file_path, 'rb') as f: with open(file_path, "rb") as f:
content = f.read() content = f.read()
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/plain') self.send_header("Content-type", "text/plain")
self.send_header('Content-Disposition', 'attachment; filename="malicious_ips.txt"') self.send_header(
self.send_header('Content-Length', str(len(content))) "Content-Disposition",
'attachment; filename="malicious_ips.txt"',
)
self.send_header("Content-Length", str(len(content)))
self.end_headers() self.end_headers()
self.wfile.write(content) self.wfile.write(content)
else: else:
self.send_response(404) self.send_response(404)
self.send_header('Content-type', 'text/plain') self.send_header("Content-type", "text/plain")
self.end_headers() self.end_headers()
self.wfile.write(b'File not found') self.wfile.write(b"File not found")
except BrokenPipeError: except BrokenPipeError:
pass pass
except Exception as e: except Exception as e:
self.app_logger.error(f"Error serving malicious IPs file: {e}") self.app_logger.error(f"Error serving malicious IPs file: {e}")
self.send_response(500) self.send_response(500)
self.send_header('Content-type', 'text/plain') self.send_header("Content-type", "text/plain")
self.end_headers() self.end_headers()
self.wfile.write(b'Internal server error') self.wfile.write(b"Internal server error")
return return
self.tracker.record_access(client_ip, self.path, user_agent, method='GET') self.tracker.record_access(client_ip, self.path, user_agent, method="GET")
# self.analyzer.infer_user_category(client_ip) # self.analyzer.infer_user_category(client_ip)
# self.analyzer.update_ip_rep_infos(client_ip) # self.analyzer.update_ip_rep_infos(client_ip)
if self.tracker.is_suspicious_user_agent(user_agent): if self.tracker.is_suspicious_user_agent(user_agent):
self.access_logger.warning(f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}") self.access_logger.warning(
f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}"
)
if self._should_return_error(): if self._should_return_error():
error_code = self._get_random_error_code() error_code = self._get_random_error_code()
self.access_logger.info(f"Returning error {error_code} to {client_ip} - {self.path}") self.access_logger.info(
f"Returning error {error_code} to {client_ip} - {self.path}"
)
self.send_response(error_code) self.send_response(error_code)
self.end_headers() self.end_headers()
return return
@@ -530,13 +607,15 @@ class Handler(BaseHTTPRequestHandler):
time.sleep(self.config.delay / 1000.0) time.sleep(self.config.delay / 1000.0)
self.send_response(200) self.send_response(200)
self.send_header('Content-type', 'text/html') self.send_header("Content-type", "text/html")
self.end_headers() self.end_headers()
try: try:
# Increment page visit counter for this IP and get the current count # Increment page visit counter for this IP and get the current count
current_visit_count = self._increment_page_visit(client_ip) current_visit_count = self._increment_page_visit(client_ip)
self.wfile.write(self.generate_page(self.path, current_visit_count).encode()) self.wfile.write(
self.generate_page(self.path, current_visit_count).encode()
)
Handler.counter -= 1 Handler.counter -= 1

View File

@@ -13,6 +13,7 @@ from datetime import datetime
class TimezoneFormatter(logging.Formatter): class TimezoneFormatter(logging.Formatter):
"""Custom formatter that respects configured timezone""" """Custom formatter that respects configured timezone"""
def __init__(self, fmt=None, datefmt=None): def __init__(self, fmt=None, datefmt=None):
super().__init__(fmt, datefmt) super().__init__(fmt, datefmt)
@@ -26,6 +27,7 @@ class TimezoneFormatter(logging.Formatter):
class LoggerManager: class LoggerManager:
"""Singleton logger manager for the application.""" """Singleton logger manager for the application."""
_instance = None _instance = None
def __new__(cls): def __new__(cls):
@@ -65,7 +67,7 @@ class LoggerManager:
app_file_handler = RotatingFileHandler( app_file_handler = RotatingFileHandler(
os.path.join(log_dir, "krawl.log"), os.path.join(log_dir, "krawl.log"),
maxBytes=max_bytes, maxBytes=max_bytes,
backupCount=backup_count backupCount=backup_count,
) )
app_file_handler.setFormatter(log_format) app_file_handler.setFormatter(log_format)
self._app_logger.addHandler(app_file_handler) self._app_logger.addHandler(app_file_handler)
@@ -82,7 +84,7 @@ class LoggerManager:
access_file_handler = RotatingFileHandler( access_file_handler = RotatingFileHandler(
os.path.join(log_dir, "access.log"), os.path.join(log_dir, "access.log"),
maxBytes=max_bytes, maxBytes=max_bytes,
backupCount=backup_count backupCount=backup_count,
) )
access_file_handler.setFormatter(log_format) access_file_handler.setFormatter(log_format)
self._access_logger.addHandler(access_file_handler) self._access_logger.addHandler(access_file_handler)
@@ -102,7 +104,7 @@ class LoggerManager:
credential_file_handler = RotatingFileHandler( credential_file_handler = RotatingFileHandler(
os.path.join(log_dir, "credentials.log"), os.path.join(log_dir, "credentials.log"),
maxBytes=max_bytes, maxBytes=max_bytes,
backupCount=backup_count backupCount=backup_count,
) )
credential_file_handler.setFormatter(credential_format) credential_file_handler.setFormatter(credential_format)
self._credential_logger.addHandler(credential_file_handler) self._credential_logger.addHandler(credential_file_handler)

View File

@@ -25,6 +25,7 @@ from sanitizer import (
class Base(DeclarativeBase): class Base(DeclarativeBase):
"""Base class for all ORM models.""" """Base class for all ORM models."""
pass pass
@@ -35,30 +36,35 @@ class AccessLog(Base):
Stores request metadata, suspicious activity flags, and timestamps Stores request metadata, suspicious activity flags, and timestamps
for analysis and dashboard display. for analysis and dashboard display.
""" """
__tablename__ = 'access_logs'
__tablename__ = "access_logs"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
#ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE')) # ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True, ForeignKey('ip_logs.id', ondelete='CASCADE'))
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True) ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False) path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
user_agent: Mapped[Optional[str]] = mapped_column(String(MAX_USER_AGENT_LENGTH), nullable=True) user_agent: Mapped[Optional[str]] = mapped_column(
method: Mapped[str] = mapped_column(String(10), nullable=False, default='GET') String(MAX_USER_AGENT_LENGTH), nullable=True
)
method: Mapped[str] = mapped_column(String(10), nullable=False, default="GET")
is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) is_suspicious: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
is_honeypot_trigger: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False) is_honeypot_trigger: Mapped[bool] = mapped_column(
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True) Boolean, nullable=False, default=False
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Relationship to attack detections # Relationship to attack detections
attack_detections: Mapped[List["AttackDetection"]] = relationship( attack_detections: Mapped[List["AttackDetection"]] = relationship(
"AttackDetection", "AttackDetection", back_populates="access_log", cascade="all, delete-orphan"
back_populates="access_log",
cascade="all, delete-orphan"
) )
# Indexes for common queries # Indexes for common queries
__table_args__ = ( __table_args__ = (
Index('ix_access_logs_ip_timestamp', 'ip', 'timestamp'), Index("ix_access_logs_ip_timestamp", "ip", "timestamp"),
Index('ix_access_logs_is_suspicious', 'is_suspicious'), Index("ix_access_logs_is_suspicious", "is_suspicious"),
Index('ix_access_logs_is_honeypot_trigger', 'is_honeypot_trigger'), Index("ix_access_logs_is_honeypot_trigger", "is_honeypot_trigger"),
) )
def __repr__(self) -> str: def __repr__(self) -> str:
@@ -71,19 +77,24 @@ class CredentialAttempt(Base):
Stores the submitted username and password along with request metadata. Stores the submitted username and password along with request metadata.
""" """
__tablename__ = 'credential_attempts'
__tablename__ = "credential_attempts"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True) ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False) path: Mapped[str] = mapped_column(String(MAX_PATH_LENGTH), nullable=False)
username: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True) username: Mapped[Optional[str]] = mapped_column(
password: Mapped[Optional[str]] = mapped_column(String(MAX_CREDENTIAL_LENGTH), nullable=True) String(MAX_CREDENTIAL_LENGTH), nullable=True
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True) )
password: Mapped[Optional[str]] = mapped_column(
String(MAX_CREDENTIAL_LENGTH), nullable=True
)
timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Composite index for common queries # Composite index for common queries
__table_args__ = ( __table_args__ = (Index("ix_credential_attempts_ip_timestamp", "ip", "timestamp"),)
Index('ix_credential_attempts_ip_timestamp', 'ip', 'timestamp'),
)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<CredentialAttempt(id={self.id}, ip='{self.ip}', username='{self.username}')>" return f"<CredentialAttempt(id={self.id}, ip='{self.ip}', username='{self.username}')>"
@@ -96,20 +107,25 @@ class AttackDetection(Base):
Linked to the parent AccessLog record. Multiple attack types can be Linked to the parent AccessLog record. Multiple attack types can be
detected in a single request. detected in a single request.
""" """
__tablename__ = 'attack_detections'
__tablename__ = "attack_detections"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
access_log_id: Mapped[int] = mapped_column( access_log_id: Mapped[int] = mapped_column(
Integer, Integer,
ForeignKey('access_logs.id', ondelete='CASCADE'), ForeignKey("access_logs.id", ondelete="CASCADE"),
nullable=False, nullable=False,
index=True index=True,
) )
attack_type: Mapped[str] = mapped_column(String(50), nullable=False) attack_type: Mapped[str] = mapped_column(String(50), nullable=False)
matched_pattern: Mapped[Optional[str]] = mapped_column(String(MAX_ATTACK_PATTERN_LENGTH), nullable=True) matched_pattern: Mapped[Optional[str]] = mapped_column(
String(MAX_ATTACK_PATTERN_LENGTH), nullable=True
)
# Relationship back to access log # Relationship back to access log
access_log: Mapped["AccessLog"] = relationship("AccessLog", back_populates="attack_detections") access_log: Mapped["AccessLog"] = relationship(
"AccessLog", back_populates="attack_detections"
)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>" return f"<AttackDetection(id={self.id}, type='{self.attack_type}')>"
@@ -122,33 +138,43 @@ class IpStats(Base):
Includes fields for future GeoIP and reputation enrichment. Includes fields for future GeoIP and reputation enrichment.
Updated on each request from an IP. Updated on each request from an IP.
""" """
__tablename__ = 'ip_stats'
__tablename__ = "ip_stats"
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True) ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), primary_key=True)
total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0) total_requests: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
first_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow) first_seen: Mapped[datetime] = mapped_column(
last_seen: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow) DateTime, nullable=False, default=datetime.utcnow
)
last_seen: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow
)
# GeoIP fields (populated by future enrichment) # GeoIP fields (populated by future enrichment)
country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True) country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True)
city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True) city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True)
asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
asn_org: Mapped[Optional[str]] = mapped_column(String(MAX_ASN_ORG_LENGTH), nullable=True) asn_org: Mapped[Optional[str]] = mapped_column(
list_on: Mapped[Optional[Dict[str,str]]] = mapped_column(JSON, nullable=True) String(MAX_ASN_ORG_LENGTH), nullable=True
)
list_on: Mapped[Optional[Dict[str, str]]] = mapped_column(JSON, nullable=True)
# Reputation fields (populated by future enrichment) # Reputation fields (populated by future enrichment)
reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) reputation_score: Mapped[Optional[int]] = mapped_column(Integer, nullable=True)
reputation_source: Mapped[Optional[str]] = mapped_column(String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True) reputation_source: Mapped[Optional[str]] = mapped_column(
reputation_updated: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) String(MAX_REPUTATION_SOURCE_LENGTH), nullable=True
)
reputation_updated: Mapped[Optional[datetime]] = mapped_column(
DateTime, nullable=True
)
#Analyzed metrics, category and category scores # Analyzed metrics, category and category scores
analyzed_metrics: Mapped[Dict[str,object]] = mapped_column(JSON, nullable=True) analyzed_metrics: Mapped[Dict[str, object]] = mapped_column(JSON, nullable=True)
category: Mapped[str] = mapped_column(String, nullable=True) category: Mapped[str] = mapped_column(String, nullable=True)
category_scores: Mapped[Dict[str,int]] = mapped_column(JSON, nullable=True) category_scores: Mapped[Dict[str, int]] = mapped_column(JSON, nullable=True)
manual_category: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True) manual_category: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True)
last_analysis: Mapped[datetime] = mapped_column(DateTime, nullable=True) last_analysis: Mapped[datetime] = mapped_column(DateTime, nullable=True)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>" return f"<IpStats(ip='{self.ip}', total_requests={self.total_requests})>"
@@ -160,18 +186,19 @@ class CategoryHistory(Base):
Tracks when an IP's category changes, storing both the previous Tracks when an IP's category changes, storing both the previous
and new category along with timestamp for timeline visualization. and new category along with timestamp for timeline visualization.
""" """
__tablename__ = 'category_history'
__tablename__ = "category_history"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True) ip: Mapped[str] = mapped_column(String(MAX_IP_LENGTH), nullable=False, index=True)
old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) old_category: Mapped[Optional[str]] = mapped_column(String(50), nullable=True)
new_category: Mapped[str] = mapped_column(String(50), nullable=False) new_category: Mapped[str] = mapped_column(String(50), nullable=False)
timestamp: Mapped[datetime] = mapped_column(DateTime, nullable=False, default=datetime.utcnow, index=True) timestamp: Mapped[datetime] = mapped_column(
DateTime, nullable=False, default=datetime.utcnow, index=True
)
# Composite index for efficient IP-based timeline queries # Composite index for efficient IP-based timeline queries
__table_args__ = ( __table_args__ = (Index("ix_category_history_ip_timestamp", "ip", "timestamp"),)
Index('ix_category_history_ip_timestamp', 'ip', 'timestamp'),
)
def __repr__(self) -> str: def __repr__(self) -> str:
return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>" return f"<CategoryHistory(ip='{self.ip}', {self.old_category} -> {self.new_category})>"

View File

@@ -9,7 +9,6 @@ import html
import re import re
from typing import Optional, Dict from typing import Optional, Dict
# Field length limits for database storage # Field length limits for database storage
MAX_IP_LENGTH = 45 # IPv6 max length MAX_IP_LENGTH = 45 # IPv6 max length
MAX_PATH_LENGTH = 2048 # URL max practical length MAX_PATH_LENGTH = 2048 # URL max practical length
@@ -43,7 +42,7 @@ def sanitize_for_storage(value: Optional[str], max_length: int) -> str:
# Remove null bytes and control characters (except newline \n, tab \t, carriage return \r) # Remove null bytes and control characters (except newline \n, tab \t, carriage return \r)
# Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return) # Control chars are 0x00-0x1F and 0x7F, we keep 0x09 (tab), 0x0A (newline), 0x0D (carriage return)
cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', value) cleaned = re.sub(r"[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]", "", value)
# Truncate to max length # Truncate to max length
return cleaned[:max_length] return cleaned[:max_length]
@@ -112,5 +111,6 @@ def escape_html_truncated(value: Optional[str], max_display_length: int) -> str:
return html.escape(value_str) return html.escape(value_str)
def sanitize_dict(value: Optional[Dict[str,str]], max_display_length):
def sanitize_dict(value: Optional[Dict[str, str]], max_display_length):
return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()} return {k: sanitize_for_storage(v, max_display_length) for k, v in value.items()}

View File

@@ -12,43 +12,48 @@ from config import get_config
from tracker import AccessTracker from tracker import AccessTracker
from analyzer import Analyzer from analyzer import Analyzer
from handler import Handler from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger from logger import (
initialize_logging,
get_app_logger,
get_access_logger,
get_credential_logger,
)
from database import initialize_database from database import initialize_database
from tasks_master import get_tasksmaster from tasks_master import get_tasksmaster
def print_usage(): def print_usage():
"""Print usage information""" """Print usage information"""
print(f'Usage: {sys.argv[0]} [FILE]\n') print(f"Usage: {sys.argv[0]} [FILE]\n")
print('FILE is file containing a list of webpage names to serve, one per line.') print("FILE is file containing a list of webpage names to serve, one per line.")
print('If no file is provided, random links will be generated.\n') print("If no file is provided, random links will be generated.\n")
print('Configuration:') print("Configuration:")
print(' Configuration is loaded from a YAML file (default: config.yaml)') print(" Configuration is loaded from a YAML file (default: config.yaml)")
print(' Set CONFIG_LOCATION environment variable to use a different file.\n') print(" Set CONFIG_LOCATION environment variable to use a different file.\n")
print(' Example config.yaml structure:') print(" Example config.yaml structure:")
print(' server:') print(" server:")
print(' port: 5000') print(" port: 5000")
print(' delay: 100') print(" delay: 100")
print(' links:') print(" links:")
print(' min_length: 5') print(" min_length: 5")
print(' max_length: 15') print(" max_length: 15")
print(' min_per_page: 10') print(" min_per_page: 10")
print(' max_per_page: 15') print(" max_per_page: 15")
print(' canary:') print(" canary:")
print(' token_url: null') print(" token_url: null")
print(' token_tries: 10') print(" token_tries: 10")
print(' dashboard:') print(" dashboard:")
print(' secret_path: null # auto-generated if not set') print(" secret_path: null # auto-generated if not set")
print(' database:') print(" database:")
print(' path: "data/krawl.db"') print(' path: "data/krawl.db"')
print(' retention_days: 30') print(" retention_days: 30")
print(' behavior:') print(" behavior:")
print(' probability_error_codes: 0') print(" probability_error_codes: 0")
def main(): def main():
"""Main entry point for the deception server""" """Main entry point for the deception server"""
if '-h' in sys.argv or '--help' in sys.argv: if "-h" in sys.argv or "--help" in sys.argv:
print_usage() print_usage()
exit(0) exit(0)
@@ -63,9 +68,11 @@ def main():
# Initialize database for persistent storage # Initialize database for persistent storage
try: try:
initialize_database(config.database_path) initialize_database(config.database_path)
app_logger.info(f'Database initialized at: {config.database_path}') app_logger.info(f"Database initialized at: {config.database_path}")
except Exception as e: except Exception as e:
app_logger.warning(f'Database initialization failed: {e}. Continuing with in-memory only.') app_logger.warning(
f"Database initialization failed: {e}. Continuing with in-memory only."
)
tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds) tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds)
analyzer = Analyzer() analyzer = Analyzer()
@@ -80,11 +87,13 @@ def main():
if len(sys.argv) == 2: if len(sys.argv) == 2:
try: try:
with open(sys.argv[1], 'r') as f: with open(sys.argv[1], "r") as f:
Handler.webpages = f.readlines() Handler.webpages = f.readlines()
if not Handler.webpages: if not Handler.webpages:
app_logger.warning('The file provided was empty. Using randomly generated links.') app_logger.warning(
"The file provided was empty. Using randomly generated links."
)
Handler.webpages = None Handler.webpages = None
except IOError: except IOError:
app_logger.warning("Can't read input file. Using randomly generated links.") app_logger.warning("Can't read input file. Using randomly generated links.")
@@ -94,25 +103,31 @@ def main():
tasks_master.run_scheduled_tasks() tasks_master.run_scheduled_tasks()
try: try:
app_logger.info(f'Starting deception server on port {config.port}...') app_logger.info(f"Starting deception server on port {config.port}...")
app_logger.info(f'Dashboard available at: {config.dashboard_secret_path}') app_logger.info(f"Dashboard available at: {config.dashboard_secret_path}")
if config.canary_token_url: if config.canary_token_url:
app_logger.info(f'Canary token will appear after {config.canary_token_tries} tries') app_logger.info(
f"Canary token will appear after {config.canary_token_tries} tries"
)
else: else:
app_logger.info('No canary token configured (set CANARY_TOKEN_URL to enable)') app_logger.info(
"No canary token configured (set CANARY_TOKEN_URL to enable)"
)
server = HTTPServer(('0.0.0.0', config.port), Handler) server = HTTPServer(("0.0.0.0", config.port), Handler)
app_logger.info('Server started. Use <Ctrl-C> to stop.') app_logger.info("Server started. Use <Ctrl-C> to stop.")
server.serve_forever() server.serve_forever()
except KeyboardInterrupt: except KeyboardInterrupt:
app_logger.info('Stopping server...') app_logger.info("Stopping server...")
server.socket.close() server.socket.close()
app_logger.info('Server stopped') app_logger.info("Server stopped")
except Exception as e: except Exception as e:
app_logger.error(f'Error starting HTTP server on port {config.port}: {e}') app_logger.error(f"Error starting HTTP server on port {config.port}: {e}")
app_logger.error(f'Make sure you are root, if needed, and that port {config.port} is open.') app_logger.error(
f"Make sure you are root, if needed, and that port {config.port} is open."
)
exit(1) exit(1)
if __name__ == '__main__': if __name__ == "__main__":
main() main()

View File

@@ -21,23 +21,23 @@ def generate_server_error() -> tuple[str, str]:
404: "Not Found", 404: "Not Found",
500: "Internal Server Error", 500: "Internal Server Error",
502: "Bad Gateway", 502: "Bad Gateway",
503: "Service Unavailable" 503: "Service Unavailable",
} }
code = random.choice(list(error_codes.keys())) code = random.choice(list(error_codes.keys()))
message = error_codes[code] message = error_codes[code]
template = server_config.get('template', '') template = server_config.get("template", "")
version = random.choice(server_config.get('versions', ['1.0'])) version = random.choice(server_config.get("versions", ["1.0"]))
html = template.replace('{code}', str(code)) html = template.replace("{code}", str(code))
html = html.replace('{message}', message) html = html.replace("{message}", message)
html = html.replace('{version}', version) html = html.replace("{version}", version)
if server_type == 'apache': if server_type == "apache":
os = random.choice(server_config.get('os', ['Ubuntu'])) os = random.choice(server_config.get("os", ["Ubuntu"]))
html = html.replace('{os}', os) html = html.replace("{os}", os)
html = html.replace('{host}', 'localhost') html = html.replace("{host}", "localhost")
return (html, "text/html") return (html, "text/html")
@@ -53,13 +53,13 @@ def get_server_header(server_type: str = None) -> str:
server_type = random.choice(list(server_errors.keys())) server_type = random.choice(list(server_errors.keys()))
server_config = server_errors.get(server_type, {}) server_config = server_errors.get(server_type, {})
version = random.choice(server_config.get('versions', ['1.0'])) version = random.choice(server_config.get("versions", ["1.0"]))
server_headers = { server_headers = {
'nginx': f"nginx/{version}", "nginx": f"nginx/{version}",
'apache': f"Apache/{version}", "apache": f"Apache/{version}",
'iis': f"Microsoft-IIS/{version}", "iis": f"Microsoft-IIS/{version}",
'tomcat': f"Apache-Coyote/1.1" "tomcat": f"Apache-Coyote/1.1",
} }
return server_headers.get(server_type, "nginx/1.18.0") return server_headers.get(server_type, "nginx/1.18.0")

View File

@@ -13,14 +13,14 @@ def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
query_lower = query_string.lower() query_lower = query_string.lower()
patterns = { patterns = {
'quote': [r"'", r'"', r'`'], "quote": [r"'", r'"', r"`"],
'comment': [r'--', r'#', r'/\*', r'\*/'], "comment": [r"--", r"#", r"/\*", r"\*/"],
'union': [r'\bunion\b', r'\bunion\s+select\b'], "union": [r"\bunion\b", r"\bunion\s+select\b"],
'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"], "boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"],
'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'], "time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"],
'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'], "stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"],
'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'], "command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"],
'info_schema': [r'information_schema', r'table_schema', r'table_name'], "info_schema": [r"information_schema", r"table_schema", r"table_name"],
} }
for injection_type, pattern_list in patterns.items(): for injection_type, pattern_list in patterns.items():
@@ -31,7 +31,9 @@ def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
return None return None
def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]: def get_random_sql_error(
db_type: str = None, injection_type: str = None
) -> Tuple[str, str]:
wl = get_wordlists() wl = get_wordlists()
sql_errors = wl.sql_errors sql_errors = wl.sql_errors
@@ -45,8 +47,8 @@ def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tup
if injection_type and injection_type in db_errors: if injection_type and injection_type in db_errors:
errors = db_errors[injection_type] errors = db_errors[injection_type]
elif 'generic' in db_errors: elif "generic" in db_errors:
errors = db_errors['generic'] errors = db_errors["generic"]
else: else:
all_errors = [] all_errors = []
for error_list in db_errors.values(): for error_list in db_errors.values():
@@ -56,18 +58,20 @@ def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tup
error_message = random.choice(errors) if errors else "Database error occurred" error_message = random.choice(errors) if errors else "Database error occurred"
if '{table}' in error_message: if "{table}" in error_message:
tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions'] tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
error_message = error_message.replace('{table}', random.choice(tables)) error_message = error_message.replace("{table}", random.choice(tables))
if '{column}' in error_message: if "{column}" in error_message:
columns = ['id', 'name', 'email', 'password', 'username', 'created_at'] columns = ["id", "name", "email", "password", "username", "created_at"]
error_message = error_message.replace('{column}', random.choice(columns)) error_message = error_message.replace("{column}", random.choice(columns))
return (error_message, "text/plain") return (error_message, "text/plain")
def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]: def generate_sql_error_response(
query_string: str, db_type: str = None
) -> Tuple[str, str, int]:
injection_type = detect_sql_injection_pattern(query_string) injection_type = detect_sql_injection_pattern(query_string)
if not injection_type: if not injection_type:
@@ -89,7 +93,7 @@ def get_sql_response_with_data(path: str, params: str) -> str:
injection_type = detect_sql_injection_pattern(params) injection_type = detect_sql_injection_pattern(params)
if injection_type in ['union', 'boolean', 'stacked']: if injection_type in ["union", "boolean", "stacked"]:
data = { data = {
"success": True, "success": True,
"results": [ "results": [
@@ -98,15 +102,14 @@ def get_sql_response_with_data(path: str, params: str) -> str:
"username": random_username(), "username": random_username(),
"email": random_email(), "email": random_email(),
"password_hash": random_password(), "password_hash": random_password(),
"role": random.choice(["admin", "user", "moderator"]) "role": random.choice(["admin", "user", "moderator"]),
} }
for i in range(1, random.randint(2, 5)) for i in range(1, random.randint(2, 5))
] ],
} }
return json.dumps(data, indent=2) return json.dumps(data, indent=2)
return json.dumps({ return json.dumps(
"success": True, {"success": True, "message": "Query executed successfully", "results": []},
"message": "Query executed successfully", indent=2,
"results": [] )
}, indent=2)

View File

@@ -20,7 +20,7 @@ TASK_CONFIG = {
"name": "analyze-ips", "name": "analyze-ips",
"cron": "*/1 * * * *", "cron": "*/1 * * * *",
"enabled": True, "enabled": True,
"run_when_loaded": True "run_when_loaded": True,
} }
@@ -34,48 +34,74 @@ def main():
uneven_request_timing_threshold = config.uneven_request_timing_threshold uneven_request_timing_threshold = config.uneven_request_timing_threshold
user_agents_used_threshold = config.user_agents_used_threshold user_agents_used_threshold = config.user_agents_used_threshold
attack_urls_threshold = config.attack_urls_threshold attack_urls_threshold = config.attack_urls_threshold
uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds uneven_request_timing_time_window_seconds = (
config.uneven_request_timing_time_window_seconds
)
app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}") app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
score = {} score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} score["attacker"] = {
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} "risky_http_methods": False,
score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} "robots_violations": False,
score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} "uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["good_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["bad_crawler"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
score["regular_user"] = {
"risky_http_methods": False,
"robots_violations": False,
"uneven_request_timing": False,
"different_user_agents": False,
"attack_url": False,
}
#1-3 low, 4-6 mid, 7-9 high, 10-20 extreme # 1-3 low, 4-6 mid, 7-9 high, 10-20 extreme
weights = { weights = {
"attacker": { "attacker": {
"risky_http_methods": 6, "risky_http_methods": 6,
"robots_violations": 4, "robots_violations": 4,
"uneven_request_timing": 3, "uneven_request_timing": 3,
"different_user_agents": 8, "different_user_agents": 8,
"attack_url": 15 "attack_url": 15,
}, },
"good_crawler": { "good_crawler": {
"risky_http_methods": 1, "risky_http_methods": 1,
"robots_violations": 0, "robots_violations": 0,
"uneven_request_timing": 0, "uneven_request_timing": 0,
"different_user_agents": 0, "different_user_agents": 0,
"attack_url": 0 "attack_url": 0,
}, },
"bad_crawler": { "bad_crawler": {
"risky_http_methods": 2, "risky_http_methods": 2,
"robots_violations": 7, "robots_violations": 7,
"uneven_request_timing": 0, "uneven_request_timing": 0,
"different_user_agents": 5, "different_user_agents": 5,
"attack_url": 5 "attack_url": 5,
}, },
"regular_user": { "regular_user": {
"risky_http_methods": 0, "risky_http_methods": 0,
"robots_violations": 0, "robots_violations": 0,
"uneven_request_timing": 8, "uneven_request_timing": 8,
"different_user_agents": 3, "different_user_agents": 3,
"attack_url": 0 "attack_url": 0,
} },
} }
# Get IPs with recent activity (last minute to match cron schedule) # Get IPs with recent activity (last minute to match cron schedule)
recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1) recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1)
ips_to_analyze = {item['ip'] for item in recent_accesses} ips_to_analyze = {item["ip"] for item in recent_accesses}
if not ips_to_analyze: if not ips_to_analyze:
app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping") app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping")
@@ -92,23 +118,51 @@ def main():
if total_accesses_count < 3: if total_accesses_count < 3:
category = "unknown" category = "unknown"
analyzed_metrics = {} analyzed_metrics = {}
category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0} category_scores = {
"attacker": 0,
"good_crawler": 0,
"bad_crawler": 0,
"regular_user": 0,
"unknown": 0,
}
last_analysis = datetime.now() last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return 0 return 0
#--------------------- HTTP Methods --------------------- # --------------------- HTTP Methods ---------------------
get_accesses_count = len([item for item in ip_accesses if item["method"] == "GET"]) get_accesses_count = len(
post_accesses_count = len([item for item in ip_accesses if item["method"] == "POST"]) [item for item in ip_accesses if item["method"] == "GET"]
put_accesses_count = len([item for item in ip_accesses if item["method"] == "PUT"]) )
delete_accesses_count = len([item for item in ip_accesses if item["method"] == "DELETE"]) post_accesses_count = len(
head_accesses_count = len([item for item in ip_accesses if item["method"] == "HEAD"]) [item for item in ip_accesses if item["method"] == "POST"]
options_accesses_count = len([item for item in ip_accesses if item["method"] == "OPTIONS"]) )
patch_accesses_count = len([item for item in ip_accesses if item["method"] == "PATCH"]) put_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PUT"]
)
delete_accesses_count = len(
[item for item in ip_accesses if item["method"] == "DELETE"]
)
head_accesses_count = len(
[item for item in ip_accesses if item["method"] == "HEAD"]
)
options_accesses_count = len(
[item for item in ip_accesses if item["method"] == "OPTIONS"]
)
patch_accesses_count = len(
[item for item in ip_accesses if item["method"] == "PATCH"]
)
if total_accesses_count > http_risky_methods_threshold: if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count http_method_attacker_score = (
post_accesses_count
+ put_accesses_count
+ delete_accesses_count
+ options_accesses_count
+ patch_accesses_count
) / total_accesses_count
else: else:
http_method_attacker_score = 0 http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}") # print(f"HTTP Method attacker score: {http_method_attacker_score}")
if http_method_attacker_score >= http_risky_methods_threshold: if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False score["good_crawler"]["risky_http_methods"] = False
@@ -119,8 +173,8 @@ def main():
score["good_crawler"]["risky_http_methods"] = True score["good_crawler"]["risky_http_methods"] = True
score["bad_crawler"]["risky_http_methods"] = False score["bad_crawler"]["risky_http_methods"] = False
score["regular_user"]["risky_http_methods"] = False score["regular_user"]["risky_http_methods"] = False
#--------------------- Robots Violations --------------------- # --------------------- Robots Violations ---------------------
#respect robots.txt and login/config pages access frequency # respect robots.txt and login/config pages access frequency
robots_disallows = [] robots_disallows = []
robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt" robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt"
with open(robots_path, "r") as f: with open(robots_path, "r") as f:
@@ -132,11 +186,20 @@ def main():
if parts[0] == "Disallow": if parts[0] == "Disallow":
parts[1] = parts[1].rstrip("/") parts[1] = parts[1].rstrip("/")
#print(f"DISALLOW {parts[1]}") # print(f"DISALLOW {parts[1]}")
robots_disallows.append(parts[1].strip()) robots_disallows.append(parts[1].strip())
#if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker # if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker
violated_robots_count = len([item for item in ip_accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)]) violated_robots_count = len(
#print(f"Violated robots count: {violated_robots_count}") [
item
for item in ip_accesses
if any(
item["path"].rstrip("/").startswith(disallow)
for disallow in robots_disallows
)
]
)
# print(f"Violated robots count: {violated_robots_count}")
if total_accesses_count > 0: if total_accesses_count > 0:
violated_robots_ratio = violated_robots_count / total_accesses_count violated_robots_ratio = violated_robots_count / total_accesses_count
else: else:
@@ -152,15 +215,20 @@ def main():
score["bad_crawler"]["robots_violations"] = False score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing --------------------- # --------------------- Requests Timing ---------------------
# Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior # Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses] timestamps = [datetime.fromisoformat(item["timestamp"]) for item in ip_accesses]
now_utc = datetime.now() now_utc = datetime.now()
timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] timestamps = [
ts
for ts in timestamps
if now_utc - ts
<= timedelta(seconds=uneven_request_timing_time_window_seconds)
]
timestamps = sorted(timestamps, reverse=True) timestamps = sorted(timestamps, reverse=True)
time_diffs = [] time_diffs = []
for i in range(0, len(timestamps)-1): for i in range(0, len(timestamps) - 1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds() diff = (timestamps[i] - timestamps[i + 1]).total_seconds()
time_diffs.append(diff) time_diffs.append(diff)
mean = 0 mean = 0
@@ -170,9 +238,11 @@ def main():
if time_diffs: if time_diffs:
mean = sum(time_diffs) / len(time_diffs) mean = sum(time_diffs) / len(time_diffs)
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs) variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5 std = variance**0.5
cv = std/mean cv = std / mean
app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}") app_logger.debug(
f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}"
)
if cv >= uneven_request_timing_threshold: if cv >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False score["good_crawler"]["uneven_request_timing"] = False
@@ -183,11 +253,11 @@ def main():
score["good_crawler"]["uneven_request_timing"] = False score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False score["regular_user"]["uneven_request_timing"] = False
#--------------------- Different User Agents --------------------- # --------------------- Different User Agents ---------------------
#Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers # Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers
user_agents_used = [item["user_agent"] for item in ip_accesses] user_agents_used = [item["user_agent"] for item in ip_accesses]
user_agents_used = list(dict.fromkeys(user_agents_used)) user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}") # print(f"User agents used: {user_agents_used}")
if len(user_agents_used) >= user_agents_used_threshold: if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False score["good_crawler"]["different_user_agents"] = False
@@ -198,7 +268,7 @@ def main():
score["good_crawler"]["different_user_agents"] = False score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agents"] = False score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs --------------------- # --------------------- Attack URLs ---------------------
attack_urls_found_list = [] attack_urls_found_list = []
wl = get_wordlists() wl = get_wordlists()
if wl.attack_patterns: if wl.attack_patterns:
@@ -215,12 +285,14 @@ def main():
for name, pattern in wl.attack_patterns.items(): for name, pattern in wl.attack_patterns.items():
# Check original, decoded, and double-decoded paths # Check original, decoded, and double-decoded paths
if (re.search(pattern, queried_path, re.IGNORECASE) or if (
re.search(pattern, decoded_path, re.IGNORECASE) or re.search(pattern, queried_path, re.IGNORECASE)
re.search(pattern, decoded_path_twice, re.IGNORECASE)): or re.search(pattern, decoded_path, re.IGNORECASE)
or re.search(pattern, decoded_path_twice, re.IGNORECASE)
):
attack_urls_found_list.append(f"{name}: {pattern}") attack_urls_found_list.append(f"{name}: {pattern}")
#remove duplicates # remove duplicates
attack_urls_found_list = set(attack_urls_found_list) attack_urls_found_list = set(attack_urls_found_list)
attack_urls_found_list = list(attack_urls_found_list) attack_urls_found_list = list(attack_urls_found_list)
@@ -234,28 +306,102 @@ def main():
score["good_crawler"]["attack_url"] = False score["good_crawler"]["attack_url"] = False
score["bad_crawler"]["attack_url"] = False score["bad_crawler"]["attack_url"] = False
score["regular_user"]["attack_url"] = False score["regular_user"]["attack_url"] = False
#--------------------- Calculate score --------------------- # --------------------- Calculate score ---------------------
attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0 attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0
attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"] attacker_score = (
attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"] score["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"] * weights["attacker"]["risky_http_methods"]
attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"] )
attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"] attacker_score = (
good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"] attacker_score
good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"] + score["attacker"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"] * weights["attacker"]["robots_violations"]
good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"] )
good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"] attacker_score = (
bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"] attacker_score
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"] + score["attacker"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"]
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"] )
bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"] attacker_score = (
regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"] attacker_score
regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"] + score["attacker"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"] * weights["attacker"]["different_user_agents"]
regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"] )
regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"] attacker_score = (
attacker_score
+ score["attacker"]["attack_url"] * weights["attacker"]["attack_url"]
)
good_crawler_score = (
score["good_crawler"]["risky_http_methods"]
* weights["good_crawler"]["risky_http_methods"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["robots_violations"]
* weights["good_crawler"]["robots_violations"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["uneven_request_timing"]
* weights["good_crawler"]["uneven_request_timing"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["different_user_agents"]
* weights["good_crawler"]["different_user_agents"]
)
good_crawler_score = (
good_crawler_score
+ score["good_crawler"]["attack_url"]
* weights["good_crawler"]["attack_url"]
)
bad_crawler_score = (
score["bad_crawler"]["risky_http_methods"]
* weights["bad_crawler"]["risky_http_methods"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["robots_violations"]
* weights["bad_crawler"]["robots_violations"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["uneven_request_timing"]
* weights["bad_crawler"]["uneven_request_timing"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["different_user_agents"]
* weights["bad_crawler"]["different_user_agents"]
)
bad_crawler_score = (
bad_crawler_score
+ score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"]
)
regular_user_score = (
score["regular_user"]["risky_http_methods"]
* weights["regular_user"]["risky_http_methods"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["robots_violations"]
* weights["regular_user"]["robots_violations"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["uneven_request_timing"]
* weights["regular_user"]["uneven_request_timing"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["different_user_agents"]
* weights["regular_user"]["different_user_agents"]
)
regular_user_score = (
regular_user_score
+ score["regular_user"]["attack_url"]
* weights["regular_user"]["attack_url"]
)
score_details = f""" score_details = f"""
Attacker score: {attacker_score} Attacker score: {attacker_score}
Good Crawler score: {good_crawler_score} Good Crawler score: {good_crawler_score}
@@ -263,9 +409,22 @@ def main():
Regular User score: {regular_user_score} Regular User score: {regular_user_score}
""" """
app_logger.debug(score_details) app_logger.debug(score_details)
analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} analyzed_metrics = {
category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} "risky_http_methods": http_method_attacker_score,
"robots_violations": violated_robots_ratio,
"uneven_request_timing": mean,
"different_user_agents": user_agents_used,
"attack_url": attack_urls_found_list,
}
category_scores = {
"attacker": attacker_score,
"good_crawler": good_crawler_score,
"bad_crawler": bad_crawler_score,
"regular_user": regular_user_score,
}
category = max(category_scores, key=category_scores.get) category = max(category_scores, key=category_scores.get)
last_analysis = datetime.now() last_analysis = datetime.now()
db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) db_manager.update_ip_stats_analysis(
ip, analyzed_metrics, category, category_scores, last_analysis
)
return return

View File

@@ -11,7 +11,7 @@ TASK_CONFIG = {
"name": "fetch-ip-rep", "name": "fetch-ip-rep",
"cron": "*/5 * * * *", "cron": "*/5 * * * *",
"enabled": True, "enabled": True,
"run_when_loaded": True "run_when_loaded": True,
} }
@@ -21,7 +21,9 @@ def main():
# Only get IPs that haven't been enriched yet # Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50) unenriched_ips = db_manager.get_unenriched_ips(limit=50)
app_logger.info(f"{len(unenriched_ips)} IP's need to be have reputation enrichment.") app_logger.info(
f"{len(unenriched_ips)} IP's need to be have reputation enrichment."
)
for ip in unenriched_ips: for ip in unenriched_ips:
try: try:
api_url = "https://iprep.lcrawl.com/api/iprep/" api_url = "https://iprep.lcrawl.com/api/iprep/"
@@ -43,8 +45,11 @@ def main():
sanitized_list_on = sanitize_dict(list_on, 100000) sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos( db_manager.update_ip_rep_infos(
ip, sanitized_country_iso_code, sanitized_asn, ip,
sanitized_asn_org, sanitized_list_on sanitized_country_iso_code,
sanitized_asn,
sanitized_asn_org,
sanitized_list_on,
) )
except requests.RequestException as e: except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}") app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")

View File

@@ -17,24 +17,29 @@ TASK_CONFIG = {
"name": "export-malicious-ips", "name": "export-malicious-ips",
"cron": "*/5 * * * *", "cron": "*/5 * * * *",
"enabled": True, "enabled": True,
"run_when_loaded": True "run_when_loaded": True,
} }
EXPORTS_DIR = "exports" EXPORTS_DIR = "exports"
OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt") OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt")
# ---------------------- # ----------------------
# TASK LOGIC # TASK LOGIC
# ---------------------- # ----------------------
def has_recent_honeypot_access(session, minutes: int = 5) -> bool: def has_recent_honeypot_access(session, minutes: int = 5) -> bool:
"""Check if honeypot was accessed in the last N minutes.""" """Check if honeypot was accessed in the last N minutes."""
cutoff_time = datetime.now() - timedelta(minutes=minutes) cutoff_time = datetime.now() - timedelta(minutes=minutes)
count = session.query(AccessLog).filter( count = (
AccessLog.is_honeypot_trigger == True, session.query(AccessLog)
AccessLog.timestamp >= cutoff_time .filter(
).count() AccessLog.is_honeypot_trigger == True, AccessLog.timestamp >= cutoff_time
)
.count()
)
return count > 0 return count > 0
def main(): def main():
""" """
Export all IPs flagged as suspicious to a text file. Export all IPs flagged as suspicious to a text file.
@@ -49,23 +54,29 @@ def main():
# Check for recent honeypot activity # Check for recent honeypot activity
if not has_recent_honeypot_access(session): if not has_recent_honeypot_access(session):
app_logger.info(f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes") app_logger.info(
f"[Background Task] {task_name} skipped - no honeypot access in last 5 minutes"
)
return return
# Query distinct suspicious IPs # Query distinct suspicious IPs
results = session.query(distinct(AccessLog.ip)).filter( results = (
AccessLog.is_suspicious == True session.query(distinct(AccessLog.ip))
).all() .filter(AccessLog.is_suspicious == True)
.all()
)
# Ensure exports directory exists # Ensure exports directory exists
os.makedirs(EXPORTS_DIR, exist_ok=True) os.makedirs(EXPORTS_DIR, exist_ok=True)
# Write IPs to file (one per line) # Write IPs to file (one per line)
with open(OUTPUT_FILE, 'w') as f: with open(OUTPUT_FILE, "w") as f:
for (ip,) in results: for (ip,) in results:
f.write(f"{ip}\n") f.write(f"{ip}\n")
app_logger.info(f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}") app_logger.info(
f"[Background Task] {task_name} exported {len(results)} IPs to {OUTPUT_FILE}"
)
except Exception as e: except Exception as e:
app_logger.error(f"[Background Task] {task_name} failed: {e}") app_logger.error(f"[Background Task] {task_name} failed: {e}")

View File

@@ -6,7 +6,12 @@ import threading
import importlib import importlib
import importlib.util import importlib.util
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger from logger import (
initialize_logging,
get_app_logger,
get_access_logger,
get_credential_logger,
)
app_logger = get_app_logger() app_logger = get_app_logger()
@@ -28,7 +33,7 @@ except ModuleNotFoundError:
# ---------- TASKSMASTER CLASS ---------- # ---------- TASKSMASTER CLASS ----------
class TasksMaster: class TasksMaster:
TASK_DEFAULT_CRON = '*/15 * * * *' TASK_DEFAULT_CRON = "*/15 * * * *"
TASK_JITTER = 240 TASK_JITTER = 240
TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks") TASKS_FOLDER = os.path.join(os.path.dirname(__file__), "tasks")
@@ -36,7 +41,9 @@ class TasksMaster:
self.tasks = self._config_tasks() self.tasks = self._config_tasks()
self.scheduler = scheduler self.scheduler = scheduler
self.last_run_times = {} self.last_run_times = {}
self.scheduler.add_listener(self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR) self.scheduler.add_listener(
self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR
)
def _config_tasks(self): def _config_tasks(self):
""" """
@@ -80,7 +87,7 @@ class TasksMaster:
for filename in sorted(os.listdir(folder_path)): for filename in sorted(os.listdir(folder_path)):
# skip any non python files, as well as any __pycache__ or .pyc files that might creep in there # skip any non python files, as well as any __pycache__ or .pyc files that might creep in there
if not filename.endswith('.py') or filename.startswith("__"): if not filename.endswith(".py") or filename.startswith("__"):
continue continue
path = os.path.join(folder_path, filename) path = os.path.join(folder_path, filename)
@@ -95,11 +102,13 @@ class TasksMaster:
continue continue
# if we have a tasks config and a main function, we attempt to schedule it # if we have a tasks config and a main function, we attempt to schedule it
if hasattr(module, 'TASK_CONFIG') and hasattr(module, 'main'): if hasattr(module, "TASK_CONFIG") and hasattr(module, "main"):
# ensure task_config is a dict # ensure task_config is a dict
if not isinstance(module.TASK_CONFIG, dict): if not isinstance(module.TASK_CONFIG, dict):
app_logger.error(f"TASK_CONFIG is not a dict in {filename}. Skipping task.") app_logger.error(
f"TASK_CONFIG is not a dict in {filename}. Skipping task."
)
continue continue
task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON task_cron = module.TASK_CONFIG.get("cron") or self.TASK_DEFAULT_CRON
@@ -109,24 +118,26 @@ class TasksMaster:
try: try:
CronTrigger.from_crontab(task_cron) CronTrigger.from_crontab(task_cron)
except ValueError as ve: except ValueError as ve:
app_logger.error(f"Invalid cron format for task {task_name}: {ve} - Skipping this task") app_logger.error(
f"Invalid cron format for task {task_name}: {ve} - Skipping this task"
)
continue continue
task = { task = {
'name': module.TASK_CONFIG.get('name', module_name), "name": module.TASK_CONFIG.get("name", module_name),
'filename': filename, "filename": filename,
'cron': task_cron, "cron": task_cron,
"enabled": module.TASK_CONFIG.get("enabled", False), "enabled": module.TASK_CONFIG.get("enabled", False),
"run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False) "run_when_loaded": module.TASK_CONFIG.get("run_when_loaded", False),
} }
tasks.append(task) tasks.append(task)
# we are missing things, and we log what's missing # we are missing things, and we log what's missing
else: else:
if not hasattr(module, 'TASK_CONFIG'): if not hasattr(module, "TASK_CONFIG"):
app_logger.warning(f"Missing TASK_CONFIG in {filename}") app_logger.warning(f"Missing TASK_CONFIG in {filename}")
elif not hasattr(module, 'main'): elif not hasattr(module, "main"):
app_logger.warning(f"Missing main() in {filename}") app_logger.warning(f"Missing main() in {filename}")
return tasks return tasks
@@ -147,18 +158,32 @@ class TasksMaster:
# if task is disabled, skip this one # if task is disabled, skip this one
if not task_enabled: if not task_enabled:
app_logger.info(f"{task_name} is disabled in client config. Skipping task") app_logger.info(
f"{task_name} is disabled in client config. Skipping task"
)
continue continue
try: try:
if os.path.isfile(os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))): if os.path.isfile(
os.path.join(self.TASKS_FOLDER, task_to_run.get("filename"))
):
# schedule the task now that everything has checked out above... # schedule the task now that everything has checked out above...
self._schedule_task(task_name, module_name, task_cron, run_when_loaded) self._schedule_task(
app_logger.info(f"Scheduled {module_name} cron is set to {task_cron}.", extra={"task": task_to_run}) task_name, module_name, task_cron, run_when_loaded
)
app_logger.info(
f"Scheduled {module_name} cron is set to {task_cron}.",
extra={"task": task_to_run},
)
else: else:
app_logger.info(f"Skipping invalid or unsafe file: {task_to_run.get('filename')}", extra={"task": task_to_run}) app_logger.info(
f"Skipping invalid or unsafe file: {task_to_run.get('filename')}",
extra={"task": task_to_run},
)
except Exception as e: except Exception as e:
app_logger.error(f"Error scheduling task: {e}", extra={"tasks": task_to_run}) app_logger.error(
f"Error scheduling task: {e}", extra={"tasks": task_to_run}
)
def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded): def _schedule_task(self, task_name, module_name, task_cron, run_when_loaded):
try: try:
@@ -166,7 +191,7 @@ class TasksMaster:
module = importlib.import_module(f"tasks.{module_name}") module = importlib.import_module(f"tasks.{module_name}")
# Check if the module has a 'main' function # Check if the module has a 'main' function
if hasattr(module, 'main'): if hasattr(module, "main"):
app_logger.info(f"Scheduling {task_name} - {module_name} Main Function") app_logger.info(f"Scheduling {task_name} - {module_name} Main Function")
# unique_job_id # unique_job_id
@@ -180,7 +205,9 @@ class TasksMaster:
# schedule the task / job # schedule the task / job
if run_when_loaded: if run_when_loaded:
app_logger.info(f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start") app_logger.info(
f"Task: {task_name} is set to run instantly. Scheduling to run on scheduler start"
)
self.scheduler.add_job( self.scheduler.add_job(
module.main, module.main,
@@ -189,7 +216,7 @@ class TasksMaster:
jitter=self.TASK_JITTER, jitter=self.TASK_JITTER,
name=task_name, name=task_name,
next_run_time=datetime.datetime.now(), next_run_time=datetime.datetime.now(),
max_instances=1 max_instances=1,
) )
else: else:
self.scheduler.add_job( self.scheduler.add_job(
@@ -198,7 +225,7 @@ class TasksMaster:
id=job_identifier, id=job_identifier,
jitter=self.TASK_JITTER, jitter=self.TASK_JITTER,
name=task_name, name=task_name,
max_instances=1 max_instances=1,
) )
else: else:
app_logger.error(f"{module_name} does not define a 'main' function.") app_logger.error(f"{module_name} does not define a 'main' function.")
@@ -220,11 +247,13 @@ class TasksMaster:
jobs_list = [] jobs_list = []
for job in scheduled_jobs: for job in scheduled_jobs:
jobs_list.append({ jobs_list.append(
{
"id": job.id, "id": job.id,
"name": job.name, "name": job.name,
"next_run": job.next_run_time, "next_run": job.next_run_time,
}) }
)
return jobs_list return jobs_list
def run_scheduled_tasks(self): def run_scheduled_tasks(self):
@@ -251,6 +280,7 @@ class TasksMaster:
# ---------- SINGLETON WRAPPER ---------- # ---------- SINGLETON WRAPPER ----------
T = type T = type
def singleton_loader(func): def singleton_loader(func):
"""Decorator to ensure only one instance exists.""" """Decorator to ensure only one instance exists."""
cache: dict[str, T] = {} cache: dict[str, T] = {}
@@ -262,6 +292,7 @@ def singleton_loader(func):
if func.__name__ not in cache: if func.__name__ not in cache:
cache[func.__name__] = func(*args, **kwargs) cache[func.__name__] = func(*args, **kwargs)
return cache[func.__name__] return cache[func.__name__]
return wrapper return wrapper
@@ -283,6 +314,8 @@ def get_tasksmaster(scheduler: BackgroundScheduler | None = None) -> TasksMaster
# Auto-start scheduler if not already running # Auto-start scheduler if not already running
if not scheduler.running: if not scheduler.running:
scheduler.start() scheduler.start()
app_logger.info("TasksMaster scheduler started automatically with singleton creation.") app_logger.info(
"TasksMaster scheduler started automatically with singleton creation."
)
return tm_instance return tm_instance

View File

@@ -8,8 +8,8 @@ from .template_loader import load_template, clear_cache, TemplateNotFoundError
from . import html_templates from . import html_templates
__all__ = [ __all__ = [
'load_template', "load_template",
'clear_cache', "clear_cache",
'TemplateNotFoundError', "TemplateNotFoundError",
'html_templates', "html_templates",
] ]

View File

@@ -9,12 +9,14 @@ import html
from datetime import datetime from datetime import datetime
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
def _escape(value) -> str: def _escape(value) -> str:
"""Escape HTML special characters to prevent XSS attacks.""" """Escape HTML special characters to prevent XSS attacks."""
if value is None: if value is None:
return "" return ""
return html.escape(str(value)) return html.escape(str(value))
def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str: def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
"""Format ISO timestamp for display with timezone conversion """Format ISO timestamp for display with timezone conversion
@@ -30,10 +32,12 @@ def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str:
return dt.strftime("%Y-%m-%d %H:%M:%S") return dt.strftime("%Y-%m-%d %H:%M:%S")
except Exception: except Exception:
# Fallback for old format # Fallback for old format
return iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp return (
iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp
)
def generate_dashboard(stats: dict, dashboard_path: str = '') -> str: def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
"""Generate dashboard HTML with access statistics """Generate dashboard HTML with access statistics
Args: Args:
@@ -42,8 +46,8 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
""" """
# Generate IP rows with clickable functionality for dropdown stats # Generate IP rows with clickable functionality for dropdown stats
top_ips_rows = '\n'.join([ top_ips_rows = (
f'''<tr class="ip-row" data-ip="{_escape(ip)}"> "\n".join([f"""<tr class="ip-row" data-ip="{_escape(ip)}">
<td class="rank">{i+1}</td> <td class="rank">{i+1}</td>
<td class="ip-clickable">{_escape(ip)}</td> <td class="ip-clickable">{_escape(ip)}</td>
<td>{count}</td> <td>{count}</td>
@@ -54,25 +58,35 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div> <div class="loading">Loading stats...</div>
</div> </div>
</td> </td>
</tr>''' </tr>""" for i, (ip, count) in enumerate(stats["top_ips"])])
for i, (ip, count) in enumerate(stats['top_ips']) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>' )
# Generate paths rows (CRITICAL: paths can contain XSS payloads) # Generate paths rows (CRITICAL: paths can contain XSS payloads)
top_paths_rows = '\n'.join([ top_paths_rows = (
f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>' "\n".join(
for i, (path, count) in enumerate(stats['top_paths']) [
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>' f'<tr><td class="rank">{i+1}</td><td>{_escape(path)}</td><td>{count}</td></tr>'
for i, (path, count) in enumerate(stats["top_paths"])
]
)
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads) # Generate User-Agent rows (CRITICAL: user agents can contain XSS payloads)
top_ua_rows = '\n'.join([ top_ua_rows = (
f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>' "\n".join(
for i, (ua, count) in enumerate(stats['top_user_agents']) [
]) or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>' f'<tr><td class="rank">{i+1}</td><td style="word-break: break-all;">{_escape(ua[:80])}</td><td>{count}</td></tr>'
for i, (ua, count) in enumerate(stats["top_user_agents"])
]
)
or '<tr><td colspan="3" style="text-align:center;">No data</td></tr>'
)
# Generate suspicious accesses rows with clickable IPs # Generate suspicious accesses rows with clickable IPs
suspicious_rows = '\n'.join([ suspicious_rows = (
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}"> "\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td> <td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td> <td style="word-break: break-all;">{_escape(log["user_agent"][:60])}</td>
@@ -84,13 +98,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div> <div class="loading">Loading stats...</div>
</div> </div>
</td> </td>
</tr>''' </tr>""" for log in stats["recent_suspicious"][-10:]])
for log in stats['recent_suspicious'][-10:] or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>'
]) or '<tr><td colspan="4" style="text-align:center;">No suspicious activity detected</td></tr>' )
# Generate honeypot triggered IPs rows with clickable IPs # Generate honeypot triggered IPs rows with clickable IPs
honeypot_rows = '\n'.join([ honeypot_rows = (
f'''<tr class="ip-row" data-ip="{_escape(ip)}"> "\n".join([f"""<tr class="ip-row" data-ip="{_escape(ip)}">
<td class="ip-clickable">{_escape(ip)}</td> <td class="ip-clickable">{_escape(ip)}</td>
<td style="word-break: break-all;">{_escape(", ".join(paths))}</td> <td style="word-break: break-all;">{_escape(", ".join(paths))}</td>
<td>{len(paths)}</td> <td>{len(paths)}</td>
@@ -101,13 +115,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div> <div class="loading">Loading stats...</div>
</div> </div>
</td> </td>
</tr>''' </tr>""" for ip, paths in stats.get("honeypot_triggered_ips", [])])
for ip, paths in stats.get('honeypot_triggered_ips', []) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>'
]) or '<tr><td colspan="3" style="text-align:center;">No honeypot triggers yet</td></tr>' )
# Generate attack types rows with clickable IPs # Generate attack types rows with clickable IPs
attack_type_rows = '\n'.join([ attack_type_rows = (
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}"> "\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td> <td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["path"])}</td> <td>{_escape(log["path"])}</td>
<td>{_escape(", ".join(log["attack_types"]))}</td> <td>{_escape(", ".join(log["attack_types"]))}</td>
@@ -120,13 +134,13 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div> <div class="loading">Loading stats...</div>
</div> </div>
</td> </td>
</tr>''' </tr>""" for log in stats.get("attack_types", [])[-10:]])
for log in stats.get('attack_types', [])[-10:] or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>'
]) or '<tr><td colspan="4" style="text-align:center;">No attacks detected</td></tr>' )
# Generate credential attempts rows with clickable IPs # Generate credential attempts rows with clickable IPs
credential_rows = '\n'.join([ credential_rows = (
f'''<tr class="ip-row" data-ip="{_escape(log["ip"])}"> "\n".join([f"""<tr class="ip-row" data-ip="{_escape(log["ip"])}">
<td class="ip-clickable">{_escape(log["ip"])}</td> <td class="ip-clickable">{_escape(log["ip"])}</td>
<td>{_escape(log["username"])}</td> <td>{_escape(log["username"])}</td>
<td>{_escape(log["password"])}</td> <td>{_escape(log["password"])}</td>
@@ -139,9 +153,9 @@ def generate_dashboard(stats: dict, dashboard_path: str = '') -> str:
<div class="loading">Loading stats...</div> <div class="loading">Loading stats...</div>
</div> </div>
</td> </td>
</tr>''' </tr>""" for log in stats.get("credential_attempts", [])[-20:]])
for log in stats.get('credential_attempts', [])[-20:] or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>'
]) or '<tr><td colspan="5" style="text-align:center;">No credentials captured yet</td></tr>' )
return f"""<!DOCTYPE html> return f"""<!DOCTYPE html>
<html> <html>

View File

@@ -11,6 +11,7 @@ from typing import Dict
class TemplateNotFoundError(Exception): class TemplateNotFoundError(Exception):
"""Raised when a template file cannot be found.""" """Raised when a template file cannot be found."""
pass pass
@@ -46,7 +47,7 @@ def load_template(name: str, **kwargs) -> str:
# Check cache first # Check cache first
if name not in _template_cache: if name not in _template_cache:
# Determine file path based on whether name has an extension # Determine file path based on whether name has an extension
if '.' in name: if "." in name:
file_path = _TEMPLATE_DIR / name file_path = _TEMPLATE_DIR / name
else: else:
file_path = _TEMPLATE_DIR / f"{name}.html" file_path = _TEMPLATE_DIR / f"{name}.html"
@@ -54,7 +55,7 @@ def load_template(name: str, **kwargs) -> str:
if not file_path.exists(): if not file_path.exists():
raise TemplateNotFoundError(f"Template '{name}' not found at {file_path}") raise TemplateNotFoundError(f"Template '{name}' not found at {file_path}")
_template_cache[name] = file_path.read_text(encoding='utf-8') _template_cache[name] = file_path.read_text(encoding="utf-8")
template = _template_cache[name] template = _template_cache[name]

View File

@@ -17,7 +17,13 @@ class AccessTracker:
Maintains in-memory structures for fast dashboard access and Maintains in-memory structures for fast dashboard access and
persists data to SQLite for long-term storage and analysis. persists data to SQLite for long-term storage and analysis.
""" """
def __init__(self, max_pages_limit, ban_duration_seconds, db_manager: Optional[DatabaseManager] = None):
def __init__(
self,
max_pages_limit,
ban_duration_seconds,
db_manager: Optional[DatabaseManager] = None,
):
""" """
Initialize the access tracker. Initialize the access tracker.
@@ -37,9 +43,27 @@ class AccessTracker:
self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict) self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict)
self.suspicious_patterns = [ self.suspicious_patterns = [
'bot', 'crawler', 'spider', 'scraper', 'curl', 'wget', 'python-requests', "bot",
'scanner', 'nikto', 'sqlmap', 'nmap', 'masscan', 'nessus', 'acunetix', "crawler",
'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster' "spider",
"scraper",
"curl",
"wget",
"python-requests",
"scanner",
"nikto",
"sqlmap",
"nmap",
"masscan",
"nessus",
"acunetix",
"burp",
"zap",
"w3af",
"metasploit",
"nuclei",
"gobuster",
"dirbuster",
] ]
# Load attack patterns from wordlists # Load attack patterns from wordlists
@@ -49,11 +73,11 @@ class AccessTracker:
# Fallback if wordlists not loaded # Fallback if wordlists not loaded
if not self.attack_types: if not self.attack_types:
self.attack_types = { self.attack_types = {
'path_traversal': r'\.\.', "path_traversal": r"\.\.",
'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)", "sql_injection": r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
'xss_attempt': r'(<script|javascript:|onerror=|onload=)', "xss_attempt": r"(<script|javascript:|onerror=|onload=)",
'common_probes': r'(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)', "common_probes": r"(wp-admin|phpmyadmin|\.env|\.git|/admin|/config)",
'shell_injection': r'(\||;|`|\$\(|&&)', "shell_injection": r"(\||;|`|\$\(|&&)",
} }
# Track IPs that accessed honeypot paths from robots.txt # Track IPs that accessed honeypot paths from robots.txt
@@ -94,14 +118,22 @@ class AccessTracker:
parsed = urllib.parse.parse_qs(post_data) parsed = urllib.parse.parse_qs(post_data)
# Common username field names # Common username field names
username_fields = ['username', 'user', 'login', 'email', 'log', 'userid', 'account'] username_fields = [
"username",
"user",
"login",
"email",
"log",
"userid",
"account",
]
for field in username_fields: for field in username_fields:
if field in parsed and parsed[field]: if field in parsed and parsed[field]:
username = parsed[field][0] username = parsed[field][0]
break break
# Common password field names # Common password field names
password_fields = ['password', 'pass', 'passwd', 'pwd', 'passphrase'] password_fields = ["password", "pass", "passwd", "pwd", "passphrase"]
for field in password_fields: for field in password_fields:
if field in parsed and parsed[field]: if field in parsed and parsed[field]:
password = parsed[field][0] password = parsed[field][0]
@@ -109,8 +141,12 @@ class AccessTracker:
except Exception: except Exception:
# If parsing fails, try simple regex patterns # If parsing fails, try simple regex patterns
username_match = re.search(r'(?:username|user|login|email|log)=([^&\s]+)', post_data, re.IGNORECASE) username_match = re.search(
password_match = re.search(r'(?:password|pass|passwd|pwd)=([^&\s]+)', post_data, re.IGNORECASE) r"(?:username|user|login|email|log)=([^&\s]+)", post_data, re.IGNORECASE
)
password_match = re.search(
r"(?:password|pass|passwd|pwd)=([^&\s]+)", post_data, re.IGNORECASE
)
if username_match: if username_match:
username = urllib.parse.unquote_plus(username_match.group(1)) username = urllib.parse.unquote_plus(username_match.group(1))
@@ -119,29 +155,30 @@ class AccessTracker:
return username, password return username, password
def record_credential_attempt(self, ip: str, path: str, username: str, password: str): def record_credential_attempt(
self, ip: str, path: str, username: str, password: str
):
""" """
Record a credential login attempt. Record a credential login attempt.
Stores in both in-memory list and SQLite database. Stores in both in-memory list and SQLite database.
""" """
# In-memory storage for dashboard # In-memory storage for dashboard
self.credential_attempts.append({ self.credential_attempts.append(
'ip': ip, {
'path': path, "ip": ip,
'username': username, "path": path,
'password': password, "username": username,
'timestamp': datetime.now().isoformat() "password": password,
}) "timestamp": datetime.now().isoformat(),
}
)
# Persist to database # Persist to database
if self.db: if self.db:
try: try:
self.db.persist_credential( self.db.persist_credential(
ip=ip, ip=ip, path=path, username=username, password=password
path=path,
username=username,
password=password
) )
except Exception: except Exception:
# Don't crash if database persistence fails # Don't crash if database persistence fails
@@ -151,9 +188,9 @@ class AccessTracker:
self, self,
ip: str, ip: str,
path: str, path: str,
user_agent: str = '', user_agent: str = "",
body: str = '', body: str = "",
method: str = 'GET' method: str = "GET",
): ):
""" """
Record an access attempt. Record an access attempt.
@@ -180,9 +217,9 @@ class AccessTracker:
attack_findings.extend(self.detect_attack_type(body)) attack_findings.extend(self.detect_attack_type(body))
is_suspicious = ( is_suspicious = (
self.is_suspicious_user_agent(user_agent) or self.is_suspicious_user_agent(user_agent)
self.is_honeypot_path(path) or or self.is_honeypot_path(path)
len(attack_findings) > 0 or len(attack_findings) > 0
) )
is_honeypot = self.is_honeypot_path(path) is_honeypot = self.is_honeypot_path(path)
@@ -191,15 +228,17 @@ class AccessTracker:
self.honeypot_triggered[ip].append(path) self.honeypot_triggered[ip].append(path)
# In-memory storage for dashboard # In-memory storage for dashboard
self.access_log.append({ self.access_log.append(
'ip': ip, {
'path': path, "ip": ip,
'user_agent': user_agent, "path": path,
'suspicious': is_suspicious, "user_agent": user_agent,
'honeypot_triggered': self.is_honeypot_path(path), "suspicious": is_suspicious,
'attack_types':attack_findings, "honeypot_triggered": self.is_honeypot_path(path),
'timestamp': datetime.now().isoformat() "attack_types": attack_findings,
}) "timestamp": datetime.now().isoformat(),
}
)
# Persist to database # Persist to database
if self.db: if self.db:
@@ -211,13 +250,13 @@ class AccessTracker:
method=method, method=method,
is_suspicious=is_suspicious, is_suspicious=is_suspicious,
is_honeypot_trigger=is_honeypot, is_honeypot_trigger=is_honeypot,
attack_types=attack_findings if attack_findings else None attack_types=attack_findings if attack_findings else None,
) )
except Exception: except Exception:
# Don't crash if database persistence fails # Don't crash if database persistence fails
pass pass
def detect_attack_type(self, data:str) -> list[str]: def detect_attack_type(self, data: str) -> list[str]:
""" """
Returns a list of all attack types found in path data Returns a list of all attack types found in path data
""" """
@@ -230,27 +269,37 @@ class AccessTracker:
def is_honeypot_path(self, path: str) -> bool: def is_honeypot_path(self, path: str) -> bool:
"""Check if path is one of the honeypot traps from robots.txt""" """Check if path is one of the honeypot traps from robots.txt"""
honeypot_paths = [ honeypot_paths = [
'/admin', "/admin",
'/admin/', "/admin/",
'/backup', "/backup",
'/backup/', "/backup/",
'/config', "/config",
'/config/', "/config/",
'/private', "/private",
'/private/', "/private/",
'/database', "/database",
'/database/', "/database/",
'/credentials.txt', "/credentials.txt",
'/passwords.txt', "/passwords.txt",
'/admin_notes.txt', "/admin_notes.txt",
'/api_keys.json', "/api_keys.json",
'/.env', "/.env",
'/wp-admin', "/wp-admin",
'/wp-admin/', "/wp-admin/",
'/phpmyadmin', "/phpmyadmin",
'/phpMyAdmin/' "/phpMyAdmin/",
] ]
return path in honeypot_paths or any(hp in path.lower() for hp in ['/backup', '/admin', '/config', '/private', '/database', 'phpmyadmin']) return path in honeypot_paths or any(
hp in path.lower()
for hp in [
"/backup",
"/admin",
"/config",
"/private",
"/database",
"phpmyadmin",
]
)
def is_suspicious_user_agent(self, user_agent: str) -> bool: def is_suspicious_user_agent(self, user_agent: str) -> bool:
"""Check if user agent matches suspicious patterns""" """Check if user agent matches suspicious patterns"""
@@ -272,6 +321,7 @@ class AccessTracker:
""" """
try: try:
from sanitizer import sanitize_ip from sanitizer import sanitize_ip
# Sanitize the IP address # Sanitize the IP address
safe_ip = sanitize_ip(client_ip) safe_ip = sanitize_ip(client_ip)
@@ -281,16 +331,17 @@ class AccessTracker:
return False return False
ip_stats = db.get_ip_stats_by_ip(safe_ip) ip_stats = db.get_ip_stats_by_ip(safe_ip)
if not ip_stats or not ip_stats.get('category'): if not ip_stats or not ip_stats.get("category"):
return False return False
# Check if category matches "good crawler" # Check if category matches "good crawler"
category = ip_stats.get('category', '').lower().strip() category = ip_stats.get("category", "").lower().strip()
return category return category
except Exception as e: except Exception as e:
# Log but don't crash on database errors # Log but don't crash on database errors
import logging import logging
logging.error(f"Error checking IP category for {client_ip}: {str(e)}") logging.error(f"Error checking IP category for {client_ip}: {str(e)}")
return False return False
@@ -315,7 +366,9 @@ class AccessTracker:
# Set ban if reached limit # Set ban if reached limit
if self.ip_page_visits[client_ip]["count"] >= self.max_pages_limit: if self.ip_page_visits[client_ip]["count"] >= self.max_pages_limit:
self.ip_page_visits[client_ip]["ban_timestamp"] = datetime.now().isoformat() self.ip_page_visits[client_ip][
"ban_timestamp"
] = datetime.now().isoformat()
return self.ip_page_visits[client_ip]["count"] return self.ip_page_visits[client_ip]["count"]
@@ -337,8 +390,10 @@ class AccessTracker:
if ban_timestamp is not None: if ban_timestamp is not None:
banned = True banned = True
#Check if ban period has expired (> 60 seconds) # Check if ban period has expired (> 60 seconds)
ban_time = datetime.fromisoformat(self.ip_page_visits[client_ip]["ban_timestamp"]) ban_time = datetime.fromisoformat(
self.ip_page_visits[client_ip]["ban_timestamp"]
)
time_diff = datetime.now() - ban_time time_diff = datetime.now() - ban_time
if time_diff.total_seconds() > self.ban_duration_seconds: if time_diff.total_seconds() > self.ban_duration_seconds:
self.ip_page_visits[client_ip]["count"] = 0 self.ip_page_visits[client_ip]["count"] = 0
@@ -350,7 +405,6 @@ class AccessTracker:
except Exception: except Exception:
return False return False
def get_page_visit_count(self, client_ip: str) -> int: def get_page_visit_count(self, client_ip: str) -> int:
""" """
Get the current page visit count for an IP. Get the current page visit count for an IP.
@@ -372,20 +426,24 @@ class AccessTracker:
def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]: def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]:
"""Get top N paths by access count""" """Get top N paths by access count"""
return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[:limit] return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[
:limit
]
def get_top_user_agents(self, limit: int = 10) -> List[Tuple[str, int]]: def get_top_user_agents(self, limit: int = 10) -> List[Tuple[str, int]]:
"""Get top N user agents by access count""" """Get top N user agents by access count"""
return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[:limit] return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[
:limit
]
def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]: def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent suspicious accesses""" """Get recent suspicious accesses"""
suspicious = [log for log in self.access_log if log.get('suspicious', False)] suspicious = [log for log in self.access_log if log.get("suspicious", False)]
return suspicious[-limit:] return suspicious[-limit:]
def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]: def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]:
"""Get recent accesses with detected attack types""" """Get recent accesses with detected attack types"""
attacks = [log for log in self.access_log if log.get('attack_types')] attacks = [log for log in self.access_log if log.get("attack_types")]
return attacks[-limit:] return attacks[-limit:]
def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]: def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]:
@@ -401,12 +459,12 @@ class AccessTracker:
stats = self.db.get_dashboard_counts() stats = self.db.get_dashboard_counts()
# Add detailed lists from database # Add detailed lists from database
stats['top_ips'] = self.db.get_top_ips(10) stats["top_ips"] = self.db.get_top_ips(10)
stats['top_paths'] = self.db.get_top_paths(10) stats["top_paths"] = self.db.get_top_paths(10)
stats['top_user_agents'] = self.db.get_top_user_agents(10) stats["top_user_agents"] = self.db.get_top_user_agents(10)
stats['recent_suspicious'] = self.db.get_recent_suspicious(20) stats["recent_suspicious"] = self.db.get_recent_suspicious(20)
stats['honeypot_triggered_ips'] = self.db.get_honeypot_triggered_ips() stats["honeypot_triggered_ips"] = self.db.get_honeypot_triggered_ips()
stats['attack_types'] = self.db.get_recent_attacks(20) stats["attack_types"] = self.db.get_recent_attacks(20)
stats['credential_attempts'] = self.db.get_credential_attempts(limit=50) stats["credential_attempts"] = self.db.get_credential_attempts(limit=50)
return stats return stats

View File

@@ -19,13 +19,15 @@ class Wordlists:
def _load_config(self): def _load_config(self):
"""Load wordlists from JSON file""" """Load wordlists from JSON file"""
config_path = Path(__file__).parent.parent / 'wordlists.json' config_path = Path(__file__).parent.parent / "wordlists.json"
try: try:
with open(config_path, 'r') as f: with open(config_path, "r") as f:
return json.load(f) return json.load(f)
except FileNotFoundError: except FileNotFoundError:
get_app_logger().warning(f"Wordlists file {config_path} not found, using default values") get_app_logger().warning(
f"Wordlists file {config_path} not found, using default values"
)
return self._get_defaults() return self._get_defaults()
except json.JSONDecodeError as e: except json.JSONDecodeError as e:
get_app_logger().warning(f"Invalid JSON in {config_path}: {e}") get_app_logger().warning(f"Invalid JSON in {config_path}: {e}")
@@ -36,29 +38,21 @@ class Wordlists:
return { return {
"usernames": { "usernames": {
"prefixes": ["admin", "user", "root"], "prefixes": ["admin", "user", "root"],
"suffixes": ["", "_prod", "_dev"] "suffixes": ["", "_prod", "_dev"],
}, },
"passwords": { "passwords": {
"prefixes": ["P@ssw0rd", "Admin"], "prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "demo", "password"] "simple": ["test", "demo", "password"],
},
"emails": {
"domains": ["example.com", "test.com"]
},
"api_keys": {
"prefixes": ["sk_live_", "api_", ""]
}, },
"emails": {"domains": ["example.com", "test.com"]},
"api_keys": {"prefixes": ["sk_live_", "api_", ""]},
"databases": { "databases": {
"names": ["production", "main_db"], "names": ["production", "main_db"],
"hosts": ["localhost", "db.internal"] "hosts": ["localhost", "db.internal"],
}, },
"applications": { "applications": {"names": ["WebApp", "Dashboard"]},
"names": ["WebApp", "Dashboard"] "users": {"roles": ["Administrator", "User"]},
}, "server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"],
"users": {
"roles": ["Administrator", "User"]
},
"server_headers": ["Apache/2.4.41 (Ubuntu)", "nginx/1.18.0"]
} }
@property @property
@@ -137,10 +131,10 @@ class Wordlists:
_wordlists_instance = None _wordlists_instance = None
def get_wordlists(): def get_wordlists():
"""Get the singleton Wordlists instance""" """Get the singleton Wordlists instance"""
global _wordlists_instance global _wordlists_instance
if _wordlists_instance is None: if _wordlists_instance is None:
_wordlists_instance = Wordlists() _wordlists_instance = Wordlists()
return _wordlists_instance return _wordlists_instance

View File

@@ -10,10 +10,10 @@ def detect_xss_pattern(input_string: str) -> bool:
return False return False
wl = get_wordlists() wl = get_wordlists()
xss_pattern = wl.attack_patterns.get('xss_attempt', '') xss_pattern = wl.attack_patterns.get("xss_attempt", "")
if not xss_pattern: if not xss_pattern:
xss_pattern = r'(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()' xss_pattern = r"(<script|</script|javascript:|onerror=|onload=|onclick=|<iframe|<img|<svg|eval\(|alert\()"
return bool(re.search(xss_pattern, input_string, re.IGNORECASE)) return bool(re.search(xss_pattern, input_string, re.IGNORECASE))