2025-12-28 10:43:32 -06:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
Database singleton module for the Krawl honeypot.
|
|
|
|
|
Provides SQLAlchemy session management and database initialization.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import os
|
|
|
|
|
import stat
|
2026-01-15 13:30:35 -06:00
|
|
|
from datetime import datetime, timedelta
|
2025-12-28 10:43:32 -06:00
|
|
|
from typing import Optional, List, Dict, Any
|
2026-01-08 19:20:22 +01:00
|
|
|
from zoneinfo import ZoneInfo
|
2025-12-28 10:43:32 -06:00
|
|
|
|
2026-01-27 17:32:38 +01:00
|
|
|
from sqlalchemy import create_engine, func, distinct, case, event, or_
|
2025-12-28 10:43:32 -06:00
|
|
|
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
2026-01-10 14:59:15 -06:00
|
|
|
from sqlalchemy.engine import Engine
|
|
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
from ip_utils import is_local_or_private_ip, is_valid_public_ip
|
|
|
|
|
|
2026-01-10 14:59:15 -06:00
|
|
|
|
|
|
|
|
@event.listens_for(Engine, "connect")
|
|
|
|
|
def set_sqlite_pragma(dbapi_connection, connection_record):
|
|
|
|
|
"""Enable WAL mode and set busy timeout for SQLite connections."""
|
|
|
|
|
cursor = dbapi_connection.cursor()
|
|
|
|
|
cursor.execute("PRAGMA journal_mode=WAL")
|
|
|
|
|
cursor.execute("PRAGMA busy_timeout=30000")
|
|
|
|
|
cursor.close()
|
2025-12-28 10:43:32 -06:00
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
|
|
|
|
|
from models import (
|
|
|
|
|
Base,
|
|
|
|
|
AccessLog,
|
|
|
|
|
CredentialAttempt,
|
|
|
|
|
AttackDetection,
|
|
|
|
|
IpStats,
|
|
|
|
|
CategoryHistory,
|
|
|
|
|
)
|
2025-12-28 10:43:32 -06:00
|
|
|
from sanitizer import (
|
|
|
|
|
sanitize_ip,
|
|
|
|
|
sanitize_path,
|
|
|
|
|
sanitize_user_agent,
|
|
|
|
|
sanitize_credential,
|
|
|
|
|
sanitize_attack_pattern,
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-07 12:33:43 -06:00
|
|
|
from logger import get_app_logger
|
|
|
|
|
|
|
|
|
|
applogger = get_app_logger()
|
2025-12-28 10:43:32 -06:00
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
|
2025-12-28 10:43:32 -06:00
|
|
|
class DatabaseManager:
|
|
|
|
|
"""
|
|
|
|
|
Singleton database manager for the Krawl honeypot.
|
|
|
|
|
|
|
|
|
|
Handles database initialization, session management, and provides
|
|
|
|
|
methods for persisting access logs, credentials, and attack detections.
|
|
|
|
|
"""
|
2026-01-23 22:00:21 +01:00
|
|
|
|
2025-12-28 10:43:32 -06:00
|
|
|
_instance: Optional["DatabaseManager"] = None
|
|
|
|
|
|
|
|
|
|
def __new__(cls) -> "DatabaseManager":
|
|
|
|
|
if cls._instance is None:
|
|
|
|
|
cls._instance = super().__new__(cls)
|
|
|
|
|
cls._instance._initialized = False
|
|
|
|
|
return cls._instance
|
|
|
|
|
|
|
|
|
|
def initialize(self, database_path: str = "data/krawl.db") -> None:
|
|
|
|
|
"""
|
|
|
|
|
Initialize the database connection and create tables.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
database_path: Path to the SQLite database file
|
|
|
|
|
"""
|
|
|
|
|
if self._initialized:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Create data directory if it doesn't exist
|
|
|
|
|
data_dir = os.path.dirname(database_path)
|
|
|
|
|
if data_dir and not os.path.exists(data_dir):
|
|
|
|
|
os.makedirs(data_dir, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Create SQLite database with check_same_thread=False for multi-threaded access
|
|
|
|
|
database_url = f"sqlite:///{database_path}"
|
|
|
|
|
self._engine = create_engine(
|
|
|
|
|
database_url,
|
|
|
|
|
connect_args={"check_same_thread": False},
|
2026-01-23 22:00:21 +01:00
|
|
|
echo=False, # Set to True for SQL debugging
|
2025-12-28 10:43:32 -06:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Create session factory with scoped_session for thread safety
|
|
|
|
|
session_factory = sessionmaker(bind=self._engine)
|
|
|
|
|
self._Session = scoped_session(session_factory)
|
|
|
|
|
|
|
|
|
|
# Create all tables
|
|
|
|
|
Base.metadata.create_all(self._engine)
|
|
|
|
|
|
2026-01-29 11:55:06 +01:00
|
|
|
# Run automatic migrations for backward compatibility
|
|
|
|
|
self._run_migrations(database_path)
|
|
|
|
|
|
2025-12-28 10:43:32 -06:00
|
|
|
# Set restrictive file permissions (owner read/write only)
|
|
|
|
|
if os.path.exists(database_path):
|
|
|
|
|
try:
|
|
|
|
|
os.chmod(database_path, stat.S_IRUSR | stat.S_IWUSR) # 600
|
|
|
|
|
except OSError:
|
|
|
|
|
# May fail on some systems, not critical
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
self._initialized = True
|
|
|
|
|
|
2026-01-29 11:55:06 +01:00
|
|
|
def _run_migrations(self, database_path: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Run automatic migrations for backward compatibility.
|
|
|
|
|
Adds missing columns that were added in newer versions.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
database_path: Path to the SQLite database file
|
|
|
|
|
"""
|
|
|
|
|
import sqlite3
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
conn = sqlite3.connect(database_path)
|
|
|
|
|
cursor = conn.cursor()
|
|
|
|
|
|
|
|
|
|
# Check if latitude/longitude columns exist
|
|
|
|
|
cursor.execute("PRAGMA table_info(ip_stats)")
|
|
|
|
|
columns = [row[1] for row in cursor.fetchall()]
|
|
|
|
|
|
|
|
|
|
migrations_run = []
|
|
|
|
|
|
|
|
|
|
# Add latitude column if missing
|
|
|
|
|
if "latitude" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN latitude REAL")
|
|
|
|
|
migrations_run.append("latitude")
|
|
|
|
|
|
|
|
|
|
# Add longitude column if missing
|
|
|
|
|
if "longitude" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN longitude REAL")
|
|
|
|
|
migrations_run.append("longitude")
|
|
|
|
|
|
2026-02-01 22:43:12 +01:00
|
|
|
# Add new geolocation columns
|
|
|
|
|
if "country" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN country VARCHAR(100)")
|
|
|
|
|
migrations_run.append("country")
|
|
|
|
|
|
|
|
|
|
if "region" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN region VARCHAR(2)")
|
|
|
|
|
migrations_run.append("region")
|
|
|
|
|
|
|
|
|
|
if "region_name" not in columns:
|
2026-02-02 23:01:25 +01:00
|
|
|
cursor.execute(
|
|
|
|
|
"ALTER TABLE ip_stats ADD COLUMN region_name VARCHAR(100)"
|
|
|
|
|
)
|
2026-02-01 22:43:12 +01:00
|
|
|
migrations_run.append("region_name")
|
|
|
|
|
|
|
|
|
|
if "timezone" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN timezone VARCHAR(50)")
|
|
|
|
|
migrations_run.append("timezone")
|
|
|
|
|
|
|
|
|
|
if "isp" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN isp VARCHAR(100)")
|
|
|
|
|
migrations_run.append("isp")
|
|
|
|
|
|
|
|
|
|
if "is_proxy" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN is_proxy BOOLEAN")
|
|
|
|
|
migrations_run.append("is_proxy")
|
|
|
|
|
|
|
|
|
|
if "is_hosting" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN is_hosting BOOLEAN")
|
|
|
|
|
migrations_run.append("is_hosting")
|
|
|
|
|
|
|
|
|
|
if "reverse" not in columns:
|
|
|
|
|
cursor.execute("ALTER TABLE ip_stats ADD COLUMN reverse VARCHAR(255)")
|
|
|
|
|
migrations_run.append("reverse")
|
|
|
|
|
|
2026-01-29 11:55:06 +01:00
|
|
|
if migrations_run:
|
|
|
|
|
conn.commit()
|
|
|
|
|
applogger.info(
|
|
|
|
|
f"Auto-migration: Added columns {', '.join(migrations_run)} to ip_stats table"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
applogger.error(f"Auto-migration failed: {e}")
|
|
|
|
|
# Don't raise - allow app to continue even if migration fails
|
|
|
|
|
|
2025-12-28 10:43:32 -06:00
|
|
|
@property
|
|
|
|
|
def session(self) -> Session:
|
|
|
|
|
"""Get a thread-local database session."""
|
|
|
|
|
if not self._initialized:
|
2026-01-23 22:00:21 +01:00
|
|
|
raise RuntimeError(
|
|
|
|
|
"DatabaseManager not initialized. Call initialize() first."
|
|
|
|
|
)
|
2025-12-28 10:43:32 -06:00
|
|
|
return self._Session()
|
|
|
|
|
|
|
|
|
|
def close_session(self) -> None:
|
|
|
|
|
"""Close the current thread-local session."""
|
|
|
|
|
if self._initialized:
|
|
|
|
|
self._Session.remove()
|
|
|
|
|
|
|
|
|
|
def persist_access(
|
|
|
|
|
self,
|
|
|
|
|
ip: str,
|
|
|
|
|
path: str,
|
|
|
|
|
user_agent: str = "",
|
|
|
|
|
method: str = "GET",
|
|
|
|
|
is_suspicious: bool = False,
|
|
|
|
|
is_honeypot_trigger: bool = False,
|
|
|
|
|
attack_types: Optional[List[str]] = None,
|
2026-01-23 22:00:21 +01:00
|
|
|
matched_patterns: Optional[Dict[str, str]] = None,
|
2026-02-08 16:02:18 +01:00
|
|
|
raw_request: Optional[str] = None,
|
2025-12-28 10:43:32 -06:00
|
|
|
) -> Optional[int]:
|
|
|
|
|
"""
|
|
|
|
|
Persist an access log entry to the database.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: Client IP address
|
|
|
|
|
path: Requested path
|
|
|
|
|
user_agent: Client user agent string
|
|
|
|
|
method: HTTP method (GET, POST, HEAD)
|
|
|
|
|
is_suspicious: Whether the request was flagged as suspicious
|
|
|
|
|
is_honeypot_trigger: Whether a honeypot path was accessed
|
|
|
|
|
attack_types: List of detected attack types
|
|
|
|
|
matched_patterns: Dict mapping attack_type to matched pattern
|
2026-02-08 16:02:18 +01:00
|
|
|
raw_request: Full raw HTTP request for forensic analysis
|
2025-12-28 10:43:32 -06:00
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The ID of the created AccessLog record, or None on error
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
# Create access log with sanitized fields
|
|
|
|
|
access_log = AccessLog(
|
|
|
|
|
ip=sanitize_ip(ip),
|
|
|
|
|
path=sanitize_path(path),
|
|
|
|
|
user_agent=sanitize_user_agent(user_agent),
|
|
|
|
|
method=method[:10],
|
|
|
|
|
is_suspicious=is_suspicious,
|
|
|
|
|
is_honeypot_trigger=is_honeypot_trigger,
|
2026-01-23 22:00:21 +01:00
|
|
|
timestamp=datetime.now(),
|
2026-02-08 16:02:18 +01:00
|
|
|
raw_request=raw_request,
|
2025-12-28 10:43:32 -06:00
|
|
|
)
|
|
|
|
|
session.add(access_log)
|
|
|
|
|
session.flush() # Get the ID before committing
|
|
|
|
|
|
|
|
|
|
# Add attack detections if any
|
|
|
|
|
if attack_types:
|
|
|
|
|
matched_patterns = matched_patterns or {}
|
|
|
|
|
for attack_type in attack_types:
|
|
|
|
|
detection = AttackDetection(
|
|
|
|
|
access_log_id=access_log.id,
|
|
|
|
|
attack_type=attack_type[:50],
|
|
|
|
|
matched_pattern=sanitize_attack_pattern(
|
|
|
|
|
matched_patterns.get(attack_type, "")
|
2026-01-23 22:00:21 +01:00
|
|
|
),
|
2025-12-28 10:43:32 -06:00
|
|
|
)
|
|
|
|
|
session.add(detection)
|
|
|
|
|
|
|
|
|
|
# Update IP stats
|
|
|
|
|
self._update_ip_stats(session, ip)
|
|
|
|
|
|
|
|
|
|
session.commit()
|
|
|
|
|
return access_log.id
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
session.rollback()
|
|
|
|
|
# Log error but don't crash - database persistence is secondary to honeypot function
|
2026-01-07 12:33:43 -06:00
|
|
|
applogger.critical(f"Database error persisting access: {e}")
|
2025-12-28 10:43:32 -06:00
|
|
|
return None
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def persist_credential(
|
|
|
|
|
self,
|
|
|
|
|
ip: str,
|
|
|
|
|
path: str,
|
|
|
|
|
username: Optional[str] = None,
|
2026-01-23 22:00:21 +01:00
|
|
|
password: Optional[str] = None,
|
2025-12-28 10:43:32 -06:00
|
|
|
) -> Optional[int]:
|
|
|
|
|
"""
|
|
|
|
|
Persist a credential attempt to the database.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: Client IP address
|
|
|
|
|
path: Login form path
|
|
|
|
|
username: Submitted username
|
|
|
|
|
password: Submitted password
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The ID of the created CredentialAttempt record, or None on error
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
credential = CredentialAttempt(
|
|
|
|
|
ip=sanitize_ip(ip),
|
|
|
|
|
path=sanitize_path(path),
|
|
|
|
|
username=sanitize_credential(username),
|
|
|
|
|
password=sanitize_credential(password),
|
2026-01-23 22:00:21 +01:00
|
|
|
timestamp=datetime.now(),
|
2025-12-28 10:43:32 -06:00
|
|
|
)
|
|
|
|
|
session.add(credential)
|
|
|
|
|
session.commit()
|
|
|
|
|
return credential.id
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
session.rollback()
|
2026-01-07 12:33:43 -06:00
|
|
|
applogger.critical(f"Database error persisting credential: {e}")
|
2025-12-28 10:43:32 -06:00
|
|
|
return None
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def _update_ip_stats(self, session: Session, ip: str) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Update IP statistics (upsert pattern).
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
session: Active database session
|
|
|
|
|
ip: IP address to update
|
|
|
|
|
"""
|
|
|
|
|
sanitized_ip = sanitize_ip(ip)
|
2026-01-17 18:06:09 +01:00
|
|
|
now = datetime.now()
|
2025-12-28 10:43:32 -06:00
|
|
|
|
|
|
|
|
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
|
|
|
|
|
|
|
|
|
if ip_stats:
|
|
|
|
|
ip_stats.total_requests += 1
|
|
|
|
|
ip_stats.last_seen = now
|
|
|
|
|
else:
|
|
|
|
|
ip_stats = IpStats(
|
2026-01-23 22:00:21 +01:00
|
|
|
ip=sanitized_ip, total_requests=1, first_seen=now, last_seen=now
|
2025-12-28 10:43:32 -06:00
|
|
|
)
|
|
|
|
|
session.add(ip_stats)
|
|
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
def update_ip_stats_analysis(
|
|
|
|
|
self,
|
|
|
|
|
ip: str,
|
|
|
|
|
analyzed_metrics: Dict[str, object],
|
|
|
|
|
category: str,
|
|
|
|
|
category_scores: Dict[str, int],
|
|
|
|
|
last_analysis: datetime,
|
|
|
|
|
) -> None:
|
2026-01-04 19:12:23 +01:00
|
|
|
"""
|
|
|
|
|
Update IP statistics (ip is already persisted).
|
2026-01-07 18:24:43 +01:00
|
|
|
Records category change in history if category has changed.
|
2026-01-04 19:12:23 +01:00
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: IP address to update
|
|
|
|
|
analyzed_metrics: metric values analyzed be the analyzer
|
|
|
|
|
category: inferred category
|
|
|
|
|
category_scores: inferred category scores
|
|
|
|
|
last_analysis: timestamp of last analysis
|
|
|
|
|
|
|
|
|
|
"""
|
2026-01-23 22:00:21 +01:00
|
|
|
applogger.debug(
|
|
|
|
|
f"Analyzed metrics {analyzed_metrics}, category {category}, category scores {category_scores}, last analysis {last_analysis}"
|
|
|
|
|
)
|
2026-01-07 12:33:43 -06:00
|
|
|
applogger.info(f"IP: {ip} category has been updated to {category}")
|
2026-01-04 19:12:23 +01:00
|
|
|
|
|
|
|
|
session = self.session
|
|
|
|
|
sanitized_ip = sanitize_ip(ip)
|
|
|
|
|
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
|
|
|
|
|
2026-01-07 18:24:43 +01:00
|
|
|
# Check if category has changed and record it
|
|
|
|
|
old_category = ip_stats.category
|
|
|
|
|
if old_category != category:
|
2026-01-23 22:00:21 +01:00
|
|
|
self._record_category_change(
|
|
|
|
|
sanitized_ip, old_category, category, last_analysis
|
|
|
|
|
)
|
2026-01-07 18:24:43 +01:00
|
|
|
|
2026-01-04 19:12:23 +01:00
|
|
|
ip_stats.analyzed_metrics = analyzed_metrics
|
|
|
|
|
ip_stats.category = category
|
|
|
|
|
ip_stats.category_scores = category_scores
|
|
|
|
|
ip_stats.last_analysis = last_analysis
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-08 19:20:22 +01:00
|
|
|
try:
|
|
|
|
|
session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
session.rollback()
|
2026-01-25 22:50:27 +01:00
|
|
|
applogger.error(f"Error updating IP stats analysis: {e}")
|
2026-01-04 19:12:23 +01:00
|
|
|
|
2026-01-07 22:56:01 +01:00
|
|
|
def manual_update_category(self, ip: str, category: str) -> None:
|
2026-01-04 19:12:23 +01:00
|
|
|
"""
|
|
|
|
|
Update IP category as a result of a manual intervention by an admin
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: IP address to update
|
|
|
|
|
category: selected category
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-04 19:12:23 +01:00
|
|
|
"""
|
|
|
|
|
session = self.session
|
2026-01-07 22:56:01 +01:00
|
|
|
sanitized_ip = sanitize_ip(ip)
|
2026-01-04 19:12:23 +01:00
|
|
|
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-07 18:24:43 +01:00
|
|
|
# Record the manual category change
|
|
|
|
|
old_category = ip_stats.category
|
|
|
|
|
if old_category != category:
|
2026-01-23 22:00:21 +01:00
|
|
|
self._record_category_change(
|
|
|
|
|
sanitized_ip, old_category, category, datetime.now()
|
|
|
|
|
)
|
2026-01-07 18:24:43 +01:00
|
|
|
|
2026-01-04 19:12:23 +01:00
|
|
|
ip_stats.category = category
|
|
|
|
|
ip_stats.manual_category = True
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-08 19:20:22 +01:00
|
|
|
try:
|
|
|
|
|
session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
session.rollback()
|
2026-01-25 22:50:27 +01:00
|
|
|
applogger.error(f"Error updating manual category: {e}")
|
2026-01-04 19:12:23 +01:00
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
def _record_category_change(
|
|
|
|
|
self,
|
|
|
|
|
ip: str,
|
|
|
|
|
old_category: Optional[str],
|
|
|
|
|
new_category: str,
|
|
|
|
|
timestamp: datetime,
|
|
|
|
|
) -> None:
|
2026-01-07 18:24:43 +01:00
|
|
|
"""
|
|
|
|
|
Internal method to record category changes in history.
|
2026-02-01 22:43:12 +01:00
|
|
|
Records all category changes including initial categorization.
|
2026-01-07 18:24:43 +01:00
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: IP address
|
|
|
|
|
old_category: Previous category (None if first categorization)
|
|
|
|
|
new_category: New category
|
|
|
|
|
timestamp: When the change occurred
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
history_entry = CategoryHistory(
|
|
|
|
|
ip=ip,
|
|
|
|
|
old_category=old_category,
|
|
|
|
|
new_category=new_category,
|
2026-01-23 22:00:21 +01:00
|
|
|
timestamp=timestamp,
|
2026-01-07 18:24:43 +01:00
|
|
|
)
|
|
|
|
|
session.add(history_entry)
|
|
|
|
|
session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
session.rollback()
|
2026-01-07 12:33:43 -06:00
|
|
|
applogger.error(f"Error recording category change: {e}")
|
2026-01-07 18:24:43 +01:00
|
|
|
|
|
|
|
|
def get_category_history(self, ip: str) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve category change history for a specific IP.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: IP address to get history for
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of category change records ordered by timestamp
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
sanitized_ip = sanitize_ip(ip)
|
2026-01-23 22:00:21 +01:00
|
|
|
history = (
|
|
|
|
|
session.query(CategoryHistory)
|
|
|
|
|
.filter(CategoryHistory.ip == sanitized_ip)
|
|
|
|
|
.order_by(CategoryHistory.timestamp.asc())
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2026-01-07 18:24:43 +01:00
|
|
|
|
|
|
|
|
return [
|
|
|
|
|
{
|
2026-01-23 22:00:21 +01:00
|
|
|
"old_category": h.old_category,
|
|
|
|
|
"new_category": h.new_category,
|
|
|
|
|
"timestamp": h.timestamp.isoformat(),
|
2026-01-07 18:24:43 +01:00
|
|
|
}
|
|
|
|
|
for h in history
|
|
|
|
|
]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
2026-01-04 19:12:23 +01:00
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
def update_ip_rep_infos(
|
|
|
|
|
self,
|
|
|
|
|
ip: str,
|
|
|
|
|
country_code: str,
|
|
|
|
|
asn: str,
|
|
|
|
|
asn_org: str,
|
|
|
|
|
list_on: Dict[str, str],
|
2026-01-27 16:56:34 +01:00
|
|
|
city: Optional[str] = None,
|
2026-01-29 11:55:06 +01:00
|
|
|
latitude: Optional[float] = None,
|
|
|
|
|
longitude: Optional[float] = None,
|
2026-02-01 22:43:12 +01:00
|
|
|
country: Optional[str] = None,
|
|
|
|
|
region: Optional[str] = None,
|
|
|
|
|
region_name: Optional[str] = None,
|
|
|
|
|
timezone: Optional[str] = None,
|
|
|
|
|
isp: Optional[str] = None,
|
|
|
|
|
reverse: Optional[str] = None,
|
|
|
|
|
is_proxy: Optional[bool] = None,
|
|
|
|
|
is_hosting: Optional[bool] = None,
|
2026-01-23 22:00:21 +01:00
|
|
|
) -> None:
|
2026-01-07 22:56:01 +01:00
|
|
|
"""
|
|
|
|
|
Update IP rep stats
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: IP address
|
|
|
|
|
country_code: IP address country code
|
|
|
|
|
asn: IP address ASN
|
|
|
|
|
asn_org: IP address ASN ORG
|
|
|
|
|
list_on: public lists containing the IP address
|
2026-01-27 16:56:34 +01:00
|
|
|
city: City name (optional)
|
2026-01-29 11:55:06 +01:00
|
|
|
latitude: Latitude coordinate (optional)
|
|
|
|
|
longitude: Longitude coordinate (optional)
|
2026-02-01 22:43:12 +01:00
|
|
|
country: Full country name (optional)
|
|
|
|
|
region: Region code (optional)
|
|
|
|
|
region_name: Region name (optional)
|
|
|
|
|
timezone: Timezone (optional)
|
|
|
|
|
isp: Internet Service Provider (optional)
|
|
|
|
|
reverse: Reverse DNS lookup (optional)
|
|
|
|
|
is_proxy: Whether IP is a proxy (optional)
|
|
|
|
|
is_hosting: Whether IP is a hosting provider (optional)
|
2026-01-10 14:59:15 -06:00
|
|
|
|
2026-01-07 22:56:01 +01:00
|
|
|
"""
|
|
|
|
|
session = self.session
|
2026-01-10 14:59:15 -06:00
|
|
|
try:
|
|
|
|
|
sanitized_ip = sanitize_ip(ip)
|
|
|
|
|
ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first()
|
|
|
|
|
if ip_stats:
|
|
|
|
|
ip_stats.country_code = country_code
|
|
|
|
|
ip_stats.asn = asn
|
|
|
|
|
ip_stats.asn_org = asn_org
|
|
|
|
|
ip_stats.list_on = list_on
|
2026-01-27 16:56:34 +01:00
|
|
|
if city:
|
|
|
|
|
ip_stats.city = city
|
2026-01-29 11:55:06 +01:00
|
|
|
if latitude is not None:
|
|
|
|
|
ip_stats.latitude = latitude
|
|
|
|
|
if longitude is not None:
|
|
|
|
|
ip_stats.longitude = longitude
|
2026-02-01 22:43:12 +01:00
|
|
|
if country:
|
|
|
|
|
ip_stats.country = country
|
|
|
|
|
if region:
|
|
|
|
|
ip_stats.region = region
|
|
|
|
|
if region_name:
|
|
|
|
|
ip_stats.region_name = region_name
|
|
|
|
|
if timezone:
|
|
|
|
|
ip_stats.timezone = timezone
|
|
|
|
|
if isp:
|
|
|
|
|
ip_stats.isp = isp
|
|
|
|
|
if reverse:
|
|
|
|
|
ip_stats.reverse = reverse
|
|
|
|
|
if is_proxy is not None:
|
|
|
|
|
ip_stats.is_proxy = is_proxy
|
|
|
|
|
if is_hosting is not None:
|
|
|
|
|
ip_stats.is_hosting = is_hosting
|
2026-01-10 14:59:15 -06:00
|
|
|
session.commit()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
session.rollback()
|
|
|
|
|
raise
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
|
|
|
|
|
"""
|
2026-01-27 17:32:38 +01:00
|
|
|
Get IPs that don't have complete reputation data yet.
|
2026-01-29 11:55:06 +01:00
|
|
|
Returns IPs without country_code, city, latitude, or longitude data.
|
2026-01-15 13:30:35 -06:00
|
|
|
Excludes RFC1918 private addresses and other non-routable IPs.
|
2026-01-07 22:56:01 +01:00
|
|
|
|
2026-01-10 14:59:15 -06:00
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of IPs to return
|
2026-01-07 22:56:01 +01:00
|
|
|
|
2026-01-10 14:59:15 -06:00
|
|
|
Returns:
|
2026-01-27 17:32:38 +01:00
|
|
|
List of IP addresses without complete reputation data
|
2026-01-10 14:59:15 -06:00
|
|
|
"""
|
2026-01-29 11:55:06 +01:00
|
|
|
from sqlalchemy.exc import OperationalError
|
|
|
|
|
|
2026-01-10 14:59:15 -06:00
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-29 11:55:06 +01:00
|
|
|
# Try to query including latitude/longitude (for backward compatibility)
|
|
|
|
|
try:
|
|
|
|
|
ips = (
|
|
|
|
|
session.query(IpStats.ip)
|
|
|
|
|
.filter(
|
|
|
|
|
or_(
|
|
|
|
|
IpStats.country_code.is_(None),
|
|
|
|
|
IpStats.city.is_(None),
|
|
|
|
|
IpStats.latitude.is_(None),
|
|
|
|
|
IpStats.longitude.is_(None),
|
|
|
|
|
),
|
|
|
|
|
~IpStats.ip.like("10.%"),
|
|
|
|
|
~IpStats.ip.like("172.16.%"),
|
|
|
|
|
~IpStats.ip.like("172.17.%"),
|
|
|
|
|
~IpStats.ip.like("172.18.%"),
|
|
|
|
|
~IpStats.ip.like("172.19.%"),
|
|
|
|
|
~IpStats.ip.like("172.2_.%"),
|
|
|
|
|
~IpStats.ip.like("172.30.%"),
|
|
|
|
|
~IpStats.ip.like("172.31.%"),
|
|
|
|
|
~IpStats.ip.like("192.168.%"),
|
|
|
|
|
~IpStats.ip.like("127.%"),
|
|
|
|
|
~IpStats.ip.like("169.254.%"),
|
|
|
|
|
)
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
2026-01-23 22:00:21 +01:00
|
|
|
)
|
2026-01-29 11:55:06 +01:00
|
|
|
except OperationalError as e:
|
|
|
|
|
# If latitude/longitude columns don't exist yet, fall back to old query
|
|
|
|
|
if "no such column" in str(e).lower():
|
|
|
|
|
ips = (
|
|
|
|
|
session.query(IpStats.ip)
|
|
|
|
|
.filter(
|
|
|
|
|
or_(IpStats.country_code.is_(None), IpStats.city.is_(None)),
|
|
|
|
|
~IpStats.ip.like("10.%"),
|
|
|
|
|
~IpStats.ip.like("172.16.%"),
|
|
|
|
|
~IpStats.ip.like("172.17.%"),
|
|
|
|
|
~IpStats.ip.like("172.18.%"),
|
|
|
|
|
~IpStats.ip.like("172.19.%"),
|
|
|
|
|
~IpStats.ip.like("172.2_.%"),
|
|
|
|
|
~IpStats.ip.like("172.30.%"),
|
|
|
|
|
~IpStats.ip.like("172.31.%"),
|
|
|
|
|
~IpStats.ip.like("192.168.%"),
|
|
|
|
|
~IpStats.ip.like("127.%"),
|
|
|
|
|
~IpStats.ip.like("169.254.%"),
|
|
|
|
|
)
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
raise
|
|
|
|
|
|
2026-01-10 14:59:15 -06:00
|
|
|
return [ip[0] for ip in ips]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
2026-01-07 22:56:01 +01:00
|
|
|
|
2025-12-28 10:43:32 -06:00
|
|
|
def get_access_logs(
|
|
|
|
|
self,
|
|
|
|
|
limit: int = 100,
|
|
|
|
|
offset: int = 0,
|
|
|
|
|
ip_filter: Optional[str] = None,
|
2026-01-15 13:30:35 -06:00
|
|
|
suspicious_only: bool = False,
|
2026-01-23 22:00:21 +01:00
|
|
|
since_minutes: Optional[int] = None,
|
2025-12-28 10:43:32 -06:00
|
|
|
) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve access logs with optional filtering.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of records to return
|
|
|
|
|
offset: Number of records to skip
|
|
|
|
|
ip_filter: Filter by IP address
|
|
|
|
|
suspicious_only: Only return suspicious requests
|
2026-01-15 13:30:35 -06:00
|
|
|
since_minutes: Only return logs from the last N minutes
|
2025-12-28 10:43:32 -06:00
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of access log dictionaries
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
query = session.query(AccessLog).order_by(AccessLog.timestamp.desc())
|
|
|
|
|
|
|
|
|
|
if ip_filter:
|
|
|
|
|
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
|
|
|
|
|
if suspicious_only:
|
|
|
|
|
query = query.filter(AccessLog.is_suspicious == True)
|
2026-01-15 13:30:35 -06:00
|
|
|
if since_minutes is not None:
|
2026-01-17 18:06:09 +01:00
|
|
|
cutoff_time = datetime.now() - timedelta(minutes=since_minutes)
|
2026-01-15 13:30:35 -06:00
|
|
|
query = query.filter(AccessLog.timestamp >= cutoff_time)
|
2025-12-28 10:43:32 -06:00
|
|
|
|
|
|
|
|
logs = query.offset(offset).limit(limit).all()
|
|
|
|
|
|
|
|
|
|
return [
|
|
|
|
|
{
|
2026-01-23 22:00:21 +01:00
|
|
|
"id": log.id,
|
|
|
|
|
"ip": log.ip,
|
|
|
|
|
"path": log.path,
|
|
|
|
|
"user_agent": log.user_agent,
|
|
|
|
|
"method": log.method,
|
|
|
|
|
"is_suspicious": log.is_suspicious,
|
|
|
|
|
"is_honeypot_trigger": log.is_honeypot_trigger,
|
|
|
|
|
"timestamp": log.timestamp.isoformat(),
|
|
|
|
|
"attack_types": [d.attack_type for d in log.attack_detections],
|
2025-12-28 10:43:32 -06:00
|
|
|
}
|
|
|
|
|
for log in logs
|
|
|
|
|
]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_credential_attempts(
|
2026-01-23 22:00:21 +01:00
|
|
|
self, limit: int = 100, offset: int = 0, ip_filter: Optional[str] = None
|
2025-12-28 10:43:32 -06:00
|
|
|
) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve credential attempts with optional filtering.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of records to return
|
|
|
|
|
offset: Number of records to skip
|
|
|
|
|
ip_filter: Filter by IP address
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of credential attempt dictionaries
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
query = session.query(CredentialAttempt).order_by(
|
|
|
|
|
CredentialAttempt.timestamp.desc()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if ip_filter:
|
|
|
|
|
query = query.filter(CredentialAttempt.ip == sanitize_ip(ip_filter))
|
|
|
|
|
|
|
|
|
|
attempts = query.offset(offset).limit(limit).all()
|
|
|
|
|
|
|
|
|
|
return [
|
|
|
|
|
{
|
2026-01-23 22:00:21 +01:00
|
|
|
"id": attempt.id,
|
|
|
|
|
"ip": attempt.ip,
|
|
|
|
|
"path": attempt.path,
|
|
|
|
|
"username": attempt.username,
|
|
|
|
|
"password": attempt.password,
|
|
|
|
|
"timestamp": attempt.timestamp.isoformat(),
|
2025-12-28 10:43:32 -06:00
|
|
|
}
|
|
|
|
|
for attempt in attempts
|
|
|
|
|
]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_ip_stats(self, limit: int = 100) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve IP statistics ordered by total requests.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of records to return
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of IP stats dictionaries
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-23 22:00:21 +01:00
|
|
|
stats = (
|
|
|
|
|
session.query(IpStats)
|
|
|
|
|
.order_by(IpStats.total_requests.desc())
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 10:43:32 -06:00
|
|
|
|
|
|
|
|
return [
|
|
|
|
|
{
|
2026-01-23 22:00:21 +01:00
|
|
|
"ip": s.ip,
|
|
|
|
|
"total_requests": s.total_requests,
|
2026-01-25 22:50:27 +01:00
|
|
|
"first_seen": s.first_seen.isoformat() if s.first_seen else None,
|
|
|
|
|
"last_seen": s.last_seen.isoformat() if s.last_seen else None,
|
2026-01-23 22:00:21 +01:00
|
|
|
"country_code": s.country_code,
|
|
|
|
|
"city": s.city,
|
|
|
|
|
"asn": s.asn,
|
|
|
|
|
"asn_org": s.asn_org,
|
|
|
|
|
"reputation_score": s.reputation_score,
|
|
|
|
|
"reputation_source": s.reputation_source,
|
|
|
|
|
"analyzed_metrics": s.analyzed_metrics,
|
|
|
|
|
"category": s.category,
|
|
|
|
|
"manual_category": s.manual_category,
|
2026-01-26 12:36:22 +01:00
|
|
|
"last_analysis": (
|
|
|
|
|
s.last_analysis.isoformat() if s.last_analysis else None
|
|
|
|
|
),
|
2025-12-28 10:43:32 -06:00
|
|
|
}
|
|
|
|
|
for s in stats
|
|
|
|
|
]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-06 18:50:36 +01:00
|
|
|
def get_ip_stats_by_ip(self, ip: str) -> Optional[Dict[str, Any]]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve IP statistics for a specific IP address.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
ip: The IP address to look up
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with IP stats or None if not found
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
stat = session.query(IpStats).filter(IpStats.ip == ip).first()
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-06 18:50:36 +01:00
|
|
|
if not stat:
|
|
|
|
|
return None
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-07 18:24:43 +01:00
|
|
|
# Get category history for this IP
|
|
|
|
|
category_history = self.get_category_history(ip)
|
2026-01-17 18:06:09 +01:00
|
|
|
|
2026-01-06 18:50:36 +01:00
|
|
|
return {
|
2026-01-23 22:00:21 +01:00
|
|
|
"ip": stat.ip,
|
|
|
|
|
"total_requests": stat.total_requests,
|
|
|
|
|
"first_seen": stat.first_seen.isoformat() if stat.first_seen else None,
|
|
|
|
|
"last_seen": stat.last_seen.isoformat() if stat.last_seen else None,
|
|
|
|
|
"country_code": stat.country_code,
|
|
|
|
|
"city": stat.city,
|
2026-02-01 22:43:12 +01:00
|
|
|
"country": stat.country,
|
|
|
|
|
"region": stat.region,
|
|
|
|
|
"region_name": stat.region_name,
|
|
|
|
|
"timezone": stat.timezone,
|
|
|
|
|
"isp": stat.isp,
|
|
|
|
|
"reverse": stat.reverse,
|
2026-01-23 22:00:21 +01:00
|
|
|
"asn": stat.asn,
|
|
|
|
|
"asn_org": stat.asn_org,
|
2026-02-01 22:43:12 +01:00
|
|
|
"is_proxy": stat.is_proxy,
|
|
|
|
|
"is_hosting": stat.is_hosting,
|
2026-01-23 22:00:21 +01:00
|
|
|
"list_on": stat.list_on or {},
|
|
|
|
|
"reputation_score": stat.reputation_score,
|
|
|
|
|
"reputation_source": stat.reputation_source,
|
|
|
|
|
"analyzed_metrics": stat.analyzed_metrics or {},
|
|
|
|
|
"category": stat.category,
|
|
|
|
|
"category_scores": stat.category_scores or {},
|
|
|
|
|
"manual_category": stat.manual_category,
|
|
|
|
|
"last_analysis": (
|
|
|
|
|
stat.last_analysis.isoformat() if stat.last_analysis else None
|
|
|
|
|
),
|
|
|
|
|
"category_history": category_history,
|
2026-01-06 18:50:36 +01:00
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_attackers_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 25,
|
|
|
|
|
sort_by: str = "total_requests",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of attacker IPs ordered by specified field.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (total_requests, first_seen, last_seen)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with attackers list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
# Validate sort parameters
|
|
|
|
|
valid_sort_fields = {"total_requests", "first_seen", "last_seen"}
|
|
|
|
|
sort_by = sort_by if sort_by in valid_sort_fields else "total_requests"
|
2026-01-26 12:36:22 +01:00
|
|
|
sort_order = (
|
|
|
|
|
sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
|
|
|
|
|
)
|
2026-01-25 22:50:27 +01:00
|
|
|
|
|
|
|
|
# Get total count of attackers
|
|
|
|
|
total_attackers = (
|
2026-01-26 12:36:22 +01:00
|
|
|
session.query(IpStats).filter(IpStats.category == "attacker").count()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Build query with sorting
|
|
|
|
|
query = session.query(IpStats).filter(IpStats.category == "attacker")
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-25 22:50:27 +01:00
|
|
|
if sort_by == "total_requests":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
IpStats.total_requests.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else IpStats.total_requests.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
elif sort_by == "first_seen":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
IpStats.first_seen.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else IpStats.first_seen.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
elif sort_by == "last_seen":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
IpStats.last_seen.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else IpStats.last_seen.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Get paginated attackers
|
|
|
|
|
attackers = query.offset(offset).limit(page_size).all()
|
|
|
|
|
|
|
|
|
|
total_pages = (total_attackers + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"attackers": [
|
|
|
|
|
{
|
|
|
|
|
"ip": a.ip,
|
|
|
|
|
"total_requests": a.total_requests,
|
2026-01-26 12:36:22 +01:00
|
|
|
"first_seen": (
|
|
|
|
|
a.first_seen.isoformat() if a.first_seen else None
|
|
|
|
|
),
|
2026-01-25 22:50:27 +01:00
|
|
|
"last_seen": a.last_seen.isoformat() if a.last_seen else None,
|
|
|
|
|
"country_code": a.country_code,
|
|
|
|
|
"city": a.city,
|
2026-01-29 11:55:06 +01:00
|
|
|
"latitude": a.latitude,
|
|
|
|
|
"longitude": a.longitude,
|
2026-01-25 22:50:27 +01:00
|
|
|
"asn": a.asn,
|
|
|
|
|
"asn_org": a.asn_org,
|
|
|
|
|
"reputation_score": a.reputation_score,
|
|
|
|
|
"reputation_source": a.reputation_source,
|
|
|
|
|
"category": a.category,
|
|
|
|
|
"category_scores": a.category_scores or {},
|
|
|
|
|
}
|
|
|
|
|
for a in attackers
|
|
|
|
|
],
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total_attackers": total_attackers,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_all_ips_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 25,
|
|
|
|
|
sort_by: str = "total_requests",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
categories: Optional[List[str]] = None,
|
|
|
|
|
) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of all IPs (or filtered by categories) ordered by specified field.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (total_requests, first_seen, last_seen)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
categories: Optional list of categories to filter by
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with IPs list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
# Validate sort parameters
|
|
|
|
|
valid_sort_fields = {"total_requests", "first_seen", "last_seen"}
|
|
|
|
|
sort_by = sort_by if sort_by in valid_sort_fields else "total_requests"
|
|
|
|
|
sort_order = (
|
|
|
|
|
sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Build query with optional category filter
|
|
|
|
|
query = session.query(IpStats)
|
|
|
|
|
if categories:
|
|
|
|
|
query = query.filter(IpStats.category.in_(categories))
|
|
|
|
|
|
|
|
|
|
# Get total count
|
|
|
|
|
total_ips = query.count()
|
|
|
|
|
|
|
|
|
|
# Apply sorting
|
|
|
|
|
if sort_by == "total_requests":
|
|
|
|
|
query = query.order_by(
|
|
|
|
|
IpStats.total_requests.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else IpStats.total_requests.asc()
|
|
|
|
|
)
|
|
|
|
|
elif sort_by == "first_seen":
|
|
|
|
|
query = query.order_by(
|
|
|
|
|
IpStats.first_seen.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else IpStats.first_seen.asc()
|
|
|
|
|
)
|
|
|
|
|
elif sort_by == "last_seen":
|
|
|
|
|
query = query.order_by(
|
|
|
|
|
IpStats.last_seen.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else IpStats.last_seen.asc()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Get paginated IPs
|
|
|
|
|
ips = query.offset(offset).limit(page_size).all()
|
|
|
|
|
|
|
|
|
|
total_pages = (total_ips + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"ips": [
|
|
|
|
|
{
|
|
|
|
|
"ip": ip.ip,
|
|
|
|
|
"total_requests": ip.total_requests,
|
|
|
|
|
"first_seen": (
|
|
|
|
|
ip.first_seen.isoformat() if ip.first_seen else None
|
|
|
|
|
),
|
|
|
|
|
"last_seen": ip.last_seen.isoformat() if ip.last_seen else None,
|
|
|
|
|
"country_code": ip.country_code,
|
|
|
|
|
"city": ip.city,
|
2026-01-29 11:55:06 +01:00
|
|
|
"latitude": ip.latitude,
|
|
|
|
|
"longitude": ip.longitude,
|
2026-01-26 12:36:22 +01:00
|
|
|
"asn": ip.asn,
|
|
|
|
|
"asn_org": ip.asn_org,
|
|
|
|
|
"reputation_score": ip.reputation_score,
|
|
|
|
|
"reputation_source": ip.reputation_source,
|
|
|
|
|
"category": ip.category,
|
|
|
|
|
"category_scores": ip.category_scores or {},
|
|
|
|
|
}
|
|
|
|
|
for ip in ips
|
|
|
|
|
],
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_ips,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2025-12-28 13:52:46 -06:00
|
|
|
def get_dashboard_counts(self) -> Dict[str, int]:
|
|
|
|
|
"""
|
2026-01-24 23:28:10 +01:00
|
|
|
Get aggregate statistics for the dashboard (excludes local/private IPs and server IP).
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with total_accesses, unique_ips, unique_paths,
|
|
|
|
|
suspicious_accesses, honeypot_triggered, honeypot_ips
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-24 23:28:10 +01:00
|
|
|
# Get server IP to filter it out
|
|
|
|
|
from config import get_config
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
config = get_config()
|
|
|
|
|
server_ip = config.get_server_ip()
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
# Get all accesses first, then filter out local IPs and server IP
|
|
|
|
|
all_accesses = session.query(AccessLog).all()
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
# Filter out local/private IPs and server IP
|
|
|
|
|
public_accesses = [
|
2026-01-26 12:36:22 +01:00
|
|
|
log for log in all_accesses if is_valid_public_ip(log.ip, server_ip)
|
2026-01-24 23:28:10 +01:00
|
|
|
]
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
# Calculate counts from filtered data
|
|
|
|
|
total_accesses = len(public_accesses)
|
|
|
|
|
unique_ips = len(set(log.ip for log in public_accesses))
|
|
|
|
|
unique_paths = len(set(log.path for log in public_accesses))
|
|
|
|
|
suspicious_accesses = sum(1 for log in public_accesses if log.is_suspicious)
|
2026-01-26 12:36:22 +01:00
|
|
|
honeypot_triggered = sum(
|
|
|
|
|
1 for log in public_accesses if log.is_honeypot_trigger
|
|
|
|
|
)
|
|
|
|
|
honeypot_ips = len(
|
|
|
|
|
set(log.ip for log in public_accesses if log.is_honeypot_trigger)
|
|
|
|
|
)
|
|
|
|
|
|
2026-01-25 22:50:27 +01:00
|
|
|
# Count unique attackers from IpStats (matching the "Attackers by Total Requests" table)
|
2026-01-26 12:36:22 +01:00
|
|
|
unique_attackers = (
|
|
|
|
|
session.query(IpStats).filter(IpStats.category == "attacker").count()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
return {
|
2026-01-24 23:28:10 +01:00
|
|
|
"total_accesses": total_accesses,
|
|
|
|
|
"unique_ips": unique_ips,
|
|
|
|
|
"unique_paths": unique_paths,
|
|
|
|
|
"suspicious_accesses": suspicious_accesses,
|
|
|
|
|
"honeypot_triggered": honeypot_triggered,
|
2026-01-23 22:00:21 +01:00
|
|
|
"honeypot_ips": honeypot_ips,
|
2026-01-25 22:50:27 +01:00
|
|
|
"unique_attackers": unique_attackers,
|
2025-12-28 13:52:46 -06:00
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_top_ips(self, limit: int = 10) -> List[tuple]:
|
|
|
|
|
"""
|
2026-01-24 23:28:10 +01:00
|
|
|
Get top IP addresses by access count (excludes local/private IPs and server IP).
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of results
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of (ip, count) tuples ordered by count descending
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-24 23:28:10 +01:00
|
|
|
# Get server IP to filter it out
|
|
|
|
|
from config import get_config
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
config = get_config()
|
|
|
|
|
server_ip = config.get_server_ip()
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
results = (
|
|
|
|
|
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
|
|
|
|
|
.group_by(AccessLog.ip)
|
|
|
|
|
.order_by(func.count(AccessLog.id).desc())
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
# Filter out local/private IPs and server IP, then limit results
|
|
|
|
|
filtered = [
|
|
|
|
|
(row.ip, row.count)
|
|
|
|
|
for row in results
|
|
|
|
|
if is_valid_public_ip(row.ip, server_ip)
|
|
|
|
|
]
|
|
|
|
|
return filtered[:limit]
|
2025-12-28 13:52:46 -06:00
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_top_paths(self, limit: int = 10) -> List[tuple]:
|
|
|
|
|
"""
|
|
|
|
|
Get top paths by access count.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of results
|
|
|
|
|
|
2026-01-25 22:50:27 +01:00
|
|
|
Returns:
|
2025-12-28 13:52:46 -06:00
|
|
|
List of (path, count) tuples ordered by count descending
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-23 22:00:21 +01:00
|
|
|
results = (
|
|
|
|
|
session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
|
|
|
|
|
.group_by(AccessLog.path)
|
|
|
|
|
.order_by(func.count(AccessLog.id).desc())
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
return [(row.path, row.count) for row in results]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_top_user_agents(self, limit: int = 10) -> List[tuple]:
|
|
|
|
|
"""
|
|
|
|
|
Get top user agents by access count.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of results
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of (user_agent, count) tuples ordered by count descending
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-23 22:00:21 +01:00
|
|
|
results = (
|
|
|
|
|
session.query(
|
|
|
|
|
AccessLog.user_agent, func.count(AccessLog.id).label("count")
|
|
|
|
|
)
|
|
|
|
|
.filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
|
|
|
|
|
.group_by(AccessLog.user_agent)
|
|
|
|
|
.order_by(func.count(AccessLog.id).desc())
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
return [(row.user_agent, row.count) for row in results]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_recent_suspicious(self, limit: int = 20) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
2026-01-24 23:28:10 +01:00
|
|
|
Get recent suspicious access attempts (excludes local/private IPs and server IP).
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of results
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of access log dictionaries with is_suspicious=True
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-24 23:28:10 +01:00
|
|
|
# Get server IP to filter it out
|
|
|
|
|
from config import get_config
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
config = get_config()
|
|
|
|
|
server_ip = config.get_server_ip()
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-23 22:00:21 +01:00
|
|
|
logs = (
|
|
|
|
|
session.query(AccessLog)
|
|
|
|
|
.filter(AccessLog.is_suspicious == True)
|
|
|
|
|
.order_by(AccessLog.timestamp.desc())
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
# Filter out local/private IPs and server IP
|
|
|
|
|
filtered_logs = [
|
2026-01-26 12:36:22 +01:00
|
|
|
log for log in logs if is_valid_public_ip(log.ip, server_ip)
|
2026-01-24 23:28:10 +01:00
|
|
|
]
|
|
|
|
|
|
2025-12-28 13:52:46 -06:00
|
|
|
return [
|
|
|
|
|
{
|
2026-01-23 22:00:21 +01:00
|
|
|
"ip": log.ip,
|
|
|
|
|
"path": log.path,
|
|
|
|
|
"user_agent": log.user_agent,
|
|
|
|
|
"timestamp": log.timestamp.isoformat(),
|
2025-12-28 13:52:46 -06:00
|
|
|
}
|
2026-01-24 23:28:10 +01:00
|
|
|
for log in filtered_logs[:limit]
|
2025-12-28 13:52:46 -06:00
|
|
|
]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_honeypot_triggered_ips(self) -> List[tuple]:
|
|
|
|
|
"""
|
2026-01-24 23:28:10 +01:00
|
|
|
Get IPs that triggered honeypot paths with the paths they accessed
|
|
|
|
|
(excludes local/private IPs and server IP).
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of (ip, [paths]) tuples
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
2026-01-24 23:28:10 +01:00
|
|
|
# Get server IP to filter it out
|
|
|
|
|
from config import get_config
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
config = get_config()
|
|
|
|
|
server_ip = config.get_server_ip()
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2025-12-28 13:52:46 -06:00
|
|
|
# Get all honeypot triggers grouped by IP
|
2026-01-23 22:00:21 +01:00
|
|
|
results = (
|
|
|
|
|
session.query(AccessLog.ip, AccessLog.path)
|
|
|
|
|
.filter(AccessLog.is_honeypot_trigger == True)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
2026-01-24 23:28:10 +01:00
|
|
|
# Group paths by IP, filtering out local/private IPs and server IP
|
2025-12-28 13:52:46 -06:00
|
|
|
ip_paths: Dict[str, List[str]] = {}
|
|
|
|
|
for row in results:
|
2026-01-24 23:28:10 +01:00
|
|
|
# Skip invalid IPs
|
|
|
|
|
if not is_valid_public_ip(row.ip, server_ip):
|
|
|
|
|
continue
|
2025-12-28 13:52:46 -06:00
|
|
|
if row.ip not in ip_paths:
|
|
|
|
|
ip_paths[row.ip] = []
|
|
|
|
|
if row.path not in ip_paths[row.ip]:
|
|
|
|
|
ip_paths[row.ip].append(row.path)
|
|
|
|
|
|
|
|
|
|
return [(ip, paths) for ip, paths in ip_paths.items()]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_recent_attacks(self, limit: int = 20) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
|
|
|
|
Get recent access logs that have attack detections.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of results
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
List of access log dicts with attack_types included
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
# Get access logs that have attack detections
|
2026-01-23 22:00:21 +01:00
|
|
|
logs = (
|
|
|
|
|
session.query(AccessLog)
|
|
|
|
|
.join(AttackDetection)
|
|
|
|
|
.order_by(AccessLog.timestamp.desc())
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
2025-12-28 13:52:46 -06:00
|
|
|
|
|
|
|
|
return [
|
|
|
|
|
{
|
2026-01-23 22:00:21 +01:00
|
|
|
"ip": log.ip,
|
|
|
|
|
"path": log.path,
|
|
|
|
|
"user_agent": log.user_agent,
|
|
|
|
|
"timestamp": log.timestamp.isoformat(),
|
|
|
|
|
"attack_types": [d.attack_type for d in log.attack_detections],
|
2025-12-28 13:52:46 -06:00
|
|
|
}
|
|
|
|
|
for log in logs
|
|
|
|
|
]
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_honeypot_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 5,
|
|
|
|
|
sort_by: str = "count",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of honeypot-triggered IPs with their paths.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (count or ip)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with honeypots list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
from config import get_config
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-25 22:50:27 +01:00
|
|
|
config = get_config()
|
|
|
|
|
server_ip = config.get_server_ip()
|
|
|
|
|
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
# Get honeypot triggers grouped by IP
|
|
|
|
|
results = (
|
|
|
|
|
session.query(AccessLog.ip, AccessLog.path)
|
|
|
|
|
.filter(AccessLog.is_honeypot_trigger == True)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Group paths by IP, filtering out invalid IPs
|
|
|
|
|
ip_paths: Dict[str, List[str]] = {}
|
|
|
|
|
for row in results:
|
|
|
|
|
if not is_valid_public_ip(row.ip, server_ip):
|
|
|
|
|
continue
|
|
|
|
|
if row.ip not in ip_paths:
|
|
|
|
|
ip_paths[row.ip] = []
|
|
|
|
|
if row.path not in ip_paths[row.ip]:
|
|
|
|
|
ip_paths[row.ip].append(row.path)
|
|
|
|
|
|
|
|
|
|
# Create list and sort
|
|
|
|
|
honeypot_list = [
|
|
|
|
|
{"ip": ip, "paths": paths, "count": len(paths)}
|
|
|
|
|
for ip, paths in ip_paths.items()
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if sort_by == "count":
|
|
|
|
|
honeypot_list.sort(
|
2026-01-26 12:36:22 +01:00
|
|
|
key=lambda x: x["count"], reverse=(sort_order == "desc")
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
else: # sort by ip
|
|
|
|
|
honeypot_list.sort(
|
2026-01-26 12:36:22 +01:00
|
|
|
key=lambda x: x["ip"], reverse=(sort_order == "desc")
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
total_honeypots = len(honeypot_list)
|
2026-01-26 12:36:22 +01:00
|
|
|
paginated = honeypot_list[offset : offset + page_size]
|
2026-01-25 22:50:27 +01:00
|
|
|
total_pages = (total_honeypots + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"honeypots": paginated,
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_honeypots,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_credentials_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 5,
|
|
|
|
|
sort_by: str = "timestamp",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of credential attempts.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (timestamp, ip, username)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with credentials list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
# Validate sort parameters
|
|
|
|
|
valid_sort_fields = {"timestamp", "ip", "username"}
|
|
|
|
|
sort_by = sort_by if sort_by in valid_sort_fields else "timestamp"
|
2026-01-26 12:36:22 +01:00
|
|
|
sort_order = (
|
|
|
|
|
sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
|
|
|
|
|
)
|
2026-01-25 22:50:27 +01:00
|
|
|
|
|
|
|
|
total_credentials = session.query(CredentialAttempt).count()
|
|
|
|
|
|
|
|
|
|
# Build query with sorting
|
|
|
|
|
query = session.query(CredentialAttempt)
|
|
|
|
|
|
|
|
|
|
if sort_by == "timestamp":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
CredentialAttempt.timestamp.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else CredentialAttempt.timestamp.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
elif sort_by == "ip":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
CredentialAttempt.ip.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else CredentialAttempt.ip.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
elif sort_by == "username":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
CredentialAttempt.username.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else CredentialAttempt.username.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
credentials = query.offset(offset).limit(page_size).all()
|
|
|
|
|
total_pages = (total_credentials + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"credentials": [
|
|
|
|
|
{
|
|
|
|
|
"ip": c.ip,
|
|
|
|
|
"username": c.username,
|
|
|
|
|
"password": c.password,
|
|
|
|
|
"path": c.path,
|
|
|
|
|
"timestamp": c.timestamp.isoformat() if c.timestamp else None,
|
|
|
|
|
}
|
|
|
|
|
for c in credentials
|
|
|
|
|
],
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_credentials,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_top_ips_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 5,
|
|
|
|
|
sort_by: str = "count",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of top IP addresses by access count.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (count or ip)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with IPs list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
from config import get_config
|
2026-01-26 12:36:22 +01:00
|
|
|
|
2026-01-25 22:50:27 +01:00
|
|
|
config = get_config()
|
|
|
|
|
server_ip = config.get_server_ip()
|
|
|
|
|
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
results = (
|
|
|
|
|
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
|
|
|
|
|
.group_by(AccessLog.ip)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Filter out local/private IPs and server IP, then sort
|
|
|
|
|
filtered = [
|
|
|
|
|
{"ip": row.ip, "count": row.count}
|
|
|
|
|
for row in results
|
|
|
|
|
if is_valid_public_ip(row.ip, server_ip)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if sort_by == "count":
|
|
|
|
|
filtered.sort(key=lambda x: x["count"], reverse=(sort_order == "desc"))
|
|
|
|
|
else: # sort by ip
|
|
|
|
|
filtered.sort(key=lambda x: x["ip"], reverse=(sort_order == "desc"))
|
|
|
|
|
|
|
|
|
|
total_ips = len(filtered)
|
2026-01-26 12:36:22 +01:00
|
|
|
paginated = filtered[offset : offset + page_size]
|
2026-01-25 22:50:27 +01:00
|
|
|
total_pages = (total_ips + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"ips": paginated,
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_ips,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_top_paths_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 5,
|
|
|
|
|
sort_by: str = "count",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of top paths by access count.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (count or path)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with paths list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
results = (
|
|
|
|
|
session.query(AccessLog.path, func.count(AccessLog.id).label("count"))
|
|
|
|
|
.group_by(AccessLog.path)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Create list and sort
|
2026-01-26 12:36:22 +01:00
|
|
|
paths_list = [{"path": row.path, "count": row.count} for row in results]
|
2026-01-25 22:50:27 +01:00
|
|
|
|
|
|
|
|
if sort_by == "count":
|
2026-01-26 12:36:22 +01:00
|
|
|
paths_list.sort(
|
|
|
|
|
key=lambda x: x["count"], reverse=(sort_order == "desc")
|
|
|
|
|
)
|
2026-01-25 22:50:27 +01:00
|
|
|
else: # sort by path
|
|
|
|
|
paths_list.sort(key=lambda x: x["path"], reverse=(sort_order == "desc"))
|
|
|
|
|
|
|
|
|
|
total_paths = len(paths_list)
|
2026-01-26 12:36:22 +01:00
|
|
|
paginated = paths_list[offset : offset + page_size]
|
2026-01-25 22:50:27 +01:00
|
|
|
total_pages = (total_paths + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"paths": paginated,
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_paths,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_top_user_agents_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 5,
|
|
|
|
|
sort_by: str = "count",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of top user agents by access count.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (count or user_agent)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with user agents list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
results = (
|
2026-01-26 12:36:22 +01:00
|
|
|
session.query(
|
|
|
|
|
AccessLog.user_agent, func.count(AccessLog.id).label("count")
|
|
|
|
|
)
|
2026-01-25 22:50:27 +01:00
|
|
|
.filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
|
|
|
|
|
.group_by(AccessLog.user_agent)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Create list and sort
|
|
|
|
|
ua_list = [
|
2026-01-26 12:36:22 +01:00
|
|
|
{"user_agent": row.user_agent, "count": row.count} for row in results
|
2026-01-25 22:50:27 +01:00
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if sort_by == "count":
|
|
|
|
|
ua_list.sort(key=lambda x: x["count"], reverse=(sort_order == "desc"))
|
|
|
|
|
else: # sort by user_agent
|
2026-01-26 12:36:22 +01:00
|
|
|
ua_list.sort(
|
|
|
|
|
key=lambda x: x["user_agent"], reverse=(sort_order == "desc")
|
|
|
|
|
)
|
2026-01-25 22:50:27 +01:00
|
|
|
|
|
|
|
|
total_uas = len(ua_list)
|
2026-01-26 12:36:22 +01:00
|
|
|
paginated = ua_list[offset : offset + page_size]
|
2026-01-25 22:50:27 +01:00
|
|
|
total_pages = (total_uas + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"user_agents": paginated,
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_uas,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2026-01-26 12:36:22 +01:00
|
|
|
def get_attack_types_paginated(
|
|
|
|
|
self,
|
|
|
|
|
page: int = 1,
|
|
|
|
|
page_size: int = 5,
|
|
|
|
|
sort_by: str = "timestamp",
|
|
|
|
|
sort_order: str = "desc",
|
|
|
|
|
) -> Dict[str, Any]:
|
2026-01-25 22:50:27 +01:00
|
|
|
"""
|
|
|
|
|
Retrieve paginated list of detected attack types with access logs.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
page: Page number (1-indexed)
|
|
|
|
|
page_size: Number of results per page
|
|
|
|
|
sort_by: Field to sort by (timestamp, ip, attack_type)
|
|
|
|
|
sort_order: Sort order (asc or desc)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with attacks list and pagination info
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
offset = (page - 1) * page_size
|
|
|
|
|
|
|
|
|
|
# Validate sort parameters
|
|
|
|
|
valid_sort_fields = {"timestamp", "ip", "attack_type"}
|
|
|
|
|
sort_by = sort_by if sort_by in valid_sort_fields else "timestamp"
|
2026-01-26 12:36:22 +01:00
|
|
|
sort_order = (
|
|
|
|
|
sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
|
|
|
|
|
)
|
2026-01-25 22:50:27 +01:00
|
|
|
|
2026-02-08 16:02:18 +01:00
|
|
|
# Count total attacks first (efficient)
|
|
|
|
|
total_attacks = session.query(AccessLog).join(AttackDetection).count()
|
|
|
|
|
|
|
|
|
|
# Get paginated access logs with attack detections
|
2026-01-26 12:36:22 +01:00
|
|
|
query = session.query(AccessLog).join(AttackDetection)
|
2026-01-25 22:50:27 +01:00
|
|
|
|
|
|
|
|
if sort_by == "timestamp":
|
|
|
|
|
query = query.order_by(
|
2026-01-26 12:36:22 +01:00
|
|
|
AccessLog.timestamp.desc()
|
|
|
|
|
if sort_order == "desc"
|
|
|
|
|
else AccessLog.timestamp.asc()
|
2026-01-25 22:50:27 +01:00
|
|
|
)
|
|
|
|
|
elif sort_by == "ip":
|
|
|
|
|
query = query.order_by(
|
|
|
|
|
AccessLog.ip.desc() if sort_order == "desc" else AccessLog.ip.asc()
|
|
|
|
|
)
|
2026-02-08 16:02:18 +01:00
|
|
|
# Note: attack_type sorting requires loading all data, so we skip it for performance
|
|
|
|
|
# elif sort_by == "attack_type":
|
|
|
|
|
# Can't efficiently sort by related table field
|
2026-01-25 22:50:27 +01:00
|
|
|
|
2026-02-08 16:02:18 +01:00
|
|
|
# Apply LIMIT and OFFSET at database level
|
|
|
|
|
logs = query.offset(offset).limit(page_size).all()
|
2026-01-25 22:50:27 +01:00
|
|
|
|
2026-02-08 16:02:18 +01:00
|
|
|
# Convert to attack list (exclude raw_request for performance - it's too large)
|
|
|
|
|
paginated = [
|
2026-01-25 22:50:27 +01:00
|
|
|
{
|
2026-02-08 16:02:18 +01:00
|
|
|
"id": log.id,
|
2026-01-25 22:50:27 +01:00
|
|
|
"ip": log.ip,
|
|
|
|
|
"path": log.path,
|
|
|
|
|
"user_agent": log.user_agent,
|
|
|
|
|
"timestamp": log.timestamp.isoformat() if log.timestamp else None,
|
|
|
|
|
"attack_types": [d.attack_type for d in log.attack_detections],
|
2026-02-08 16:02:18 +01:00
|
|
|
"raw_request": log.raw_request, # Keep for backward compatibility
|
2026-01-25 22:50:27 +01:00
|
|
|
}
|
|
|
|
|
for log in logs
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
total_pages = (total_attacks + page_size - 1) // page_size
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"attacks": paginated,
|
|
|
|
|
"pagination": {
|
|
|
|
|
"page": page,
|
|
|
|
|
"page_size": page_size,
|
|
|
|
|
"total": total_attacks,
|
|
|
|
|
"total_pages": total_pages,
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
2026-01-04 19:12:23 +01:00
|
|
|
|
2026-02-08 16:02:18 +01:00
|
|
|
def get_raw_request_by_id(self, log_id: int) -> Optional[str]:
|
|
|
|
|
"""
|
|
|
|
|
Retrieve raw HTTP request for a specific access log ID.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
log_id: The access log ID
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The raw request string, or None if not found or not available
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
access_log = session.query(AccessLog).filter(AccessLog.id == log_id).first()
|
|
|
|
|
if access_log:
|
|
|
|
|
return access_log.raw_request
|
|
|
|
|
return None
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
|
|
|
|
def get_attack_types_stats(self, limit: int = 20) -> Dict[str, Any]:
|
|
|
|
|
"""
|
|
|
|
|
Get aggregated statistics for attack types (efficient for large datasets).
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
limit: Maximum number of attack types to return
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dictionary with attack type counts
|
|
|
|
|
"""
|
|
|
|
|
session = self.session
|
|
|
|
|
try:
|
|
|
|
|
from sqlalchemy import func
|
|
|
|
|
|
|
|
|
|
# Aggregate attack types with count
|
|
|
|
|
results = (
|
|
|
|
|
session.query(
|
|
|
|
|
AttackDetection.attack_type,
|
2026-02-15 15:10:41 +01:00
|
|
|
func.count(AttackDetection.id).label("count"),
|
2026-02-08 16:02:18 +01:00
|
|
|
)
|
|
|
|
|
.group_by(AttackDetection.attack_type)
|
|
|
|
|
.order_by(func.count(AttackDetection.id).desc())
|
|
|
|
|
.limit(limit)
|
|
|
|
|
.all()
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"attack_types": [
|
2026-02-15 15:10:41 +01:00
|
|
|
{"type": row.attack_type, "count": row.count} for row in results
|
2026-02-08 16:02:18 +01:00
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
finally:
|
|
|
|
|
self.close_session()
|
|
|
|
|
|
2025-12-28 10:43:32 -06:00
|
|
|
|
|
|
|
|
# Module-level singleton instance
|
|
|
|
|
_db_manager = DatabaseManager()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_database() -> DatabaseManager:
|
|
|
|
|
"""Get the database manager singleton instance."""
|
|
|
|
|
return _db_manager
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def initialize_database(database_path: str = "data/krawl.db") -> None:
|
|
|
|
|
"""Initialize the database system."""
|
|
|
|
|
_db_manager.initialize(database_path)
|