diff --git a/Dockerfile b/Dockerfile index 4015c74..f6caa8b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -26,4 +26,4 @@ EXPOSE 5000 ENV PYTHONUNBUFFERED=1 ENTRYPOINT ["/app/entrypoint.sh"] -CMD ["python3", "src/server.py"] +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5000", "--app-dir", "src"] diff --git a/README.md b/README.md index b975134..e4bda18 100644 --- a/README.md +++ b/README.md @@ -109,7 +109,7 @@ services: - "5000:5000" environment: - CONFIG_LOCATION=config.yaml - - "TZ=Europe/Rome" + - TZ=Europe/Rome volumes: - ./config.yaml:/app/config.yaml:ro # bind mount for firewall exporters @@ -308,6 +308,31 @@ The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to ### Command Injection Detection Pages like `/api/exec`, `/api/run`, and `/api/system` simulate command execution endpoints vulnerable to **command injection**. When attackers attempt to inject shell commands using patterns like `; whoami`, `| cat /etc/passwd`, or backticks, Krawl responds with realistic command outputs. For example, `whoami` returns fake usernames like `www-data` or `nginx`, while `uname` returns fake Linux kernel versions. Network commands like `wget` or `curl` simulate downloads or return "command not found" errors, creating believable responses that delay and confuse automated exploitation tools. +## Example usage behind reverse proxy + +You can configure a reverse proxy so all web requests land on the Krawl page by default, and hide your real content behind a secret hidden url. For example: + +```bash +location / { + proxy_pass https://your-krawl-instance; + proxy_pass_header Server; +} + +location /my-hidden-service { + proxy_pass https://my-hidden-service; + proxy_pass_header Server; +} +``` + +Alternatively, you can create a bunch of different "interesting" looking domains. For example: + +- admin.example.com +- portal.example.com +- sso.example.com +- login.example.com +- ... + +Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance. ## Customizing the Canary Token To create a custom canary token, visit https://canarytokens.org @@ -317,7 +342,7 @@ and generate a “Web bug” canary token. This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitor’s IP address and user agent. -To enable this feature, set the canary token URL [using the environment variable](#configuration-via-environment-variables) `CANARY_TOKEN_URL`. +To enable this feature, set the canary token URL [using the environment variable](#configuration-via-environment-variables) `KRAWL_CANARY_TOKEN_URL`. ## Customizing the wordlist diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..75b7296 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,372 @@ +# Krawl Architecture + +## Overview + +Krawl is a cloud-native deception honeypot server built on **FastAPI**. It creates realistic fake web applications (admin panels, login pages, fake credentials) to attract, detect, and analyze malicious crawlers and attackers while wasting their resources with infinite spider-trap pages. + +## Tech Stack + +| Layer | Technology | +|-------|-----------| +| **Backend** | FastAPI, Uvicorn, Python 3.11 | +| **ORM / DB** | SQLAlchemy 2.0, SQLite (WAL mode) | +| **Templating** | Jinja2 (server-side rendering) | +| **Reactivity** | Alpine.js 3.14 | +| **Partial Updates** | HTMX 2.0 | +| **Charts** | Chart.js 3.9 (doughnut), custom SVG radar | +| **Maps** | Leaflet 1.9 + CartoDB dark tiles | +| **Scheduling** | APScheduler | +| **Container** | Docker (python:3.11-slim), Helm/K8s ready | + +## Directory Structure + +``` +Krawl/ +├── src/ +│ ├── app.py # FastAPI app factory + lifespan +│ ├── config.py # YAML + env config loader +│ ├── dependencies.py # DI providers (templates, DB, client IP) +│ ├── database.py # DatabaseManager singleton +│ ├── models.py # SQLAlchemy ORM models +│ ├── tracker.py # In-memory + DB access tracking +│ ├── logger.py # Rotating file log handlers +│ ├── deception_responses.py # Attack detection + fake responses +│ ├── sanitizer.py # Input sanitization +│ ├── generators.py # Random content generators +│ ├── wordlists.py # JSON wordlist loader +│ ├── geo_utils.py # IP geolocation API +│ ├── ip_utils.py # IP validation +│ │ +│ ├── routes/ +│ │ ├── honeypot.py # Trap pages, credential capture, catch-all +│ │ ├── dashboard.py # Dashboard page (Jinja2 SSR) +│ │ ├── api.py # JSON API endpoints +│ │ └── htmx.py # HTMX HTML fragment endpoints +│ │ +│ ├── middleware/ +│ │ ├── deception.py # Path traversal / XXE / cmd injection detection +│ │ └── ban_check.py # Banned IP enforcement +│ │ +│ ├── tasks/ # APScheduler background jobs +│ │ ├── analyze_ips.py # IP categorization scoring +│ │ ├── fetch_ip_rep.py # Geolocation + blocklist enrichment +│ │ ├── db_dump.py # Database export +│ │ ├── memory_cleanup.py # In-memory list trimming +│ │ └── top_attacking_ips.py # Top attacker caching +│ │ +│ ├── tasks_master.py # Task discovery + APScheduler orchestrator +│ ├── firewall/ # Banlist export (iptables, raw) +│ ├── migrations/ # Schema migrations (auto-run) +│ │ +│ └── templates/ +│ ├── jinja2/ +│ │ ├── base.html # Layout + CDN scripts +│ │ └── dashboard/ +│ │ ├── index.html # Main dashboard page +│ │ └── partials/ # 13 HTMX fragment templates +│ ├── html/ # Deceptive trap page templates +│ └── static/ +│ ├── css/dashboard.css +│ └── js/ +│ ├── dashboard.js # Alpine.js app controller +│ ├── map.js # Leaflet map +│ ├── charts.js # Chart.js doughnut +│ └── radar.js # SVG radar chart +│ +├── config.yaml # Application configuration +├── wordlists.json # Attack patterns + fake credentials +├── Dockerfile # Container build +├── docker-compose.yaml # Local orchestration +├── entrypoint.sh # Container startup (gosu privilege drop) +├── kubernetes/ # K8s manifests +└── helm/ # Helm chart +``` + +## Application Entry Point + +`src/app.py` uses the **FastAPI application factory** pattern with an async lifespan manager: + +``` +Startup Shutdown + │ │ + ├─ Initialize logging └─ Log shutdown + ├─ Initialize SQLite DB + ├─ Create AccessTracker + ├─ Load webpages file (optional) + ├─ Store config + tracker in app.state + ├─ Start APScheduler background tasks + └─ Log dashboard URL +``` + +## Request Pipeline + +``` + Request + │ + ▼ +┌──────────────────────┐ +│ BanCheckMiddleware │──→ IP banned? → Return 500 +└──────────┬───────────┘ + ▼ +┌──────────────────────┐ +│ DeceptionMiddleware │──→ Attack detected? → Fake error response +└──────────┬───────────┘ + ▼ +┌───────────────────────┐ +│ ServerHeaderMiddleware│──→ Add random Server header +└──────────┬────────────┘ + ▼ +┌───────────────────────┐ +│ Route Matching │ +│ (ordered by priority)│ +│ │ +│ 1. Static files │ /{secret}/static/* +│ 2. Dashboard router │ /{secret}/ (prefix-based) +│ 3. API router │ /{secret}/api/* (prefix-based) +│ 4. HTMX router │ /{secret}/htmx/* (prefix-based) +│ 5. Honeypot router │ /* (catch-all) +└───────────────────────┘ +``` + +### Prefix-Based Routing + +Dashboard, API, and HTMX routers are mounted with `prefix=f"/{secret}"` in `app.py`. This means: +- Route handlers define paths **without** the secret (e.g., `@router.get("/api/all-ips")`) +- FastAPI prepends the secret automatically (e.g., `GET /a1b2c3/api/all-ips`) +- The honeypot catch-all `/{path:path}` only matches paths that **don't** start with the secret +- No `_is_dashboard_path()` checks needed — the prefix handles access scoping + +## Route Architecture + +### Honeypot Routes (`routes/honeypot.py`) + +| Method | Path | Response | +|--------|------|----------| +| `GET` | `/{path:path}` | Trap page with random links (catch-all) | +| `HEAD` | `/{path:path}` | 200 OK | +| `POST` | `/{path:path}` | Credential capture | +| `GET` | `/admin`, `/login` | Fake login form | +| `GET` | `/wp-admin`, `/wp-login.php` | Fake WordPress login | +| `GET` | `/phpmyadmin` | Fake phpMyAdmin | +| `GET` | `/robots.txt` | Honeypot paths advertised | +| `GET/POST` | `/api/search`, `/api/sql` | SQL injection honeypot | +| `POST` | `/api/contact` | XSS detection endpoint | +| `GET` | `/.env`, `/credentials.txt` | Fake sensitive files | + +### Dashboard Routes (`routes/dashboard.py`) + +| Method | Path | Response | +|--------|------|----------| +| `GET` | `/` | Server-rendered dashboard (Jinja2) | + +### API Routes (`routes/api.py`) + +| Method | Path | Response | +|--------|------|----------| +| `GET` | `/api/all-ips` | Paginated IP list with stats | +| `GET` | `/api/attackers` | Paginated attacker IPs | +| `GET` | `/api/ip-stats/{ip}` | Single IP detail | +| `GET` | `/api/credentials` | Captured credentials | +| `GET` | `/api/honeypot` | Honeypot trigger counts | +| `GET` | `/api/top-ips` | Top requesting IPs | +| `GET` | `/api/top-paths` | Most requested paths | +| `GET` | `/api/top-user-agents` | Top user agents | +| `GET` | `/api/attack-types-stats` | Attack type distribution | +| `GET` | `/api/attack-types` | Paginated attack log | +| `GET` | `/api/raw-request/{id}` | Full HTTP request | +| `GET` | `/api/get_banlist` | Export ban rules | + +### HTMX Fragment Routes (`routes/htmx.py`) + +Each returns a server-rendered Jinja2 partial (`hx-swap="innerHTML"`): + +| Path | Template | +|------|----------| +| `/htmx/honeypot` | `honeypot_table.html` | +| `/htmx/top-ips` | `top_ips_table.html` | +| `/htmx/top-paths` | `top_paths_table.html` | +| `/htmx/top-ua` | `top_ua_table.html` | +| `/htmx/attackers` | `attackers_table.html` | +| `/htmx/credentials` | `credentials_table.html` | +| `/htmx/attacks` | `attack_types_table.html` | +| `/htmx/patterns` | `patterns_table.html` | +| `/htmx/ip-detail/{ip}` | `ip_detail.html` | + +## Database Schema + +``` +┌─────────────────┐ ┌──────────────────┐ +│ AccessLog │ │ AttackDetection │ +├─────────────────┤ ├──────────────────┤ +│ id (PK) │◄────│ access_log_id(FK)│ +│ ip (indexed) │ │ attack_type │ +│ path │ │ matched_pattern │ +│ user_agent │ └──────────────────┘ +│ method │ +│ is_suspicious │ ┌──────────────────┐ +│ is_honeypot │ │CredentialAttempt │ +│ timestamp │ ├──────────────────┤ +│ raw_request │ │ id (PK) │ +└─────────────────┘ │ ip (indexed) │ + │ path, username │ +┌─────────────────┐ │ password │ +│ IpStats │ │ timestamp │ +├─────────────────┤ └──────────────────┘ +│ ip (PK) │ +│ total_requests │ ┌──────────────────┐ +│ first/last_seen │ │ CategoryHistory │ +│ country_code │ ├──────────────────┤ +│ city, lat, lon │ │ id (PK) │ +│ asn, asn_org │ │ ip (indexed) │ +│ isp, reverse │ │ old_category │ +│ is_proxy │ │ new_category │ +│ is_hosting │ │ timestamp │ +│ list_on (JSON) │ └──────────────────┘ +│ category │ +│ category_scores │ +│ analyzed_metrics│ +│ manual_category │ +└─────────────────┘ +``` + +**SQLite config:** WAL mode, 30s busy timeout, file permissions 600. + +## Frontend Architecture + +``` +base.html + ├── CDN: Leaflet, Chart.js, HTMX, Alpine.js (deferred) + ├── Static: dashboard.css + │ + └── dashboard/index.html (extends base) + │ + ├── Stats cards ──────────── Server-rendered on page load + ├── Suspicious table ─────── Server-rendered on page load + │ + ├── Overview tab (Alpine.js x-show) + │ ├── Honeypot table ───── HTMX hx-get on load + │ ├── Top IPs table ────── HTMX hx-get on load + │ ├── Top Paths table ──── HTMX hx-get on load + │ ├── Top UA table ─────── HTMX hx-get on load + │ └── Credentials table ── HTMX hx-get on load + │ + └── Attacks tab (Alpine.js x-show, lazy init) + ├── Attackers table ──── HTMX hx-get on load + ├── Map ──────────────── Leaflet (init on tab switch) + ├── Chart ────────────── Chart.js (init on tab switch) + ├── Attack types table ─ HTMX hx-get on load + └── Patterns table ───── HTMX hx-get on load +``` + +**Responsibility split:** +- **Alpine.js** — Tab state, modals, dropdowns, lazy initialization +- **HTMX** — Table pagination, sorting, IP detail expansion +- **Leaflet** — Interactive map with category-colored markers +- **Chart.js** — Doughnut chart for attack type distribution +- **Custom SVG** — Radar charts for IP category scores + +## Background Tasks + +Managed by `TasksMaster` (APScheduler). Tasks are auto-discovered from `src/tasks/`. + +| Task | Schedule | Purpose | +|------|----------|---------| +| `analyze_ips` | Every 1 min | Score IPs into categories (attacker, crawler, user) | +| `fetch_ip_rep` | Every 5 min | Enrich IPs with geolocation + blocklist data | +| `db_dump` | Configurable | Export database backups | +| `memory_cleanup` | Periodic | Trim in-memory lists | +| `top_attacking_ips` | Periodic | Cache top attackers | + +### IP Categorization Model + +Each IP is scored across 4 categories based on: +- HTTP method distribution (risky methods ratio) +- Robots.txt violations +- Request timing anomalies (coefficient of variation) +- User-Agent diversity +- Attack URL detection + +Categories: `attacker`, `bad_crawler`, `good_crawler`, `regular_user`, `unknown` + +## Configuration + +`config.yaml` with environment variable overrides (`KRAWL_{FIELD}`): + +```yaml +server: + port: 5000 + delay: 100 # Response delay (ms) + +dashboard: + secret_path: "test" # Auto-generates if null + +database: + path: "data/krawl.db" + retention_days: 30 + +crawl: + infinite_pages_for_malicious: true + max_pages_limit: 250 + ban_duration_seconds: 600 + +behavior: + probability_error_codes: 0 # 0-100% + +canary: + token_url: null # External canary alert URL +``` + +## Logging + +Three rotating log files (1MB max, 5 backups each): + +| Logger | File | Content | +|--------|------|---------| +| `krawl.app` | `logs/krawl.log` | Application events, errors | +| `krawl.access` | `logs/access.log` | HTTP access, attack detections | +| `krawl.credentials` | `logs/credentials.log` | Captured login attempts | + +## Docker + +```dockerfile +FROM python:3.11-slim +# Non-root user: krawl:1000 +# Volumes: /app/logs, /app/data, /app/exports +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5000", "--app-dir", "src"] +``` + +## Key Data Flows + +### Honeypot Request + +``` +Client → BanCheck → DeceptionMiddleware → HoneypotRouter + │ + ┌─────────┴──────────┐ + │ tracker.record() │ + │ ├─ in-memory ++ │ + │ ├─ detect attacks │ + │ └─ DB persist │ + └────────────────────┘ +``` + +### Dashboard Load + +``` +Browser → GET /{secret}/ → SSR initial stats + Jinja2 render + → Alpine.js init → HTMX fires hx-get for each table + → User clicks Attacks tab → setTimeout → init Leaflet + Chart.js + → Leaflet fetches /api/all-ips → plots markers + → Chart.js fetches /api/attack-types-stats → renders doughnut +``` + +### IP Enrichment Pipeline + +``` +APScheduler (every 5 min) + └─ fetch_ip_rep.main() + ├─ DB: get unenriched IPs (limit 50) + ├─ ip-api.com → geolocation (country, city, ASN, coords) + ├─ iprep.lcrawl.com → blocklist memberships + └─ DB: update IpStats with enriched data +``` diff --git a/helm/Chart.yaml b/helm/Chart.yaml index bd5ef05..eaae7b3 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 1.0.4 -appVersion: 1.0.4 +version: 1.0.5 +appVersion: 1.0.5 keywords: - honeypot - security diff --git a/helm/values.yaml b/helm/values.yaml index 6564882..c63a9f3 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -576,6 +576,7 @@ wordlists: xxe_injection: "(=2.0.0,<3.0.0 APScheduler>=3.11.2 requests>=2.32.5 + +# Web framework +fastapi>=0.115.0 +uvicorn[standard]>=0.30.0 +jinja2>=3.1.0 +python-multipart>=0.0.9 \ No newline at end of file diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..ae4e0dc --- /dev/null +++ b/src/app.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +""" +FastAPI application factory for the Krawl honeypot. +Replaces the old http.server-based server.py. +""" + +import sys +import os +from contextlib import asynccontextmanager + +from fastapi import FastAPI, Request, Response +from fastapi.staticfiles import StaticFiles + +from config import get_config +from tracker import AccessTracker, set_tracker +from database import initialize_database +from tasks_master import get_tasksmaster +from logger import initialize_logging, get_app_logger +from generators import random_server_header + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application startup and shutdown lifecycle.""" + config = get_config() + + # Initialize logging + initialize_logging() + app_logger = get_app_logger() + + # Initialize database + try: + initialize_database(config.database_path) + app_logger.info(f"Database initialized at: {config.database_path}") + except Exception as e: + app_logger.warning( + f"Database initialization failed: {e}. Continuing with in-memory only." + ) + + # Initialize tracker + tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds) + set_tracker(tracker) + + # Store in app.state for dependency injection + app.state.config = config + app.state.tracker = tracker + + # Load webpages file if provided via env var + webpages = None + webpages_file = os.environ.get("KRAWL_WEBPAGES_FILE") + if webpages_file: + try: + with open(webpages_file, "r") as f: + webpages = f.readlines() + if not webpages: + app_logger.warning( + "The webpages file was empty. Using randomly generated links." + ) + webpages = None + except IOError: + app_logger.warning( + "Can't read webpages file. Using randomly generated links." + ) + app.state.webpages = webpages + + # Initialize canary counter + app.state.counter = config.canary_token_tries + + # Start scheduled tasks + tasks_master = get_tasksmaster() + tasks_master.run_scheduled_tasks() + + banner = f""" + +============================================================ +DASHBOARD AVAILABLE AT +{config.dashboard_secret_path} +============================================================ + """ + app_logger.info(banner) + app_logger.info(f"Starting deception server on port {config.port}...") + if config.canary_token_url: + app_logger.info( + f"Canary token will appear after {config.canary_token_tries} tries" + ) + else: + app_logger.info("No canary token configured (set CANARY_TOKEN_URL to enable)") + + yield + + # Shutdown + app_logger.info("Server shutting down...") + + +def create_app() -> FastAPI: + """Create and configure the FastAPI application.""" + application = FastAPI( + docs_url=None, + redoc_url=None, + openapi_url=None, + lifespan=lifespan, + ) + + # Random server header middleware (innermost — runs last on request, first on response) + @application.middleware("http") + async def server_header_middleware(request: Request, call_next): + response: Response = await call_next(request) + response.headers["Server"] = random_server_header() + return response + + # Deception detection middleware (path traversal, XXE, command injection) + from middleware.deception import DeceptionMiddleware + + application.add_middleware(DeceptionMiddleware) + + # Banned IP check middleware (outermost — runs first on request) + from middleware.ban_check import BanCheckMiddleware + + application.add_middleware(BanCheckMiddleware) + + # Mount static files for the dashboard + config = get_config() + secret = config.dashboard_secret_path.lstrip("/") + static_dir = os.path.join(os.path.dirname(__file__), "templates", "static") + application.mount( + f"/{secret}/static", + StaticFiles(directory=static_dir), + name="dashboard-static", + ) + + # Import and include routers + from routes.honeypot import router as honeypot_router + from routes.api import router as api_router + from routes.dashboard import router as dashboard_router + from routes.htmx import router as htmx_router + + # Dashboard/API/HTMX routes (prefixed with secret path, before honeypot catch-all) + dashboard_prefix = f"/{secret}" + application.include_router(dashboard_router, prefix=dashboard_prefix) + application.include_router(api_router, prefix=dashboard_prefix) + application.include_router(htmx_router, prefix=dashboard_prefix) + + # Honeypot routes (catch-all must be last) + application.include_router(honeypot_router) + + return application + + +app = create_app() diff --git a/src/database.py b/src/database.py index 5c1a275..3e04de5 100644 --- a/src/database.py +++ b/src/database.py @@ -359,6 +359,16 @@ class DatabaseManager: sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + if not ip_stats: + applogger.warning( + f"No IpStats record found for {sanitized_ip}, creating one." + ) + now = datetime.now() + ip_stats = IpStats( + ip=sanitized_ip, total_requests=0, first_seen=now, last_seen=now + ) + session.add(ip_stats) + # Check if category has changed and record it old_category = ip_stats.category if old_category != category: @@ -390,6 +400,10 @@ class DatabaseManager: sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + if not ip_stats: + applogger.warning(f"No IpStats record found for {sanitized_ip}") + return + # Record the manual category change old_category = ip_stats.category if old_category != category: @@ -995,6 +1009,27 @@ class DatabaseManager: finally: self.close_session() + def _public_ip_filter(self, query, ip_column, server_ip: Optional[str] = None): + """Apply SQL-level filters to exclude local/private IPs and server IP.""" + query = query.filter( + ~ip_column.like("10.%"), + ~ip_column.like("172.16.%"), + ~ip_column.like("172.17.%"), + ~ip_column.like("172.18.%"), + ~ip_column.like("172.19.%"), + ~ip_column.like("172.2_.%"), + ~ip_column.like("172.30.%"), + ~ip_column.like("172.31.%"), + ~ip_column.like("192.168.%"), + ~ip_column.like("127.%"), + ~ip_column.like("0.%"), + ~ip_column.like("169.254.%"), + ip_column != "::1", + ) + if server_ip: + query = query.filter(ip_column != server_ip) + return query + def get_dashboard_counts(self) -> Dict[str, int]: """ Get aggregate statistics for the dashboard (excludes local/private IPs and server IP). @@ -1005,43 +1040,43 @@ class DatabaseManager: """ session = self.session try: - # Get server IP to filter it out from config import get_config config = get_config() server_ip = config.get_server_ip() - # Get all accesses first, then filter out local IPs and server IP - all_accesses = session.query(AccessLog).all() - - # Filter out local/private IPs and server IP - public_accesses = [ - log for log in all_accesses if is_valid_public_ip(log.ip, server_ip) - ] - - # Calculate counts from filtered data - total_accesses = len(public_accesses) - unique_ips = len(set(log.ip for log in public_accesses)) - unique_paths = len(set(log.path for log in public_accesses)) - suspicious_accesses = sum(1 for log in public_accesses if log.is_suspicious) - honeypot_triggered = sum( - 1 for log in public_accesses if log.is_honeypot_trigger - ) - honeypot_ips = len( - set(log.ip for log in public_accesses if log.is_honeypot_trigger) + # Single aggregation query instead of loading all rows + base = session.query( + func.count(AccessLog.id).label("total_accesses"), + func.count(distinct(AccessLog.ip)).label("unique_ips"), + func.count(distinct(AccessLog.path)).label("unique_paths"), + func.count(case((AccessLog.is_suspicious == True, 1))).label( + "suspicious_accesses" + ), + func.count(case((AccessLog.is_honeypot_trigger == True, 1))).label( + "honeypot_triggered" + ), ) + base = self._public_ip_filter(base, AccessLog.ip, server_ip) + row = base.one() + + # Honeypot unique IPs (separate query for distinct on filtered subset) + hp_query = session.query(func.count(distinct(AccessLog.ip))).filter( + AccessLog.is_honeypot_trigger == True + ) + hp_query = self._public_ip_filter(hp_query, AccessLog.ip, server_ip) + honeypot_ips = hp_query.scalar() or 0 - # Count unique attackers from IpStats (matching the "Attackers by Total Requests" table) unique_attackers = ( session.query(IpStats).filter(IpStats.category == "attacker").count() ) return { - "total_accesses": total_accesses, - "unique_ips": unique_ips, - "unique_paths": unique_paths, - "suspicious_accesses": suspicious_accesses, - "honeypot_triggered": honeypot_triggered, + "total_accesses": row.total_accesses or 0, + "unique_ips": row.unique_ips or 0, + "unique_paths": row.unique_paths or 0, + "suspicious_accesses": row.suspicious_accesses or 0, + "honeypot_triggered": row.honeypot_triggered or 0, "honeypot_ips": honeypot_ips, "unique_attackers": unique_attackers, } @@ -1609,11 +1644,18 @@ class DatabaseManager: sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc" ) - # Count total attacks first (efficient) - total_attacks = session.query(AccessLog).join(AttackDetection).count() + # Count total unique access logs with attack detections + total_attacks = ( + session.query(AccessLog) + .join(AttackDetection) + .distinct(AccessLog.id) + .count() + ) # Get paginated access logs with attack detections - query = session.query(AccessLog).join(AttackDetection) + query = ( + session.query(AccessLog).join(AttackDetection).distinct(AccessLog.id) + ) if sort_by == "timestamp": query = query.order_by( @@ -1625,9 +1667,6 @@ class DatabaseManager: query = query.order_by( AccessLog.ip.desc() if sort_order == "desc" else AccessLog.ip.asc() ) - # Note: attack_type sorting requires loading all data, so we skip it for performance - # elif sort_by == "attack_type": - # Can't efficiently sort by related table field # Apply LIMIT and OFFSET at database level logs = query.offset(offset).limit(page_size).all() diff --git a/src/dependencies.py b/src/dependencies.py new file mode 100644 index 0000000..774d9dd --- /dev/null +++ b/src/dependencies.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +""" +FastAPI dependency injection providers. +Replaces Handler class variables with proper DI. +""" + +import os +from datetime import datetime + +from fastapi import Request +from fastapi.templating import Jinja2Templates + +from config import Config +from tracker import AccessTracker +from database import DatabaseManager, get_database +from logger import get_app_logger, get_access_logger, get_credential_logger + +# Shared Jinja2 templates instance +_templates = None + + +def get_templates() -> Jinja2Templates: + """Get shared Jinja2Templates instance with custom filters.""" + global _templates + if _templates is None: + templates_dir = os.path.join(os.path.dirname(__file__), "templates", "jinja2") + _templates = Jinja2Templates(directory=templates_dir) + _templates.env.filters["format_ts"] = _format_ts + return _templates + + +def _format_ts(value, time_only=False): + """Custom Jinja2 filter for formatting ISO timestamps.""" + if not value: + return "N/A" + if isinstance(value, str): + try: + value = datetime.fromisoformat(value) + except (ValueError, TypeError): + return value + if time_only: + return value.strftime("%H:%M:%S") + return value.strftime("%m/%d/%Y %H:%M:%S") + + +def get_tracker(request: Request) -> AccessTracker: + return request.app.state.tracker + + +def get_app_config(request: Request) -> Config: + return request.app.state.config + + +def get_db() -> DatabaseManager: + return get_database() + + +def get_client_ip(request: Request) -> str: + """Extract client IP address from request, checking proxy headers first.""" + forwarded_for = request.headers.get("X-Forwarded-For") + if forwarded_for: + return forwarded_for.split(",")[0].strip() + + real_ip = request.headers.get("X-Real-IP") + if real_ip: + return real_ip.strip() + + if request.client: + return request.client.host + + return "0.0.0.0" + + +def build_raw_request(request: Request, body: str = "") -> str: + """Build raw HTTP request string for forensic analysis.""" + try: + raw = f"{request.method} {request.url.path}" + if request.url.query: + raw += f"?{request.url.query}" + raw += f" HTTP/1.1\r\n" + + for header, value in request.headers.items(): + raw += f"{header}: {value}\r\n" + + raw += "\r\n" + + if body: + raw += body + + return raw + except Exception as e: + return f"{request.method} {request.url.path} (error building full request: {str(e)})" diff --git a/src/geo_utils.py b/src/geo_utils.py index 9c4538e..1df9904 100644 --- a/src/geo_utils.py +++ b/src/geo_utils.py @@ -9,26 +9,21 @@ from logger import get_app_logger app_logger = get_app_logger() -# Cache for IP geolocation data to avoid repeated API calls -_geoloc_cache = {} - def fetch_ip_geolocation(ip_address: str) -> Optional[Dict[str, Any]]: """ Fetch geolocation data for an IP address using ip-api.com. + Results are persisted to the database by the caller (fetch_ip_rep task), + so no in-memory caching is needed. + Args: ip_address: IP address to lookup Returns: Dictionary containing geolocation data or None if lookup fails """ - # Check cache first - if ip_address in _geoloc_cache: - return _geoloc_cache[ip_address] - # This is now replacing lcrawl to fetch IP data like latitude/longitude, city, etc... try: - # Use ip-api.com API for geolocation url = f"http://ip-api.com/json/{ip_address}" params = { "fields": "status,message,country,countryCode,region,regionName,city,zip,lat,lon,timezone,isp,org,as,reverse,mobile,proxy,hosting,query" @@ -39,16 +34,12 @@ def fetch_ip_geolocation(ip_address: str) -> Optional[Dict[str, Any]]: data = response.json() - # Check if the API call was successful if data.get("status") != "success": app_logger.warning( f"IP lookup failed for {ip_address}: {data.get('message')}" ) return None - # Cache the result - _geoloc_cache[ip_address] = data - app_logger.debug(f"Fetched geolocation for {ip_address}") return data diff --git a/src/handler.py b/src/handler.py deleted file mode 100644 index 863c223..0000000 --- a/src/handler.py +++ /dev/null @@ -1,1264 +0,0 @@ -#!/usr/bin/env python3 - -import logging -import random -import time -from datetime import datetime -from http.server import BaseHTTPRequestHandler -from typing import Optional, List -from urllib.parse import urlparse, parse_qs, unquote_plus -import json -import os - -from database import get_database -from config import Config, get_config - -# imports for the __init_subclass__ method, do not remove pls -from firewall.fwtype import FWType -from firewall.iptables import Iptables -from firewall.raw import Raw - -from tracker import AccessTracker -from templates import html_templates -from templates.dashboard_template import generate_dashboard -from generators import ( - credentials_txt, - passwords_txt, - users_json, - api_keys_json, - api_response, - directory_listing, - random_server_header, -) -from wordlists import get_wordlists -from deception_responses import ( - detect_and_respond_deception, - generate_sql_error_response, - get_sql_response_with_data, - detect_xss_pattern, - generate_xss_response, - generate_server_error, -) -from models import AccessLog -from ip_utils import is_valid_public_ip -from sqlalchemy import distinct - - -class Handler(BaseHTTPRequestHandler): - """HTTP request handler for the deception server""" - - webpages: Optional[List[str]] = None - config: Config = None - tracker: AccessTracker = None - counter: int = 0 - app_logger: logging.Logger = None - access_logger: logging.Logger = None - credential_logger: logging.Logger = None - - def _get_client_ip(self) -> str: - """Extract client IP address from request, checking proxy headers first""" - # Headers might not be available during early error logging - if hasattr(self, "headers") and self.headers: - # Check X-Forwarded-For header (set by load balancers/proxies) - forwarded_for = self.headers.get("X-Forwarded-For") - if forwarded_for: - # X-Forwarded-For can contain multiple IPs, get the first (original client) - return forwarded_for.split(",")[0].strip() - - # Check X-Real-IP header (set by nginx and other proxies) - real_ip = self.headers.get("X-Real-IP") - if real_ip: - return real_ip.strip() - - # Fallback to direct connection IP - return self.client_address[0] - - def _build_raw_request(self, body: str = "") -> str: - """Build raw HTTP request string for forensic analysis""" - try: - # Request line - raw = f"{self.command} {self.path} {self.request_version}\r\n" - - # Headers - if hasattr(self, "headers") and self.headers: - for header, value in self.headers.items(): - raw += f"{header}: {value}\r\n" - - raw += "\r\n" - - # Body (if present) - if body: - raw += body - - return raw - except Exception as e: - # Fallback to minimal representation if building fails - return f"{self.command} {self.path} (error building full request: {str(e)})" - - def _get_category_by_ip(self, client_ip: str) -> str: - """Get the category of an IP from the database""" - return self.tracker.get_category_by_ip(client_ip) - - def _get_page_visit_count(self, client_ip: str) -> int: - """Get current page visit count for an IP""" - return self.tracker.get_page_visit_count(client_ip) - - def _increment_page_visit(self, client_ip: str) -> int: - """Increment page visit counter for an IP and return new count""" - return self.tracker.increment_page_visit(client_ip) - - def version_string(self) -> str: - """Return custom server version for deception.""" - return random_server_header() - - def _should_return_error(self) -> bool: - """Check if we should return an error based on probability""" - if self.config.probability_error_codes <= 0: - return False - return random.randint(1, 100) <= self.config.probability_error_codes - - def _get_random_error_code(self) -> int: - """Get a random error code from wordlists""" - wl = get_wordlists() - error_codes = wl.error_codes - if not error_codes: - error_codes = [400, 401, 403, 404, 500, 502, 503] - return random.choice(error_codes) - - def _handle_sql_endpoint(self, path: str) -> bool: - """ - Handle SQL injection honeypot endpoints. - Returns True if the path was handled, False otherwise. - """ - # SQL-vulnerable endpoints - sql_endpoints = ["/api/search", "/api/sql", "/api/database"] - - base_path = urlparse(path).path - if base_path not in sql_endpoints: - return False - - try: - parsed_url = urlparse(path) - request_query = parsed_url.query - - # Log SQL injection attempt - client_ip = self._get_client_ip() - user_agent = self.headers.get("User-Agent", "") - - # Always check for SQL injection patterns - error_msg, content_type, status_code = generate_sql_error_response( - request_query or "" - ) - - if error_msg: - # SQL injection detected - log and return error - self.access_logger.warning( - f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {request_query[:100] if request_query else 'empty'}" - ) - self.send_response(status_code) - self.send_header("Content-type", content_type) - self.end_headers() - self.wfile.write(error_msg.encode()) - else: - # No injection detected - return fake data - self.access_logger.info( - f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {request_query[:100] if request_query else 'empty'}" - ) - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - response_data = get_sql_response_with_data( - base_path, request_query or "" - ) - self.wfile.write(response_data.encode()) - - return True - - except BrokenPipeError: - # Client disconnected - return True - except Exception as e: - self.app_logger.error(f"Error handling SQL endpoint {path}: {str(e)}") - # Still send a response even on error - try: - self.send_response(500) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(b'{"error": "Internal server error"}') - except: - pass - return True - - def _handle_deception_response( - self, path: str, query: str = "", body: str = "", method: str = "GET" - ) -> bool: - """ - Handle deception responses for path traversal, XXE, and command injection. - Returns True if a deception response was sent, False otherwise. - """ - try: - self.app_logger.debug(f"Checking deception for: {method} {path}") - result = detect_and_respond_deception(path, query, body, method) - - if result: - response_body, content_type, status_code = result - client_ip = self._get_client_ip() - user_agent = self.headers.get("User-Agent", "") - - # Determine attack type using standardized names from wordlists - full_input = f"{path} {query} {body}".lower() - attack_type_db = None # For database (standardized) - attack_type_log = "UNKNOWN" # For logging (human-readable) - - if ( - "passwd" in path.lower() - or "shadow" in path.lower() - or ".." in path - or ".." in query - ): - attack_type_db = "path_traversal" - attack_type_log = "PATH_TRAVERSAL" - elif body and (" str: - """Generate a webpage containing random links or canary token""" - - random.seed(seed) - num_pages = random.randint(*self.config.links_per_page_range) - - # Check if this is a good crawler by IP category from database - ip_category = self._get_category_by_ip(self._get_client_ip()) - - # Determine if we should apply crawler page limit based on config and IP category - should_apply_crawler_limit = False - if self.config.infinite_pages_for_malicious: - if ( - ip_category == "good_crawler" or ip_category == "regular_user" - ) and page_visit_count >= self.config.max_pages_limit: - should_apply_crawler_limit = True - else: - if ( - ip_category == "good_crawler" - or ip_category == "bad_crawler" - or ip_category == "attacker" - ) and page_visit_count >= self.config.max_pages_limit: - should_apply_crawler_limit = True - - # If good crawler reached max pages, return a simple page with no links - if should_apply_crawler_limit: - return html_templates.main_page( - Handler.counter, "

Crawl limit reached.

" - ) - - num_pages = random.randint(*self.config.links_per_page_range) - - # Build the content HTML - content = "" - - # Add canary token if needed - if Handler.counter <= 0 and self.config.canary_token_url: - content += f""" - -""" - - # Add links - if self.webpages is None: - for _ in range(num_pages): - address = "".join( - [ - random.choice(self.config.char_space) - for _ in range(random.randint(*self.config.links_length_range)) - ] - ) - content += f""" - -""" - else: - for _ in range(num_pages): - address = random.choice(self.webpages) - content += f""" - -""" - - # Return the complete page using the template - return html_templates.main_page(Handler.counter, content) - - def do_HEAD(self): - """Sends header information""" - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - - def do_POST(self): - """Handle POST requests (mainly login attempts)""" - client_ip = self._get_client_ip() - user_agent = self.headers.get("User-Agent", "") - post_data = "" - - base_path = urlparse(self.path).path - - content_length = int(self.headers.get("Content-Length", 0)) - if content_length > 0: - post_data = self.rfile.read(content_length).decode( - "utf-8", errors="replace" - ) - - parsed_url = urlparse(self.path) - query_string = parsed_url.query - - if self._handle_deception_response(self.path, query_string, post_data, "POST"): - return - - if base_path in ["/api/search", "/api/sql", "/api/database"]: - self.access_logger.info( - f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}" - ) - - error_msg, content_type, status_code = generate_sql_error_response( - post_data - ) - - try: - if error_msg: - self.access_logger.warning( - f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}" - ) - self.send_response(status_code) - self.send_header("Content-type", content_type) - self.end_headers() - self.wfile.write(error_msg.encode()) - else: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - response_data = get_sql_response_with_data(base_path, post_data) - self.wfile.write(response_data.encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error in SQL POST handler: {str(e)}") - return - - if base_path == "/api/contact": - # Parse URL-encoded POST data properly - parsed_data = {} - if post_data: - # Use parse_qs for proper URL decoding - parsed_qs = parse_qs(post_data) - # parse_qs returns lists, get first value of each - parsed_data = {k: v[0] if v else "" for k, v in parsed_qs.items()} - - self.app_logger.debug(f"Parsed contact data: {parsed_data}") - - xss_detected = any(detect_xss_pattern(str(v)) for v in parsed_data.values()) - - if xss_detected: - self.access_logger.warning( - f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}" - ) - else: - self.access_logger.info( - f"[XSS ENDPOINT POST] {client_ip} - {base_path}" - ) - - # Record access for dashboard tracking (including XSS detection) - self.tracker.record_access( - ip=client_ip, - path=self.path, - user_agent=user_agent, - body=post_data, - method="POST", - raw_request=self._build_raw_request(post_data), - ) - - try: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - response_html = generate_xss_response(parsed_data) - self.wfile.write(response_html.encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error in XSS POST handler: {str(e)}") - return - - self.access_logger.warning( - f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}" - ) - - # post_data was already read at the beginning of do_POST, don't read again - if post_data: - self.access_logger.warning(f"[POST DATA] {post_data[:200]}") - - # Parse and log credentials - username, password = self.tracker.parse_credentials(post_data) - if username or password: - # Log to dedicated credentials.log file - timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") - credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}" - self.credential_logger.info(credential_line) - - # Also record in tracker for dashboard - self.tracker.record_credential_attempt( - client_ip, self.path, username or "N/A", password or "N/A" - ) - - self.access_logger.warning( - f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}" - ) - - # send the post data (body) to the record_access function so the post data can be used to detect suspicious things. - self.tracker.record_access( - client_ip, - self.path, - user_agent, - post_data, - method="POST", - raw_request=self._build_raw_request(post_data), - ) - - time.sleep(1) - - try: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.login_error().encode()) - except BrokenPipeError: - # Client disconnected before receiving response, ignore silently - pass - except Exception as e: - # Log other exceptions but don't crash - self.app_logger.error(f"Failed to send response to {client_ip}: {str(e)}") - - def serve_special_path(self, path: str) -> bool: - """Serve special paths like robots.txt, API endpoints, etc.""" - - # Check SQL injection honeypot endpoints first - if self._handle_sql_endpoint(path): - return True - - try: - if path == "/robots.txt": - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(html_templates.robots_txt().encode()) - return True - - if path in ["/credentials.txt", "/passwords.txt", "/admin_notes.txt"]: - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.end_headers() - if "credentials" in path: - self.wfile.write(credentials_txt().encode()) - else: - self.wfile.write(passwords_txt().encode()) - return True - - if path in ["/users.json", "/api_keys.json", "/config.json"]: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - if "users" in path: - self.wfile.write(users_json().encode()) - elif "api_keys" in path: - self.wfile.write(api_keys_json().encode()) - else: - self.wfile.write(api_response("/api/config").encode()) - return True - - if path in ["/admin", "/admin/", "/admin/login", "/login"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.login_form().encode()) - return True - - if path in ["/users", "/user", "/database", "/db", "/search"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.product_search().encode()) - return True - - if path in ["/info", "/input", "/contact", "/feedback", "/comment"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.input_form().encode()) - return True - - if path == "/server": - error_html, content_type = generate_server_error() - self.send_response(500) - self.send_header("Content-type", content_type) - self.end_headers() - self.wfile.write(error_html.encode()) - return True - - if path in ["/wp-login.php", "/wp-login", "/wp-admin", "/wp-admin/"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.wp_login().encode()) - return True - - if path in ["/wp-content/", "/wp-includes/"] or "wordpress" in path.lower(): - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.wordpress().encode()) - return True - - if "phpmyadmin" in path.lower() or path in ["/pma/", "/phpMyAdmin/"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.phpmyadmin().encode()) - return True - - if path.startswith("/api/") or path.startswith("/api") or path in ["/.env"]: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(api_response(path).encode()) - return True - - if path in [ - "/backup/", - "/uploads/", - "/private/", - "/admin/", - "/config/", - "/database/", - ]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(directory_listing(path).encode()) - return True - except BrokenPipeError: - # Client disconnected, ignore silently - pass - except Exception as e: - self.app_logger.error(f"Failed to serve special path {path}: {str(e)}") - pass - - return False - - def do_GET(self): - """Responds to webpage requests""" - - client_ip = self._get_client_ip() - - # respond with HTTP error code if client is banned - if self.tracker.is_banned_ip(client_ip): - self.send_response(500) - self.end_headers() - return - - # get request data - user_agent = self.headers.get("User-Agent", "") - request_path = urlparse(self.path).path - self.app_logger.info(f"request_query: {request_path}") - parsed_url = urlparse(self.path) - query_string = parsed_url.query - query_params = parse_qs(query_string) - self.app_logger.info(f"query_params: {query_params}") - - if self._handle_deception_response(self.path, query_string, "", "GET"): - return - - # get database reference - db = get_database() - session = db.session - - # Handle static files for dashboard - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/static/" - ): - - file_path = self.path.replace( - f"{self.config.dashboard_secret_path}/static/", "" - ) - static_dir = os.path.join(os.path.dirname(__file__), "templates", "static") - full_path = os.path.join(static_dir, file_path) - - # Security check: ensure the path is within static directory - if os.path.commonpath( - [full_path, static_dir] - ) == static_dir and os.path.exists(full_path): - try: - with open(full_path, "rb") as f: - content = f.read() - self.send_response(200) - if file_path.endswith(".svg"): - self.send_header("Content-type", "image/svg+xml") - elif file_path.endswith(".css"): - self.send_header("Content-type", "text/css") - elif file_path.endswith(".js"): - self.send_header("Content-type", "application/javascript") - else: - self.send_header("Content-type", "application/octet-stream") - self.send_header("Content-Length", str(len(content))) - self.end_headers() - self.wfile.write(content) - return - except Exception as e: - self.app_logger.error(f"Error serving static file: {e}") - - self.send_response(404) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"Not found") - return - - if ( - self.config.dashboard_secret_path - and self.path == self.config.dashboard_secret_path - ): - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - try: - stats = self.tracker.get_stats() - self.wfile.write( - generate_dashboard( - stats, self.config.dashboard_secret_path - ).encode() - ) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error generating dashboard: {e}") - return - - # API endpoint for fetching all IP statistics - if ( - self.config.dashboard_secret_path - and self.path == f"{self.config.dashboard_secret_path}/api/all-ip-stats" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - ip_stats_list = db.get_ip_stats(limit=500) - self.wfile.write(json.dumps({"ips": ip_stats_list}).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching all IP stats: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching paginated attackers - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/attackers" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["25"])[0]) - sort_by = query_params.get("sort_by", ["total_requests"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - # Ensure valid parameters - page = max(1, page) - page_size = min(max(1, page_size), 100) # Max 100 per page - - result = db.get_attackers_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching attackers: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching all IPs (all categories) - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/all-ips" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - # Parse query parameters - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["25"])[0]) - sort_by = query_params.get("sort_by", ["total_requests"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - # Ensure valid parameters - page = max(1, page) - page_size = min(max(1, page_size), 100) # Max 100 per page - - result = db.get_all_ips_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching all IPs: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching IP stats - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/ip-stats/" - ): - ip_address = self.path.replace( - f"{self.config.dashboard_secret_path}/api/ip-stats/", "" - ) - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - # Prevent browser caching - force fresh data from database every time - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - ip_stats = db.get_ip_stats_by_ip(ip_address) - if ip_stats: - self.wfile.write(json.dumps(ip_stats).encode()) - else: - self.wfile.write(json.dumps({"error": "IP not found"}).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching IP stats: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated honeypot triggers - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/honeypot" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_honeypot_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching honeypot data: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated credentials - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/credentials" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["timestamp"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_credentials_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching credentials: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated top IPs - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/top-ips" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_top_ips_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching top IPs: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated top paths - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/top-paths" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_top_paths_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching top paths: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated top user agents - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/top-user-agents" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_top_user_agents_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching top user agents: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated attack types - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/attack-types" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["timestamp"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_attack_types_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching attack types: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for attack types statistics (aggregated) - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/attack-types-stats" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - limit = int(query_params.get("limit", ["20"])[0]) - limit = min(max(1, limit), 100) # Cap at 100 - - result = db.get_attack_types_stats(limit=limit) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching attack types stats: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching raw request by log ID - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/raw-request/" - ): - try: - # Extract log ID from path: /api/raw-request/123 - log_id = int(self.path.split("/")[-1]) - raw_request = db.get_raw_request_by_id(log_id) - - if raw_request is None: - self.send_response(404) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write( - json.dumps({"error": "Raw request not found"}).encode() - ) - else: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", - "no-store, no-cache, must-revalidate, max-age=0", - ) - self.end_headers() - self.wfile.write(json.dumps({"raw_request": raw_request}).encode()) - except (ValueError, IndexError): - self.send_response(400) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(json.dumps({"error": "Invalid log ID"}).encode()) - except Exception as e: - self.app_logger.error(f"Error fetching raw request: {e}") - self.send_response(500) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for downloading malicious IPs blocklist file - if ( - self.config.dashboard_secret_path - and request_path == f"{self.config.dashboard_secret_path}/api/get_banlist" - ): - - # get fwtype from request params - fwtype = query_params.get("fwtype", ["iptables"])[0] - filename = f"{fwtype}_banlist.txt" - if fwtype == "raw": - filename = f"malicious_ips.txt" - - file_path = os.path.join(self.config.exports_path, f"{filename}") - - try: - if os.path.exists(file_path): - with open(file_path, "rb") as f: - content = f.read() - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.send_header( - "Content-Disposition", - f'attachment; filename="{filename}"', - ) - self.send_header("Content-Length", str(len(content))) - self.end_headers() - self.wfile.write(content) - else: - self.send_response(404) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"File not found") - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error serving malicious IPs file: {e}") - self.send_response(500) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"Internal server error") - return - - # API endpoint for downloading malicious IPs file - if ( - self.config.dashboard_secret_path - and self.path - == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt" - ): - - file_path = os.path.join( - os.path.dirname(__file__), "exports", "malicious_ips.txt" - ) - try: - if os.path.exists(file_path): - with open(file_path, "rb") as f: - content = f.read() - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.send_header( - "Content-Disposition", - 'attachment; filename="malicious_ips.txt"', - ) - self.send_header("Content-Length", str(len(content))) - self.end_headers() - self.wfile.write(content) - else: - self.send_response(404) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"File not found") - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error serving malicious IPs file: {e}") - self.send_response(500) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"Internal server error") - return - - self.tracker.record_access( - client_ip, - self.path, - user_agent, - method="GET", - raw_request=self._build_raw_request(), - ) - - if self.tracker.is_suspicious_user_agent(user_agent): - self.access_logger.warning( - f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}" - ) - - if self._should_return_error(): - error_code = self._get_random_error_code() - self.access_logger.info( - f"Returning error {error_code} to {client_ip} - {self.path}" - ) - self.send_response(error_code) - self.end_headers() - return - - if self.serve_special_path(self.path): - return - - time.sleep(self.config.delay / 1000.0) - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - - try: - # Increment page visit counter for this IP and get the current count - current_visit_count = self._increment_page_visit(client_ip) - self.wfile.write( - self.generate_page(self.path, current_visit_count).encode() - ) - - Handler.counter -= 1 - - if Handler.counter < 0: - Handler.counter = self.config.canary_token_tries - except BrokenPipeError: - # Client disconnected, ignore silently - pass - except Exception as e: - self.app_logger.error(f"Error generating page: {e}") - - def log_message(self, format, *args): - """Override to customize logging - uses access logger""" - client_ip = self._get_client_ip() - self.access_logger.info(f"{client_ip} - {format % args}") diff --git a/src/middleware/__init__.py b/src/middleware/__init__.py new file mode 100644 index 0000000..be27011 --- /dev/null +++ b/src/middleware/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +""" +FastAPI middleware package for the Krawl honeypot. +""" diff --git a/src/middleware/ban_check.py b/src/middleware/ban_check.py new file mode 100644 index 0000000..a3be689 --- /dev/null +++ b/src/middleware/ban_check.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +""" +Middleware for checking if client IP is banned. +""" + +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response + +from dependencies import get_client_ip + + +class BanCheckMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + # Skip ban check for dashboard routes + config = request.app.state.config + dashboard_prefix = "/" + config.dashboard_secret_path.lstrip("/") + if request.url.path.startswith(dashboard_prefix): + return await call_next(request) + + client_ip = get_client_ip(request) + tracker = request.app.state.tracker + + if tracker.is_banned_ip(client_ip): + return Response(status_code=500) + + response = await call_next(request) + return response diff --git a/src/middleware/deception.py b/src/middleware/deception.py new file mode 100644 index 0000000..6070a14 --- /dev/null +++ b/src/middleware/deception.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +""" +Middleware for deception response detection (path traversal, XXE, command injection). +Short-circuits the request if a deception response is triggered. +""" + +import asyncio +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response + +from deception_responses import detect_and_respond_deception +from dependencies import get_client_ip, build_raw_request +from logger import get_app_logger, get_access_logger + + +class DeceptionMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + path = request.url.path + + # Skip deception detection for dashboard routes + config = request.app.state.config + dashboard_prefix = "/" + config.dashboard_secret_path.lstrip("/") + if path.startswith(dashboard_prefix): + return await call_next(request) + + query = request.url.query or "" + method = request.method + + # Read body for POST requests + body = "" + if method == "POST": + body_bytes = await request.body() + body = body_bytes.decode("utf-8", errors="replace") + + result = detect_and_respond_deception(path, query, body, method) + + if result: + response_body, content_type, status_code = result + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + app_logger = get_app_logger() + access_logger = get_access_logger() + + # Determine attack type for logging + full_input = f"{path} {query} {body}".lower() + attack_type_log = "UNKNOWN" + + if ( + "passwd" in path.lower() + or "shadow" in path.lower() + or ".." in path + or ".." in query + ): + attack_type_log = "PATH_TRAVERSAL" + elif body and (" dict: + return { + "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0", + "Pragma": "no-cache", + "Expires": "0", + "Access-Control-Allow-Origin": "*", + } + + +@router.get("/api/all-ip-stats") +async def all_ip_stats(request: Request): + db = get_db() + try: + ip_stats_list = db.get_ip_stats(limit=500) + return JSONResponse( + content={"ips": ip_stats_list}, + headers=_no_cache_headers(), + ) + except Exception as e: + get_app_logger().error(f"Error fetching all IP stats: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/attackers") +async def attackers( + request: Request, + page: int = Query(1), + page_size: int = Query(25), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_attackers_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching attackers: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/all-ips") +async def all_ips( + request: Request, + page: int = Query(1), + page_size: int = Query(25), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_all_ips_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching all IPs: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/ip-stats/{ip_address:path}") +async def ip_stats(ip_address: str, request: Request): + db = get_db() + try: + stats = db.get_ip_stats_by_ip(ip_address) + if stats: + return JSONResponse(content=stats, headers=_no_cache_headers()) + else: + return JSONResponse( + content={"error": "IP not found"}, headers=_no_cache_headers() + ) + except Exception as e: + get_app_logger().error(f"Error fetching IP stats: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/honeypot") +async def honeypot( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_honeypot_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching honeypot data: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/credentials") +async def credentials( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_credentials_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching credentials: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/top-ips") +async def top_ips( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_top_ips_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching top IPs: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/top-paths") +async def top_paths( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_top_paths_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching top paths: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/top-user-agents") +async def top_user_agents( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_top_user_agents_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching top user agents: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/attack-types-stats") +async def attack_types_stats( + request: Request, + limit: int = Query(20), +): + db = get_db() + limit = min(max(1, limit), 100) + + try: + result = db.get_attack_types_stats(limit=limit) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching attack types stats: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/attack-types") +async def attack_types( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_attack_types_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching attack types: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/raw-request/{log_id:int}") +async def raw_request(log_id: int, request: Request): + db = get_db() + try: + raw = db.get_raw_request_by_id(log_id) + if raw is None: + return JSONResponse( + content={"error": "Raw request not found"}, status_code=404 + ) + return JSONResponse(content={"raw_request": raw}, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching raw request: {e}") + return JSONResponse(content={"error": str(e)}, status_code=500) + + +@router.get("/api/get_banlist") +async def get_banlist(request: Request, fwtype: str = Query("iptables")): + config = request.app.state.config + + filename = f"{fwtype}_banlist.txt" + if fwtype == "raw": + filename = "malicious_ips.txt" + + file_path = os.path.join(config.exports_path, filename) + + try: + if os.path.exists(file_path): + with open(file_path, "rb") as f: + content = f.read() + return Response( + content=content, + status_code=200, + media_type="text/plain", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "Content-Length": str(len(content)), + }, + ) + else: + return PlainTextResponse("File not found", status_code=404) + except Exception as e: + get_app_logger().error(f"Error serving malicious IPs file: {e}") + return PlainTextResponse("Internal server error", status_code=500) + + +@router.get("/api/download/malicious_ips.txt") +async def download_malicious_ips(request: Request): + config = request.app.state.config + file_path = os.path.join(config.exports_path, "malicious_ips.txt") + + try: + if os.path.exists(file_path): + with open(file_path, "rb") as f: + content = f.read() + return Response( + content=content, + status_code=200, + media_type="text/plain", + headers={ + "Content-Disposition": 'attachment; filename="malicious_ips.txt"', + "Content-Length": str(len(content)), + }, + ) + else: + return PlainTextResponse("File not found", status_code=404) + except Exception as e: + get_app_logger().error(f"Error serving malicious IPs file: {e}") + return PlainTextResponse("Internal server error", status_code=500) diff --git a/src/routes/dashboard.py b/src/routes/dashboard.py new file mode 100644 index 0000000..6f5773b --- /dev/null +++ b/src/routes/dashboard.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python3 + +""" +Dashboard page route. +Renders the main dashboard page with server-side data for initial load. +""" + +from fastapi import APIRouter, Request + +from dependencies import get_db, get_templates + +router = APIRouter() + + +@router.get("") +@router.get("/") +async def dashboard_page(request: Request): + db = get_db() + config = request.app.state.config + dashboard_path = "/" + config.dashboard_secret_path.lstrip("/") + + # Get initial data for server-rendered sections + stats = db.get_dashboard_counts() + suspicious = db.get_recent_suspicious(limit=20) + + # Get credential count for the stats card + cred_result = db.get_credentials_paginated(page=1, page_size=1) + stats["credential_count"] = cred_result["pagination"]["total"] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/index.html", + { + "request": request, + "dashboard_path": dashboard_path, + "stats": stats, + "suspicious_activities": suspicious, + }, + ) diff --git a/src/routes/honeypot.py b/src/routes/honeypot.py new file mode 100644 index 0000000..6db1c65 --- /dev/null +++ b/src/routes/honeypot.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python3 + +""" +Honeypot trap routes for the Krawl deception server. +Migrated from handler.py serve_special_path(), do_POST(), and do_GET() catch-all. +""" + +import asyncio +import random +import time +from datetime import datetime +from urllib.parse import urlparse, parse_qs, unquote_plus + +from fastapi import APIRouter, Request, Response, Depends +from fastapi.responses import HTMLResponse, PlainTextResponse, JSONResponse + +from dependencies import ( + get_tracker, + get_app_config, + get_client_ip, + build_raw_request, +) +from config import Config +from tracker import AccessTracker +from templates import html_templates +from generators import ( + credentials_txt, + passwords_txt, + users_json, + api_keys_json, + api_response, + directory_listing, +) +from deception_responses import ( + generate_sql_error_response, + get_sql_response_with_data, + detect_xss_pattern, + generate_xss_response, + generate_server_error, +) +from wordlists import get_wordlists +from logger import get_app_logger, get_access_logger, get_credential_logger + +# --- Auto-tracking dependency --- +# Records requests that match attack patterns or honeypot trap paths. + + +async def _track_honeypot_request(request: Request): + """Record access for requests with attack patterns or honeypot path hits.""" + tracker = request.app.state.tracker + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + path = request.url.path + + body = "" + if request.method in ("POST", "PUT"): + body_bytes = await request.body() + body = body_bytes.decode("utf-8", errors="replace") + + # Check attack patterns in path and body + attack_findings = tracker.detect_attack_type(path) + + if body: + import urllib.parse + + decoded_body = urllib.parse.unquote(body) + attack_findings.extend(tracker.detect_attack_type(decoded_body)) + + # Record if attack pattern detected OR path is a honeypot trap + if attack_findings or tracker.is_honeypot_path(path): + tracker.record_access( + ip=client_ip, + path=path, + user_agent=user_agent, + body=body, + method=request.method, + raw_request=build_raw_request(request, body), + ) + + +router = APIRouter(dependencies=[Depends(_track_honeypot_request)]) + + +# --- Helper functions --- + + +def _should_return_error(config: Config) -> bool: + if config.probability_error_codes <= 0: + return False + return random.randint(1, 100) <= config.probability_error_codes + + +def _get_random_error_code() -> int: + wl = get_wordlists() + error_codes = wl.error_codes + if not error_codes: + error_codes = [400, 401, 403, 404, 500, 502, 503] + return random.choice(error_codes) + + +# --- HEAD --- + + +@router.head("/{path:path}") +async def handle_head(path: str): + return Response(status_code=200, headers={"Content-Type": "text/html"}) + + +# --- POST routes --- + + +@router.post("/api/search") +@router.post("/api/sql") +@router.post("/api/database") +async def sql_endpoint_post(request: Request): + client_ip = get_client_ip(request) + access_logger = get_access_logger() + + body_bytes = await request.body() + post_data = body_bytes.decode("utf-8", errors="replace") + + base_path = request.url.path + access_logger.info( + f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}" + ) + + error_msg, content_type, status_code = generate_sql_error_response(post_data) + + if error_msg: + access_logger.warning( + f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}" + ) + return Response( + content=error_msg, status_code=status_code, media_type=content_type + ) + else: + response_data = get_sql_response_with_data(base_path, post_data) + return Response( + content=response_data, status_code=200, media_type="application/json" + ) + + +@router.post("/api/contact") +async def contact_post(request: Request): + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + tracker = request.app.state.tracker + access_logger = get_access_logger() + app_logger = get_app_logger() + + body_bytes = await request.body() + post_data = body_bytes.decode("utf-8", errors="replace") + + parsed_data = {} + if post_data: + parsed_qs = parse_qs(post_data) + parsed_data = {k: v[0] if v else "" for k, v in parsed_qs.items()} + + xss_detected = any(detect_xss_pattern(str(v)) for v in parsed_data.values()) + + if xss_detected: + access_logger.warning( + f"[XSS ATTEMPT DETECTED] {client_ip} - {request.url.path} - Data: {post_data[:200]}" + ) + else: + access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {request.url.path}") + + response_html = generate_xss_response(parsed_data) + return HTMLResponse(content=response_html, status_code=200) + + +@router.post("/{path:path}") +async def credential_capture_post(request: Request, path: str): + """Catch-all POST handler for credential capture.""" + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + tracker = request.app.state.tracker + access_logger = get_access_logger() + credential_logger = get_credential_logger() + + body_bytes = await request.body() + post_data = body_bytes.decode("utf-8", errors="replace") + + full_path = f"/{path}" + + access_logger.warning( + f"[LOGIN ATTEMPT] {client_ip} - {full_path} - {user_agent[:50]}" + ) + + if post_data: + access_logger.warning(f"[POST DATA] {post_data[:200]}") + + username, password = tracker.parse_credentials(post_data) + if username or password: + timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{full_path}" + credential_logger.info(credential_line) + + tracker.record_credential_attempt( + client_ip, full_path, username or "N/A", password or "N/A" + ) + + access_logger.warning( + f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {full_path}" + ) + + await asyncio.sleep(1) + return HTMLResponse(content=html_templates.login_error(), status_code=200) + + +# --- GET special paths --- + + +@router.get("/robots.txt") +async def robots_txt(): + return PlainTextResponse(html_templates.robots_txt()) + + +@router.get("/credentials.txt") +async def fake_credentials(): + return PlainTextResponse(credentials_txt()) + + +@router.get("/passwords.txt") +@router.get("/admin_notes.txt") +async def fake_passwords(): + return PlainTextResponse(passwords_txt()) + + +@router.get("/users.json") +async def fake_users_json(): + return JSONResponse(content=None, status_code=200, media_type="application/json") + + +@router.get("/api_keys.json") +async def fake_api_keys(): + return Response( + content=api_keys_json(), status_code=200, media_type="application/json" + ) + + +@router.get("/config.json") +async def fake_config_json(): + return Response( + content=api_response("/api/config"), + status_code=200, + media_type="application/json", + ) + + +# Override the generic /users.json to return actual content +@router.get("/users.json", include_in_schema=False) +async def fake_users_json_content(): + return Response( + content=users_json(), status_code=200, media_type="application/json" + ) + + +@router.get("/admin") +@router.get("/admin/") +@router.get("/admin/login") +@router.get("/login") +async def fake_login(): + return HTMLResponse(html_templates.login_form()) + + +@router.get("/users") +@router.get("/user") +@router.get("/database") +@router.get("/db") +@router.get("/search") +async def fake_product_search(): + return HTMLResponse(html_templates.product_search()) + + +@router.get("/info") +@router.get("/input") +@router.get("/contact") +@router.get("/feedback") +@router.get("/comment") +async def fake_input_form(): + return HTMLResponse(html_templates.input_form()) + + +@router.get("/server") +async def fake_server_error(): + error_html, content_type = generate_server_error() + return Response(content=error_html, status_code=500, media_type=content_type) + + +@router.get("/wp-login.php") +@router.get("/wp-login") +@router.get("/wp-admin") +@router.get("/wp-admin/") +async def fake_wp_login(): + return HTMLResponse(html_templates.wp_login()) + + +@router.get("/wp-content/{path:path}") +@router.get("/wp-includes/{path:path}") +async def fake_wordpress(path: str = ""): + return HTMLResponse(html_templates.wordpress()) + + +@router.get("/phpmyadmin") +@router.get("/phpmyadmin/{path:path}") +@router.get("/phpMyAdmin") +@router.get("/phpMyAdmin/{path:path}") +@router.get("/pma") +@router.get("/pma/") +async def fake_phpmyadmin(path: str = ""): + return HTMLResponse(html_templates.phpmyadmin()) + + +@router.get("/.env") +async def fake_env(): + return Response( + content=api_response("/.env"), status_code=200, media_type="application/json" + ) + + +@router.get("/backup/") +@router.get("/uploads/") +@router.get("/private/") +@router.get("/config/") +@router.get("/database/") +async def fake_directory_listing(request: Request): + return HTMLResponse(directory_listing(request.url.path)) + + +# --- SQL injection honeypot GET endpoints --- + + +@router.get("/api/search") +@router.get("/api/sql") +@router.get("/api/database") +async def sql_endpoint_get(request: Request): + client_ip = get_client_ip(request) + access_logger = get_access_logger() + app_logger = get_app_logger() + + base_path = request.url.path + request_query = request.url.query or "" + + error_msg, content_type, status_code = generate_sql_error_response(request_query) + + if error_msg: + access_logger.warning( + f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {request_query[:100] if request_query else 'empty'}" + ) + return Response( + content=error_msg, status_code=status_code, media_type=content_type + ) + else: + access_logger.info( + f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {request_query[:100] if request_query else 'empty'}" + ) + response_data = get_sql_response_with_data(base_path, request_query) + return Response( + content=response_data, status_code=200, media_type="application/json" + ) + + +# --- Generic /api/* fake endpoints --- + + +@router.get("/api/{path:path}") +async def fake_api_catchall(request: Request, path: str): + full_path = f"/api/{path}" + return Response( + content=api_response(full_path), status_code=200, media_type="application/json" + ) + + +# --- Catch-all GET (trap pages with random links) --- +# This MUST be registered last in the router + + +@router.get("/{path:path}") +async def trap_page(request: Request, path: str): + """Generate trap page with random links. This is the catch-all route.""" + config = request.app.state.config + tracker = request.app.state.tracker + app_logger = get_app_logger() + access_logger = get_access_logger() + + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + full_path = f"/{path}" if path else "/" + + # Check wordpress-like paths + if "wordpress" in full_path.lower(): + return HTMLResponse(html_templates.wordpress()) + + is_suspicious = tracker.is_suspicious_user_agent(user_agent) + + if is_suspicious: + access_logger.warning( + f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {full_path}" + ) + + # Record access unless the router dependency already handled it + # (attack pattern or honeypot path → already recorded by _track_honeypot_request) + if not tracker.detect_attack_type(full_path) and not tracker.is_honeypot_path( + full_path + ): + tracker.record_access( + ip=client_ip, + path=full_path, + user_agent=user_agent, + method=request.method, + raw_request=build_raw_request(request) if is_suspicious else "", + ) + + # Random error response + if _should_return_error(config): + error_code = _get_random_error_code() + access_logger.info(f"Returning error {error_code} to {client_ip} - {full_path}") + return Response(status_code=error_code) + + # Response delay + await asyncio.sleep(config.delay / 1000.0) + + # Increment page visit counter + current_visit_count = tracker.increment_page_visit(client_ip) + + # Generate page + page_html = _generate_page( + config, tracker, client_ip, full_path, current_visit_count, request.app + ) + + # Decrement canary counter + request.app.state.counter -= 1 + if request.app.state.counter < 0: + request.app.state.counter = config.canary_token_tries + + return HTMLResponse(content=page_html, status_code=200) + + +def _generate_page(config, tracker, client_ip, seed, page_visit_count, app) -> str: + """Generate a webpage containing random links or canary token.""" + random.seed(seed) + + ip_category = tracker.get_category_by_ip(client_ip) + + should_apply_crawler_limit = False + if config.infinite_pages_for_malicious: + if ( + ip_category == "good_crawler" or ip_category == "regular_user" + ) and page_visit_count >= config.max_pages_limit: + should_apply_crawler_limit = True + else: + if ( + ip_category == "good_crawler" + or ip_category == "bad_crawler" + or ip_category == "attacker" + ) and page_visit_count >= config.max_pages_limit: + should_apply_crawler_limit = True + + if should_apply_crawler_limit: + return html_templates.main_page( + app.state.counter, "

Crawl limit reached.

" + ) + + num_pages = random.randint(*config.links_per_page_range) + content = "" + + if app.state.counter <= 0 and config.canary_token_url: + content += f""" + +""" + + webpages = app.state.webpages + if webpages is None: + for _ in range(num_pages): + address = "".join( + [ + random.choice(config.char_space) + for _ in range(random.randint(*config.links_length_range)) + ] + ) + content += f""" + +""" + else: + for _ in range(num_pages): + address = random.choice(webpages) + content += f""" + +""" + + return html_templates.main_page(app.state.counter, content) diff --git a/src/routes/htmx.py b/src/routes/htmx.py new file mode 100644 index 0000000..4013ce5 --- /dev/null +++ b/src/routes/htmx.py @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 + +""" +HTMX fragment endpoints. +Server-rendered HTML partials for table pagination, sorting, and IP details. +""" + +from fastapi import APIRouter, Request, Response, Query + +from dependencies import get_db, get_templates + +router = APIRouter() + + +def _dashboard_path(request: Request) -> str: + config = request.app.state.config + return "/" + config.dashboard_secret_path.lstrip("/") + + +# ── Honeypot Triggers ──────────────────────────────────────────────── + + +@router.get("/htmx/honeypot") +async def htmx_honeypot( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_honeypot_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/honeypot_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["honeypots"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Top IPs ────────────────────────────────────────────────────────── + + +@router.get("/htmx/top-ips") +async def htmx_top_ips( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_top_ips_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/top_ips_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["ips"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Top Paths ──────────────────────────────────────────────────────── + + +@router.get("/htmx/top-paths") +async def htmx_top_paths( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_top_paths_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/top_paths_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["paths"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Top User-Agents ───────────────────────────────────────────────── + + +@router.get("/htmx/top-ua") +async def htmx_top_ua( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_top_user_agents_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/top_ua_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["user_agents"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Attackers ──────────────────────────────────────────────────────── + + +@router.get("/htmx/attackers") +async def htmx_attackers( + request: Request, + page: int = Query(1), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_attackers_paginated( + page=max(1, page), page_size=25, sort_by=sort_by, sort_order=sort_order + ) + + # Normalize pagination key (DB returns total_attackers, template expects total) + pagination = result["pagination"] + if "total_attackers" in pagination and "total" not in pagination: + pagination["total"] = pagination["total_attackers"] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/attackers_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["attackers"], + "pagination": pagination, + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Credentials ────────────────────────────────────────────────────── + + +@router.get("/htmx/credentials") +async def htmx_credentials( + request: Request, + page: int = Query(1), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_credentials_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/credentials_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["credentials"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Attack Types ───────────────────────────────────────────────────── + + +@router.get("/htmx/attacks") +async def htmx_attacks( + request: Request, + page: int = Query(1), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_attack_types_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + # Transform attack data for template (join attack_types list, map id to log_id) + items = [] + for attack in result["attacks"]: + items.append( + { + "ip": attack["ip"], + "path": attack["path"], + "attack_type": ", ".join(attack.get("attack_types", [])), + "user_agent": attack.get("user_agent", ""), + "timestamp": attack.get("timestamp"), + "log_id": attack.get("id"), + } + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/attack_types_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": items, + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Attack Patterns ────────────────────────────────────────────────── + + +@router.get("/htmx/patterns") +async def htmx_patterns( + request: Request, + page: int = Query(1), +): + db = get_db() + page = max(1, page) + page_size = 10 + + # Get all attack type stats and paginate manually + result = db.get_attack_types_stats(limit=100) + all_patterns = [ + {"pattern": item["type"], "count": item["count"]} + for item in result.get("attack_types", []) + ] + + total = len(all_patterns) + total_pages = max(1, (total + page_size - 1) // page_size) + offset = (page - 1) * page_size + items = all_patterns[offset : offset + page_size] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/patterns_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": items, + "pagination": { + "page": page, + "page_size": page_size, + "total": total, + "total_pages": total_pages, + }, + }, + ) + + +# ── IP Detail ──────────────────────────────────────────────────────── + + +@router.get("/htmx/ip-detail/{ip_address:path}") +async def htmx_ip_detail(ip_address: str, request: Request): + db = get_db() + stats = db.get_ip_stats_by_ip(ip_address) + + if not stats: + stats = {"ip": ip_address, "total_requests": "N/A"} + + # Transform fields for template compatibility + list_on = stats.get("list_on") or {} + stats["blocklist_memberships"] = list(list_on.keys()) if list_on else [] + stats["reverse_dns"] = stats.get("reverse") + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/ip_detail.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "stats": stats, + }, + ) diff --git a/src/server.py b/src/server.py deleted file mode 100644 index ed7ecad..0000000 --- a/src/server.py +++ /dev/null @@ -1,138 +0,0 @@ -#!/usr/bin/env python3 - -""" -Main server module for the deception honeypot. -Run this file to start the server. -""" - -import sys -from http.server import HTTPServer - -from config import get_config -from tracker import AccessTracker -from handler import Handler -from logger import ( - initialize_logging, - get_app_logger, - get_access_logger, - get_credential_logger, -) -from database import initialize_database -from tasks_master import get_tasksmaster - - -def print_usage(): - """Print usage information""" - print(f"Usage: {sys.argv[0]} [FILE]\n") - print("FILE is file containing a list of webpage names to serve, one per line.") - print("If no file is provided, random links will be generated.\n") - print("Configuration:") - print(" Configuration is loaded from a YAML file (default: config.yaml)") - print("Set CONFIG_LOCATION environment variable to use a different file.\n") - print("Example config.yaml structure:") - print("server:") - print("port: 5000") - print("delay: 100") - print("links:") - print("min_length: 5") - print("max_length: 15") - print("min_per_page: 10") - print("max_per_page: 15") - print("canary:") - print("token_url: null") - print("token_tries: 10") - print("dashboard:") - print("secret_path: null # auto-generated if not set") - print("database:") - print('path: "data/krawl.db"') - print("retention_days: 30") - print("behavior:") - print("probability_error_codes: 0") - - -def main(): - """Main entry point for the deception server""" - if "-h" in sys.argv or "--help" in sys.argv: - print_usage() - exit(0) - - config = get_config() - - # Initialize logging with timezone - initialize_logging() - app_logger = get_app_logger() - access_logger = get_access_logger() - credential_logger = get_credential_logger() - - # Initialize database for persistent storage - try: - initialize_database(config.database_path) - app_logger.info(f"Database initialized at: {config.database_path}") - except Exception as e: - app_logger.warning( - f"Database initialization failed: {e}. Continuing with in-memory only." - ) - - tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds) - - Handler.config = config - Handler.tracker = tracker - Handler.counter = config.canary_token_tries - Handler.app_logger = app_logger - Handler.access_logger = access_logger - Handler.credential_logger = credential_logger - - if len(sys.argv) == 2: - try: - with open(sys.argv[1], "r") as f: - Handler.webpages = f.readlines() - - if not Handler.webpages: - app_logger.warning( - "The file provided was empty. Using randomly generated links." - ) - Handler.webpages = None - except IOError: - app_logger.warning("Can't read input file. Using randomly generated links.") - - # tasks master init - tasks_master = get_tasksmaster() - tasks_master.run_scheduled_tasks() - - try: - - banner = f""" - -============================================================ -DASHBOARD AVAILABLE AT -{config.dashboard_secret_path} -============================================================ - """ - app_logger.info(banner) - app_logger.info(f"Starting deception server on port {config.port}...") - if config.canary_token_url: - app_logger.info( - f"Canary token will appear after {config.canary_token_tries} tries" - ) - else: - app_logger.info( - "No canary token configured (set CANARY_TOKEN_URL to enable)" - ) - - server = HTTPServer(("0.0.0.0", config.port), Handler) - app_logger.info("Server started. Use to stop.") - server.serve_forever() - except KeyboardInterrupt: - app_logger.info("Stopping server...") - server.socket.close() - app_logger.info("Server stopped") - except Exception as e: - app_logger.error(f"Error starting HTTP server on port {config.port}: {e}") - app_logger.error( - f"Make sure you are root, if needed, and that port {config.port} is open." - ) - exit(1) - - -if __name__ == "__main__": - main() diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py new file mode 100644 index 0000000..b4feaa7 --- /dev/null +++ b/src/tasks/db_retention.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +""" +Database retention task for Krawl honeypot. +Periodically deletes old records based on configured retention_days. +""" + +from datetime import datetime, timedelta + +from database import get_database +from logger import get_app_logger + +# ---------------------- +# TASK CONFIG +# ---------------------- + +TASK_CONFIG = { + "name": "db-retention", + "cron": "0 3 * * *", # Run daily at 3 AM + "enabled": True, + "run_when_loaded": False, +} + +app_logger = get_app_logger() + + +def main(): + """ + Delete access logs, credential attempts, and attack detections + older than the configured retention period. + """ + try: + from config import get_config + from models import AccessLog, CredentialAttempt, AttackDetection + + config = get_config() + retention_days = config.database_retention_days + + db = get_database() + session = db.session + + cutoff = datetime.now() - timedelta(days=retention_days) + + # Delete attack detections linked to old access logs first (FK constraint) + old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff) + detections_deleted = ( + session.query(AttackDetection) + .filter(AttackDetection.access_log_id.in_(old_log_ids)) + .delete(synchronize_session=False) + ) + + # Delete old access logs + logs_deleted = ( + session.query(AccessLog) + .filter(AccessLog.timestamp < cutoff) + .delete(synchronize_session=False) + ) + + # Delete old credential attempts + creds_deleted = ( + session.query(CredentialAttempt) + .filter(CredentialAttempt.timestamp < cutoff) + .delete(synchronize_session=False) + ) + + session.commit() + + if logs_deleted or creds_deleted or detections_deleted: + app_logger.info( + f"DB retention: Deleted {logs_deleted} access logs, " + f"{detections_deleted} attack detections, " + f"{creds_deleted} credential attempts older than {retention_days} days" + ) + + except Exception as e: + app_logger.error(f"Error during DB retention cleanup: {e}") + finally: + try: + db.close_session() + except Exception: + pass diff --git a/src/tasks/memory_cleanup.py b/src/tasks/memory_cleanup.py index 38a27a2..ac9af92 100644 --- a/src/tasks/memory_cleanup.py +++ b/src/tasks/memory_cleanup.py @@ -2,10 +2,9 @@ """ Memory cleanup task for Krawl honeypot. -Periodically trims unbounded in-memory structures to prevent OOM. +Periodically cleans expired bans and stale entries from ip_page_visits. """ -from database import get_database from logger import get_app_logger # ---------------------- @@ -28,43 +27,29 @@ def main(): Called periodically to prevent unbounded memory growth. """ try: - # Import here to avoid circular imports - from handler import Handler + from tracker import get_tracker - if not Handler.tracker: + tracker = get_tracker() + if not tracker: app_logger.warning("Tracker not initialized, skipping memory cleanup") return - # Get memory stats before cleanup - stats_before = Handler.tracker.get_memory_stats() + stats_before = tracker.get_memory_stats() - # Run cleanup - Handler.tracker.cleanup_memory() + tracker.cleanup_memory() - # Get memory stats after cleanup - stats_after = Handler.tracker.get_memory_stats() + stats_after = tracker.get_memory_stats() - # Log changes - access_log_reduced = ( - stats_before["access_log_size"] - stats_after["access_log_size"] - ) - cred_reduced = ( - stats_before["credential_attempts_size"] - - stats_after["credential_attempts_size"] - ) + visits_reduced = stats_before["ip_page_visits"] - stats_after["ip_page_visits"] - if access_log_reduced > 0 or cred_reduced > 0: + if visits_reduced > 0: app_logger.info( - f"Memory cleanup: Trimmed {access_log_reduced} access logs, " - f"{cred_reduced} credential attempts" + f"Memory cleanup: Removed {visits_reduced} stale ip_page_visits entries" ) - # Log current memory state for monitoring app_logger.debug( f"Memory stats after cleanup: " - f"access_logs={stats_after['access_log_size']}, " - f"credentials={stats_after['credential_attempts_size']}, " - f"unique_ips={stats_after['unique_ips_tracked']}" + f"ip_page_visits={stats_after['ip_page_visits']}" ) except Exception as e: diff --git a/src/tasks_master.py b/src/tasks_master.py index 9017c49..1f910f0 100644 --- a/src/tasks_master.py +++ b/src/tasks_master.py @@ -40,7 +40,6 @@ class TasksMaster: def __init__(self, scheduler: BackgroundScheduler): self.tasks = self._config_tasks() self.scheduler = scheduler - self.last_run_times = {} self.scheduler.add_listener( self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR ) @@ -234,9 +233,6 @@ class TasksMaster: app_logger.error(f"Failed to load {module_name}: {e}") def job_listener(self, event): - job_id = event.job_id - self.last_run_times[job_id] = datetime.datetime.now() - if event.exception: app_logger.error(f"Job {event.job_id} failed: {event.exception}") else: diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py deleted file mode 100644 index 1a312a1..0000000 --- a/src/templates/dashboard_template.py +++ /dev/null @@ -1,3612 +0,0 @@ -#!/usr/bin/env python3 - -""" -Dashboard template for viewing honeypot statistics. -Customize this template to change the dashboard appearance. -""" - -import html -from datetime import datetime -from zoneinfo import ZoneInfo - -# imports for the __init_subclass__ method, do not remove pls -from firewall import fwtype - - -def _escape(value) -> str: - """Escape HTML special characters to prevent XSS attacks.""" - if value is None: - return "" - return html.escape(str(value)) - - -def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str: - """Format ISO timestamp for display with timezone conversion - - Args: - iso_timestamp: ISO format timestamp string (UTC) - time_only: If True, return only HH:MM:SS, otherwise full datetime - """ - try: - # Parse UTC timestamp - dt = datetime.fromisoformat(iso_timestamp) - if time_only: - return dt.strftime("%H:%M:%S") - return dt.strftime("%Y-%m-%d %H:%M:%S") - except Exception: - # Fallback for old format - return ( - iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp - ) - - -def generate_dashboard(stats: dict, dashboard_path: str = "") -> str: - """Generate dashboard HTML with access statistics - - Args: - stats: Statistics dictionary - dashboard_path: The secret dashboard path for generating API URLs - """ - - # Generate comprehensive suspicious activity rows combining all suspicious events - suspicious_activities = [] - - # Add recent suspicious accesses (attacks) - for log in stats.get("recent_suspicious", [])[-20:]: - suspicious_activities.append( - { - "type": "Attack", - "ip": log["ip"], - "path": log["path"], - "user_agent": log["user_agent"][:60], - "timestamp": log["timestamp"], - "details": ( - ", ".join(log.get("attack_types", [])) - if log.get("attack_types") - else "Suspicious behavior" - ), - } - ) - - # Add credential attempts - for cred in stats.get("credential_attempts", [])[-20:]: - suspicious_activities.append( - { - "type": "Credentials", - "ip": cred["ip"], - "path": cred["path"], - "user_agent": "", - "timestamp": cred["timestamp"], - "details": f"User: {cred.get('username', 'N/A')}", - } - ) - - # Add honeypot triggers - for honeypot in stats.get("honeypot_triggered_ips", [])[-20:]: - # honeypot is a tuple (ip, paths) - ip = honeypot[0] - paths = honeypot[1] if isinstance(honeypot[1], list) else [] - suspicious_activities.append( - { - "type": "Honeypot", - "ip": ip, - "path": paths[0] if paths else "Multiple", - "user_agent": "", - "timestamp": "", # Tuples don't have timestamp - "details": f"{len(paths)} trap(s) triggered", - } - ) - - # Sort by timestamp (most recent first) and take last 20 - # Put entries with empty timestamps at the end - try: - suspicious_activities.sort( - key=lambda x: (x["timestamp"] == "", x["timestamp"]), reverse=True - ) - except: - pass - suspicious_activities = suspicious_activities[:20] - - # Generate table rows - suspicious_rows = ( - "\n".join([f""" - {_escape(activity["ip"])} - {_escape(activity["type"])} - {_escape(activity["path"])} - {_escape(activity["details"])} - {format_timestamp(activity["timestamp"], time_only=True)} - - - -
-
Loading stats...
-
- - """ for activity in suspicious_activities]) - or 'No suspicious activity detected' - ) - - return f""" - - - - Krawl Dashboard - - - - - - - -
- -
-
- - -
-
-

Krawl Dashboard

- -
-
-
{stats['total_accesses']}
-
Total Accesses
-
-
-
{stats['unique_ips']}
-
Unique IPs
-
-
-
{stats['unique_paths']}
-
Unique Paths
-
-
-
{stats['suspicious_accesses']}
-
Suspicious Accesses
-
-
-
{stats.get('honeypot_ips', 0)}
-
Honeypot Caught
-
-
-
{len(stats.get('credential_attempts', []))}
-
Credentials Captured
-
-
-
{stats.get('unique_attackers', 0)}
-
Unique Attackers
-
-
- - - -
-
-

Recent Suspicious Activity

- - - - - - - - - - - - {suspicious_rows} - -
IP AddressTypePathDetailsTime
-
- -
-
-

Honeypot Triggers by IP

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - -
#IP AddressAccessed PathsCount
Loading...
-
- -
-
-
-

Top IP Addresses

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - -
#IP AddressAccess Count
Loading...
-
- -
-
-

Top User-Agents

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - -
#User-AgentCount
Loading...
-
-
-
- -
-
-
-

IP Origins Map

-
- - - - - -
-
-
-
Loading map...
-
-
- -
-
-

Attackers by Total Requests

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - - - - -
#IP AddressTotal RequestsFirst SeenLast SeenLocation
-
- -
-
-

Captured Credentials

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - - - -
#IP AddressUsernamePasswordPathTime
Loading...
-
- -
-
-

Detected Attack Types

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - - - - -
#IP AddressPathAttack TypesUser-AgentTimeActions
Loading...
-
- -
-
-
-
-

Most Recurring Attack Types

-
Top 10
-
-
- -
-
-
- -
-
-
-

Most Recurring Attack Patterns

-
-
- Page 1/1 - - 0 total -
- - -
-
-
- - - - - - - - - - - - - -
#Attack PatternAttack TypeFrequencyIPs
Loading...
-
-
-
-
- -
-
- -
- -
-
-
- -
-
-
-

Raw HTTP Request

- × -
-
-
- -
-
- -
-
-
- - - -""" diff --git a/src/templates/jinja2/base.html b/src/templates/jinja2/base.html new file mode 100644 index 0000000..4583a1d --- /dev/null +++ b/src/templates/jinja2/base.html @@ -0,0 +1,25 @@ + + + + + + Krawl Dashboard + + + + + + + + + + + {% block content %}{% endblock %} + + + + + + {% block scripts %}{% endblock %} + + diff --git a/src/templates/jinja2/dashboard/index.html b/src/templates/jinja2/dashboard/index.html new file mode 100644 index 0000000..5ec70f7 --- /dev/null +++ b/src/templates/jinja2/dashboard/index.html @@ -0,0 +1,154 @@ +{% extends "base.html" %} + +{% block content %} +
+ + {# GitHub logo #} + + + {# Banlist export dropdown - Alpine.js #} +
+
+ + +
+
+ +

Krawl Dashboard

+ + {# Stats cards - server-rendered #} + {% include "dashboard/partials/stats_cards.html" %} + + {# Tab navigation - Alpine.js #} + + + {# ==================== OVERVIEW TAB ==================== #} +
+ + {# Suspicious Activity - server-rendered #} + {% include "dashboard/partials/suspicious_table.html" %} + + {# Honeypot Triggers - HTMX loaded #} +
+

Honeypot Triggers by IP

+
+
Loading...
+
+
+ + {# Top IPs + Top User-Agents side by side #} +
+
+

Top IP Addresses

+
+
Loading...
+
+
+
+

Top User-Agents

+
+
Loading...
+
+
+
+ + {# Top Paths #} +
+

Top Paths

+
+
Loading...
+
+
+
+ + {# ==================== ATTACKS TAB ==================== #} +
+ + {# Map section #} + {% include "dashboard/partials/map_section.html" %} + + {# Attackers table - HTMX loaded #} +
+

Attackers by Total Requests

+
+
Loading...
+
+
+ + {# Credentials table #} +
+

Captured Credentials

+
+
Loading...
+
+
+ + {# Attack Types table #} +
+

Detected Attack Types

+
+
Loading...
+
+
+ + {# Charts + Patterns side by side #} +
+
+

Most Recurring Attack Types

+
+ +
+
+
+

Most Recurring Attack Patterns

+
+
Loading...
+
+
+
+
+ + {# Raw request modal - Alpine.js #} + {% include "dashboard/partials/raw_request_modal.html" %} + +
+{% endblock %} diff --git a/src/templates/jinja2/dashboard/partials/attack_types_table.html b/src/templates/jinja2/dashboard/partials/attack_types_table.html new file mode 100644 index 0000000..8a74572 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/attack_types_table.html @@ -0,0 +1,80 @@ +{# HTMX fragment: Detected Attack Types table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + + + + {% for attack in items %} + + + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP AddressPathAttack TypesUser-Agent + Time + Actions
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ attack.ip | e }} + +
+ {{ attack.path | e }} + {% if attack.path | length > 30 %} +
{{ attack.path | e }}
+ {% endif %} +
+
+
+ {{ attack.attack_type | e }} + {% if attack.attack_type | length > 30 %} +
{{ attack.attack_type | e }}
+ {% endif %} +
+
{{ (attack.user_agent | default(''))[:50] | e }}{{ attack.timestamp | format_ts }} + {% if attack.log_id %} + + {% endif %} +
No attacks detected
diff --git a/src/templates/jinja2/dashboard/partials/attackers_table.html b/src/templates/jinja2/dashboard/partials/attackers_table.html new file mode 100644 index 0000000..632137d --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/attackers_table.html @@ -0,0 +1,60 @@ +{# HTMX fragment: Attackers table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} attackers +
+ + +
+
+ + + + + + + + + + + + + {% for ip in items %} + + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP Address + Total Requests + First SeenLast SeenLocation
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ ip.ip | e }} + {{ ip.total_requests }}{{ ip.first_seen | format_ts }}{{ ip.last_seen | format_ts }}{{ ip.city | default('') | e }}{% if ip.city and ip.country_code %}, {% endif %}{{ ip.country_code | default('N/A') | e }}
No attackers found
diff --git a/src/templates/jinja2/dashboard/partials/credentials_table.html b/src/templates/jinja2/dashboard/partials/credentials_table.html new file mode 100644 index 0000000..ccfb364 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/credentials_table.html @@ -0,0 +1,60 @@ +{# HTMX fragment: Captured Credentials table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + + + {% for cred in items %} + + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP AddressUsernamePasswordPath + Time +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ cred.ip | e }} + {{ cred.username | default('N/A') | e }}{{ cred.password | default('N/A') | e }}{{ cred.path | default('') | e }}{{ cred.timestamp | format_ts }}
No credentials captured
diff --git a/src/templates/jinja2/dashboard/partials/honeypot_table.html b/src/templates/jinja2/dashboard/partials/honeypot_table.html new file mode 100644 index 0000000..35676fc --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/honeypot_table.html @@ -0,0 +1,54 @@ +{# HTMX fragment: Honeypot triggers table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + {% for item in items %} + + + + + + + + + {% else %} + + {% endfor %} + +
#IP Address + Honeypot Triggers +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ item.ip | e }} + {{ item.count }}
No data
diff --git a/src/templates/jinja2/dashboard/partials/ip_detail.html b/src/templates/jinja2/dashboard/partials/ip_detail.html new file mode 100644 index 0000000..8082859 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/ip_detail.html @@ -0,0 +1,131 @@ +{# HTMX fragment: IP detail expansion row content #} +{# Replaces the ~250 line formatIpStats() JavaScript function #} +
+
+ Total Requests: + {{ stats.total_requests | default('N/A') }} +
+
+ First Seen: + {{ stats.first_seen | format_ts }} +
+
+ Last Seen: + {{ stats.last_seen | format_ts }} +
+ {% if stats.city or stats.country_code %} +
+ Location: + {{ stats.city | default('') }}{% if stats.city and stats.country_code %}, {% endif %}{{ stats.country_code | default('') }} +
+ {% endif %} + {% if stats.reverse_dns %} +
+ Reverse DNS: + {{ stats.reverse_dns | e }} +
+ {% endif %} + {% if stats.asn_org %} +
+ ASN Org: + {{ stats.asn_org | e }} +
+ {% endif %} + {% if stats.asn %} +
+ ASN: + {{ stats.asn | e }} +
+ {% endif %} + {% if stats.isp %} +
+ ISP: + {{ stats.isp | e }} +
+ {% endif %} + + {# Flags #} + {% set flags = [] %} + {% if stats.is_proxy %}{% set _ = flags.append('Proxy') %}{% endif %} + {% if stats.is_hosting %}{% set _ = flags.append('Hosting') %}{% endif %} + {% if flags %} +
+ Flags: + {{ flags | join(', ') }} +
+ {% endif %} + + {% if stats.reputation_score is not none %} +
+ Reputation Score: + + {{ stats.reputation_score }}/100 + +
+ {% endif %} + + {% if stats.category %} +
+ Category: + + {{ stats.category | replace('_', ' ') | title }} + +
+ {% endif %} + + {# Timeline + Reputation section #} + {% if stats.category_history or stats.blocklist_memberships %} +
+
+ {# Behavior Timeline #} + {% if stats.category_history %} +
+
Behavior Timeline
+
+ {% for entry in stats.category_history %} +
+
+
+ {{ entry.new_category | default('unknown') | replace('_', ' ') | title }} + {% if entry.old_category %} from {{ entry.old_category | replace('_', ' ') | title }}{% endif %} +
{{ entry.timestamp | format_ts }} +
+
+ {% endfor %} +
+
+ {% endif %} + + {# Reputation / Listed On #} +
+
Reputation
+ {% if stats.blocklist_memberships %} +
Listed On
+ {% for bl in stats.blocklist_memberships %} + {{ bl | e }} + {% endfor %} + {% else %} + Clean - Not listed on any blocklists + {% endif %} +
+
+
+ {% endif %} +
+ +{# Radar chart (right side) #} +{% if stats.category_scores %} +
+
+ +
+
+{% endif %} diff --git a/src/templates/jinja2/dashboard/partials/map_section.html b/src/templates/jinja2/dashboard/partials/map_section.html new file mode 100644 index 0000000..1191671 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/map_section.html @@ -0,0 +1,27 @@ +{# Map section with filter checkboxes #} +
+

IP Origins Map

+
+ + + + + +
+
+
diff --git a/src/templates/jinja2/dashboard/partials/patterns_table.html b/src/templates/jinja2/dashboard/partials/patterns_table.html new file mode 100644 index 0000000..260f31d --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/patterns_table.html @@ -0,0 +1,43 @@ +{# HTMX fragment: Attack Patterns table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} patterns +
+ + +
+
+ + + + + + + + + + {% for pattern in items %} + + + + + + {% else %} + + {% endfor %} + +
#Attack PatternOccurrences
{{ loop.index + (pagination.page - 1) * pagination.page_size }} +
+ {{ pattern.pattern | e }} + {% if pattern.pattern | length > 40 %} +
{{ pattern.pattern | e }}
+ {% endif %} +
+
{{ pattern.count }}
No patterns found
diff --git a/src/templates/jinja2/dashboard/partials/raw_request_modal.html b/src/templates/jinja2/dashboard/partials/raw_request_modal.html new file mode 100644 index 0000000..06a46bb --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/raw_request_modal.html @@ -0,0 +1,20 @@ +{# Raw request viewer modal - Alpine.js controlled #} +
+
+
+

Raw HTTP Request

+ × +
+
+

+        
+ +
+
diff --git a/src/templates/jinja2/dashboard/partials/stats_cards.html b/src/templates/jinja2/dashboard/partials/stats_cards.html new file mode 100644 index 0000000..260076c --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/stats_cards.html @@ -0,0 +1,31 @@ +{# Stats cards - server-rendered on initial page load #} +
+
+
{{ stats.total_accesses }}
+
Total Accesses
+
+
+
{{ stats.unique_ips }}
+
Unique IPs
+
+
+
{{ stats.unique_paths }}
+
Unique Paths
+
+
+
{{ stats.suspicious_accesses }}
+
Suspicious Accesses
+
+
+
{{ stats.honeypot_ips | default(0) }}
+
Honeypot Caught
+
+
+
{{ stats.credential_count | default(0) }}
+
Credentials Captured
+
+
+
{{ stats.unique_attackers | default(0) }}
+
Unique Attackers
+
+
diff --git a/src/templates/jinja2/dashboard/partials/suspicious_table.html b/src/templates/jinja2/dashboard/partials/suspicious_table.html new file mode 100644 index 0000000..72a0480 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/suspicious_table.html @@ -0,0 +1,39 @@ +{# Recent Suspicious Activity - server-rendered on page load #} +
+

Recent Suspicious Activity

+ + + + + + + + + + + {% for activity in suspicious_activities %} + + + + + + + + + + {% else %} + + {% endfor %} + +
IP AddressPathUser-AgentTime
+ {{ activity.ip | e }} + {{ activity.path | e }}{{ (activity.user_agent | default(''))[:80] | e }}{{ activity.timestamp | format_ts(time_only=True) }}
No suspicious activity detected
+
diff --git a/src/templates/jinja2/dashboard/partials/top_ips_table.html b/src/templates/jinja2/dashboard/partials/top_ips_table.html new file mode 100644 index 0000000..84b335f --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/top_ips_table.html @@ -0,0 +1,54 @@ +{# HTMX fragment: Top IPs table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + {% for item in items %} + + + + + + + + + {% else %} + + {% endfor %} + +
#IP Address + Access Count +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ item.ip | e }} + {{ item.count }}
No data
diff --git a/src/templates/jinja2/dashboard/partials/top_paths_table.html b/src/templates/jinja2/dashboard/partials/top_paths_table.html new file mode 100644 index 0000000..d1ec6d1 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/top_paths_table.html @@ -0,0 +1,41 @@ +{# HTMX fragment: Top Paths table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + {% for item in items %} + + + + + + {% else %} + + {% endfor %} + +
#Path + Access Count +
{{ loop.index + (pagination.page - 1) * pagination.page_size }}{{ item.path | e }}{{ item.count }}
No data
diff --git a/src/templates/jinja2/dashboard/partials/top_ua_table.html b/src/templates/jinja2/dashboard/partials/top_ua_table.html new file mode 100644 index 0000000..faf487e --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/top_ua_table.html @@ -0,0 +1,41 @@ +{# HTMX fragment: Top User-Agents table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + {% for item in items %} + + + + + + {% else %} + + {% endfor %} + +
#User-Agent + Count +
{{ loop.index + (pagination.page - 1) * pagination.page_size }}{{ item.user_agent | e }}{{ item.count }}
No data
diff --git a/src/templates/static/css/dashboard.css b/src/templates/static/css/dashboard.css new file mode 100644 index 0000000..8a32b80 --- /dev/null +++ b/src/templates/static/css/dashboard.css @@ -0,0 +1,1250 @@ +/* Krawl Dashboard Styles */ +/* Extracted from dashboard_template.py */ + +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + background-color: #0d1117; + color: #c9d1d9; + margin: 0; + padding: 20px; +} +.container { + max-width: 1400px; + margin: 0 auto; + position: relative; +} +.github-logo { + position: absolute; + top: 0; + left: 0; + display: flex; + align-items: center; + gap: 8px; + text-decoration: none; + color: #58a6ff; + transition: color 0.2s; +} +.github-logo:hover { + color: #79c0ff; +} +.github-logo svg { + width: 32px; + height: 32px; + fill: currentColor; +} +.github-logo-text { + font-size: 14px; + font-weight: 600; + text-decoration: none; +} +h1 { + color: #58a6ff; + text-align: center; + margin-bottom: 40px; +} +.download-section { + position: absolute; + top: 0; + right: 0; +} +.download-btn { + display: inline-block; + padding: 8px 14px; + background: #238636; + color: #ffffff; + text-decoration: none; + border-radius: 6px; + font-weight: 500; + font-size: 13px; + transition: background 0.2s; + border: 1px solid #2ea043; +} +.download-btn:hover { + background: #2ea043; +} +.download-btn:active { + background: #1f7a2f; +} +.banlist-dropdown { + position: relative; + display: inline-block; + width: 100%; +} +.banlist-dropdown-btn { + display: block; + width: 100%; + padding: 8px 14px; + background: #238636; + color: #ffffff; + text-decoration: none; + border-radius: 6px; + font-weight: 500; + font-size: 13px; + transition: background 0.2s; + border: 1px solid #2ea043; + cursor: pointer; + text-align: left; + box-sizing: border-box; +} +.banlist-dropdown-btn:hover { + background: #2ea043; +} +.banlist-dropdown-menu { + display: none; + position: absolute; + right: 0; + left: 0; + background-color: #161b22; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.3); + z-index: 1; + border: 1px solid #30363d; + border-radius: 6px; + margin-top: 4px; + overflow: hidden; +} +.banlist-dropdown-menu.show { + display: block; +} +.banlist-dropdown-menu a { + color: #c9d1d9; + padding: 6px 12px; + text-decoration: none; + display: flex; + align-items: center; + gap: 6px; + transition: background 0.2s; + font-size: 12px; +} +.banlist-dropdown-menu a:hover { + background-color: #1c2128; + color: #58a6ff; +} +.banlist-dropdown-menu a.disabled { + color: #6e7681; + cursor: not-allowed; + pointer-events: none; +} +.banlist-icon { + font-size: 14px; +} +.stats-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + gap: 20px; + margin-bottom: 40px; +} +.stat-card { + background: #161b22; + border: 1px solid #30363d; + border-radius: 6px; + padding: 20px; + text-align: center; +} +.stat-card.alert { + border-color: #f85149; +} +.stat-value { + font-size: 36px; + font-weight: bold; + color: #58a6ff; +} +.stat-value.alert { + color: #f85149; +} +.stat-label { + font-size: 14px; + color: #8b949e; + margin-top: 5px; +} +.table-container { + background: #161b22; + border: 1px solid #30363d; + border-radius: 6px; + padding: 12px; + margin-bottom: 20px; +} +h2 { + color: #58a6ff; + margin-top: 0; +} +table { + width: 100%; + border-collapse: collapse; +} +th, td { + padding: 12px; + text-align: left; + border-bottom: 1px solid #30363d; +} +th { + background: #0d1117; + color: #58a6ff; + font-weight: 600; +} +tr:hover { + background: #1c2128; +} +.rank { + color: #8b949e; + font-weight: bold; +} +.alert-section { + background: #1c1917; + border-left: 4px solid #f85149; +} +th.sortable { + cursor: pointer; + user-select: none; + position: relative; + padding-right: 24px; +} +th.sortable:hover { + background: #1c2128; +} +th.sortable::after { + content: '\21C5'; + position: absolute; + right: 8px; + opacity: 0.5; + font-size: 12px; +} +th.sortable.asc::after { + content: '\25B2'; + opacity: 1; +} +th.sortable.desc::after { + content: '\25BC'; + opacity: 1; +} +tbody { + transition: opacity 0.1s ease; +} +tbody { + animation: fadeIn 0.3s ease-in; +} +.ip-row { + transition: background-color 0.2s; +} +.ip-clickable { + cursor: pointer; + color: #58a6ff !important; + font-weight: 500; + text-decoration: underline; + text-decoration-style: dotted; + text-underline-offset: 3px; +} +.ip-clickable:hover { + color: #79c0ff !important; + text-decoration-style: solid; + background: #1c2128; +} +.ip-stats-row { + background: #0d1117; +} +.ip-stats-cell { + padding: 0 !important; +} +.ip-stats-dropdown { + margin-top: 10px; + padding: 15px; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + font-size: 13px; + display: flex; + gap: 20px; +} +.stats-left { + flex: 1; +} +.stats-right { + flex: 0 0 200px; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; +} +.radar-chart { + position: relative; + width: 220px; + height: 220px; + overflow: visible; +} +.radar-legend { + margin-top: 10px; + font-size: 11px; +} +.radar-legend-item { + display: flex; + align-items: center; + gap: 6px; + margin: 3px 0; +} +.radar-legend-color { + width: 12px; + height: 12px; + border-radius: 2px; +} +.ip-stats-dropdown .loading { + color: #8b949e; + font-style: italic; +} +.stat-row { + display: flex; + justify-content: space-between; + padding: 5px 0; + border-bottom: 1px solid #21262d; +} +.stat-row:last-child { + border-bottom: none; +} +.stat-label-sm { + color: #8b949e; + font-weight: 500; +} +.stat-value-sm { + color: #58a6ff; + font-weight: 600; +} +.category-badge { + display: inline-block; + padding: 4px 8px; + border-radius: 4px; + font-size: 12px; + font-weight: 600; + text-transform: uppercase; +} +.category-attacker { + background: #f851491a; + color: #f85149; + border: 1px solid #f85149; +} +.category-good-crawler { + background: #3fb9501a; + color: #3fb950; + border: 1px solid #3fb950; +} +.category-bad-crawler { + background: #f0883e1a; + color: #f0883e; + border: 1px solid #f0883e; +} +.category-regular-user { + background: #58a6ff1a; + color: #58a6ff; + border: 1px solid #58a6ff; +} +.category-unknown { + background: #8b949e1a; + color: #8b949e; + border: 1px solid #8b949e; +} +.timeline-section { + margin-top: 15px; + padding-top: 15px; + border-top: 1px solid #30363d; +} +.timeline-container { + display: flex; + gap: 20px; + min-height: 200px; +} +.timeline-column { + flex: 1; + min-width: 0; + overflow: auto; + max-height: 350px; +} +.timeline-column:first-child { + flex: 1.5; +} +.timeline-column:last-child { + flex: 1; +} +.timeline-header { + color: #58a6ff; + font-size: 13px; + font-weight: 600; + margin-bottom: 12px; + padding-bottom: 8px; + border-bottom: 1px solid #30363d; +} +.reputation-title { + color: #8b949e; + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + margin-bottom: 8px; +} +.reputation-badge { + display: inline-flex; + align-items: center; + gap: 3px; + padding: 4px 8px; + background: #161b22; + border: 1px solid #f851494d; + border-radius: 4px; + font-size: 11px; + color: #f85149; + text-decoration: none; + transition: all 0.2s; + margin-bottom: 6px; + margin-right: 6px; + white-space: nowrap; +} +.reputation-badge:hover { + background: #1c2128; + border-color: #f85149; +} +.reputation-clean { + display: inline-flex; + align-items: center; + gap: 3px; + padding: 4px 8px; + background: #161b22; + border: 1px solid #3fb9504d; + border-radius: 4px; + font-size: 11px; + color: #3fb950; + margin-bottom: 6px; +} +.timeline { + position: relative; + padding-left: 28px; +} +.timeline::before { + content: ''; + position: absolute; + left: 11px; + top: 0; + bottom: 0; + width: 2px; + background: #30363d; +} +.timeline-item { + position: relative; + padding-bottom: 12px; + font-size: 12px; +} +.timeline-item:last-child { + padding-bottom: 0; +} +.timeline-marker { + position: absolute; + left: -23px; + width: 14px; + height: 14px; + border-radius: 50%; + border: 2px solid #0d1117; +} +.timeline-marker.attacker { background: #f85149; } +.timeline-marker.good-crawler { background: #3fb950; } +.timeline-marker.bad-crawler { background: #f0883e; } +.timeline-marker.regular-user { background: #58a6ff; } +.timeline-marker.unknown { background: #8b949e; } +.tabs-container { + border-bottom: 1px solid #30363d; + margin-bottom: 30px; + display: flex; + gap: 2px; + background: #161b22; + border-radius: 6px 6px 0 0; + overflow-x: auto; + overflow-y: hidden; +} +.tab-button { + padding: 12px 20px; + background: transparent; + border: none; + color: #8b949e; + font-size: 14px; + font-weight: 500; + cursor: pointer; + white-space: nowrap; + transition: all 0.2s; + border-bottom: 3px solid transparent; + position: relative; + bottom: -1px; +} +.tab-button:hover { + color: #c9d1d9; + background: #1c2128; +} +.tab-button.active { + color: #58a6ff; + border-bottom-color: #58a6ff; +} +.tab-content { + display: none; +} +.tab-content.active { + display: block; +} +.ip-stats-table { + width: 100%; + border-collapse: collapse; +} +.ip-stats-table th, .ip-stats-table td { + padding: 12px; + text-align: left; + border-bottom: 1px solid #30363d; +} +.ip-stats-table th { + background: #0d1117; + color: #58a6ff; + font-weight: 600; +} +.ip-stats-table tr:hover { + background: #1c2128; +} +.ip-detail-modal { + display: none; + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background: rgba(0, 0, 0, 0.7); + z-index: 1000; + align-items: center; + justify-content: center; +} +.ip-detail-modal.show { + display: flex; +} +.ip-detail-content { + background: #161b22; + border: 1px solid #30363d; + border-radius: 8px; + padding: 30px; + max-width: 900px; + max-height: 90vh; + overflow-y: auto; + position: relative; +} +.ip-detail-close { + position: absolute; + top: 15px; + right: 15px; + background: none; + border: none; + color: #8b949e; + font-size: 24px; + cursor: pointer; + padding: 0; + width: 30px; + height: 30px; + display: flex; + align-items: center; + justify-content: center; +} +.ip-detail-close:hover { + color: #c9d1d9; +} +#attacker-map { + background: #0d1117 !important; +} +.leaflet-container { + background: #0d1117 !important; +} +.leaflet-tile { + filter: none; +} +.leaflet-popup-content-wrapper { + background-color: #0d1117; + color: #c9d1d9; + border: 1px solid #30363d; + border-radius: 6px; + padding: 0; +} +.leaflet-popup-content { + margin: 0; + min-width: 280px; +} +.leaflet-popup-content-wrapper a { + color: #58a6ff; +} +.leaflet-popup-tip { + background: #0d1117; + border: 1px solid #30363d; +} +.ip-detail-popup .leaflet-popup-content-wrapper { + max-width: 340px !important; +} +/* Remove the default leaflet icon background */ +.ip-custom-marker { + background: none !important; + border: none !important; +} +.ip-marker { + border: 2px solid #fff; + border-radius: 50%; + display: flex; + align-items: center; + justify-content: center; + font-size: 10px; + font-weight: bold; + color: white; + cursor: pointer; + transition: transform 0.2s, box-shadow 0.2s; +} +.ip-marker:hover { + transform: scale(1.15); +} +.marker-attacker { + background: #f85149; + box-shadow: 0 0 8px rgba(248, 81, 73, 0.8), inset 0 0 4px rgba(248, 81, 73, 0.5); +} +.marker-attacker:hover { + box-shadow: 0 0 15px rgba(248, 81, 73, 1), inset 0 0 6px rgba(248, 81, 73, 0.7); +} +.marker-bad_crawler { + background: #f0883e; + box-shadow: 0 0 8px rgba(240, 136, 62, 0.8), inset 0 0 4px rgba(240, 136, 62, 0.5); +} +.marker-bad_crawler:hover { + box-shadow: 0 0 15px rgba(240, 136, 62, 1), inset 0 0 6px rgba(240, 136, 62, 0.7); +} +.marker-good_crawler { + background: #3fb950; + box-shadow: 0 0 8px rgba(63, 185, 80, 0.8), inset 0 0 4px rgba(63, 185, 80, 0.5); +} +.marker-good_crawler:hover { + box-shadow: 0 0 15px rgba(63, 185, 80, 1), inset 0 0 6px rgba(63, 185, 80, 0.7); +} +.marker-regular_user { + background: #58a6ff; + box-shadow: 0 0 8px rgba(88, 166, 255, 0.8), inset 0 0 4px rgba(88, 166, 255, 0.5); +} +.marker-regular_user:hover { + box-shadow: 0 0 15px rgba(88, 166, 255, 1), inset 0 0 6px rgba(88, 166, 255, 0.7); +} +.marker-unknown { + background: #8b949e; + box-shadow: 0 0 8px rgba(139, 148, 158, 0.8), inset 0 0 4px rgba(139, 148, 158, 0.5); +} +.marker-unknown:hover { + box-shadow: 0 0 15px rgba(139, 148, 158, 1), inset 0 0 6px rgba(139, 148, 158, 0.7); +} +.leaflet-bottom.leaflet-right { + display: none !important; +} +.charts-container { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 20px; + margin-top: 20px; +} +.chart-section { + display: flex; + flex-direction: column; +} +.chart-wrapper { + display: flex; + flex-direction: column; +} +#attack-types-chart { + max-height: 350px; +} +#attack-patterns-chart { + max-height: 350px; +} +@media (max-width: 1200px) { + .charts-container { + grid-template-columns: 1fr; + } +} + +/* Raw Request Modal */ +.raw-request-modal { + position: fixed; + z-index: 1000; + left: 0; + top: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.7); + overflow: auto; +} +.raw-request-modal-content { + background-color: #161b22; + margin: 5% auto; + padding: 0; + border: 1px solid #30363d; + border-radius: 6px; + width: 80%; + max-width: 900px; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5); +} +.raw-request-modal-header { + padding: 16px 20px; + background-color: #21262d; + border-bottom: 1px solid #30363d; + border-radius: 6px 6px 0 0; + display: flex; + justify-content: space-between; + align-items: center; +} +.raw-request-modal-header h3 { + margin: 0; + color: #58a6ff; + font-size: 16px; +} +.raw-request-modal-close { + color: #8b949e; + font-size: 28px; + font-weight: bold; + cursor: pointer; + line-height: 20px; + transition: color 0.2s; +} +.raw-request-modal-close:hover { + color: #c9d1d9; +} +.raw-request-modal-body { + padding: 20px; +} +.raw-request-content { + background-color: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + padding: 16px; + font-family: 'Courier New', Courier, monospace; + font-size: 12px; + color: #c9d1d9; + white-space: pre-wrap; + word-wrap: break-word; + max-height: 400px; + overflow-y: auto; +} +.raw-request-modal-footer { + padding: 16px 20px; + background-color: #21262d; + border-top: 1px solid #30363d; + border-radius: 0 0 6px 6px; + text-align: right; +} +.raw-request-download-btn { + padding: 8px 16px; + background: #238636; + color: #ffffff; + border: none; + border-radius: 6px; + font-weight: 500; + font-size: 13px; + cursor: pointer; + transition: background 0.2s; +} +.raw-request-download-btn:hover { + background: #2ea043; +} + +/* Attack Types Cell Styling */ +.attack-types-cell { + max-width: 280px; + position: relative; + display: inline-block; + width: 100%; + overflow: visible; +} +.attack-types-truncated { + display: block; + width: 100%; + max-width: 280px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + color: #fb8500; + font-weight: 500; + transition: all 0.2s; + position: relative; +} +.attack-types-tooltip { + position: absolute; + bottom: 100%; + left: 0; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + padding: 12px; + margin-bottom: 8px; + max-width: 400px; + word-wrap: break-word; + white-space: normal; + z-index: 1000; + color: #c9d1d9; + font-size: 12px; + font-weight: normal; + display: none; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5); + pointer-events: auto; +} +.attack-types-cell:hover .attack-types-tooltip { + display: block; +} +.attack-types-tooltip::after { + content: ''; + position: absolute; + top: 100%; + left: 12px; + border: 6px solid transparent; + border-top-color: #30363d; +} +.attack-types-tooltip::before { + content: ''; + position: absolute; + top: 100%; + left: 13px; + border: 5px solid transparent; + border-top-color: #0d1117; + z-index: 1; +} + +/* Path Cell Styling for Attack Table */ +.path-cell-container { + position: relative; + display: inline-block; + max-width: 100%; +} +.path-truncated { + display: block; + max-width: 250px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + cursor: pointer; + color: #f85149 !important; + font-weight: 500; + text-decoration: underline; + text-decoration-style: dotted; + text-underline-offset: 3px; + transition: all 0.2s; +} +.path-truncated:hover { + color: #ff7369 !important; + text-decoration-style: solid; +} +.path-cell-container:hover .path-tooltip { + display: block; +} +.path-tooltip { + position: absolute; + bottom: 100%; + left: 0; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + padding: 8px 12px; + margin-bottom: 8px; + max-width: 500px; + word-wrap: break-word; + white-space: normal; + z-index: 1000; + color: #c9d1d9; + font-size: 12px; + font-weight: normal; + display: none; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5); + font-family: 'Courier New', monospace; +} +.path-tooltip::after { + content: ''; + position: absolute; + top: 100%; + left: 12px; + border: 6px solid transparent; + border-top-color: #30363d; +} +.path-tooltip::before { + content: ''; + position: absolute; + top: 100%; + left: 13px; + border: 5px solid transparent; + border-top-color: #0d1117; + z-index: 1; +} + +/* Mobile Optimization - Tablets (768px and down) */ +@media (max-width: 768px) { + body { + padding: 12px; + } + .container { + max-width: 100%; + } + h1 { + font-size: 24px; + margin-bottom: 20px; + } + .github-logo { + position: relative; + top: auto; + left: auto; + margin-bottom: 15px; + } + .download-section { + position: relative; + top: auto; + right: auto; + margin-bottom: 20px; + } + .stats-grid { + grid-template-columns: repeat(2, 1fr); + gap: 12px; + margin-bottom: 20px; + } + .stat-value { + font-size: 28px; + } + .stat-card { + padding: 15px; + } + .table-container { + padding: 12px; + margin-bottom: 15px; + overflow-x: auto; + } + table { + font-size: 13px; + } + th, td { + padding: 10px 6px; + } + h2 { + font-size: 18px; + } + .tabs-container { + gap: 0; + overflow-x: auto; + -webkit-overflow-scrolling: touch; + } + .tab-button { + padding: 10px 16px; + font-size: 12px; + } + .ip-stats-dropdown { + flex-direction: column; + gap: 15px; + } + .stats-right { + flex: 0 0 auto; + width: 100%; + } + .radar-chart { + width: 160px; + height: 160px; + } + .timeline-container { + flex-direction: column; + gap: 15px; + min-height: auto; + } + .timeline-column { + flex: 1 !important; + max-height: 300px; + } + #attacker-map { + height: 350px !important; + } + .leaflet-popup-content { + min-width: 200px !important; + } + .ip-marker { + font-size: 8px; + } + .ip-detail-content { + padding: 20px; + max-width: 95%; + max-height: 85vh; + } + .download-btn { + padding: 6px 12px; + font-size: 12px; + } +} + +/* Mobile Optimization - Small phones (480px and down) */ +@media (max-width: 480px) { + body { + padding: 8px; + } + h1 { + font-size: 20px; + margin-bottom: 15px; + } + .stats-grid { + grid-template-columns: 1fr; + gap: 10px; + margin-bottom: 15px; + } + .stat-value { + font-size: 24px; + } + .stat-card { + padding: 12px; + } + .stat-label { + font-size: 12px; + } + .table-container { + padding: 10px; + margin-bottom: 12px; + border-radius: 4px; + } + table { + font-size: 12px; + } + th, td { + padding: 8px 4px; + } + th { + position: relative; + } + th.sortable::after { + right: 4px; + font-size: 10px; + } + h2 { + font-size: 16px; + margin-bottom: 12px; + } + .tabs-container { + gap: 0; + } + .tab-button { + padding: 10px 12px; + font-size: 11px; + flex: 1; + } + .ip-row { + display: block; + margin-bottom: 10px; + background: #1c2128; + padding: 10px; + border-radius: 4px; + } + .ip-row td { + display: block; + padding: 4px 0; + border: none; + } + .ip-row td::before { + content: attr(data-label); + font-weight: bold; + color: #8b949e; + margin-right: 8px; + } + .ip-clickable { + display: inline-block; + } + .ip-stats-dropdown { + flex-direction: column; + gap: 12px; + font-size: 12px; + } + .stats-left { + flex: 1; + } + .stats-right { + flex: 0 0 auto; + width: 100%; + } + .radar-chart { + width: 140px; + height: 140px; + } + .radar-legend { + margin-top: 8px; + font-size: 10px; + } + .stat-row { + padding: 4px 0; + } + .stat-label-sm { + font-size: 12px; + } + .stat-value-sm { + font-size: 13px; + } + .category-badge { + padding: 3px 6px; + font-size: 10px; + } + .timeline-container { + flex-direction: column; + gap: 12px; + min-height: auto; + } + .timeline-column { + flex: 1 !important; + max-height: 250px; + font-size: 11px; + } + .timeline-header { + font-size: 12px; + margin-bottom: 8px; + } + .timeline-item { + padding-bottom: 10px; + font-size: 11px; + } + .timeline-marker { + left: -19px; + width: 12px; + height: 12px; + } + .reputation-badge { + display: block; + margin-bottom: 6px; + margin-right: 0; + font-size: 10px; + } + #attacker-map { + height: 300px !important; + } + .leaflet-popup-content { + min-width: 150px !important; + } + .ip-marker { + font-size: 7px; + } + .ip-detail-modal { + justify-content: flex-end; + align-items: flex-end; + } + .ip-detail-content { + padding: 15px; + max-width: 100%; + max-height: 90vh; + border-radius: 8px 8px 0 0; + width: 100%; + } + .download-btn { + padding: 6px 10px; + font-size: 11px; + } + .github-logo { + font-size: 12px; + } + .github-logo svg { + width: 24px; + height: 24px; + } +} + +/* Landscape mode optimization */ +@media (max-height: 600px) and (orientation: landscape) { + body { + padding: 8px; + } + h1 { + margin-bottom: 10px; + font-size: 18px; + } + .stats-grid { + margin-bottom: 10px; + gap: 8px; + } + .stat-value { + font-size: 20px; + } + .stat-card { + padding: 8px; + } + #attacker-map { + height: 250px !important; + } + .ip-stats-dropdown { + gap: 10px; + } + .radar-chart { + width: 120px; + height: 120px; + } +} + +/* Touch-friendly optimizations */ +@media (hover: none) and (pointer: coarse) { + .ip-clickable { + -webkit-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(88, 166, 255, 0.2); + } + .tab-button { + -webkit-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(88, 166, 255, 0.2); + padding: 14px 18px; + } + .download-btn { + -webkit-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(36, 134, 54, 0.3); + } + input[type="checkbox"] { + width: 18px; + height: 18px; + cursor: pointer; + } +} + +/* Dynamically injected button styles (previously in JS) */ +.view-btn { + padding: 4px 10px; + background: #21262d; + color: #58a6ff; + border: 1px solid #30363d; + border-radius: 4px; + font-size: 11px; + cursor: pointer; + transition: all 0.2s; + white-space: nowrap; +} +.view-btn:hover { + background: #30363d; + border-color: #58a6ff; +} +.pagination-btn { + padding: 6px 14px; + background: #21262d; + color: #c9d1d9; + border: 1px solid #30363d; + border-radius: 4px; + font-size: 12px; + cursor: pointer; + transition: all 0.2s; +} +.pagination-btn:hover:not(:disabled) { + background: #30363d; + border-color: #58a6ff; + color: #58a6ff; +} +.pagination-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} +.pagination-info { + color: #8b949e; + font-size: 12px; +} + +/* HTMX loading indicator */ +.htmx-indicator { + display: none; + color: #8b949e; + font-style: italic; + padding: 20px; + text-align: center; +} +.htmx-request .htmx-indicator { + display: block; +} +.htmx-request.htmx-indicator { + display: block; +} + +/* Alpine.js cloak */ +[x-cloak] { + display: none !important; +} diff --git a/src/templates/static/js/charts.js b/src/templates/static/js/charts.js new file mode 100644 index 0000000..93122bb --- /dev/null +++ b/src/templates/static/js/charts.js @@ -0,0 +1,167 @@ +// Chart.js Attack Types Chart +// Extracted from dashboard_template.py (lines ~3370-3550) + +let attackTypesChart = null; +let attackTypesChartLoaded = false; + +async function loadAttackTypesChart() { + const DASHBOARD_PATH = window.__DASHBOARD_PATH__ || ''; + + try { + const canvas = document.getElementById('attack-types-chart'); + if (!canvas) return; + + const response = await fetch(DASHBOARD_PATH + '/api/attack-types-stats?limit=10', { + cache: 'no-store', + headers: { + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache' + } + }); + + if (!response.ok) throw new Error('Failed to fetch attack types'); + + const data = await response.json(); + const attackTypes = data.attack_types || []; + + if (attackTypes.length === 0) { + canvas.style.display = 'none'; + return; + } + + const labels = attackTypes.map(item => item.type); + const counts = attackTypes.map(item => item.count); + const maxCount = Math.max(...counts); + + // Hash function to generate consistent color from string + function hashCode(str) { + let hash = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return Math.abs(hash); + } + + // Dynamic color generator based on hash + function generateColorFromHash(label) { + const hash = hashCode(label); + const hue = (hash % 360); // 0-360 for hue + const saturation = 70 + (hash % 20); // 70-90 for vibrant colors + const lightness = 50 + (hash % 10); // 50-60 for brightness + + const bgColor = `hsl(${hue}, ${saturation}%, ${lightness}%)`; + const borderColor = `hsl(${hue}, ${saturation + 5}%, ${lightness - 10}%)`; // Darker border + const hoverColor = `hsl(${hue}, ${saturation - 10}%, ${lightness + 8}%)`; // Lighter hover + + return { bg: bgColor, border: borderColor, hover: hoverColor }; + } + + // Generate colors dynamically for each attack type + const backgroundColors = labels.map(label => generateColorFromHash(label).bg); + const borderColors = labels.map(label => generateColorFromHash(label).border); + const hoverColors = labels.map(label => generateColorFromHash(label).hover); + + // Create or update chart + if (attackTypesChart) { + attackTypesChart.destroy(); + } + + const ctx = canvas.getContext('2d'); + attackTypesChart = new Chart(ctx, { + type: 'doughnut', + data: { + labels: labels, + datasets: [{ + data: counts, + backgroundColor: backgroundColors, + borderColor: '#0d1117', + borderWidth: 3, + hoverBorderColor: '#58a6ff', + hoverBorderWidth: 4, + hoverOffset: 10 + }] + }, + options: { + responsive: true, + maintainAspectRatio: false, + plugins: { + legend: { + position: 'right', + labels: { + color: '#c9d1d9', + font: { + size: 12, + weight: '500', + family: "'Segoe UI', Tahoma, Geneva, Verdana" + }, + padding: 16, + usePointStyle: true, + pointStyle: 'circle', + generateLabels: (chart) => { + const data = chart.data; + return data.labels.map((label, i) => ({ + text: `${label} (${data.datasets[0].data[i]})`, + fillStyle: data.datasets[0].backgroundColor[i], + hidden: false, + index: i, + pointStyle: 'circle' + })); + } + } + }, + tooltip: { + enabled: true, + backgroundColor: 'rgba(22, 27, 34, 0.95)', + titleColor: '#58a6ff', + bodyColor: '#c9d1d9', + borderColor: '#58a6ff', + borderWidth: 2, + padding: 14, + titleFont: { + size: 14, + weight: 'bold', + family: "'Segoe UI', Tahoma, Geneva, Verdana" + }, + bodyFont: { + size: 13, + family: "'Segoe UI', Tahoma, Geneva, Verdana" + }, + caretSize: 8, + caretPadding: 12, + callbacks: { + label: function(context) { + const total = context.dataset.data.reduce((a, b) => a + b, 0); + const percentage = ((context.parsed / total) * 100).toFixed(1); + return `${context.label}: ${percentage}%`; + } + } + } + }, + animation: { + enabled: false + }, + onHover: (event, activeElements) => { + canvas.style.cursor = activeElements.length > 0 ? 'pointer' : 'default'; + } + }, + plugins: [{ + id: 'customCanvasBackgroundColor', + beforeDraw: (chart) => { + if (chart.ctx) { + chart.ctx.save(); + chart.ctx.globalCompositeOperation = 'destination-over'; + chart.ctx.fillStyle = 'rgba(0,0,0,0)'; + chart.ctx.fillRect(0, 0, chart.width, chart.height); + chart.ctx.restore(); + } + } + }] + }); + + attackTypesChartLoaded = true; + } catch (err) { + console.error('Error loading attack types chart:', err); + } +} diff --git a/src/templates/static/js/dashboard.js b/src/templates/static/js/dashboard.js new file mode 100644 index 0000000..b74a51d --- /dev/null +++ b/src/templates/static/js/dashboard.js @@ -0,0 +1,125 @@ +// Alpine.js Dashboard Application +document.addEventListener('alpine:init', () => { + Alpine.data('dashboardApp', () => ({ + // State + tab: 'overview', + dashboardPath: window.__DASHBOARD_PATH__ || '', + + // Banlist dropdown + banlistOpen: false, + + // Raw request modal + rawModal: { show: false, content: '', logId: null }, + + // Map state + mapInitialized: false, + + // Chart state + chartLoaded: false, + + init() { + // Handle hash-based tab routing + const hash = window.location.hash.slice(1); + if (hash === 'ip-stats' || hash === 'attacks') { + this.switchToAttacks(); + } + + window.addEventListener('hashchange', () => { + const h = window.location.hash.slice(1); + if (h === 'ip-stats' || h === 'attacks') { + this.switchToAttacks(); + } else { + this.switchToOverview(); + } + }); + }, + + switchToAttacks() { + this.tab = 'attacks'; + window.location.hash = '#ip-stats'; + + // Delay initialization to ensure the container is visible and + // the browser has reflowed after x-show removes display:none. + // Leaflet and Chart.js need visible containers with real dimensions. + this.$nextTick(() => { + setTimeout(() => { + if (!this.mapInitialized && typeof initializeAttackerMap === 'function') { + initializeAttackerMap(); + this.mapInitialized = true; + } + if (!this.chartLoaded && typeof loadAttackTypesChart === 'function') { + loadAttackTypesChart(); + this.chartLoaded = true; + } + }, 200); + }); + }, + + switchToOverview() { + this.tab = 'overview'; + window.location.hash = '#overview'; + }, + + async viewRawRequest(logId) { + try { + const resp = await fetch( + `${this.dashboardPath}/api/raw-request/${logId}`, + { cache: 'no-store' } + ); + if (resp.status === 404) { + alert('Raw request not available'); + return; + } + const data = await resp.json(); + this.rawModal.content = data.raw_request || 'No content available'; + this.rawModal.logId = logId; + this.rawModal.show = true; + } catch (err) { + alert('Failed to load raw request'); + } + }, + + closeRawModal() { + this.rawModal.show = false; + this.rawModal.content = ''; + this.rawModal.logId = null; + }, + + downloadRawRequest() { + if (!this.rawModal.content) return; + const blob = new Blob([this.rawModal.content], { type: 'text/plain' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `raw-request-${this.rawModal.logId || Date.now()}.txt`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + }, + + toggleIpDetail(event) { + const row = event.target.closest('tr'); + if (!row) return; + const detailRow = row.nextElementSibling; + if (detailRow && detailRow.classList.contains('ip-stats-row')) { + detailRow.style.display = + detailRow.style.display === 'table-row' ? 'none' : 'table-row'; + } + }, + })); +}); + +// Utility function for formatting timestamps (used by map popups) +function formatTimestamp(isoTimestamp) { + if (!isoTimestamp) return 'N/A'; + try { + const date = new Date(isoTimestamp); + return date.toLocaleString('en-US', { + year: 'numeric', month: '2-digit', day: '2-digit', + hour: '2-digit', minute: '2-digit', second: '2-digit', hour12: false + }); + } catch { + return isoTimestamp; + } +} diff --git a/src/templates/static/js/map.js b/src/templates/static/js/map.js new file mode 100644 index 0000000..6dfaf02 --- /dev/null +++ b/src/templates/static/js/map.js @@ -0,0 +1,469 @@ +// IP Map Visualization +// Extracted from dashboard_template.py (lines ~2978-3348) + +let attackerMap = null; +let allIps = []; +let mapMarkers = []; +let markerLayers = {}; + +const categoryColors = { + attacker: '#f85149', + bad_crawler: '#f0883e', + good_crawler: '#3fb950', + regular_user: '#58a6ff', + unknown: '#8b949e' +}; + +async function initializeAttackerMap() { + const DASHBOARD_PATH = window.__DASHBOARD_PATH__ || ''; + const mapContainer = document.getElementById('attacker-map'); + if (!mapContainer || attackerMap) return; + + try { + // Initialize map + attackerMap = L.map('attacker-map', { + center: [20, 0], + zoom: 2, + layers: [ + L.tileLayer('https://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}{r}.png', { + attribution: '© CartoDB | © OpenStreetMap contributors', + maxZoom: 19, + subdomains: 'abcd' + }) + ] + }); + + // Fetch all IPs (not just attackers) + const response = await fetch(DASHBOARD_PATH + '/api/all-ips?page=1&page_size=100&sort_by=total_requests&sort_order=desc', { + cache: 'no-store', + headers: { + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache' + } + }); + + if (!response.ok) throw new Error('Failed to fetch IPs'); + + const data = await response.json(); + allIps = data.ips || []; + + if (allIps.length === 0) { + mapContainer.innerHTML = '
No IP location data available
'; + return; + } + + // Get max request count for scaling + const maxRequests = Math.max(...allIps.map(ip => ip.total_requests || 0)); + + // City coordinates database (major cities worldwide) + const cityCoordinates = { + // United States + 'New York': [40.7128, -74.0060], 'Los Angeles': [34.0522, -118.2437], + 'San Francisco': [37.7749, -122.4194], 'Chicago': [41.8781, -87.6298], + 'Seattle': [47.6062, -122.3321], 'Miami': [25.7617, -80.1918], + 'Boston': [42.3601, -71.0589], 'Atlanta': [33.7490, -84.3880], + 'Dallas': [32.7767, -96.7970], 'Houston': [29.7604, -95.3698], + 'Denver': [39.7392, -104.9903], 'Phoenix': [33.4484, -112.0740], + // Europe + 'London': [51.5074, -0.1278], 'Paris': [48.8566, 2.3522], + 'Berlin': [52.5200, 13.4050], 'Amsterdam': [52.3676, 4.9041], + 'Moscow': [55.7558, 37.6173], 'Rome': [41.9028, 12.4964], + 'Madrid': [40.4168, -3.7038], 'Barcelona': [41.3874, 2.1686], + 'Milan': [45.4642, 9.1900], 'Vienna': [48.2082, 16.3738], + 'Stockholm': [59.3293, 18.0686], 'Oslo': [59.9139, 10.7522], + 'Copenhagen': [55.6761, 12.5683], 'Warsaw': [52.2297, 21.0122], + 'Prague': [50.0755, 14.4378], 'Budapest': [47.4979, 19.0402], + 'Athens': [37.9838, 23.7275], 'Lisbon': [38.7223, -9.1393], + 'Brussels': [50.8503, 4.3517], 'Dublin': [53.3498, -6.2603], + 'Zurich': [47.3769, 8.5417], 'Geneva': [46.2044, 6.1432], + 'Helsinki': [60.1699, 24.9384], 'Bucharest': [44.4268, 26.1025], + 'Saint Petersburg': [59.9343, 30.3351], 'Manchester': [53.4808, -2.2426], + 'Roubaix': [50.6942, 3.1746], 'Frankfurt': [50.1109, 8.6821], + 'Munich': [48.1351, 11.5820], 'Hamburg': [53.5511, 9.9937], + // Asia + 'Tokyo': [35.6762, 139.6503], 'Beijing': [39.9042, 116.4074], + 'Shanghai': [31.2304, 121.4737], 'Singapore': [1.3521, 103.8198], + 'Mumbai': [19.0760, 72.8777], 'Delhi': [28.7041, 77.1025], + 'Bangalore': [12.9716, 77.5946], 'Seoul': [37.5665, 126.9780], + 'Hong Kong': [22.3193, 114.1694], 'Bangkok': [13.7563, 100.5018], + 'Jakarta': [6.2088, 106.8456], 'Manila': [14.5995, 120.9842], + 'Hanoi': [21.0285, 105.8542], 'Ho Chi Minh City': [10.8231, 106.6297], + 'Taipei': [25.0330, 121.5654], 'Kuala Lumpur': [3.1390, 101.6869], + 'Karachi': [24.8607, 67.0011], 'Islamabad': [33.6844, 73.0479], + 'Dhaka': [23.8103, 90.4125], 'Colombo': [6.9271, 79.8612], + // South America + 'S\u00e3o Paulo': [-23.5505, -46.6333], 'Rio de Janeiro': [-22.9068, -43.1729], + 'Buenos Aires': [-34.6037, -58.3816], 'Bogot\u00e1': [4.7110, -74.0721], + 'Lima': [-12.0464, -77.0428], 'Santiago': [-33.4489, -70.6693], + // Middle East & Africa + 'Cairo': [30.0444, 31.2357], 'Dubai': [25.2048, 55.2708], + 'Istanbul': [41.0082, 28.9784], 'Tel Aviv': [32.0853, 34.7818], + 'Johannesburg': [26.2041, 28.0473], 'Lagos': [6.5244, 3.3792], + 'Nairobi': [-1.2921, 36.8219], 'Cape Town': [-33.9249, 18.4241], + // Australia & Oceania + 'Sydney': [-33.8688, 151.2093], 'Melbourne': [-37.8136, 144.9631], + 'Brisbane': [-27.4698, 153.0251], 'Perth': [-31.9505, 115.8605], + 'Auckland': [-36.8485, 174.7633], + // Additional cities + 'Unknown': null + }; + + // Country center coordinates (fallback when city not found) + const countryCoordinates = { + 'US': [37.1, -95.7], 'GB': [55.4, -3.4], 'CN': [35.9, 104.1], 'RU': [61.5, 105.3], + 'JP': [36.2, 138.3], 'DE': [51.2, 10.5], 'FR': [46.6, 2.2], 'IN': [20.6, 78.96], + 'BR': [-14.2, -51.9], 'CA': [56.1, -106.3], 'AU': [-25.3, 133.8], 'MX': [23.6, -102.6], + 'ZA': [-30.6, 22.9], 'KR': [35.9, 127.8], 'IT': [41.9, 12.6], 'ES': [40.5, -3.7], + 'NL': [52.1, 5.3], 'SE': [60.1, 18.6], 'CH': [46.8, 8.2], 'PL': [51.9, 19.1], + 'SG': [1.4, 103.8], 'HK': [22.4, 114.1], 'TW': [23.7, 120.96], 'TH': [15.9, 100.9], + 'VN': [14.1, 108.8], 'ID': [-0.8, 113.2], 'PH': [12.9, 121.8], 'MY': [4.2, 101.7], + 'PK': [30.4, 69.2], 'BD': [23.7, 90.4], 'NG': [9.1, 8.7], 'EG': [26.8, 30.8], + 'TR': [38.9, 35.2], 'IR': [32.4, 53.7], 'AE': [23.4, 53.8], 'KZ': [48.0, 66.9], + 'UA': [48.4, 31.2], 'BG': [42.7, 25.5], 'RO': [45.9, 24.97], 'CZ': [49.8, 15.5], + 'HU': [47.2, 19.5], 'AT': [47.5, 14.6], 'BE': [50.5, 4.5], 'DK': [56.3, 9.5], + 'FI': [61.9, 25.8], 'NO': [60.5, 8.5], 'GR': [39.1, 21.8], 'PT': [39.4, -8.2], + 'AR': [-38.4161, -63.6167], 'CO': [4.5709, -74.2973], 'CL': [-35.6751, -71.5430], + 'PE': [-9.1900, -75.0152], 'VE': [6.4238, -66.5897], 'LS': [40.0, -100.0] + }; + + // Helper function to get coordinates for an IP + function getIPCoordinates(ip) { + // Use actual latitude and longitude if available + if (ip.latitude != null && ip.longitude != null) { + return [ip.latitude, ip.longitude]; + } + // Fall back to city lookup + if (ip.city && cityCoordinates[ip.city]) { + return cityCoordinates[ip.city]; + } + // Fall back to country + if (ip.country_code && countryCoordinates[ip.country_code]) { + return countryCoordinates[ip.country_code]; + } + return null; + } + + // Track used coordinates to add small offsets for overlapping markers + const usedCoordinates = {}; + function getUniqueCoordinates(baseCoords) { + const key = `${baseCoords[0].toFixed(4)},${baseCoords[1].toFixed(4)}`; + if (!usedCoordinates[key]) { + usedCoordinates[key] = 0; + } + usedCoordinates[key]++; + + // If this is the first marker at this location, use exact coordinates + if (usedCoordinates[key] === 1) { + return baseCoords; + } + + // Add small random offset for subsequent markers + // Offset increases with each marker to create a spread pattern + const angle = (usedCoordinates[key] * 137.5) % 360; // Golden angle for even distribution + const distance = 0.05 * Math.sqrt(usedCoordinates[key]); // Increase distance with more markers + const latOffset = distance * Math.cos(angle * Math.PI / 180); + const lngOffset = distance * Math.sin(angle * Math.PI / 180); + + return [ + baseCoords[0] + latOffset, + baseCoords[1] + lngOffset + ]; + } + + // Create layer groups for each category + markerLayers = { + attacker: L.featureGroup(), + bad_crawler: L.featureGroup(), + good_crawler: L.featureGroup(), + regular_user: L.featureGroup(), + unknown: L.featureGroup() + }; + + // Add markers for each IP + allIps.slice(0, 100).forEach(ip => { + if (!ip.country_code || !ip.category) return; + + // Get coordinates (city first, then country) + const baseCoords = getIPCoordinates(ip); + if (!baseCoords) return; + + // Get unique coordinates with offset to prevent overlap + const coords = getUniqueCoordinates(baseCoords); + + const category = ip.category.toLowerCase(); + if (!markerLayers[category]) return; + + // Calculate marker size based on request count with more dramatic scaling + // Scale up to 10,000 requests, then cap it + const requestsForScale = Math.min(ip.total_requests, 10000); + const sizeRatio = Math.pow(requestsForScale / 10000, 0.5); // Square root for better visual scaling + const markerSize = Math.max(10, Math.min(30, 10 + (sizeRatio * 20))); + + // Create custom marker element with category-specific class + const markerElement = document.createElement('div'); + markerElement.className = `ip-marker marker-${category}`; + markerElement.style.width = markerSize + 'px'; + markerElement.style.height = markerSize + 'px'; + markerElement.style.fontSize = (markerSize * 0.5) + 'px'; + markerElement.textContent = '\u25CF'; + + const marker = L.marker(coords, { + icon: L.divIcon({ + html: markerElement.outerHTML, + iconSize: [markerSize, markerSize], + className: `ip-custom-marker category-${category}` + }) + }); + + // Create popup with category badge and chart + const categoryColor = categoryColors[category] || '#8b949e'; + const categoryLabels = { + attacker: 'Attacker', + bad_crawler: 'Bad Crawler', + good_crawler: 'Good Crawler', + regular_user: 'Regular User', + unknown: 'Unknown' + }; + + // Bind popup once when marker is created + marker.bindPopup('', { + maxWidth: 550, + className: 'ip-detail-popup' + }); + + // Add click handler to fetch data and show popup + marker.on('click', async function(e) { + // Show loading popup first + const loadingPopup = ` +
+
+ ${ip.ip} + + ${categoryLabels[category]} + +
+
+
Loading details...
+
+
+ `; + + marker.setPopupContent(loadingPopup); + marker.openPopup(); + + try { + console.log('Fetching IP stats for:', ip.ip); + const response = await fetch(`${DASHBOARD_PATH}/api/ip-stats/${ip.ip}`); + if (!response.ok) throw new Error('Failed to fetch IP stats'); + + const stats = await response.json(); + console.log('Received stats:', stats); + + // Build complete popup content with chart + let popupContent = ` +
+
+ ${ip.ip} + + ${categoryLabels[category]} + +
+ + ${ip.city ? (ip.country_code ? `${ip.city}, ${ip.country_code}` : ip.city) : (ip.country_code || 'Unknown')} +
+
+
Requests: ${ip.total_requests}
+
First Seen: ${formatTimestamp(ip.first_seen)}
+
Last Seen: ${formatTimestamp(ip.last_seen)}
+
+ `; + + // Add chart if category scores exist + if (stats.category_scores && Object.keys(stats.category_scores).length > 0) { + console.log('Category scores found:', stats.category_scores); + const chartHtml = generateMapPanelRadarChart(stats.category_scores); + console.log('Generated chart HTML length:', chartHtml.length); + popupContent += ` +
+ ${chartHtml} +
+ `; + } + + popupContent += '
'; + + // Update popup content + console.log('Updating popup content'); + marker.setPopupContent(popupContent); + } catch (err) { + console.error('Error fetching IP stats:', err); + const errorPopup = ` +
+
+ ${ip.ip} + + ${categoryLabels[category]} + +
+ + ${ip.city ? (ip.country_code ? `${ip.city}, ${ip.country_code}` : ip.city) : (ip.country_code || 'Unknown')} +
+
+
Requests: ${ip.total_requests}
+
First Seen: ${formatTimestamp(ip.first_seen)}
+
Last Seen: ${formatTimestamp(ip.last_seen)}
+
+
+ Failed to load chart: ${err.message} +
+
+ `; + marker.setPopupContent(errorPopup); + } + }); + + markerLayers[category].addLayer(marker); + }); + + // Add all marker layers to map initially + Object.values(markerLayers).forEach(layer => attackerMap.addLayer(layer)); + + // Fit map to all markers + const allMarkers = Object.values(markerLayers).reduce((acc, layer) => { + acc.push(...layer.getLayers()); + return acc; + }, []); + + if (allMarkers.length > 0) { + const bounds = L.featureGroup(allMarkers).getBounds(); + attackerMap.fitBounds(bounds, { padding: [50, 50] }); + } + + // Force Leaflet to recalculate container size after the tab becomes visible. + // Without this, tiles may not render correctly when the container was hidden. + setTimeout(() => { + if (attackerMap) attackerMap.invalidateSize(); + }, 300); + + } catch (err) { + console.error('Error initializing attacker map:', err); + mapContainer.innerHTML = '
Failed to load map: ' + err.message + '
'; + } +} + +// Update map filters based on checkbox selection +function updateMapFilters() { + if (!attackerMap) return; + + const filters = {}; + document.querySelectorAll('.map-filter').forEach(cb => { + const category = cb.getAttribute('data-category'); + if (category) filters[category] = cb.checked; + }); + + // Update marker and circle layers visibility + Object.entries(filters).forEach(([category, show]) => { + if (markerLayers[category]) { + if (show) { + if (!attackerMap.hasLayer(markerLayers[category])) { + attackerMap.addLayer(markerLayers[category]); + } + } else { + if (attackerMap.hasLayer(markerLayers[category])) { + attackerMap.removeLayer(markerLayers[category]); + } + } + } + }); +} + +// Generate radar chart SVG for map panel popups +function generateMapPanelRadarChart(categoryScores) { + if (!categoryScores || Object.keys(categoryScores).length === 0) { + return '
No category data available
'; + } + + let html = '
'; + html += ''; + + const scores = { + attacker: categoryScores.attacker || 0, + good_crawler: categoryScores.good_crawler || 0, + bad_crawler: categoryScores.bad_crawler || 0, + regular_user: categoryScores.regular_user || 0, + unknown: categoryScores.unknown || 0 + }; + + const maxScore = Math.max(...Object.values(scores), 1); + const minVisibleRadius = 0.15; + const normalizedScores = {}; + + Object.keys(scores).forEach(key => { + normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius); + }); + + const colors = { + attacker: '#f85149', + good_crawler: '#3fb950', + bad_crawler: '#f0883e', + regular_user: '#58a6ff', + unknown: '#8b949e' + }; + + const labels = { + attacker: 'Attacker', + good_crawler: 'Good Bot', + bad_crawler: 'Bad Bot', + regular_user: 'User', + unknown: 'Unknown' + }; + + const cx = 100, cy = 100, maxRadius = 75; + for (let i = 1; i <= 5; i++) { + const r = (maxRadius / 5) * i; + html += ``; + } + + const angles = [0, 72, 144, 216, 288]; + const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker']; + + angles.forEach((angle, i) => { + const rad = (angle - 90) * Math.PI / 180; + const x2 = cx + maxRadius * Math.cos(rad); + const y2 = cy + maxRadius * Math.sin(rad); + html += ``; + + const labelDist = maxRadius + 35; + const lx = cx + labelDist * Math.cos(rad); + const ly = cy + labelDist * Math.sin(rad); + html += `${labels[keys[i]]}`; + }); + + let points = []; + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + points.push(`${x},${y}`); + }); + + const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); + const dominantColor = colors[dominantKey]; + + html += ``; + + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + html += ``; + }); + + html += ''; + html += '
'; + return html; +} diff --git a/src/templates/static/js/radar.js b/src/templates/static/js/radar.js new file mode 100644 index 0000000..f531046 --- /dev/null +++ b/src/templates/static/js/radar.js @@ -0,0 +1,127 @@ +// Radar chart generation for IP stats +// Used by map popups and IP detail partials +// Extracted from dashboard_template.py (lines ~2092-2181) + +/** + * Generate an SVG radar chart for category scores. + * This is a reusable function that can be called from: + * - Map popup panels (generateMapPanelRadarChart in map.js) + * - IP detail partials (server-side or client-side rendering) + * + * @param {Object} categoryScores - Object with keys: attacker, good_crawler, bad_crawler, regular_user, unknown + * @param {number} [size=200] - Width/height of the SVG in pixels + * @param {boolean} [showLegend=true] - Whether to show the legend below the chart + * @returns {string} HTML string containing the SVG radar chart + */ +function generateRadarChart(categoryScores, size, showLegend) { + size = size || 200; + if (showLegend === undefined) showLegend = true; + + if (!categoryScores || Object.keys(categoryScores).length === 0) { + return '
No category data available
'; + } + + const scores = { + attacker: categoryScores.attacker || 0, + good_crawler: categoryScores.good_crawler || 0, + bad_crawler: categoryScores.bad_crawler || 0, + regular_user: categoryScores.regular_user || 0, + unknown: categoryScores.unknown || 0 + }; + + const maxScore = Math.max(...Object.values(scores), 1); + const minVisibleRadius = 0.15; + const normalizedScores = {}; + + Object.keys(scores).forEach(key => { + normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius); + }); + + const colors = { + attacker: '#f85149', + good_crawler: '#3fb950', + bad_crawler: '#f0883e', + regular_user: '#58a6ff', + unknown: '#8b949e' + }; + + const labels = { + attacker: 'Attacker', + good_crawler: 'Good Bot', + bad_crawler: 'Bad Bot', + regular_user: 'User', + unknown: 'Unknown' + }; + + const cx = 100, cy = 100, maxRadius = 75; + + let html = '
'; + html += ``; + + // Draw concentric circles (grid) + for (let i = 1; i <= 5; i++) { + const r = (maxRadius / 5) * i; + html += ``; + } + + const angles = [0, 72, 144, 216, 288]; + const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker']; + + // Draw axis lines and labels + angles.forEach((angle, i) => { + const rad = (angle - 90) * Math.PI / 180; + const x2 = cx + maxRadius * Math.cos(rad); + const y2 = cy + maxRadius * Math.sin(rad); + html += ``; + + const labelDist = maxRadius + 35; + const lx = cx + labelDist * Math.cos(rad); + const ly = cy + labelDist * Math.sin(rad); + html += `${labels[keys[i]]}`; + }); + + // Calculate polygon points + let points = []; + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + points.push(`${x},${y}`); + }); + + // Determine dominant category for color + const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); + const dominantColor = colors[dominantKey]; + + // Draw filled polygon + html += ``; + + // Draw data point dots + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + html += ``; + }); + + html += ''; + + // Optional legend + if (showLegend) { + html += '
'; + keys.forEach(key => { + html += '
'; + html += `
`; + html += `${labels[key]}: ${scores[key]} pt`; + html += '
'; + }); + html += '
'; + } + + html += '
'; + return html; +} diff --git a/src/tracker.py b/src/tracker.py index b7b97d5..292ebba 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from typing import Dict, List, Tuple, Optional +from typing import Dict, Tuple, Optional from collections import defaultdict from datetime import datetime from zoneinfo import ZoneInfo @@ -9,7 +9,20 @@ import urllib.parse from wordlists import get_wordlists from database import get_database, DatabaseManager -from ip_utils import is_local_or_private_ip, is_valid_public_ip + +# Module-level singleton for background task access +_tracker_instance: "AccessTracker | None" = None + + +def get_tracker() -> "AccessTracker | None": + """Get the global AccessTracker singleton (set during app startup).""" + return _tracker_instance + + +def set_tracker(tracker: "AccessTracker"): + """Store the AccessTracker singleton for background task access.""" + global _tracker_instance + _tracker_instance = tracker class AccessTracker: @@ -35,16 +48,6 @@ class AccessTracker: """ self.max_pages_limit = max_pages_limit self.ban_duration_seconds = ban_duration_seconds - self.ip_counts: Dict[str, int] = defaultdict(int) - self.path_counts: Dict[str, int] = defaultdict(int) - self.user_agent_counts: Dict[str, int] = defaultdict(int) - self.access_log: List[Dict] = [] - self.credential_attempts: List[Dict] = [] - - # Memory limits for in-memory lists (prevents unbounded growth) - self.max_access_log_size = 10_000 # Keep only recent 10k accesses - self.max_credential_log_size = 5_000 # Keep only recent 5k attempts - self.max_counter_keys = 100_000 # Max unique IPs/paths/user agents # Track pages visited by each IP (for good crawler limiting) self.ip_page_visits: Dict[str, Dict[str, object]] = defaultdict(dict) @@ -88,13 +91,10 @@ class AccessTracker: "path_traversal": r"\.\.", "sql_injection": r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)", "xss_attempt": r"( self.max_credential_log_size: - self.credential_attempts = self.credential_attempts[ - -self.max_credential_log_size : - ] - # Persist to database if self.db: try: @@ -264,11 +247,6 @@ class AccessTracker: if server_ip and ip == server_ip: return - self.ip_counts[ip] += 1 - self.path_counts[path] += 1 - if user_agent: - self.user_agent_counts[user_agent] += 1 - # Path attack type detection attack_findings = self.detect_attack_type(path) @@ -285,27 +263,7 @@ class AccessTracker: ) is_honeypot = self.is_honeypot_path(path) - # Track if this IP accessed a honeypot path - if is_honeypot: - self.honeypot_triggered[ip].append(path) - # In-memory storage for dashboard - self.access_log.append( - { - "ip": ip, - "path": path, - "user_agent": user_agent, - "suspicious": is_suspicious, - "honeypot_triggered": self.is_honeypot_path(path), - "attack_types": attack_findings, - "timestamp": datetime.now().isoformat(), - } - ) - - # Trim if exceeding max size (prevent unbounded growth) - if len(self.access_log) > self.max_access_log_size: - self.access_log = self.access_log[-self.max_access_log_size :] - # Persist to database if self.db: try: @@ -583,54 +541,6 @@ class AccessTracker: except Exception: return 0 - def get_top_ips(self, limit: int = 10) -> List[Tuple[str, int]]: - """Get top N IP addresses by access count (excludes local/private IPs)""" - filtered = [ - (ip, count) - for ip, count in self.ip_counts.items() - if not is_local_or_private_ip(ip) - ] - return sorted(filtered, key=lambda x: x[1], reverse=True)[:limit] - - def get_top_paths(self, limit: int = 10) -> List[Tuple[str, int]]: - """Get top N paths by access count""" - return sorted(self.path_counts.items(), key=lambda x: x[1], reverse=True)[ - :limit - ] - - def get_top_user_agents(self, limit: int = 10) -> List[Tuple[str, int]]: - """Get top N user agents by access count""" - return sorted(self.user_agent_counts.items(), key=lambda x: x[1], reverse=True)[ - :limit - ] - - def get_suspicious_accesses(self, limit: int = 20) -> List[Dict]: - """Get recent suspicious accesses (excludes local/private IPs)""" - suspicious = [ - log - for log in self.access_log - if log.get("suspicious", False) - and not is_local_or_private_ip(log.get("ip", "")) - ] - return suspicious[-limit:] - - def get_attack_type_accesses(self, limit: int = 20) -> List[Dict]: - """Get recent accesses with detected attack types (excludes local/private IPs)""" - attacks = [ - log - for log in self.access_log - if log.get("attack_types") and not is_local_or_private_ip(log.get("ip", "")) - ] - return attacks[-limit:] - - def get_honeypot_triggered_ips(self) -> List[Tuple[str, List[str]]]: - """Get IPs that accessed honeypot paths (excludes local/private IPs)""" - return [ - (ip, paths) - for ip, paths in self.honeypot_triggered.items() - if not is_local_or_private_ip(ip) - ] - def get_stats(self) -> Dict: """Get statistics summary from database.""" if not self.db: @@ -654,47 +564,32 @@ class AccessTracker: """ Clean up in-memory structures to prevent unbounded growth. Should be called periodically (e.g., every 5 minutes). - - Trimming strategy: - - Keep most recent N entries in logs - - Remove oldest entries when limit exceeded - - Clean expired ban entries from ip_page_visits """ - # Trim access_log to max size (keep most recent) - if len(self.access_log) > self.max_access_log_size: - self.access_log = self.access_log[-self.max_access_log_size :] - - # Trim credential_attempts to max size (keep most recent) - if len(self.credential_attempts) > self.max_credential_log_size: - self.credential_attempts = self.credential_attempts[ - -self.max_credential_log_size : - ] - # Clean expired ban entries from ip_page_visits current_time = datetime.now() - ips_to_clean = [] for ip, data in self.ip_page_visits.items(): ban_timestamp = data.get("ban_timestamp") if ban_timestamp is not None: try: ban_time = datetime.fromisoformat(ban_timestamp) time_diff = (current_time - ban_time).total_seconds() - if time_diff > self.ban_duration_seconds: - # Ban expired, reset the entry + effective_duration = self.ban_duration_seconds * data.get( + "ban_multiplier", 1 + ) + if time_diff > effective_duration: data["count"] = 0 data["ban_timestamp"] = None except (ValueError, TypeError): pass - # Optional: Remove IPs with zero activity (advanced cleanup) - # Comment out to keep indefinite history of zero-activity IPs - # ips_to_remove = [ - # ip - # for ip, data in self.ip_page_visits.items() - # if data.get("count", 0) == 0 and data.get("ban_timestamp") is None - # ] - # for ip in ips_to_remove: - # del self.ip_page_visits[ip] + # Remove IPs with zero activity and no active ban + ips_to_remove = [ + ip + for ip, data in self.ip_page_visits.items() + if data.get("count", 0) == 0 and data.get("ban_timestamp") is None + ] + for ip in ips_to_remove: + del self.ip_page_visits[ip] def get_memory_stats(self) -> Dict[str, int]: """ @@ -704,11 +599,5 @@ class AccessTracker: Dictionary with counts of in-memory items """ return { - "access_log_size": len(self.access_log), - "credential_attempts_size": len(self.credential_attempts), - "unique_ips_tracked": len(self.ip_counts), - "unique_paths_tracked": len(self.path_counts), - "unique_user_agents": len(self.user_agent_counts), - "unique_ip_page_visits": len(self.ip_page_visits), - "honeypot_triggered_ips": len(self.honeypot_triggered), + "ip_page_visits": len(self.ip_page_visits), } diff --git a/tests/test_insert_fake_ips.py b/tests/test_insert_fake_ips.py index 5f19530..1eba765 100644 --- a/tests/test_insert_fake_ips.py +++ b/tests/test_insert_fake_ips.py @@ -243,13 +243,13 @@ def fetch_geolocation_from_api(ip: str, app_logger) -> tuple: """ try: geoloc_data = extract_geolocation_from_ip(ip) - + if geoloc_data: country_code = geoloc_data.get("country_code") city = geoloc_data.get("city") asn = geoloc_data.get("asn") asn_org = geoloc_data.get("org") - + return (country_code, city, asn, asn_org) except requests.RequestException as e: app_logger.warning(f"Failed to fetch geolocation for {ip}: {e}") @@ -541,7 +541,7 @@ def generate_fake_data( app_logger.info( "All IPs have API-fetched geolocation with reverse geocoded city names." ) - app_logger.info("Run: python server.py") + app_logger.info("Run: uvicorn app:app --app-dir src") app_logger.info("=" * 60) diff --git a/wordlists.json b/wordlists.json index 129f256..f284aab 100644 --- a/wordlists.json +++ b/wordlists.json @@ -469,7 +469,8 @@ "lfi_rfi": "(file://|php://|expect://|data://|zip://|phar://|/etc/passwd|/etc/shadow|/proc/self|c:\\\\windows)", "xxe_injection": "(