diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml index 9feb01c..d899b13 100644 --- a/.github/workflows/pr-checks.yml +++ b/.github/workflows/pr-checks.yml @@ -20,7 +20,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - name: Install dependencies diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml index 4b471cd..3048e62 100644 --- a/.github/workflows/security-scan.yml +++ b/.github/workflows/security-scan.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: '3.11' + python-version: '3.13' cache: 'pip' - name: Install dependencies @@ -48,12 +48,4 @@ jobs: - name: Safety check for dependencies run: safety check --json || true - - - name: Trivy vulnerability scan - uses: aquasecurity/trivy-action@0.31.0 - with: - scan-type: 'fs' - scan-ref: '.' - format: 'table' - severity: 'CRITICAL,HIGH' - exit-code: '1' + \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index f6caa8b..e93a55c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,16 @@ -FROM python:3.11-slim +FROM python:3.13-slim LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app # Install gosu for dropping privileges -RUN apt-get update && apt-get install -y --no-install-recommends gosu && \ +RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends gosu && \ rm -rf /var/lib/apt/lists/* COPY requirements.txt /app/ -RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt COPY src/ /app/src/ COPY wordlists.json /app/ diff --git a/README.md b/README.md index c6ad739..4dc5702 100644 --- a/README.md +++ b/README.md @@ -33,21 +33,25 @@ Helm Chart - -
- -

- What is Krawl? • - Installation • - Honeypot Pages • - Dashboard • - Todo • - Contributing -

- -
+## Table of Contents +- [Demo](#demo) +- [What is Krawl?](#what-is-krawl) +- [Krawl Dashboard](#krawl-dashboard) +- [Installation](#-installation) + - [Docker Run](#docker-run) + - [Docker Compose](#docker-compose) + - [Kubernetes](#kubernetes) +- [Configuration](#configuration) + - [config.yaml](#configuration-via-configyaml) + - [Environment Variables](#configuration-via-enviromental-variables) +- [Ban Malicious IPs](#use-krawl-to-ban-malicious-ips) +- [IP Reputation](#ip-reputation) +- [Forward Server Header](#forward-server-header) +- [Additional Documentation](#additional-documentation) +- [Contributing](#-contributing) + ## Demo Tip: crawl the `robots.txt` paths for additional fun ### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com) @@ -67,7 +71,7 @@ It features: - **Fake Login Pages**: WordPress, phpMyAdmin, admin panels - **Honeypot Paths**: Advertised in robots.txt to catch scanners - **Fake Credentials**: Realistic-looking usernames, passwords, API keys -- **[Canary Token](#customizing-the-canary-token) Integration**: External alert triggering +- **[Canary Token](docs/canary-token.md) Integration**: External alert triggering - **Random server headers**: Confuse attacks based on server header and version - **Real-time Dashboard**: Monitor suspicious activity - **Customizable Wordlists**: Easy JSON-based configuration @@ -75,8 +79,28 @@ It features: ![dashboard](img/deception-page.png) + +## Krawl Dashboard + +Krawl provides a comprehensive dashboard, accessible at a **random secret path** generated at startup or at a **custom path** configured via `KRAWL_DASHBOARD_SECRET_PATH`. This keeps the dashboard hidden from attackers scanning your honeypot. + +The dashboard is organized in three main tabs: + +- **Overview** — High-level view of attack activity: an interactive map of IP origins, recent suspicious requests, and top IPs, User-Agents, and paths. + ![geoip](img/geoip_dashboard.png) +- **Attacks** — Detailed breakdown of captured credentials, honeypot triggers, and detected attack types (SQLi, XSS, path traversal, etc.) with charts and tables. + +![attack_types](img/attack_types.png) + +- **IP Insight** — In-depth forensic view of a selected IP: geolocation, ISP/ASN info, reputation flags, behavioral timeline, attack type distribution, and full access history. + +![ipinsight](img/ip_insight_dashboard.png) + +For more details, see the [Dashboard documentation](docs/dashboard.md). + + ## 🚀 Installation ### Docker Run @@ -89,7 +113,7 @@ docker run -d \ -e KRAWL_PORT=5000 \ -e KRAWL_DELAY=100 \ -e KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" \ - -e KRAWL_DATABASE_RETENTION_DAYS=30 \ + -v krawl-data:/app/data \ --name krawl \ ghcr.io/blessedrebus/krawl:latest ``` @@ -136,64 +160,13 @@ docker-compose down ### Kubernetes **Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md). -## Use Krawl to Ban Malicious IPs -Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file. - -This file can either be mounted from the Docker container into another system or downloaded directly via `curl`: - -```bash -curl https://your-krawl-instance//api/download/malicious_ips.txt -``` - -This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on: -* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic) -* [RouterOS](https://rentry.co/krawl-routeros) -* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md) -* [Fail2Ban](plugins/fail2ban/README.md) - -## IP Reputation -Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable. - -![ip reputation](img/ip-reputation.png) - -The analysis includes: -- **Risky HTTP methods usage** (e.g. POST, PUT, DELETE ratios) -- **Robots.txt violations** -- **Request timing anomalies** (bursty or irregular patterns) -- **User-Agent consistency** -- **Attack URL detection** (e.g. SQL injection, XSS patterns) - -Each signal contributes to a weighted scoring model that assigns a reputation category: -- `attacker` -- `bad_crawler` -- `good_crawler` -- `regular_user` -- `unknown` (for insufficient data) - -The resulting scores and metrics are stored in the database and used by Krawl to drive dashboards, reputation tracking, and automated mitigation actions such as IP banning or firewall integration. - -## Forward server header -If Krawl is deployed behind a proxy such as NGINX the **server header** should be forwarded using the following configuration in your proxy: - -```bash -location / { - proxy_pass https://your-krawl-instance; - proxy_pass_header Server; -} -``` - -## API -Krawl uses the following APIs -- http://ip-api.com (IP Data) -- https://iprep.lcrawl.com (IP Reputation) -- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup) -- https://api.ipify.org (Public IP discovery) -- http://ident.me (Public IP discovery) -- https://ifconfig.me (Public IP discovery) ## Configuration Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization. +### Configuration via config.yaml +You can use the [config.yaml](config.yaml) file for advanced configurations, such as Docker Compose or Helm chart deployments. + ### Configuration via Enviromental Variables | Environment Variable | Description | Default | @@ -256,153 +229,63 @@ docker run -d \ ghcr.io/blessedrebus/krawl:latest ``` -### Configuration via config.yaml -You can use the [config.yaml](config.yaml) file for more advanced configurations, such as Docker Compose or Helm chart deployments. +## Use Krawl to Ban Malicious IPs +Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file. -# Honeypot -Below is a complete overview of the Krawl honeypot’s capabilities +This file can either be mounted from the Docker container into another system or downloaded directly via `curl`: -## robots.txt -The actual (juicy) robots.txt configuration [is the following](src/templates/html/robots.txt). +```bash +curl https://your-krawl-instance//api/download/malicious_ips.txt +``` -## Honeypot pages +This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on: +* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic) +* [RouterOS](https://rentry.co/krawl-routeros) +* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md) +* [Fail2Ban](plugins/fail2ban/README.md) -### Common Login Attempts -Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing). +## IP Reputation +Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable. -![admin page](img/admin-page.png) +![ip reputation](img/ip-reputation.png) -### Common Misconfiguration Paths -Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with “interesting” files, each assigned a random file size to look realistic. +The analysis includes: +- **Risky HTTP methods usage** (e.g. POST, PUT, DELETE ratios) +- **Robots.txt violations** +- **Request timing anomalies** (bursty or irregular patterns) +- **User-Agent consistency** +- **Attack URL detection** (e.g. SQL injection, XSS patterns) -![directory-page](img/directory-page.png) +Each signal contributes to a weighted scoring model that assigns a reputation category: +- `attacker` +- `bad_crawler` +- `good_crawler` +- `regular_user` +- `unknown` (for insufficient data) -### Environment File Leakage -The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage. +The resulting scores and metrics are stored in the database and used by Krawl to drive dashboards, reputation tracking, and automated mitigation actions such as IP banning or firewall integration. -### Server Error Information -The `/server` page displays randomly generated fake error information for each known server. - -![server and env page](img/server-and-env-page.png) - -### API Endpoints with Sensitive Data -The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format - -![users and secrets](img/users-and-secrets.png) - -### Exposed Credential Files -The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets - -![credentials and passwords](img/credentials-and-passwords.png) - -### SQL Injection and XSS Detection -Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**. - -![sql injection](img/sql_injection.png) - -Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard. - -### Path Traversal Detection -Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern. - -### XXE (XML External Entity) Injection -The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`:/` - -The dashboard shows: -- Total and unique accesses -- Suspicious activity and attack detection -- Top IPs, paths, user-agents and GeoIP localization -- Real-time monitoring - -The attackers’ access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged. - -Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website. - -![dashboard-1](img/dashboard-1.png) - -The top IP Addresses is shown along with top paths and User Agents - -![dashboard-2](img/dashboard-2.png) - -![dashboard-3](img/dashboard-3.png) +| Topic | Description | +|-------|-------------| +| [API](docs/api.md) | External APIs used by Krawl for IP data, reputation, and geolocation | +| [Honeypot](docs/honeypot.md) | Full overview of honeypot pages: fake logins, directory listings, credential files, SQLi/XSS/XXE/command injection traps, and more | +| [Reverse Proxy](docs/reverse-proxy.md) | How to deploy Krawl behind NGINX or use decoy subdomains | +| [Database Backups](docs/backups.md) | Enable and configure the automatic database dump job | +| [Canary Token](docs/canary-token.md) | Set up external alert triggers via canarytokens.org | +| [Wordlist](docs/wordlist.md) | Customize fake usernames, passwords, and directory listings | +| [Dashboard](docs/dashboard.md) | Access and explore the real-time monitoring dashboard | ## 🤝 Contributing @@ -413,14 +296,9 @@ Contributions welcome! Please: 4. Submit a pull request (explain the changes!) -
- -## ⚠️ Disclaimer - -**This is a deception/honeypot system.** -Deploy in isolated environments and monitor carefully for security events. -Use responsibly and in compliance with applicable laws and regulations. +## Disclaimer +> [!CAUTION] +> This is a deception/honeypot system. Deploy in isolated environments and monitor carefully for security events. Use responsibly and in compliance with applicable laws and regulations. ## Star History -Star History Chart - +Star History Chart \ No newline at end of file diff --git a/ToDo.md b/ToDo.md deleted file mode 100644 index 49e8d68..0000000 --- a/ToDo.md +++ /dev/null @@ -1,5 +0,0 @@ -# Krawl - Todo List - -- Add Prometheus exporter for metrics -- Add POST cresentials information (eg: username and password used) -- Add CloudFlare error pages \ No newline at end of file diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..8d4ab18 --- /dev/null +++ b/docs/api.md @@ -0,0 +1,9 @@ +# API + +Krawl uses the following APIs +- http://ip-api.com (IP Data) +- https://iprep.lcrawl.com (IP Reputation) +- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup) +- https://api.ipify.org (Public IP discovery) +- http://ident.me (Public IP discovery) +- https://ifconfig.me (Public IP discovery) diff --git a/docs/backups.md b/docs/backups.md new file mode 100644 index 0000000..84bf5db --- /dev/null +++ b/docs/backups.md @@ -0,0 +1,10 @@ +# Enable Database Dump Job for Backups + +To enable the database dump job, set the following variables (*config file example*) + +```yaml +backups: + path: "backups" # where backup will be saved + cron: "*/30 * * * *" # frequency of the cronjob + enabled: true +``` diff --git a/docs/canary-token.md b/docs/canary-token.md new file mode 100644 index 0000000..6e6c314 --- /dev/null +++ b/docs/canary-token.md @@ -0,0 +1,10 @@ +# Customizing the Canary Token + +To create a custom canary token, visit https://canarytokens.org + +and generate a "Web bug" canary token. + +This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitor's IP address and user agent. + + +To enable this feature, set the canary token URL [using the environment variable](../README.md#configuration-via-enviromental-variables) `KRAWL_CANARY_TOKEN_URL`. diff --git a/docs/dashboard.md b/docs/dashboard.md new file mode 100644 index 0000000..ace7955 --- /dev/null +++ b/docs/dashboard.md @@ -0,0 +1,21 @@ +# Dashboard + +Access the dashboard at `http://:/` + +The dashboard shows: +- Total and unique accesses +- Suspicious activity and attack detection +- Top IPs, paths, user-agents and GeoIP localization +- Real-time monitoring + +The attackers' access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged. + +Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website. + +![dashboard-1](../img/dashboard-1.png) + +The top IP Addresses is shown along with top paths and User Agents + +![dashboard-2](../img/dashboard-2.png) + +![dashboard-3](../img/dashboard-3.png) diff --git a/docs/honeypot.md b/docs/honeypot.md new file mode 100644 index 0000000..6baffab --- /dev/null +++ b/docs/honeypot.md @@ -0,0 +1,52 @@ +# Honeypot + +Below is a complete overview of the Krawl honeypot's capabilities + +## robots.txt +The actual (juicy) robots.txt configuration [is the following](../src/templates/html/robots.txt). + +## Honeypot pages + +### Common Login Attempts +Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing). + +![admin page](../img/admin-page.png) + +### Common Misconfiguration Paths +Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with "interesting" files, each assigned a random file size to look realistic. + +![directory-page](../img/directory-page.png) + +### Environment File Leakage +The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage. + +### Server Error Information +The `/server` page displays randomly generated fake error information for each known server. + +![server and env page](../img/server-and-env-page.png) + +### API Endpoints with Sensitive Data +The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format + +![users and secrets](../img/users-and-secrets.png) + +### Exposed Credential Files +The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets + +![credentials and passwords](../img/credentials-and-passwords.png) + +### SQL Injection and XSS Detection +Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**. + +![sql injection](../img/sql_injection.png) + +Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard. + +### Path Traversal Detection +Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern. + +### XXE (XML External Entity) Injection +The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`:5000` - -### Add the repository (if applicable) - -```bash -helm repo add krawl https://github.com/BlessedRebuS/Krawl -helm repo update -``` - -### Install from OCI Registry - -```bash -helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -``` - -Or with a specific namespace: - -```bash -helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -n krawl --create-namespace -``` - -### Install the chart locally - -```bash -helm install krawl ./helm -``` - -### Install with custom values - -```bash -helm install krawl ./helm -f values.yaml -``` - -### Install in a specific namespace - -```bash -helm install krawl ./helm -n krawl --create-namespace -``` +Then access the deception server at `http://:5000` ## Configuration @@ -221,16 +149,6 @@ The following table lists the main configuration parameters of the Krawl chart a | `resources.requests.cpu` | CPU request | `100m` | | `resources.requests.memory` | Memory request | `64Mi` | -### Autoscaling - -| Parameter | Description | Default | -|-----------|-------------|---------| -| `autoscaling.enabled` | Enable horizontal pod autoscaling | `false` | -| `autoscaling.minReplicas` | Minimum replicas | `1` | -| `autoscaling.maxReplicas` | Maximum replicas | `1` | -| `autoscaling.targetCPUUtilizationPercentage` | Target CPU utilization | `70` | -| `autoscaling.targetMemoryUtilizationPercentage` | Target memory utilization | `80` | - ### Network Policy | Parameter | Description | Default | @@ -248,68 +166,24 @@ kubectl get secret krawl-server -n krawl-system \ ## Usage Examples -### Basic Installation +You can override individual values with `--set` without a values file: ```bash -helm install krawl ./helm -``` - -### Installation with Custom Domain - -```bash -helm install krawl ./helm \ - --set ingress.hosts[0].host=honeypot.example.com -``` - -### Enable Canary Tokens - -```bash -helm install krawl ./helm \ +helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \ + --set ingress.hosts[0].host=honeypot.example.com \ --set config.canary.token_url=https://canarytokens.com/your-token ``` -### Configure Custom API Endpoint - -```bash -helm install krawl ./helm \ - --set config.api.server_url=https://api.example.com \ - --set config.api.server_port=443 -``` - -### Create Values Override File - -Create `custom-values.yaml`: - -```yaml -config: - server: - port: 8080 - delay: 500 - canary: - token_url: https://your-canary-token-url - dashboard: - secret_path: /super-secret-path - crawl: - max_pages_limit: 500 - ban_duration_seconds: 3600 -``` - -Then install: - -```bash -helm install krawl ./helm -f custom-values.yaml -``` - ## Upgrading ```bash -helm upgrade krawl ./helm +helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 -f values.yaml ``` ## Uninstalling ```bash -helm uninstall krawl +helm uninstall krawl -n krawl-system ``` ## Troubleshooting @@ -348,7 +222,6 @@ kubectl logs -l app.kubernetes.io/name=krawl - `configmap.yaml` - Application configuration - `pvc.yaml` - Persistent volume claim - `ingress.yaml` - Ingress configuration - - `hpa.yaml` - Horizontal pod autoscaler - `network-policy.yaml` - Network policies ## Support diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index 3676817..730f774 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -5,9 +5,7 @@ metadata: labels: {{- include "krawl.labels" . | nindent 4 }} spec: - {{- if not .Values.autoscaling.enabled }} replicas: {{ .Values.replicaCount }} - {{- end }} strategy: type: Recreate selector: diff --git a/helm/templates/hpa.yaml b/helm/templates/hpa.yaml deleted file mode 100644 index 0f64b10..0000000 --- a/helm/templates/hpa.yaml +++ /dev/null @@ -1,32 +0,0 @@ -{{- if .Values.autoscaling.enabled }} -apiVersion: autoscaling/v2 -kind: HorizontalPodAutoscaler -metadata: - name: {{ include "krawl.fullname" . }} - labels: - {{- include "krawl.labels" . | nindent 4 }} -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: {{ include "krawl.fullname" . }} - minReplicas: {{ .Values.autoscaling.minReplicas }} - maxReplicas: {{ .Values.autoscaling.maxReplicas }} - metrics: - {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} - - type: Resource - resource: - name: cpu - target: - type: Utilization - averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} - {{- end }} - {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} - - type: Resource - resource: - name: memory - target: - type: Utilization - averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} - {{- end }} -{{- end }} diff --git a/helm/values.yaml b/helm/values.yaml index df4df23..8b4a907 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/blessedrebus/krawl pullPolicy: Always - tag: "1.0.0" + tag: "1.1.0" imagePullSecrets: [] nameOverride: "krawl" @@ -54,13 +54,6 @@ resources: # If not set, container will use its default timezone timezone: "" -autoscaling: - enabled: false - minReplicas: 1 - maxReplicas: 1 - targetCPUUtilizationPercentage: 70 - targetMemoryUtilizationPercentage: 80 - nodeSelector: {} tolerations: [] @@ -579,7 +572,7 @@ wordlists: xxe_injection: "(=2.32.5 # Web framework fastapi>=0.115.0 uvicorn[standard]>=0.30.0 -jinja2>=3.1.0 +jinja2>=3.1.5 python-multipart>=0.0.9 \ No newline at end of file diff --git a/src/config.py b/src/config.py index 8344883..cb46bf6 100644 --- a/src/config.py +++ b/src/config.py @@ -94,7 +94,7 @@ class Config: ip = response.text.strip() if ip: break - except Exception: + except requests.RequestException: continue if not ip: diff --git a/src/deception_responses.py b/src/deception_responses.py index e8ec551..6e90ed3 100644 --- a/src/deception_responses.py +++ b/src/deception_responses.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import re -import random +import secrets import logging import json from typing import Optional, Tuple, Dict @@ -9,6 +9,7 @@ from generators import random_username, random_password, random_email from wordlists import get_wordlists logger = logging.getLogger("krawl") +_sysrand = secrets.SystemRandom() def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool: @@ -86,7 +87,7 @@ def generate_fake_passwd() -> str: shells = passwd_config.get("shells", ["/bin/bash"]) fake_users = [ - f"{random_username()}:x:{random.randint(uid_min, uid_max)}:{random.randint(gid_min, gid_max)}::/home/{random_username()}:{random.choice(shells)}" + f"{random_username()}:x:{_sysrand.randint(uid_min, uid_max)}:{_sysrand.randint(gid_min, gid_max)}::/home/{random_username()}:{secrets.choice(shells)}" for _ in range(3) ] @@ -108,7 +109,7 @@ def generate_fake_shadow() -> str: hash_length = shadow_config.get("hash_length", 86) fake_entries = [ - f"{random_username()}:{hash_prefix}{''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::" + f"{random_username()}:{hash_prefix}{''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::" for _ in range(3) ] @@ -147,9 +148,9 @@ SECRET_TOKEN=fake_secret_token_xyz""", return f"""# Configuration File api_endpoint = https://api.example.com -api_key = fake_key_{random.randint(1000, 9999)} +api_key = fake_key_{_sysrand.randint(1000, 9999)} database_url = mysql://user:fake_pass@localhost/db -secret = fake_secret_{random.randint(10000, 99999)} +secret = fake_secret_{_sysrand.randint(10000, 99999)} """ @@ -167,7 +168,7 @@ def generate_fake_directory_listing(path: str) -> str: directories = [(d["name"], d["size"], d["perms"]) for d in fake_dirs] files = [ - (f["name"], str(random.randint(f["size_min"], f["size_max"])), f["perms"]) + (f["name"], str(_sysrand.randint(f["size_min"], f["size_max"])), f["perms"]) for f in fake_files ] @@ -208,7 +209,7 @@ def generate_path_traversal_response(path: str) -> Tuple[str, str, int]: if "proc/self" in path_lower: logger.debug("Returning fake proc info") - return (f"{random.randint(1000, 9999)}", "text/plain", 200) + return (f"{_sysrand.randint(1000, 9999)}", "text/plain", 200) logger.debug("Returning fake directory listing") return (generate_fake_directory_listing(path), "text/html", 200) @@ -246,7 +247,7 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]: if xxe_config and "entity_processed" in xxe_config: template = xxe_config["entity_processed"]["template"] entity_values = xxe_config["entity_processed"]["entity_values"] - entity_value = random.choice(entity_values) + entity_value = secrets.choice(entity_values) response = template.replace("{entity_value}", entity_value) else: response = """ @@ -260,7 +261,7 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]: if xxe_config and "error" in xxe_config: template = xxe_config["error"]["template"] messages = xxe_config["error"]["messages"] - message = random.choice(messages) + message = secrets.choice(messages) response = template.replace("{message}", message) else: response = """ @@ -281,22 +282,22 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int] # id command if re.search(r"\bid\b", input_lower): if cmd_config and "id" in cmd_config: - uid = random.randint( + uid = _sysrand.randint( cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000) ) - gid = random.randint( + gid = _sysrand.randint( cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000) ) - template = random.choice(cmd_config["id"]) + template = secrets.choice(cmd_config["id"]) output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid)) else: - output = f"uid={random.randint(1000, 2000)}(www-data) gid={random.randint(1000, 2000)}(www-data) groups={random.randint(1000, 2000)}(www-data)" + output = f"uid={_sysrand.randint(1000, 2000)}(www-data) gid={_sysrand.randint(1000, 2000)}(www-data) groups={_sysrand.randint(1000, 2000)}(www-data)" return (output, "text/plain", 200) # whoami command if re.search(r"\bwhoami\b", input_lower): users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"] - return (random.choice(users), "text/plain", 200) + return (secrets.choice(users), "text/plain", 200) # uname command if re.search(r"\buname\b", input_lower): @@ -305,7 +306,7 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int] if cmd_config else ["Linux server 5.4.0 x86_64"] ) - return (random.choice(outputs), "text/plain", 200) + return (secrets.choice(outputs), "text/plain", 200) # pwd command if re.search(r"\bpwd\b", input_lower): @@ -314,16 +315,16 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int] if cmd_config else ["/var/www/html"] ) - return (random.choice(paths), "text/plain", 200) + return (secrets.choice(paths), "text/plain", 200) # ls command if re.search(r"\bls\b", input_lower): if cmd_config and "ls" in cmd_config: - files = random.choice(cmd_config["ls"]) + files = secrets.choice(cmd_config["ls"]) else: files = ["index.php", "config.php", "uploads"] output = "\n".join( - random.sample(files, k=random.randint(3, min(6, len(files)))) + _sysrand.sample(files, k=_sysrand.randint(3, min(6, len(files)))) ) return (output, "text/plain", 200) @@ -351,27 +352,27 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int] if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]): if cmd_config and "network_commands" in cmd_config: outputs = cmd_config["network_commands"] - output = random.choice(outputs) + output = secrets.choice(outputs) if "{size}" in output: - size = random.randint( + size = _sysrand.randint( cmd_config.get("download_size_min", 100), cmd_config.get("download_size_max", 10000), ) output = output.replace("{size}", str(size)) else: outputs = ["bash: command not found", "Connection timeout"] - output = random.choice(outputs) + output = secrets.choice(outputs) return (output, "text/plain", 200) # generic outputs if cmd_config and "generic" in cmd_config: generic_outputs = cmd_config["generic"] - output = random.choice(generic_outputs) + output = secrets.choice(generic_outputs) if "{num}" in output: - output = output.replace("{num}", str(random.randint(1, 99))) + output = output.replace("{num}", str(_sysrand.randint(1, 99))) else: generic_outputs = ["", "Command executed successfully", "sh: syntax error"] - output = random.choice(generic_outputs) + output = secrets.choice(generic_outputs) return (output, "text/plain", 200) @@ -414,7 +415,7 @@ def get_random_sql_error( return ("Database error occurred", "text/plain") if not db_type: - db_type = random.choice(list(sql_errors.keys())) + db_type = secrets.choice(list(sql_errors.keys())) db_errors = sql_errors.get(db_type, {}) @@ -429,15 +430,15 @@ def get_random_sql_error( all_errors.extend(error_list) errors = all_errors if all_errors else ["Database error occurred"] - error_message = random.choice(errors) if errors else "Database error occurred" + error_message = secrets.choice(errors) if errors else "Database error occurred" if "{table}" in error_message: tables = ["users", "products", "orders", "customers", "accounts", "sessions"] - error_message = error_message.replace("{table}", random.choice(tables)) + error_message = error_message.replace("{table}", secrets.choice(tables)) if "{column}" in error_message: columns = ["id", "name", "email", "password", "username", "created_at"] - error_message = error_message.replace("{column}", random.choice(columns)) + error_message = error_message.replace("{column}", secrets.choice(columns)) return (error_message, "text/plain") @@ -455,7 +456,7 @@ def generate_sql_error_response( status_code = 500 - if random.random() < 0.3: + if _sysrand.random() < 0.3: status_code = 200 logger.info(f"SQL injection detected: {injection_type}") @@ -475,9 +476,9 @@ def get_sql_response_with_data(path: str, params: str) -> str: "username": random_username(), "email": random_email(), "password_hash": random_password(), - "role": random.choice(["admin", "user", "moderator"]), + "role": secrets.choice(["admin", "user", "moderator"]), } - for i in range(1, random.randint(2, 5)) + for i in range(1, _sysrand.randint(2, 5)) ], } return json.dumps(data, indent=2) @@ -570,7 +571,7 @@ def generate_server_error() -> Tuple[str, str]: if not server_errors: return ("500 Internal Server Error", "text/html") - server_type = random.choice(list(server_errors.keys())) + server_type = secrets.choice(list(server_errors.keys())) server_config = server_errors[server_type] error_codes = { @@ -583,18 +584,18 @@ def generate_server_error() -> Tuple[str, str]: 503: "Service Unavailable", } - code = random.choice(list(error_codes.keys())) + code = secrets.choice(list(error_codes.keys())) message = error_codes[code] template = server_config.get("template", "") - version = random.choice(server_config.get("versions", ["1.0"])) + version = secrets.choice(server_config.get("versions", ["1.0"])) html = template.replace("{code}", str(code)) html = html.replace("{message}", message) html = html.replace("{version}", version) if server_type == "apache": - os = random.choice(server_config.get("os", ["Ubuntu"])) + os = secrets.choice(server_config.get("os", ["Ubuntu"])) html = html.replace("{os}", os) html = html.replace("{host}", "localhost") @@ -611,10 +612,10 @@ def get_server_header(server_type: str = None) -> str: return "nginx/1.18.0" if not server_type: - server_type = random.choice(list(server_errors.keys())) + server_type = secrets.choice(list(server_errors.keys())) server_config = server_errors.get(server_type, {}) - version = random.choice(server_config.get("versions", ["1.0"])) + version = secrets.choice(server_config.get("versions", ["1.0"])) server_headers = { "nginx": f"nginx/{version}", diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py index b4feaa7..af803c6 100644 --- a/src/tasks/db_retention.py +++ b/src/tasks/db_retention.py @@ -77,5 +77,5 @@ def main(): finally: try: db.close_session() - except Exception: - pass + except Exception as e: + app_logger.error(f"Error closing DB session after retention cleanup: {e}") diff --git a/src/tracker.py b/src/tracker.py index c52cf24..c5683b7 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -1,12 +1,15 @@ #!/usr/bin/env python3 from typing import Dict, Tuple, Optional +import logging import re import urllib.parse from wordlists import get_wordlists from database import get_database, DatabaseManager +logger = logging.getLogger("krawl") + # Module-level singleton for background task access _tracker_instance: "AccessTracker | None" = None @@ -103,9 +106,8 @@ class AccessTracker: if self._db_manager is None: try: self._db_manager = get_database() - except Exception: - # Database not initialized, persistence disabled - pass + except Exception as e: + logger.error(f"Failed to initialize database manager: {e}") return self._db_manager def parse_credentials(self, post_data: str) -> Tuple[str, str]: @@ -206,9 +208,8 @@ class AccessTracker: self.db.persist_credential( ip=ip, path=path, username=username, password=password ) - except Exception: - # Don't crash if database persistence fails - pass + except Exception as e: + logger.error(f"Failed to persist credential attempt: {e}") def record_access( self, @@ -271,9 +272,8 @@ class AccessTracker: attack_types=attack_findings if attack_findings else None, raw_request=raw_request if raw_request else None, ) - except Exception: - # Don't crash if database persistence fails - pass + except Exception as e: + logger.error(f"Failed to persist access record: {e}") def detect_attack_type(self, data: str) -> list[str]: """ diff --git a/wordlists.json b/wordlists.json index 23082da..c4b247a 100644 --- a/wordlists.json +++ b/wordlists.json @@ -470,7 +470,7 @@ "xxe_injection": "(