Merge pull request #110 from BlessedRebuS/feat/realease-1.1

Feat/realease 1.1
This commit is contained in:
Lorenzo Venerandi
2026-03-01 21:59:29 +01:00
committed by GitHub
27 changed files with 315 additions and 468 deletions

View File

@@ -20,7 +20,7 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: '3.13'
cache: 'pip'
- name: Install dependencies

View File

@@ -19,7 +19,7 @@ jobs:
- uses: actions/setup-python@v5
with:
python-version: '3.11'
python-version: '3.13'
cache: 'pip'
- name: Install dependencies
@@ -48,12 +48,4 @@ jobs:
- name: Safety check for dependencies
run: safety check --json || true
- name: Trivy vulnerability scan
uses: aquasecurity/trivy-action@0.31.0
with:
scan-type: 'fs'
scan-ref: '.'
format: 'table'
severity: 'CRITICAL,HIGH'
exit-code: '1'

View File

@@ -1,15 +1,16 @@
FROM python:3.11-slim
FROM python:3.13-slim
LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app
# Install gosu for dropping privileges
RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends gosu && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
COPY src/ /app/src/
COPY wordlists.json /app/

292
README.md
View File

@@ -33,21 +33,25 @@
<img src="https://img.shields.io/badge/helm-chart-0F1689?logo=helm&logoColor=white" alt="Helm Chart">
</a>
</div>
<br>
<p align="center">
<a href="#what-is-krawl">What is Krawl?</a> •
<a href="#-installation">Installation</a> •
<a href="#honeypot-pages">Honeypot Pages</a> •
<a href="#dashboard">Dashboard</a> •
<a href="./ToDo.md">Todo</a> •
<a href="#-contributing">Contributing</a>
</p>
<br>
</div>
## Table of Contents
- [Demo](#demo)
- [What is Krawl?](#what-is-krawl)
- [Krawl Dashboard](#krawl-dashboard)
- [Installation](#-installation)
- [Docker Run](#docker-run)
- [Docker Compose](#docker-compose)
- [Kubernetes](#kubernetes)
- [Configuration](#configuration)
- [config.yaml](#configuration-via-configyaml)
- [Environment Variables](#configuration-via-enviromental-variables)
- [Ban Malicious IPs](#use-krawl-to-ban-malicious-ips)
- [IP Reputation](#ip-reputation)
- [Forward Server Header](#forward-server-header)
- [Additional Documentation](#additional-documentation)
- [Contributing](#-contributing)
## Demo
Tip: crawl the `robots.txt` paths for additional fun
### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com)
@@ -67,7 +71,7 @@ It features:
- **Fake Login Pages**: WordPress, phpMyAdmin, admin panels
- **Honeypot Paths**: Advertised in robots.txt to catch scanners
- **Fake Credentials**: Realistic-looking usernames, passwords, API keys
- **[Canary Token](#customizing-the-canary-token) Integration**: External alert triggering
- **[Canary Token](docs/canary-token.md) Integration**: External alert triggering
- **Random server headers**: Confuse attacks based on server header and version
- **Real-time Dashboard**: Monitor suspicious activity
- **Customizable Wordlists**: Easy JSON-based configuration
@@ -75,8 +79,28 @@ It features:
![dashboard](img/deception-page.png)
## Krawl Dashboard
Krawl provides a comprehensive dashboard, accessible at a **random secret path** generated at startup or at a **custom path** configured via `KRAWL_DASHBOARD_SECRET_PATH`. This keeps the dashboard hidden from attackers scanning your honeypot.
The dashboard is organized in three main tabs:
- **Overview** — High-level view of attack activity: an interactive map of IP origins, recent suspicious requests, and top IPs, User-Agents, and paths.
![geoip](img/geoip_dashboard.png)
- **Attacks** — Detailed breakdown of captured credentials, honeypot triggers, and detected attack types (SQLi, XSS, path traversal, etc.) with charts and tables.
![attack_types](img/attack_types.png)
- **IP Insight** — In-depth forensic view of a selected IP: geolocation, ISP/ASN info, reputation flags, behavioral timeline, attack type distribution, and full access history.
![ipinsight](img/ip_insight_dashboard.png)
For more details, see the [Dashboard documentation](docs/dashboard.md).
## 🚀 Installation
### Docker Run
@@ -89,7 +113,7 @@ docker run -d \
-e KRAWL_PORT=5000 \
-e KRAWL_DELAY=100 \
-e KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" \
-e KRAWL_DATABASE_RETENTION_DAYS=30 \
-v krawl-data:/app/data \
--name krawl \
ghcr.io/blessedrebus/krawl:latest
```
@@ -136,64 +160,13 @@ docker-compose down
### Kubernetes
**Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md).
## Use Krawl to Ban Malicious IPs
Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file.
This file can either be mounted from the Docker container into another system or downloaded directly via `curl`:
```bash
curl https://your-krawl-instance/<DASHBOARD-PATH>/api/download/malicious_ips.txt
```
This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on:
* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic)
* [RouterOS](https://rentry.co/krawl-routeros)
* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md)
* [Fail2Ban](plugins/fail2ban/README.md)
## IP Reputation
Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable.
![ip reputation](img/ip-reputation.png)
The analysis includes:
- **Risky HTTP methods usage** (e.g. POST, PUT, DELETE ratios)
- **Robots.txt violations**
- **Request timing anomalies** (bursty or irregular patterns)
- **User-Agent consistency**
- **Attack URL detection** (e.g. SQL injection, XSS patterns)
Each signal contributes to a weighted scoring model that assigns a reputation category:
- `attacker`
- `bad_crawler`
- `good_crawler`
- `regular_user`
- `unknown` (for insufficient data)
The resulting scores and metrics are stored in the database and used by Krawl to drive dashboards, reputation tracking, and automated mitigation actions such as IP banning or firewall integration.
## Forward server header
If Krawl is deployed behind a proxy such as NGINX the **server header** should be forwarded using the following configuration in your proxy:
```bash
location / {
proxy_pass https://your-krawl-instance;
proxy_pass_header Server;
}
```
## API
Krawl uses the following APIs
- http://ip-api.com (IP Data)
- https://iprep.lcrawl.com (IP Reputation)
- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup)
- https://api.ipify.org (Public IP discovery)
- http://ident.me (Public IP discovery)
- https://ifconfig.me (Public IP discovery)
## Configuration
Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization.
### Configuration via config.yaml
You can use the [config.yaml](config.yaml) file for advanced configurations, such as Docker Compose or Helm chart deployments.
### Configuration via Enviromental Variables
| Environment Variable | Description | Default |
@@ -256,153 +229,63 @@ docker run -d \
ghcr.io/blessedrebus/krawl:latest
```
### Configuration via config.yaml
You can use the [config.yaml](config.yaml) file for more advanced configurations, such as Docker Compose or Helm chart deployments.
## Use Krawl to Ban Malicious IPs
Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file.
# Honeypot
Below is a complete overview of the Krawl honeypots capabilities
This file can either be mounted from the Docker container into another system or downloaded directly via `curl`:
## robots.txt
The actual (juicy) robots.txt configuration [is the following](src/templates/html/robots.txt).
```bash
curl https://your-krawl-instance/<DASHBOARD-PATH>/api/download/malicious_ips.txt
```
## Honeypot pages
This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on:
* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic)
* [RouterOS](https://rentry.co/krawl-routeros)
* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md)
* [Fail2Ban](plugins/fail2ban/README.md)
### Common Login Attempts
Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing).
## IP Reputation
Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable.
![admin page](img/admin-page.png)
![ip reputation](img/ip-reputation.png)
### Common Misconfiguration Paths
Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with “interesting” files, each assigned a random file size to look realistic.
The analysis includes:
- **Risky HTTP methods usage** (e.g. POST, PUT, DELETE ratios)
- **Robots.txt violations**
- **Request timing anomalies** (bursty or irregular patterns)
- **User-Agent consistency**
- **Attack URL detection** (e.g. SQL injection, XSS patterns)
![directory-page](img/directory-page.png)
Each signal contributes to a weighted scoring model that assigns a reputation category:
- `attacker`
- `bad_crawler`
- `good_crawler`
- `regular_user`
- `unknown` (for insufficient data)
### Environment File Leakage
The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage.
The resulting scores and metrics are stored in the database and used by Krawl to drive dashboards, reputation tracking, and automated mitigation actions such as IP banning or firewall integration.
### Server Error Information
The `/server` page displays randomly generated fake error information for each known server.
![server and env page](img/server-and-env-page.png)
### API Endpoints with Sensitive Data
The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format
![users and secrets](img/users-and-secrets.png)
### Exposed Credential Files
The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets
![credentials and passwords](img/credentials-and-passwords.png)
### SQL Injection and XSS Detection
Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**.
![sql injection](img/sql_injection.png)
Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard.
### Path Traversal Detection
Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern.
### XXE (XML External Entity) Injection
The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`<!ENTITY`, `<!DOCTYPE`, `SYSTEM`) or reference entities to access local files, Krawl responds with realistic XML responses that appear to process the entities successfully. The honeypot returns fake file contents, simulated entity values (like `admin_credentials` or `database_connection`), or realistic error messages, making the attack appear successful while fully logging the payload.
### Command Injection Detection
Pages like `/api/exec`, `/api/run`, and `/api/system` simulate command execution endpoints vulnerable to **command injection**. When attackers attempt to inject shell commands using patterns like `; whoami`, `| cat /etc/passwd`, or backticks, Krawl responds with realistic command outputs. For example, `whoami` returns fake usernames like `www-data` or `nginx`, while `uname` returns fake Linux kernel versions. Network commands like `wget` or `curl` simulate downloads or return "command not found" errors, creating believable responses that delay and confuse automated exploitation tools.
## Example usage behind reverse proxy
You can configure a reverse proxy so all web requests land on the Krawl page by default, and hide your real content behind a secret hidden url. For example:
## Forward server header
If Krawl is deployed behind a proxy such as NGINX the **server header** should be forwarded using the following configuration in your proxy:
```bash
location / {
proxy_pass https://your-krawl-instance;
proxy_pass_header Server;
}
location /my-hidden-service {
proxy_pass https://my-hidden-service;
proxy_pass_header Server;
}
```
Alternatively, you can create a bunch of different "interesting" looking domains. For example:
## Additional Documentation
- admin.example.com
- portal.example.com
- sso.example.com
- login.example.com
- ...
Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance.
## Enable database dump job for backups
To enable the database dump job, set the following variables (*config file example*)
```yaml
backups:
path: "backups" # where backup will be saved
cron: "*/30 * * * *" # frequency of the cronjob
enabled: true
```
## Customizing the Canary Token
To create a custom canary token, visit https://canarytokens.org
and generate a “Web bug” canary token.
This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitors IP address and user agent.
To enable this feature, set the canary token URL [using the environment variable](#configuration-via-environment-variables) `KRAWL_CANARY_TOKEN_URL`.
## Customizing the wordlist
Edit `wordlists.json` to customize fake data for your use case
```json
{
"usernames": {
"prefixes": ["admin", "root", "user"],
"suffixes": ["_prod", "_dev", "123"]
},
"passwords": {
"prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "password"]
},
"directory_listing": {
"files": ["credentials.txt", "backup.sql"],
"directories": ["admin/", "backup/"]
}
}
```
or **values.yaml** in the case of helm chart installation
## Dashboard
Access the dashboard at `http://<server-ip>:<port>/<dashboard-path>`
The dashboard shows:
- Total and unique accesses
- Suspicious activity and attack detection
- Top IPs, paths, user-agents and GeoIP localization
- Real-time monitoring
The attackers access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged.
Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website.
![dashboard-1](img/dashboard-1.png)
The top IP Addresses is shown along with top paths and User Agents
![dashboard-2](img/dashboard-2.png)
![dashboard-3](img/dashboard-3.png)
| Topic | Description |
|-------|-------------|
| [API](docs/api.md) | External APIs used by Krawl for IP data, reputation, and geolocation |
| [Honeypot](docs/honeypot.md) | Full overview of honeypot pages: fake logins, directory listings, credential files, SQLi/XSS/XXE/command injection traps, and more |
| [Reverse Proxy](docs/reverse-proxy.md) | How to deploy Krawl behind NGINX or use decoy subdomains |
| [Database Backups](docs/backups.md) | Enable and configure the automatic database dump job |
| [Canary Token](docs/canary-token.md) | Set up external alert triggers via canarytokens.org |
| [Wordlist](docs/wordlist.md) | Customize fake usernames, passwords, and directory listings |
| [Dashboard](docs/dashboard.md) | Access and explore the real-time monitoring dashboard |
## 🤝 Contributing
@@ -413,14 +296,9 @@ Contributions welcome! Please:
4. Submit a pull request (explain the changes!)
<div align="center">
## ⚠️ Disclaimer
**This is a deception/honeypot system.**
Deploy in isolated environments and monitor carefully for security events.
Use responsibly and in compliance with applicable laws and regulations.
## Disclaimer
> [!CAUTION]
> This is a deception/honeypot system. Deploy in isolated environments and monitor carefully for security events. Use responsibly and in compliance with applicable laws and regulations.
## Star History
<img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" />
<img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" />

View File

@@ -1,5 +0,0 @@
# Krawl - Todo List
- Add Prometheus exporter for metrics
- Add POST cresentials information (eg: username and password used)
- Add CloudFlare error pages

9
docs/api.md Normal file
View File

@@ -0,0 +1,9 @@
# API
Krawl uses the following APIs
- http://ip-api.com (IP Data)
- https://iprep.lcrawl.com (IP Reputation)
- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup)
- https://api.ipify.org (Public IP discovery)
- http://ident.me (Public IP discovery)
- https://ifconfig.me (Public IP discovery)

10
docs/backups.md Normal file
View File

@@ -0,0 +1,10 @@
# Enable Database Dump Job for Backups
To enable the database dump job, set the following variables (*config file example*)
```yaml
backups:
path: "backups" # where backup will be saved
cron: "*/30 * * * *" # frequency of the cronjob
enabled: true
```

10
docs/canary-token.md Normal file
View File

@@ -0,0 +1,10 @@
# Customizing the Canary Token
To create a custom canary token, visit https://canarytokens.org
and generate a "Web bug" canary token.
This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitor's IP address and user agent.
To enable this feature, set the canary token URL [using the environment variable](../README.md#configuration-via-enviromental-variables) `KRAWL_CANARY_TOKEN_URL`.

21
docs/dashboard.md Normal file
View File

@@ -0,0 +1,21 @@
# Dashboard
Access the dashboard at `http://<server-ip>:<port>/<dashboard-path>`
The dashboard shows:
- Total and unique accesses
- Suspicious activity and attack detection
- Top IPs, paths, user-agents and GeoIP localization
- Real-time monitoring
The attackers' access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged.
Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website.
![dashboard-1](../img/dashboard-1.png)
The top IP Addresses is shown along with top paths and User Agents
![dashboard-2](../img/dashboard-2.png)
![dashboard-3](../img/dashboard-3.png)

52
docs/honeypot.md Normal file
View File

@@ -0,0 +1,52 @@
# Honeypot
Below is a complete overview of the Krawl honeypot's capabilities
## robots.txt
The actual (juicy) robots.txt configuration [is the following](../src/templates/html/robots.txt).
## Honeypot pages
### Common Login Attempts
Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing).
![admin page](../img/admin-page.png)
### Common Misconfiguration Paths
Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with "interesting" files, each assigned a random file size to look realistic.
![directory-page](../img/directory-page.png)
### Environment File Leakage
The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage.
### Server Error Information
The `/server` page displays randomly generated fake error information for each known server.
![server and env page](../img/server-and-env-page.png)
### API Endpoints with Sensitive Data
The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format
![users and secrets](../img/users-and-secrets.png)
### Exposed Credential Files
The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets
![credentials and passwords](../img/credentials-and-passwords.png)
### SQL Injection and XSS Detection
Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**.
![sql injection](../img/sql_injection.png)
Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard.
### Path Traversal Detection
Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern.
### XXE (XML External Entity) Injection
The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`<!ENTITY`, `<!DOCTYPE`, `SYSTEM`) or reference entities to access local files, Krawl responds with realistic XML responses that appear to process the entities successfully. The honeypot returns fake file contents, simulated entity values (like `admin_credentials` or `database_connection`), or realistic error messages, making the attack appear successful while fully logging the payload.
### Command Injection Detection
Pages like `/api/exec`, `/api/run`, and `/api/system` simulate command execution endpoints vulnerable to **command injection**. When attackers attempt to inject shell commands using patterns like `; whoami`, `| cat /etc/passwd`, or backticks, Krawl responds with realistic command outputs. For example, `whoami` returns fake usernames like `www-data` or `nginx`, while `uname` returns fake Linux kernel versions. Network commands like `wget` or `curl` simulate downloads or return "command not found" errors, creating believable responses that delay and confuse automated exploitation tools.

25
docs/reverse-proxy.md Normal file
View File

@@ -0,0 +1,25 @@
# Example Usage Behind Reverse Proxy
You can configure a reverse proxy so all web requests land on the Krawl page by default, and hide your real content behind a secret hidden url. For example:
```bash
location / {
proxy_pass https://your-krawl-instance;
proxy_pass_header Server;
}
location /my-hidden-service {
proxy_pass https://my-hidden-service;
proxy_pass_header Server;
}
```
Alternatively, you can create a bunch of different "interesting" looking domains. For example:
- admin.example.com
- portal.example.com
- sso.example.com
- login.example.com
- ...
Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance.

22
docs/wordlist.md Normal file
View File

@@ -0,0 +1,22 @@
# Customizing the Wordlist
Edit `wordlists.json` to customize fake data for your use case
```json
{
"usernames": {
"prefixes": ["admin", "root", "user"],
"suffixes": ["_prod", "_dev", "123"]
},
"passwords": {
"prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "password"]
},
"directory_listing": {
"files": ["credentials.txt", "backup.sql"],
"directories": ["admin/", "backup/"]
}
}
```
or **values.yaml** in the case of helm chart installation

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart
description: A Helm chart for Krawl honeypot server
type: application
version: 1.0.10
appVersion: 1.0.10
version: 1.1.0
appVersion: 1.1.0
keywords:
- honeypot
- security

View File

@@ -10,103 +10,31 @@ A Helm chart for deploying the Krawl honeypot application on Kubernetes.
## Installation
### Helm Chart
Install with default values:
### From OCI Registry
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
--version 1.0.0 \
--namespace krawl-system \
--create-namespace
```
Or create a minimal `values.yaml` file:
```yaml
service:
type: LoadBalancer
port: 5000
timezone: "Europe/Rome"
ingress:
enabled: true
className: "traefik"
hosts:
- host: krawl.example.com
paths:
- path: /
pathType: Prefix
config:
server:
port: 5000
delay: 100
dashboard:
secret_path: null # Auto-generated if not set
database:
persistence:
enabled: true
size: 1Gi
```
Install with custom values:
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
--version 0.2.2 \
--version 1.1.0 \
--namespace krawl-system \
--create-namespace \
-f values.yaml
-f values.yaml # optional
```
To access the deception server:
### From local chart
```bash
helm install krawl ./helm -n krawl-system --create-namespace -f values.yaml
```
A minimal [values.yaml](values-minimal.yaml) example is provided in this directory.
Once installed, get your service IP:
```bash
kubectl get svc krawl -n krawl-system
```
Once the EXTERNAL-IP is assigned, access your deception server at `http://<EXTERNAL-IP>:5000`
### Add the repository (if applicable)
```bash
helm repo add krawl https://github.com/BlessedRebuS/Krawl
helm repo update
```
### Install from OCI Registry
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1
```
Or with a specific namespace:
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -n krawl --create-namespace
```
### Install the chart locally
```bash
helm install krawl ./helm
```
### Install with custom values
```bash
helm install krawl ./helm -f values.yaml
```
### Install in a specific namespace
```bash
helm install krawl ./helm -n krawl --create-namespace
```
Then access the deception server at `http://<EXTERNAL-IP>:5000`
## Configuration
@@ -221,16 +149,6 @@ The following table lists the main configuration parameters of the Krawl chart a
| `resources.requests.cpu` | CPU request | `100m` |
| `resources.requests.memory` | Memory request | `64Mi` |
### Autoscaling
| Parameter | Description | Default |
|-----------|-------------|---------|
| `autoscaling.enabled` | Enable horizontal pod autoscaling | `false` |
| `autoscaling.minReplicas` | Minimum replicas | `1` |
| `autoscaling.maxReplicas` | Maximum replicas | `1` |
| `autoscaling.targetCPUUtilizationPercentage` | Target CPU utilization | `70` |
| `autoscaling.targetMemoryUtilizationPercentage` | Target memory utilization | `80` |
### Network Policy
| Parameter | Description | Default |
@@ -248,68 +166,24 @@ kubectl get secret krawl-server -n krawl-system \
## Usage Examples
### Basic Installation
You can override individual values with `--set` without a values file:
```bash
helm install krawl ./helm
```
### Installation with Custom Domain
```bash
helm install krawl ./helm \
--set ingress.hosts[0].host=honeypot.example.com
```
### Enable Canary Tokens
```bash
helm install krawl ./helm \
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \
--set ingress.hosts[0].host=honeypot.example.com \
--set config.canary.token_url=https://canarytokens.com/your-token
```
### Configure Custom API Endpoint
```bash
helm install krawl ./helm \
--set config.api.server_url=https://api.example.com \
--set config.api.server_port=443
```
### Create Values Override File
Create `custom-values.yaml`:
```yaml
config:
server:
port: 8080
delay: 500
canary:
token_url: https://your-canary-token-url
dashboard:
secret_path: /super-secret-path
crawl:
max_pages_limit: 500
ban_duration_seconds: 3600
```
Then install:
```bash
helm install krawl ./helm -f custom-values.yaml
```
## Upgrading
```bash
helm upgrade krawl ./helm
helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 -f values.yaml
```
## Uninstalling
```bash
helm uninstall krawl
helm uninstall krawl -n krawl-system
```
## Troubleshooting
@@ -348,7 +222,6 @@ kubectl logs -l app.kubernetes.io/name=krawl
- `configmap.yaml` - Application configuration
- `pvc.yaml` - Persistent volume claim
- `ingress.yaml` - Ingress configuration
- `hpa.yaml` - Horizontal pod autoscaler
- `network-policy.yaml` - Network policies
## Support

View File

@@ -5,9 +5,7 @@ metadata:
labels:
{{- include "krawl.labels" . | nindent 4 }}
spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }}
{{- end }}
strategy:
type: Recreate
selector:

View File

@@ -1,32 +0,0 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "krawl.fullname" . }}
labels:
{{- include "krawl.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "krawl.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@@ -3,7 +3,7 @@ replicaCount: 1
image:
repository: ghcr.io/blessedrebus/krawl
pullPolicy: Always
tag: "1.0.0"
tag: "1.1.0"
imagePullSecrets: []
nameOverride: "krawl"
@@ -54,13 +54,6 @@ resources:
# If not set, container will use its default timezone
timezone: ""
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 1
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 80
nodeSelector: {}
tolerations: []
@@ -579,7 +572,7 @@ wordlists:
xxe_injection: "(<!ENTITY|<!DOCTYPE|SYSTEM\\s+[\"']|PUBLIC\\s+[\"']|&\\w+;|file://|php://filter|expect://)"
ldap_injection: "(\\*\\)|\\(\\||\\(&)"
command_injection: "(cmd=|exec=|command=|execute=|system=|ping=|host=|&&|\\|\\||;|\\$\\{|\\$\\(|`|\\bid\\b|\\bwhoami\\b|\\buname\\b|\\bcat\\b|\\bls\\b|\\bpwd\\b|\\becho\\b|\\bwget\\b|\\bcurl\\b|\\bnc\\b|\\bnetcat\\b|\\bbash\\b|\\bsh\\b|\\bps\\b|\\bkill\\b|\\bchmod\\b|\\bchown\\b|\\bcp\\b|\\bmv\\b|\\brm\\b|/bin/bash|/bin/sh|cmd\\.exe|/bin/|/usr/bin/|/sbin/)"
common_probes: "(/admin|/backup|/config|/database|/private|/uploads|/wp-admin|/login|/phpMyAdmin|/phpmyadmin|/users|/search|/contact|/info|/input|/feedback|/server|/api/v1/|/api/v2/|/api/search|/api/sql|/api/database|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)"
common_probes: "(/admin|/wp-admin|/phpMyAdmin|/phpmyadmin|/feedback|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)"
suspicious_patterns:
- sqlmap
- nessus

BIN
img/attack_types.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 KiB

After

Width:  |  Height:  |  Size: 353 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

View File

@@ -10,6 +10,5 @@ resources:
- service.yaml
- network-policy.yaml
- ingress.yaml
- hpa.yaml
namespace: krawl-system

View File

@@ -15,5 +15,5 @@ requests>=2.32.5
# Web framework
fastapi>=0.115.0
uvicorn[standard]>=0.30.0
jinja2>=3.1.0
jinja2>=3.1.5
python-multipart>=0.0.9

View File

@@ -94,7 +94,7 @@ class Config:
ip = response.text.strip()
if ip:
break
except Exception:
except requests.RequestException:
continue
if not ip:

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
import re
import random
import secrets
import logging
import json
from typing import Optional, Tuple, Dict
@@ -9,6 +9,7 @@ from generators import random_username, random_password, random_email
from wordlists import get_wordlists
logger = logging.getLogger("krawl")
_sysrand = secrets.SystemRandom()
def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool:
@@ -86,7 +87,7 @@ def generate_fake_passwd() -> str:
shells = passwd_config.get("shells", ["/bin/bash"])
fake_users = [
f"{random_username()}:x:{random.randint(uid_min, uid_max)}:{random.randint(gid_min, gid_max)}::/home/{random_username()}:{random.choice(shells)}"
f"{random_username()}:x:{_sysrand.randint(uid_min, uid_max)}:{_sysrand.randint(gid_min, gid_max)}::/home/{random_username()}:{secrets.choice(shells)}"
for _ in range(3)
]
@@ -108,7 +109,7 @@ def generate_fake_shadow() -> str:
hash_length = shadow_config.get("hash_length", 86)
fake_entries = [
f"{random_username()}:{hash_prefix}{''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::"
f"{random_username()}:{hash_prefix}{''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::"
for _ in range(3)
]
@@ -147,9 +148,9 @@ SECRET_TOKEN=fake_secret_token_xyz""",
return f"""# Configuration File
api_endpoint = https://api.example.com
api_key = fake_key_{random.randint(1000, 9999)}
api_key = fake_key_{_sysrand.randint(1000, 9999)}
database_url = mysql://user:fake_pass@localhost/db
secret = fake_secret_{random.randint(10000, 99999)}
secret = fake_secret_{_sysrand.randint(10000, 99999)}
"""
@@ -167,7 +168,7 @@ def generate_fake_directory_listing(path: str) -> str:
directories = [(d["name"], d["size"], d["perms"]) for d in fake_dirs]
files = [
(f["name"], str(random.randint(f["size_min"], f["size_max"])), f["perms"])
(f["name"], str(_sysrand.randint(f["size_min"], f["size_max"])), f["perms"])
for f in fake_files
]
@@ -208,7 +209,7 @@ def generate_path_traversal_response(path: str) -> Tuple[str, str, int]:
if "proc/self" in path_lower:
logger.debug("Returning fake proc info")
return (f"{random.randint(1000, 9999)}", "text/plain", 200)
return (f"{_sysrand.randint(1000, 9999)}", "text/plain", 200)
logger.debug("Returning fake directory listing")
return (generate_fake_directory_listing(path), "text/html", 200)
@@ -246,7 +247,7 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]:
if xxe_config and "entity_processed" in xxe_config:
template = xxe_config["entity_processed"]["template"]
entity_values = xxe_config["entity_processed"]["entity_values"]
entity_value = random.choice(entity_values)
entity_value = secrets.choice(entity_values)
response = template.replace("{entity_value}", entity_value)
else:
response = """<?xml version="1.0"?>
@@ -260,7 +261,7 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]:
if xxe_config and "error" in xxe_config:
template = xxe_config["error"]["template"]
messages = xxe_config["error"]["messages"]
message = random.choice(messages)
message = secrets.choice(messages)
response = template.replace("{message}", message)
else:
response = """<?xml version="1.0"?>
@@ -281,22 +282,22 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
# id command
if re.search(r"\bid\b", input_lower):
if cmd_config and "id" in cmd_config:
uid = random.randint(
uid = _sysrand.randint(
cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000)
)
gid = random.randint(
gid = _sysrand.randint(
cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000)
)
template = random.choice(cmd_config["id"])
template = secrets.choice(cmd_config["id"])
output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid))
else:
output = f"uid={random.randint(1000, 2000)}(www-data) gid={random.randint(1000, 2000)}(www-data) groups={random.randint(1000, 2000)}(www-data)"
output = f"uid={_sysrand.randint(1000, 2000)}(www-data) gid={_sysrand.randint(1000, 2000)}(www-data) groups={_sysrand.randint(1000, 2000)}(www-data)"
return (output, "text/plain", 200)
# whoami command
if re.search(r"\bwhoami\b", input_lower):
users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"]
return (random.choice(users), "text/plain", 200)
return (secrets.choice(users), "text/plain", 200)
# uname command
if re.search(r"\buname\b", input_lower):
@@ -305,7 +306,7 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
if cmd_config
else ["Linux server 5.4.0 x86_64"]
)
return (random.choice(outputs), "text/plain", 200)
return (secrets.choice(outputs), "text/plain", 200)
# pwd command
if re.search(r"\bpwd\b", input_lower):
@@ -314,16 +315,16 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
if cmd_config
else ["/var/www/html"]
)
return (random.choice(paths), "text/plain", 200)
return (secrets.choice(paths), "text/plain", 200)
# ls command
if re.search(r"\bls\b", input_lower):
if cmd_config and "ls" in cmd_config:
files = random.choice(cmd_config["ls"])
files = secrets.choice(cmd_config["ls"])
else:
files = ["index.php", "config.php", "uploads"]
output = "\n".join(
random.sample(files, k=random.randint(3, min(6, len(files))))
_sysrand.sample(files, k=_sysrand.randint(3, min(6, len(files))))
)
return (output, "text/plain", 200)
@@ -351,27 +352,27 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]):
if cmd_config and "network_commands" in cmd_config:
outputs = cmd_config["network_commands"]
output = random.choice(outputs)
output = secrets.choice(outputs)
if "{size}" in output:
size = random.randint(
size = _sysrand.randint(
cmd_config.get("download_size_min", 100),
cmd_config.get("download_size_max", 10000),
)
output = output.replace("{size}", str(size))
else:
outputs = ["bash: command not found", "Connection timeout"]
output = random.choice(outputs)
output = secrets.choice(outputs)
return (output, "text/plain", 200)
# generic outputs
if cmd_config and "generic" in cmd_config:
generic_outputs = cmd_config["generic"]
output = random.choice(generic_outputs)
output = secrets.choice(generic_outputs)
if "{num}" in output:
output = output.replace("{num}", str(random.randint(1, 99)))
output = output.replace("{num}", str(_sysrand.randint(1, 99)))
else:
generic_outputs = ["", "Command executed successfully", "sh: syntax error"]
output = random.choice(generic_outputs)
output = secrets.choice(generic_outputs)
return (output, "text/plain", 200)
@@ -414,7 +415,7 @@ def get_random_sql_error(
return ("Database error occurred", "text/plain")
if not db_type:
db_type = random.choice(list(sql_errors.keys()))
db_type = secrets.choice(list(sql_errors.keys()))
db_errors = sql_errors.get(db_type, {})
@@ -429,15 +430,15 @@ def get_random_sql_error(
all_errors.extend(error_list)
errors = all_errors if all_errors else ["Database error occurred"]
error_message = random.choice(errors) if errors else "Database error occurred"
error_message = secrets.choice(errors) if errors else "Database error occurred"
if "{table}" in error_message:
tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
error_message = error_message.replace("{table}", random.choice(tables))
error_message = error_message.replace("{table}", secrets.choice(tables))
if "{column}" in error_message:
columns = ["id", "name", "email", "password", "username", "created_at"]
error_message = error_message.replace("{column}", random.choice(columns))
error_message = error_message.replace("{column}", secrets.choice(columns))
return (error_message, "text/plain")
@@ -455,7 +456,7 @@ def generate_sql_error_response(
status_code = 500
if random.random() < 0.3:
if _sysrand.random() < 0.3:
status_code = 200
logger.info(f"SQL injection detected: {injection_type}")
@@ -475,9 +476,9 @@ def get_sql_response_with_data(path: str, params: str) -> str:
"username": random_username(),
"email": random_email(),
"password_hash": random_password(),
"role": random.choice(["admin", "user", "moderator"]),
"role": secrets.choice(["admin", "user", "moderator"]),
}
for i in range(1, random.randint(2, 5))
for i in range(1, _sysrand.randint(2, 5))
],
}
return json.dumps(data, indent=2)
@@ -570,7 +571,7 @@ def generate_server_error() -> Tuple[str, str]:
if not server_errors:
return ("500 Internal Server Error", "text/html")
server_type = random.choice(list(server_errors.keys()))
server_type = secrets.choice(list(server_errors.keys()))
server_config = server_errors[server_type]
error_codes = {
@@ -583,18 +584,18 @@ def generate_server_error() -> Tuple[str, str]:
503: "Service Unavailable",
}
code = random.choice(list(error_codes.keys()))
code = secrets.choice(list(error_codes.keys()))
message = error_codes[code]
template = server_config.get("template", "")
version = random.choice(server_config.get("versions", ["1.0"]))
version = secrets.choice(server_config.get("versions", ["1.0"]))
html = template.replace("{code}", str(code))
html = html.replace("{message}", message)
html = html.replace("{version}", version)
if server_type == "apache":
os = random.choice(server_config.get("os", ["Ubuntu"]))
os = secrets.choice(server_config.get("os", ["Ubuntu"]))
html = html.replace("{os}", os)
html = html.replace("{host}", "localhost")
@@ -611,10 +612,10 @@ def get_server_header(server_type: str = None) -> str:
return "nginx/1.18.0"
if not server_type:
server_type = random.choice(list(server_errors.keys()))
server_type = secrets.choice(list(server_errors.keys()))
server_config = server_errors.get(server_type, {})
version = random.choice(server_config.get("versions", ["1.0"]))
version = secrets.choice(server_config.get("versions", ["1.0"]))
server_headers = {
"nginx": f"nginx/{version}",

View File

@@ -77,5 +77,5 @@ def main():
finally:
try:
db.close_session()
except Exception:
pass
except Exception as e:
app_logger.error(f"Error closing DB session after retention cleanup: {e}")

View File

@@ -1,12 +1,15 @@
#!/usr/bin/env python3
from typing import Dict, Tuple, Optional
import logging
import re
import urllib.parse
from wordlists import get_wordlists
from database import get_database, DatabaseManager
logger = logging.getLogger("krawl")
# Module-level singleton for background task access
_tracker_instance: "AccessTracker | None" = None
@@ -103,9 +106,8 @@ class AccessTracker:
if self._db_manager is None:
try:
self._db_manager = get_database()
except Exception:
# Database not initialized, persistence disabled
pass
except Exception as e:
logger.error(f"Failed to initialize database manager: {e}")
return self._db_manager
def parse_credentials(self, post_data: str) -> Tuple[str, str]:
@@ -206,9 +208,8 @@ class AccessTracker:
self.db.persist_credential(
ip=ip, path=path, username=username, password=password
)
except Exception:
# Don't crash if database persistence fails
pass
except Exception as e:
logger.error(f"Failed to persist credential attempt: {e}")
def record_access(
self,
@@ -271,9 +272,8 @@ class AccessTracker:
attack_types=attack_findings if attack_findings else None,
raw_request=raw_request if raw_request else None,
)
except Exception:
# Don't crash if database persistence fails
pass
except Exception as e:
logger.error(f"Failed to persist access record: {e}")
def detect_attack_type(self, data: str) -> list[str]:
"""

View File

@@ -470,7 +470,7 @@
"xxe_injection": "(<!ENTITY|<!DOCTYPE|SYSTEM\\s+[\"']|PUBLIC\\s+[\"']|&\\w+;|file://|php://filter|expect://)",
"ldap_injection": "(\\*\\)|\\(\\||\\(&)",
"command_injection": "(cmd=|exec=|command=|execute=|system=|ping=|host=|&&|\\|\\||;|\\$\\{|\\$\\(|`|\\bid\\b|\\bwhoami\\b|\\buname\\b|\\bcat\\b|\\bls\\b|\\bpwd\\b|\\becho\\b|\\bwget\\b|\\bcurl\\b|\\bnc\\b|\\bnetcat\\b|\\bbash\\b|\\bsh\\b|\\bps\\b|\\bkill\\b|\\bchmod\\b|\\bchown\\b|\\bcp\\b|\\bmv\\b|\\brm\\b|/bin/bash|/bin/sh|cmd\\.exe|/bin/|/usr/bin/|/sbin/)",
"common_probes": "(/admin|/backup|/config|/database|/private|/uploads|/wp-admin|/login|/phpMyAdmin|/phpmyadmin|/users|/search|/contact|/info|/input|/feedback|/server|/api/v1/|/api/v2/|/api/search|/api/sql|/api/database|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)"
"common_probes": "(/admin|/wp-admin|/phpMyAdmin|/phpmyadmin|/feedback|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)"
},
"server_headers": [
"Apache/2.4.41 (Ubuntu)",