Merge pull request #110 from BlessedRebuS/feat/realease-1.1

Feat/realease 1.1
This commit is contained in:
Lorenzo Venerandi
2026-03-01 21:59:29 +01:00
committed by GitHub
27 changed files with 315 additions and 468 deletions

View File

@@ -20,7 +20,7 @@ jobs:
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: '3.11' python-version: '3.13'
cache: 'pip' cache: 'pip'
- name: Install dependencies - name: Install dependencies

View File

@@ -19,7 +19,7 @@ jobs:
- uses: actions/setup-python@v5 - uses: actions/setup-python@v5
with: with:
python-version: '3.11' python-version: '3.13'
cache: 'pip' cache: 'pip'
- name: Install dependencies - name: Install dependencies
@@ -48,12 +48,4 @@ jobs:
- name: Safety check for dependencies - name: Safety check for dependencies
run: safety check --json || true run: safety check --json || true
- name: Trivy vulnerability scan
uses: aquasecurity/trivy-action@0.31.0
with:
scan-type: 'fs'
scan-ref: '.'
format: 'table'
severity: 'CRITICAL,HIGH'
exit-code: '1'

View File

@@ -1,15 +1,16 @@
FROM python:3.11-slim FROM python:3.13-slim
LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app WORKDIR /app
# Install gosu for dropping privileges # Install gosu for dropping privileges
RUN apt-get update && apt-get install -y --no-install-recommends gosu && \ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends gosu && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/ COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
COPY src/ /app/src/ COPY src/ /app/src/
COPY wordlists.json /app/ COPY wordlists.json /app/

292
README.md
View File

@@ -33,21 +33,25 @@
<img src="https://img.shields.io/badge/helm-chart-0F1689?logo=helm&logoColor=white" alt="Helm Chart"> <img src="https://img.shields.io/badge/helm-chart-0F1689?logo=helm&logoColor=white" alt="Helm Chart">
</a> </a>
</div> </div>
<br>
<p align="center">
<a href="#what-is-krawl">What is Krawl?</a> •
<a href="#-installation">Installation</a> •
<a href="#honeypot-pages">Honeypot Pages</a> •
<a href="#dashboard">Dashboard</a> •
<a href="./ToDo.md">Todo</a> •
<a href="#-contributing">Contributing</a>
</p>
<br>
</div> </div>
## Table of Contents
- [Demo](#demo)
- [What is Krawl?](#what-is-krawl)
- [Krawl Dashboard](#krawl-dashboard)
- [Installation](#-installation)
- [Docker Run](#docker-run)
- [Docker Compose](#docker-compose)
- [Kubernetes](#kubernetes)
- [Configuration](#configuration)
- [config.yaml](#configuration-via-configyaml)
- [Environment Variables](#configuration-via-enviromental-variables)
- [Ban Malicious IPs](#use-krawl-to-ban-malicious-ips)
- [IP Reputation](#ip-reputation)
- [Forward Server Header](#forward-server-header)
- [Additional Documentation](#additional-documentation)
- [Contributing](#-contributing)
## Demo ## Demo
Tip: crawl the `robots.txt` paths for additional fun Tip: crawl the `robots.txt` paths for additional fun
### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com) ### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com)
@@ -67,7 +71,7 @@ It features:
- **Fake Login Pages**: WordPress, phpMyAdmin, admin panels - **Fake Login Pages**: WordPress, phpMyAdmin, admin panels
- **Honeypot Paths**: Advertised in robots.txt to catch scanners - **Honeypot Paths**: Advertised in robots.txt to catch scanners
- **Fake Credentials**: Realistic-looking usernames, passwords, API keys - **Fake Credentials**: Realistic-looking usernames, passwords, API keys
- **[Canary Token](#customizing-the-canary-token) Integration**: External alert triggering - **[Canary Token](docs/canary-token.md) Integration**: External alert triggering
- **Random server headers**: Confuse attacks based on server header and version - **Random server headers**: Confuse attacks based on server header and version
- **Real-time Dashboard**: Monitor suspicious activity - **Real-time Dashboard**: Monitor suspicious activity
- **Customizable Wordlists**: Easy JSON-based configuration - **Customizable Wordlists**: Easy JSON-based configuration
@@ -75,8 +79,28 @@ It features:
![dashboard](img/deception-page.png) ![dashboard](img/deception-page.png)
## Krawl Dashboard
Krawl provides a comprehensive dashboard, accessible at a **random secret path** generated at startup or at a **custom path** configured via `KRAWL_DASHBOARD_SECRET_PATH`. This keeps the dashboard hidden from attackers scanning your honeypot.
The dashboard is organized in three main tabs:
- **Overview** — High-level view of attack activity: an interactive map of IP origins, recent suspicious requests, and top IPs, User-Agents, and paths.
![geoip](img/geoip_dashboard.png) ![geoip](img/geoip_dashboard.png)
- **Attacks** — Detailed breakdown of captured credentials, honeypot triggers, and detected attack types (SQLi, XSS, path traversal, etc.) with charts and tables.
![attack_types](img/attack_types.png)
- **IP Insight** — In-depth forensic view of a selected IP: geolocation, ISP/ASN info, reputation flags, behavioral timeline, attack type distribution, and full access history.
![ipinsight](img/ip_insight_dashboard.png)
For more details, see the [Dashboard documentation](docs/dashboard.md).
## 🚀 Installation ## 🚀 Installation
### Docker Run ### Docker Run
@@ -89,7 +113,7 @@ docker run -d \
-e KRAWL_PORT=5000 \ -e KRAWL_PORT=5000 \
-e KRAWL_DELAY=100 \ -e KRAWL_DELAY=100 \
-e KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" \ -e KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" \
-e KRAWL_DATABASE_RETENTION_DAYS=30 \ -v krawl-data:/app/data \
--name krawl \ --name krawl \
ghcr.io/blessedrebus/krawl:latest ghcr.io/blessedrebus/krawl:latest
``` ```
@@ -136,64 +160,13 @@ docker-compose down
### Kubernetes ### Kubernetes
**Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md). **Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md).
## Use Krawl to Ban Malicious IPs
Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file.
This file can either be mounted from the Docker container into another system or downloaded directly via `curl`:
```bash
curl https://your-krawl-instance/<DASHBOARD-PATH>/api/download/malicious_ips.txt
```
This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on:
* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic)
* [RouterOS](https://rentry.co/krawl-routeros)
* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md)
* [Fail2Ban](plugins/fail2ban/README.md)
## IP Reputation
Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable.
![ip reputation](img/ip-reputation.png)
The analysis includes:
- **Risky HTTP methods usage** (e.g. POST, PUT, DELETE ratios)
- **Robots.txt violations**
- **Request timing anomalies** (bursty or irregular patterns)
- **User-Agent consistency**
- **Attack URL detection** (e.g. SQL injection, XSS patterns)
Each signal contributes to a weighted scoring model that assigns a reputation category:
- `attacker`
- `bad_crawler`
- `good_crawler`
- `regular_user`
- `unknown` (for insufficient data)
The resulting scores and metrics are stored in the database and used by Krawl to drive dashboards, reputation tracking, and automated mitigation actions such as IP banning or firewall integration.
## Forward server header
If Krawl is deployed behind a proxy such as NGINX the **server header** should be forwarded using the following configuration in your proxy:
```bash
location / {
proxy_pass https://your-krawl-instance;
proxy_pass_header Server;
}
```
## API
Krawl uses the following APIs
- http://ip-api.com (IP Data)
- https://iprep.lcrawl.com (IP Reputation)
- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup)
- https://api.ipify.org (Public IP discovery)
- http://ident.me (Public IP discovery)
- https://ifconfig.me (Public IP discovery)
## Configuration ## Configuration
Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization. Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization.
### Configuration via config.yaml
You can use the [config.yaml](config.yaml) file for advanced configurations, such as Docker Compose or Helm chart deployments.
### Configuration via Enviromental Variables ### Configuration via Enviromental Variables
| Environment Variable | Description | Default | | Environment Variable | Description | Default |
@@ -256,153 +229,63 @@ docker run -d \
ghcr.io/blessedrebus/krawl:latest ghcr.io/blessedrebus/krawl:latest
``` ```
### Configuration via config.yaml ## Use Krawl to Ban Malicious IPs
You can use the [config.yaml](config.yaml) file for more advanced configurations, such as Docker Compose or Helm chart deployments. Krawl uses a reputation-based system to classify attacker IP addresses. Every five minutes, Krawl exports the identified malicious IPs to a `malicious_ips.txt` file.
# Honeypot This file can either be mounted from the Docker container into another system or downloaded directly via `curl`:
Below is a complete overview of the Krawl honeypots capabilities
## robots.txt ```bash
The actual (juicy) robots.txt configuration [is the following](src/templates/html/robots.txt). curl https://your-krawl-instance/<DASHBOARD-PATH>/api/download/malicious_ips.txt
```
## Honeypot pages This file enables automatic blocking of malicious traffic across various platforms. You can use it to update firewall rules on:
* [OPNsense and pfSense](https://www.allthingstech.ch/using-opnsense-and-ip-blocklists-to-block-malicious-traffic)
* [RouterOS](https://rentry.co/krawl-routeros)
* [IPtables](plugins/iptables/README.md) and [Nftables](plugins/nftables/README.md)
* [Fail2Ban](plugins/fail2ban/README.md)
### Common Login Attempts ## IP Reputation
Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing). Krawl [uses tasks that analyze recent traffic to build and continuously update an IP reputation](src/tasks/analyze_ips.py) score. It runs periodically and evaluates each active IP address based on multiple behavioral indicators to classify it as an attacker, crawler, or regular user. Thresholds are fully customizable.
![admin page](img/admin-page.png) ![ip reputation](img/ip-reputation.png)
### Common Misconfiguration Paths The analysis includes:
Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with “interesting” files, each assigned a random file size to look realistic. - **Risky HTTP methods usage** (e.g. POST, PUT, DELETE ratios)
- **Robots.txt violations**
- **Request timing anomalies** (bursty or irregular patterns)
- **User-Agent consistency**
- **Attack URL detection** (e.g. SQL injection, XSS patterns)
![directory-page](img/directory-page.png) Each signal contributes to a weighted scoring model that assigns a reputation category:
- `attacker`
- `bad_crawler`
- `good_crawler`
- `regular_user`
- `unknown` (for insufficient data)
### Environment File Leakage The resulting scores and metrics are stored in the database and used by Krawl to drive dashboards, reputation tracking, and automated mitigation actions such as IP banning or firewall integration.
The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage.
### Server Error Information ## Forward server header
The `/server` page displays randomly generated fake error information for each known server. If Krawl is deployed behind a proxy such as NGINX the **server header** should be forwarded using the following configuration in your proxy:
![server and env page](img/server-and-env-page.png)
### API Endpoints with Sensitive Data
The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format
![users and secrets](img/users-and-secrets.png)
### Exposed Credential Files
The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets
![credentials and passwords](img/credentials-and-passwords.png)
### SQL Injection and XSS Detection
Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**.
![sql injection](img/sql_injection.png)
Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard.
### Path Traversal Detection
Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern.
### XXE (XML External Entity) Injection
The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`<!ENTITY`, `<!DOCTYPE`, `SYSTEM`) or reference entities to access local files, Krawl responds with realistic XML responses that appear to process the entities successfully. The honeypot returns fake file contents, simulated entity values (like `admin_credentials` or `database_connection`), or realistic error messages, making the attack appear successful while fully logging the payload.
### Command Injection Detection
Pages like `/api/exec`, `/api/run`, and `/api/system` simulate command execution endpoints vulnerable to **command injection**. When attackers attempt to inject shell commands using patterns like `; whoami`, `| cat /etc/passwd`, or backticks, Krawl responds with realistic command outputs. For example, `whoami` returns fake usernames like `www-data` or `nginx`, while `uname` returns fake Linux kernel versions. Network commands like `wget` or `curl` simulate downloads or return "command not found" errors, creating believable responses that delay and confuse automated exploitation tools.
## Example usage behind reverse proxy
You can configure a reverse proxy so all web requests land on the Krawl page by default, and hide your real content behind a secret hidden url. For example:
```bash ```bash
location / { location / {
proxy_pass https://your-krawl-instance; proxy_pass https://your-krawl-instance;
proxy_pass_header Server; proxy_pass_header Server;
} }
location /my-hidden-service {
proxy_pass https://my-hidden-service;
proxy_pass_header Server;
}
``` ```
Alternatively, you can create a bunch of different "interesting" looking domains. For example: ## Additional Documentation
- admin.example.com | Topic | Description |
- portal.example.com |-------|-------------|
- sso.example.com | [API](docs/api.md) | External APIs used by Krawl for IP data, reputation, and geolocation |
- login.example.com | [Honeypot](docs/honeypot.md) | Full overview of honeypot pages: fake logins, directory listings, credential files, SQLi/XSS/XXE/command injection traps, and more |
- ... | [Reverse Proxy](docs/reverse-proxy.md) | How to deploy Krawl behind NGINX or use decoy subdomains |
| [Database Backups](docs/backups.md) | Enable and configure the automatic database dump job |
Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance. | [Canary Token](docs/canary-token.md) | Set up external alert triggers via canarytokens.org |
| [Wordlist](docs/wordlist.md) | Customize fake usernames, passwords, and directory listings |
## Enable database dump job for backups | [Dashboard](docs/dashboard.md) | Access and explore the real-time monitoring dashboard |
To enable the database dump job, set the following variables (*config file example*)
```yaml
backups:
path: "backups" # where backup will be saved
cron: "*/30 * * * *" # frequency of the cronjob
enabled: true
```
## Customizing the Canary Token
To create a custom canary token, visit https://canarytokens.org
and generate a “Web bug” canary token.
This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitors IP address and user agent.
To enable this feature, set the canary token URL [using the environment variable](#configuration-via-environment-variables) `KRAWL_CANARY_TOKEN_URL`.
## Customizing the wordlist
Edit `wordlists.json` to customize fake data for your use case
```json
{
"usernames": {
"prefixes": ["admin", "root", "user"],
"suffixes": ["_prod", "_dev", "123"]
},
"passwords": {
"prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "password"]
},
"directory_listing": {
"files": ["credentials.txt", "backup.sql"],
"directories": ["admin/", "backup/"]
}
}
```
or **values.yaml** in the case of helm chart installation
## Dashboard
Access the dashboard at `http://<server-ip>:<port>/<dashboard-path>`
The dashboard shows:
- Total and unique accesses
- Suspicious activity and attack detection
- Top IPs, paths, user-agents and GeoIP localization
- Real-time monitoring
The attackers access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged.
Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website.
![dashboard-1](img/dashboard-1.png)
The top IP Addresses is shown along with top paths and User Agents
![dashboard-2](img/dashboard-2.png)
![dashboard-3](img/dashboard-3.png)
## 🤝 Contributing ## 🤝 Contributing
@@ -413,14 +296,9 @@ Contributions welcome! Please:
4. Submit a pull request (explain the changes!) 4. Submit a pull request (explain the changes!)
<div align="center"> ## Disclaimer
> [!CAUTION]
## ⚠️ Disclaimer > This is a deception/honeypot system. Deploy in isolated environments and monitor carefully for security events. Use responsibly and in compliance with applicable laws and regulations.
**This is a deception/honeypot system.**
Deploy in isolated environments and monitor carefully for security events.
Use responsibly and in compliance with applicable laws and regulations.
## Star History ## Star History
<img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" /> <img src="https://api.star-history.com/svg?repos=BlessedRebuS/Krawl&type=Date" width="600" alt="Star History Chart" />

View File

@@ -1,5 +0,0 @@
# Krawl - Todo List
- Add Prometheus exporter for metrics
- Add POST cresentials information (eg: username and password used)
- Add CloudFlare error pages

9
docs/api.md Normal file
View File

@@ -0,0 +1,9 @@
# API
Krawl uses the following APIs
- http://ip-api.com (IP Data)
- https://iprep.lcrawl.com (IP Reputation)
- https://nominatim.openstreetmap.org/reverse (Reverse IP Lookup)
- https://api.ipify.org (Public IP discovery)
- http://ident.me (Public IP discovery)
- https://ifconfig.me (Public IP discovery)

10
docs/backups.md Normal file
View File

@@ -0,0 +1,10 @@
# Enable Database Dump Job for Backups
To enable the database dump job, set the following variables (*config file example*)
```yaml
backups:
path: "backups" # where backup will be saved
cron: "*/30 * * * *" # frequency of the cronjob
enabled: true
```

10
docs/canary-token.md Normal file
View File

@@ -0,0 +1,10 @@
# Customizing the Canary Token
To create a custom canary token, visit https://canarytokens.org
and generate a "Web bug" canary token.
This optional token is triggered when a crawler fully traverses the webpage until it reaches 0. At that point, a URL is returned. When this URL is requested, it sends an alert to the user via email, including the visitor's IP address and user agent.
To enable this feature, set the canary token URL [using the environment variable](../README.md#configuration-via-enviromental-variables) `KRAWL_CANARY_TOKEN_URL`.

21
docs/dashboard.md Normal file
View File

@@ -0,0 +1,21 @@
# Dashboard
Access the dashboard at `http://<server-ip>:<port>/<dashboard-path>`
The dashboard shows:
- Total and unique accesses
- Suspicious activity and attack detection
- Top IPs, paths, user-agents and GeoIP localization
- Real-time monitoring
The attackers' access to the honeypot endpoint and related suspicious activities (such as failed login attempts) are logged.
Krawl also implements a scoring system designed to distinguish between malicious and legitimate behavior on the website.
![dashboard-1](../img/dashboard-1.png)
The top IP Addresses is shown along with top paths and User Agents
![dashboard-2](../img/dashboard-2.png)
![dashboard-3](../img/dashboard-3.png)

52
docs/honeypot.md Normal file
View File

@@ -0,0 +1,52 @@
# Honeypot
Below is a complete overview of the Krawl honeypot's capabilities
## robots.txt
The actual (juicy) robots.txt configuration [is the following](../src/templates/html/robots.txt).
## Honeypot pages
### Common Login Attempts
Requests to common admin endpoints (`/admin/`, `/wp-admin/`, `/phpMyAdmin/`) return a fake login page. Any login attempt triggers a 1-second delay to simulate real processing and is fully logged in the dashboard (credentials, IP, headers, timing).
![admin page](../img/admin-page.png)
### Common Misconfiguration Paths
Requests to paths like `/backup/`, `/config/`, `/database/`, `/private/`, or `/uploads/` return a fake directory listing populated with "interesting" files, each assigned a random file size to look realistic.
![directory-page](../img/directory-page.png)
### Environment File Leakage
The `.env` endpoint exposes fake database connection strings, **AWS API keys**, and **Stripe secrets**. It intentionally returns an error due to the `Content-Type` being `application/json` instead of plain text, mimicking a "juicy" misconfiguration that crawlers and scanners often flag as information leakage.
### Server Error Information
The `/server` page displays randomly generated fake error information for each known server.
![server and env page](../img/server-and-env-page.png)
### API Endpoints with Sensitive Data
The pages `/api/v1/users` and `/api/v2/secrets` show fake users and random secrets in JSON format
![users and secrets](../img/users-and-secrets.png)
### Exposed Credential Files
The pages `/credentials.txt` and `/passwords.txt` show fake users and random secrets
![credentials and passwords](../img/credentials-and-passwords.png)
### SQL Injection and XSS Detection
Pages such as `/users`, `/search`, `/contact`, `/info`, `/input`, and `/feedback`, along with APIs like `/api/sql` and `/api/database`, are designed to lure attackers into performing attacks such as **SQL injection** or **XSS**.
![sql injection](../img/sql_injection.png)
Automated tools like **SQLMap** will receive a different randomized database error on each request, increasing scan noise and confusing the attacker. All detected attacks are logged and displayed in the dashboard.
### Path Traversal Detection
Krawl detects and responds to **path traversal** attempts targeting common system files like `/etc/passwd`, `/etc/shadow`, or Windows system paths. When an attacker tries to access sensitive files using patterns like `../../../etc/passwd` or encoded variants (`%2e%2e/`, `%252e`), Krawl returns convincing fake file contents with realistic system users, UIDs, GIDs, and shell configurations. This wastes attacker time while logging the full attack pattern.
### XXE (XML External Entity) Injection
The `/api/xml` and `/api/parser` endpoints accept XML input and are designed to detect **XXE injection** attempts. When attackers try to exploit external entity declarations (`<!ENTITY`, `<!DOCTYPE`, `SYSTEM`) or reference entities to access local files, Krawl responds with realistic XML responses that appear to process the entities successfully. The honeypot returns fake file contents, simulated entity values (like `admin_credentials` or `database_connection`), or realistic error messages, making the attack appear successful while fully logging the payload.
### Command Injection Detection
Pages like `/api/exec`, `/api/run`, and `/api/system` simulate command execution endpoints vulnerable to **command injection**. When attackers attempt to inject shell commands using patterns like `; whoami`, `| cat /etc/passwd`, or backticks, Krawl responds with realistic command outputs. For example, `whoami` returns fake usernames like `www-data` or `nginx`, while `uname` returns fake Linux kernel versions. Network commands like `wget` or `curl` simulate downloads or return "command not found" errors, creating believable responses that delay and confuse automated exploitation tools.

25
docs/reverse-proxy.md Normal file
View File

@@ -0,0 +1,25 @@
# Example Usage Behind Reverse Proxy
You can configure a reverse proxy so all web requests land on the Krawl page by default, and hide your real content behind a secret hidden url. For example:
```bash
location / {
proxy_pass https://your-krawl-instance;
proxy_pass_header Server;
}
location /my-hidden-service {
proxy_pass https://my-hidden-service;
proxy_pass_header Server;
}
```
Alternatively, you can create a bunch of different "interesting" looking domains. For example:
- admin.example.com
- portal.example.com
- sso.example.com
- login.example.com
- ...
Additionally, you may configure your reverse proxy to forward all non-existing subdomains (e.g. nonexistent.example.com) to one of these domains so that any crawlers that are guessing domains at random will automatically end up at your Krawl instance.

22
docs/wordlist.md Normal file
View File

@@ -0,0 +1,22 @@
# Customizing the Wordlist
Edit `wordlists.json` to customize fake data for your use case
```json
{
"usernames": {
"prefixes": ["admin", "root", "user"],
"suffixes": ["_prod", "_dev", "123"]
},
"passwords": {
"prefixes": ["P@ssw0rd", "Admin"],
"simple": ["test", "password"]
},
"directory_listing": {
"files": ["credentials.txt", "backup.sql"],
"directories": ["admin/", "backup/"]
}
}
```
or **values.yaml** in the case of helm chart installation

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart name: krawl-chart
description: A Helm chart for Krawl honeypot server description: A Helm chart for Krawl honeypot server
type: application type: application
version: 1.0.10 version: 1.1.0
appVersion: 1.0.10 appVersion: 1.1.0
keywords: keywords:
- honeypot - honeypot
- security - security

View File

@@ -10,103 +10,31 @@ A Helm chart for deploying the Krawl honeypot application on Kubernetes.
## Installation ## Installation
### From OCI Registry
### Helm Chart
Install with default values:
```bash ```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \ helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
--version 1.0.0 \ --version 1.1.0 \
--namespace krawl-system \
--create-namespace
```
Or create a minimal `values.yaml` file:
```yaml
service:
type: LoadBalancer
port: 5000
timezone: "Europe/Rome"
ingress:
enabled: true
className: "traefik"
hosts:
- host: krawl.example.com
paths:
- path: /
pathType: Prefix
config:
server:
port: 5000
delay: 100
dashboard:
secret_path: null # Auto-generated if not set
database:
persistence:
enabled: true
size: 1Gi
```
Install with custom values:
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
--version 0.2.2 \
--namespace krawl-system \ --namespace krawl-system \
--create-namespace \ --create-namespace \
-f values.yaml -f values.yaml # optional
``` ```
To access the deception server: ### From local chart
```bash
helm install krawl ./helm -n krawl-system --create-namespace -f values.yaml
```
A minimal [values.yaml](values-minimal.yaml) example is provided in this directory.
Once installed, get your service IP:
```bash ```bash
kubectl get svc krawl -n krawl-system kubectl get svc krawl -n krawl-system
``` ```
Once the EXTERNAL-IP is assigned, access your deception server at `http://<EXTERNAL-IP>:5000` Then access the deception server at `http://<EXTERNAL-IP>:5000`
### Add the repository (if applicable)
```bash
helm repo add krawl https://github.com/BlessedRebuS/Krawl
helm repo update
```
### Install from OCI Registry
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1
```
Or with a specific namespace:
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -n krawl --create-namespace
```
### Install the chart locally
```bash
helm install krawl ./helm
```
### Install with custom values
```bash
helm install krawl ./helm -f values.yaml
```
### Install in a specific namespace
```bash
helm install krawl ./helm -n krawl --create-namespace
```
## Configuration ## Configuration
@@ -221,16 +149,6 @@ The following table lists the main configuration parameters of the Krawl chart a
| `resources.requests.cpu` | CPU request | `100m` | | `resources.requests.cpu` | CPU request | `100m` |
| `resources.requests.memory` | Memory request | `64Mi` | | `resources.requests.memory` | Memory request | `64Mi` |
### Autoscaling
| Parameter | Description | Default |
|-----------|-------------|---------|
| `autoscaling.enabled` | Enable horizontal pod autoscaling | `false` |
| `autoscaling.minReplicas` | Minimum replicas | `1` |
| `autoscaling.maxReplicas` | Maximum replicas | `1` |
| `autoscaling.targetCPUUtilizationPercentage` | Target CPU utilization | `70` |
| `autoscaling.targetMemoryUtilizationPercentage` | Target memory utilization | `80` |
### Network Policy ### Network Policy
| Parameter | Description | Default | | Parameter | Description | Default |
@@ -248,68 +166,24 @@ kubectl get secret krawl-server -n krawl-system \
## Usage Examples ## Usage Examples
### Basic Installation You can override individual values with `--set` without a values file:
```bash ```bash
helm install krawl ./helm helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \
``` --set ingress.hosts[0].host=honeypot.example.com \
### Installation with Custom Domain
```bash
helm install krawl ./helm \
--set ingress.hosts[0].host=honeypot.example.com
```
### Enable Canary Tokens
```bash
helm install krawl ./helm \
--set config.canary.token_url=https://canarytokens.com/your-token --set config.canary.token_url=https://canarytokens.com/your-token
``` ```
### Configure Custom API Endpoint
```bash
helm install krawl ./helm \
--set config.api.server_url=https://api.example.com \
--set config.api.server_port=443
```
### Create Values Override File
Create `custom-values.yaml`:
```yaml
config:
server:
port: 8080
delay: 500
canary:
token_url: https://your-canary-token-url
dashboard:
secret_path: /super-secret-path
crawl:
max_pages_limit: 500
ban_duration_seconds: 3600
```
Then install:
```bash
helm install krawl ./helm -f custom-values.yaml
```
## Upgrading ## Upgrading
```bash ```bash
helm upgrade krawl ./helm helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 -f values.yaml
``` ```
## Uninstalling ## Uninstalling
```bash ```bash
helm uninstall krawl helm uninstall krawl -n krawl-system
``` ```
## Troubleshooting ## Troubleshooting
@@ -348,7 +222,6 @@ kubectl logs -l app.kubernetes.io/name=krawl
- `configmap.yaml` - Application configuration - `configmap.yaml` - Application configuration
- `pvc.yaml` - Persistent volume claim - `pvc.yaml` - Persistent volume claim
- `ingress.yaml` - Ingress configuration - `ingress.yaml` - Ingress configuration
- `hpa.yaml` - Horizontal pod autoscaler
- `network-policy.yaml` - Network policies - `network-policy.yaml` - Network policies
## Support ## Support

View File

@@ -5,9 +5,7 @@ metadata:
labels: labels:
{{- include "krawl.labels" . | nindent 4 }} {{- include "krawl.labels" . | nindent 4 }}
spec: spec:
{{- if not .Values.autoscaling.enabled }}
replicas: {{ .Values.replicaCount }} replicas: {{ .Values.replicaCount }}
{{- end }}
strategy: strategy:
type: Recreate type: Recreate
selector: selector:

View File

@@ -1,32 +0,0 @@
{{- if .Values.autoscaling.enabled }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
metadata:
name: {{ include "krawl.fullname" . }}
labels:
{{- include "krawl.labels" . | nindent 4 }}
spec:
scaleTargetRef:
apiVersion: apps/v1
kind: Deployment
name: {{ include "krawl.fullname" . }}
minReplicas: {{ .Values.autoscaling.minReplicas }}
maxReplicas: {{ .Values.autoscaling.maxReplicas }}
metrics:
{{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
- type: Resource
resource:
name: cpu
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
{{- end }}
{{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
- type: Resource
resource:
name: memory
target:
type: Utilization
averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
{{- end }}
{{- end }}

View File

@@ -3,7 +3,7 @@ replicaCount: 1
image: image:
repository: ghcr.io/blessedrebus/krawl repository: ghcr.io/blessedrebus/krawl
pullPolicy: Always pullPolicy: Always
tag: "1.0.0" tag: "1.1.0"
imagePullSecrets: [] imagePullSecrets: []
nameOverride: "krawl" nameOverride: "krawl"
@@ -54,13 +54,6 @@ resources:
# If not set, container will use its default timezone # If not set, container will use its default timezone
timezone: "" timezone: ""
autoscaling:
enabled: false
minReplicas: 1
maxReplicas: 1
targetCPUUtilizationPercentage: 70
targetMemoryUtilizationPercentage: 80
nodeSelector: {} nodeSelector: {}
tolerations: [] tolerations: []
@@ -579,7 +572,7 @@ wordlists:
xxe_injection: "(<!ENTITY|<!DOCTYPE|SYSTEM\\s+[\"']|PUBLIC\\s+[\"']|&\\w+;|file://|php://filter|expect://)" xxe_injection: "(<!ENTITY|<!DOCTYPE|SYSTEM\\s+[\"']|PUBLIC\\s+[\"']|&\\w+;|file://|php://filter|expect://)"
ldap_injection: "(\\*\\)|\\(\\||\\(&)" ldap_injection: "(\\*\\)|\\(\\||\\(&)"
command_injection: "(cmd=|exec=|command=|execute=|system=|ping=|host=|&&|\\|\\||;|\\$\\{|\\$\\(|`|\\bid\\b|\\bwhoami\\b|\\buname\\b|\\bcat\\b|\\bls\\b|\\bpwd\\b|\\becho\\b|\\bwget\\b|\\bcurl\\b|\\bnc\\b|\\bnetcat\\b|\\bbash\\b|\\bsh\\b|\\bps\\b|\\bkill\\b|\\bchmod\\b|\\bchown\\b|\\bcp\\b|\\bmv\\b|\\brm\\b|/bin/bash|/bin/sh|cmd\\.exe|/bin/|/usr/bin/|/sbin/)" command_injection: "(cmd=|exec=|command=|execute=|system=|ping=|host=|&&|\\|\\||;|\\$\\{|\\$\\(|`|\\bid\\b|\\bwhoami\\b|\\buname\\b|\\bcat\\b|\\bls\\b|\\bpwd\\b|\\becho\\b|\\bwget\\b|\\bcurl\\b|\\bnc\\b|\\bnetcat\\b|\\bbash\\b|\\bsh\\b|\\bps\\b|\\bkill\\b|\\bchmod\\b|\\bchown\\b|\\bcp\\b|\\bmv\\b|\\brm\\b|/bin/bash|/bin/sh|cmd\\.exe|/bin/|/usr/bin/|/sbin/)"
common_probes: "(/admin|/backup|/config|/database|/private|/uploads|/wp-admin|/login|/phpMyAdmin|/phpmyadmin|/users|/search|/contact|/info|/input|/feedback|/server|/api/v1/|/api/v2/|/api/search|/api/sql|/api/database|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)" common_probes: "(/admin|/wp-admin|/phpMyAdmin|/phpmyadmin|/feedback|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)"
suspicious_patterns: suspicious_patterns:
- sqlmap - sqlmap
- nessus - nessus

BIN
img/attack_types.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 97 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 179 KiB

After

Width:  |  Height:  |  Size: 353 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 106 KiB

View File

@@ -10,6 +10,5 @@ resources:
- service.yaml - service.yaml
- network-policy.yaml - network-policy.yaml
- ingress.yaml - ingress.yaml
- hpa.yaml
namespace: krawl-system namespace: krawl-system

View File

@@ -15,5 +15,5 @@ requests>=2.32.5
# Web framework # Web framework
fastapi>=0.115.0 fastapi>=0.115.0
uvicorn[standard]>=0.30.0 uvicorn[standard]>=0.30.0
jinja2>=3.1.0 jinja2>=3.1.5
python-multipart>=0.0.9 python-multipart>=0.0.9

View File

@@ -94,7 +94,7 @@ class Config:
ip = response.text.strip() ip = response.text.strip()
if ip: if ip:
break break
except Exception: except requests.RequestException:
continue continue
if not ip: if not ip:

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import re import re
import random import secrets
import logging import logging
import json import json
from typing import Optional, Tuple, Dict from typing import Optional, Tuple, Dict
@@ -9,6 +9,7 @@ from generators import random_username, random_password, random_email
from wordlists import get_wordlists from wordlists import get_wordlists
logger = logging.getLogger("krawl") logger = logging.getLogger("krawl")
_sysrand = secrets.SystemRandom()
def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool: def detect_path_traversal(path: str, query: str = "", body: str = "") -> bool:
@@ -86,7 +87,7 @@ def generate_fake_passwd() -> str:
shells = passwd_config.get("shells", ["/bin/bash"]) shells = passwd_config.get("shells", ["/bin/bash"])
fake_users = [ fake_users = [
f"{random_username()}:x:{random.randint(uid_min, uid_max)}:{random.randint(gid_min, gid_max)}::/home/{random_username()}:{random.choice(shells)}" f"{random_username()}:x:{_sysrand.randint(uid_min, uid_max)}:{_sysrand.randint(gid_min, gid_max)}::/home/{random_username()}:{secrets.choice(shells)}"
for _ in range(3) for _ in range(3)
] ]
@@ -108,7 +109,7 @@ def generate_fake_shadow() -> str:
hash_length = shadow_config.get("hash_length", 86) hash_length = shadow_config.get("hash_length", 86)
fake_entries = [ fake_entries = [
f"{random_username()}:{hash_prefix}{''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(random.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::" f"{random_username()}:{hash_prefix}{''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=salt_length))}${''.join(_sysrand.choices('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789', k=hash_length))}:19000:0:99999:7:::"
for _ in range(3) for _ in range(3)
] ]
@@ -147,9 +148,9 @@ SECRET_TOKEN=fake_secret_token_xyz""",
return f"""# Configuration File return f"""# Configuration File
api_endpoint = https://api.example.com api_endpoint = https://api.example.com
api_key = fake_key_{random.randint(1000, 9999)} api_key = fake_key_{_sysrand.randint(1000, 9999)}
database_url = mysql://user:fake_pass@localhost/db database_url = mysql://user:fake_pass@localhost/db
secret = fake_secret_{random.randint(10000, 99999)} secret = fake_secret_{_sysrand.randint(10000, 99999)}
""" """
@@ -167,7 +168,7 @@ def generate_fake_directory_listing(path: str) -> str:
directories = [(d["name"], d["size"], d["perms"]) for d in fake_dirs] directories = [(d["name"], d["size"], d["perms"]) for d in fake_dirs]
files = [ files = [
(f["name"], str(random.randint(f["size_min"], f["size_max"])), f["perms"]) (f["name"], str(_sysrand.randint(f["size_min"], f["size_max"])), f["perms"])
for f in fake_files for f in fake_files
] ]
@@ -208,7 +209,7 @@ def generate_path_traversal_response(path: str) -> Tuple[str, str, int]:
if "proc/self" in path_lower: if "proc/self" in path_lower:
logger.debug("Returning fake proc info") logger.debug("Returning fake proc info")
return (f"{random.randint(1000, 9999)}", "text/plain", 200) return (f"{_sysrand.randint(1000, 9999)}", "text/plain", 200)
logger.debug("Returning fake directory listing") logger.debug("Returning fake directory listing")
return (generate_fake_directory_listing(path), "text/html", 200) return (generate_fake_directory_listing(path), "text/html", 200)
@@ -246,7 +247,7 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]:
if xxe_config and "entity_processed" in xxe_config: if xxe_config and "entity_processed" in xxe_config:
template = xxe_config["entity_processed"]["template"] template = xxe_config["entity_processed"]["template"]
entity_values = xxe_config["entity_processed"]["entity_values"] entity_values = xxe_config["entity_processed"]["entity_values"]
entity_value = random.choice(entity_values) entity_value = secrets.choice(entity_values)
response = template.replace("{entity_value}", entity_value) response = template.replace("{entity_value}", entity_value)
else: else:
response = """<?xml version="1.0"?> response = """<?xml version="1.0"?>
@@ -260,7 +261,7 @@ def generate_xxe_response(body: str) -> Tuple[str, str, int]:
if xxe_config and "error" in xxe_config: if xxe_config and "error" in xxe_config:
template = xxe_config["error"]["template"] template = xxe_config["error"]["template"]
messages = xxe_config["error"]["messages"] messages = xxe_config["error"]["messages"]
message = random.choice(messages) message = secrets.choice(messages)
response = template.replace("{message}", message) response = template.replace("{message}", message)
else: else:
response = """<?xml version="1.0"?> response = """<?xml version="1.0"?>
@@ -281,22 +282,22 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
# id command # id command
if re.search(r"\bid\b", input_lower): if re.search(r"\bid\b", input_lower):
if cmd_config and "id" in cmd_config: if cmd_config and "id" in cmd_config:
uid = random.randint( uid = _sysrand.randint(
cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000) cmd_config.get("uid_min", 1000), cmd_config.get("uid_max", 2000)
) )
gid = random.randint( gid = _sysrand.randint(
cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000) cmd_config.get("gid_min", 1000), cmd_config.get("gid_max", 2000)
) )
template = random.choice(cmd_config["id"]) template = secrets.choice(cmd_config["id"])
output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid)) output = template.replace("{uid}", str(uid)).replace("{gid}", str(gid))
else: else:
output = f"uid={random.randint(1000, 2000)}(www-data) gid={random.randint(1000, 2000)}(www-data) groups={random.randint(1000, 2000)}(www-data)" output = f"uid={_sysrand.randint(1000, 2000)}(www-data) gid={_sysrand.randint(1000, 2000)}(www-data) groups={_sysrand.randint(1000, 2000)}(www-data)"
return (output, "text/plain", 200) return (output, "text/plain", 200)
# whoami command # whoami command
if re.search(r"\bwhoami\b", input_lower): if re.search(r"\bwhoami\b", input_lower):
users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"] users = cmd_config.get("whoami", ["www-data"]) if cmd_config else ["www-data"]
return (random.choice(users), "text/plain", 200) return (secrets.choice(users), "text/plain", 200)
# uname command # uname command
if re.search(r"\buname\b", input_lower): if re.search(r"\buname\b", input_lower):
@@ -305,7 +306,7 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
if cmd_config if cmd_config
else ["Linux server 5.4.0 x86_64"] else ["Linux server 5.4.0 x86_64"]
) )
return (random.choice(outputs), "text/plain", 200) return (secrets.choice(outputs), "text/plain", 200)
# pwd command # pwd command
if re.search(r"\bpwd\b", input_lower): if re.search(r"\bpwd\b", input_lower):
@@ -314,16 +315,16 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
if cmd_config if cmd_config
else ["/var/www/html"] else ["/var/www/html"]
) )
return (random.choice(paths), "text/plain", 200) return (secrets.choice(paths), "text/plain", 200)
# ls command # ls command
if re.search(r"\bls\b", input_lower): if re.search(r"\bls\b", input_lower):
if cmd_config and "ls" in cmd_config: if cmd_config and "ls" in cmd_config:
files = random.choice(cmd_config["ls"]) files = secrets.choice(cmd_config["ls"])
else: else:
files = ["index.php", "config.php", "uploads"] files = ["index.php", "config.php", "uploads"]
output = "\n".join( output = "\n".join(
random.sample(files, k=random.randint(3, min(6, len(files)))) _sysrand.sample(files, k=_sysrand.randint(3, min(6, len(files))))
) )
return (output, "text/plain", 200) return (output, "text/plain", 200)
@@ -351,27 +352,27 @@ def generate_command_injection_response(input_text: str) -> Tuple[str, str, int]
if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]): if any(cmd in input_lower for cmd in ["wget", "curl", "nc", "netcat"]):
if cmd_config and "network_commands" in cmd_config: if cmd_config and "network_commands" in cmd_config:
outputs = cmd_config["network_commands"] outputs = cmd_config["network_commands"]
output = random.choice(outputs) output = secrets.choice(outputs)
if "{size}" in output: if "{size}" in output:
size = random.randint( size = _sysrand.randint(
cmd_config.get("download_size_min", 100), cmd_config.get("download_size_min", 100),
cmd_config.get("download_size_max", 10000), cmd_config.get("download_size_max", 10000),
) )
output = output.replace("{size}", str(size)) output = output.replace("{size}", str(size))
else: else:
outputs = ["bash: command not found", "Connection timeout"] outputs = ["bash: command not found", "Connection timeout"]
output = random.choice(outputs) output = secrets.choice(outputs)
return (output, "text/plain", 200) return (output, "text/plain", 200)
# generic outputs # generic outputs
if cmd_config and "generic" in cmd_config: if cmd_config and "generic" in cmd_config:
generic_outputs = cmd_config["generic"] generic_outputs = cmd_config["generic"]
output = random.choice(generic_outputs) output = secrets.choice(generic_outputs)
if "{num}" in output: if "{num}" in output:
output = output.replace("{num}", str(random.randint(1, 99))) output = output.replace("{num}", str(_sysrand.randint(1, 99)))
else: else:
generic_outputs = ["", "Command executed successfully", "sh: syntax error"] generic_outputs = ["", "Command executed successfully", "sh: syntax error"]
output = random.choice(generic_outputs) output = secrets.choice(generic_outputs)
return (output, "text/plain", 200) return (output, "text/plain", 200)
@@ -414,7 +415,7 @@ def get_random_sql_error(
return ("Database error occurred", "text/plain") return ("Database error occurred", "text/plain")
if not db_type: if not db_type:
db_type = random.choice(list(sql_errors.keys())) db_type = secrets.choice(list(sql_errors.keys()))
db_errors = sql_errors.get(db_type, {}) db_errors = sql_errors.get(db_type, {})
@@ -429,15 +430,15 @@ def get_random_sql_error(
all_errors.extend(error_list) all_errors.extend(error_list)
errors = all_errors if all_errors else ["Database error occurred"] errors = all_errors if all_errors else ["Database error occurred"]
error_message = random.choice(errors) if errors else "Database error occurred" error_message = secrets.choice(errors) if errors else "Database error occurred"
if "{table}" in error_message: if "{table}" in error_message:
tables = ["users", "products", "orders", "customers", "accounts", "sessions"] tables = ["users", "products", "orders", "customers", "accounts", "sessions"]
error_message = error_message.replace("{table}", random.choice(tables)) error_message = error_message.replace("{table}", secrets.choice(tables))
if "{column}" in error_message: if "{column}" in error_message:
columns = ["id", "name", "email", "password", "username", "created_at"] columns = ["id", "name", "email", "password", "username", "created_at"]
error_message = error_message.replace("{column}", random.choice(columns)) error_message = error_message.replace("{column}", secrets.choice(columns))
return (error_message, "text/plain") return (error_message, "text/plain")
@@ -455,7 +456,7 @@ def generate_sql_error_response(
status_code = 500 status_code = 500
if random.random() < 0.3: if _sysrand.random() < 0.3:
status_code = 200 status_code = 200
logger.info(f"SQL injection detected: {injection_type}") logger.info(f"SQL injection detected: {injection_type}")
@@ -475,9 +476,9 @@ def get_sql_response_with_data(path: str, params: str) -> str:
"username": random_username(), "username": random_username(),
"email": random_email(), "email": random_email(),
"password_hash": random_password(), "password_hash": random_password(),
"role": random.choice(["admin", "user", "moderator"]), "role": secrets.choice(["admin", "user", "moderator"]),
} }
for i in range(1, random.randint(2, 5)) for i in range(1, _sysrand.randint(2, 5))
], ],
} }
return json.dumps(data, indent=2) return json.dumps(data, indent=2)
@@ -570,7 +571,7 @@ def generate_server_error() -> Tuple[str, str]:
if not server_errors: if not server_errors:
return ("500 Internal Server Error", "text/html") return ("500 Internal Server Error", "text/html")
server_type = random.choice(list(server_errors.keys())) server_type = secrets.choice(list(server_errors.keys()))
server_config = server_errors[server_type] server_config = server_errors[server_type]
error_codes = { error_codes = {
@@ -583,18 +584,18 @@ def generate_server_error() -> Tuple[str, str]:
503: "Service Unavailable", 503: "Service Unavailable",
} }
code = random.choice(list(error_codes.keys())) code = secrets.choice(list(error_codes.keys()))
message = error_codes[code] message = error_codes[code]
template = server_config.get("template", "") template = server_config.get("template", "")
version = random.choice(server_config.get("versions", ["1.0"])) version = secrets.choice(server_config.get("versions", ["1.0"]))
html = template.replace("{code}", str(code)) html = template.replace("{code}", str(code))
html = html.replace("{message}", message) html = html.replace("{message}", message)
html = html.replace("{version}", version) html = html.replace("{version}", version)
if server_type == "apache": if server_type == "apache":
os = random.choice(server_config.get("os", ["Ubuntu"])) os = secrets.choice(server_config.get("os", ["Ubuntu"]))
html = html.replace("{os}", os) html = html.replace("{os}", os)
html = html.replace("{host}", "localhost") html = html.replace("{host}", "localhost")
@@ -611,10 +612,10 @@ def get_server_header(server_type: str = None) -> str:
return "nginx/1.18.0" return "nginx/1.18.0"
if not server_type: if not server_type:
server_type = random.choice(list(server_errors.keys())) server_type = secrets.choice(list(server_errors.keys()))
server_config = server_errors.get(server_type, {}) server_config = server_errors.get(server_type, {})
version = random.choice(server_config.get("versions", ["1.0"])) version = secrets.choice(server_config.get("versions", ["1.0"]))
server_headers = { server_headers = {
"nginx": f"nginx/{version}", "nginx": f"nginx/{version}",

View File

@@ -77,5 +77,5 @@ def main():
finally: finally:
try: try:
db.close_session() db.close_session()
except Exception: except Exception as e:
pass app_logger.error(f"Error closing DB session after retention cleanup: {e}")

View File

@@ -1,12 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from typing import Dict, Tuple, Optional from typing import Dict, Tuple, Optional
import logging
import re import re
import urllib.parse import urllib.parse
from wordlists import get_wordlists from wordlists import get_wordlists
from database import get_database, DatabaseManager from database import get_database, DatabaseManager
logger = logging.getLogger("krawl")
# Module-level singleton for background task access # Module-level singleton for background task access
_tracker_instance: "AccessTracker | None" = None _tracker_instance: "AccessTracker | None" = None
@@ -103,9 +106,8 @@ class AccessTracker:
if self._db_manager is None: if self._db_manager is None:
try: try:
self._db_manager = get_database() self._db_manager = get_database()
except Exception: except Exception as e:
# Database not initialized, persistence disabled logger.error(f"Failed to initialize database manager: {e}")
pass
return self._db_manager return self._db_manager
def parse_credentials(self, post_data: str) -> Tuple[str, str]: def parse_credentials(self, post_data: str) -> Tuple[str, str]:
@@ -206,9 +208,8 @@ class AccessTracker:
self.db.persist_credential( self.db.persist_credential(
ip=ip, path=path, username=username, password=password ip=ip, path=path, username=username, password=password
) )
except Exception: except Exception as e:
# Don't crash if database persistence fails logger.error(f"Failed to persist credential attempt: {e}")
pass
def record_access( def record_access(
self, self,
@@ -271,9 +272,8 @@ class AccessTracker:
attack_types=attack_findings if attack_findings else None, attack_types=attack_findings if attack_findings else None,
raw_request=raw_request if raw_request else None, raw_request=raw_request if raw_request else None,
) )
except Exception: except Exception as e:
# Don't crash if database persistence fails logger.error(f"Failed to persist access record: {e}")
pass
def detect_attack_type(self, data: str) -> list[str]: def detect_attack_type(self, data: str) -> list[str]:
""" """

View File

@@ -470,7 +470,7 @@
"xxe_injection": "(<!ENTITY|<!DOCTYPE|SYSTEM\\s+[\"']|PUBLIC\\s+[\"']|&\\w+;|file://|php://filter|expect://)", "xxe_injection": "(<!ENTITY|<!DOCTYPE|SYSTEM\\s+[\"']|PUBLIC\\s+[\"']|&\\w+;|file://|php://filter|expect://)",
"ldap_injection": "(\\*\\)|\\(\\||\\(&)", "ldap_injection": "(\\*\\)|\\(\\||\\(&)",
"command_injection": "(cmd=|exec=|command=|execute=|system=|ping=|host=|&&|\\|\\||;|\\$\\{|\\$\\(|`|\\bid\\b|\\bwhoami\\b|\\buname\\b|\\bcat\\b|\\bls\\b|\\bpwd\\b|\\becho\\b|\\bwget\\b|\\bcurl\\b|\\bnc\\b|\\bnetcat\\b|\\bbash\\b|\\bsh\\b|\\bps\\b|\\bkill\\b|\\bchmod\\b|\\bchown\\b|\\bcp\\b|\\bmv\\b|\\brm\\b|/bin/bash|/bin/sh|cmd\\.exe|/bin/|/usr/bin/|/sbin/)", "command_injection": "(cmd=|exec=|command=|execute=|system=|ping=|host=|&&|\\|\\||;|\\$\\{|\\$\\(|`|\\bid\\b|\\bwhoami\\b|\\buname\\b|\\bcat\\b|\\bls\\b|\\bpwd\\b|\\becho\\b|\\bwget\\b|\\bcurl\\b|\\bnc\\b|\\bnetcat\\b|\\bbash\\b|\\bsh\\b|\\bps\\b|\\bkill\\b|\\bchmod\\b|\\bchown\\b|\\bcp\\b|\\bmv\\b|\\brm\\b|/bin/bash|/bin/sh|cmd\\.exe|/bin/|/usr/bin/|/sbin/)",
"common_probes": "(/admin|/backup|/config|/database|/private|/uploads|/wp-admin|/login|/phpMyAdmin|/phpmyadmin|/users|/search|/contact|/info|/input|/feedback|/server|/api/v1/|/api/v2/|/api/search|/api/sql|/api/database|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)" "common_probes": "(/admin|/wp-admin|/phpMyAdmin|/phpmyadmin|/feedback|\\.env|/credentials\\.txt|/passwords\\.txt|\\.git|/backup\\.sql|/db_backup\\.sql)"
}, },
"server_headers": [ "server_headers": [
"Apache/2.4.41 (Ubuntu)", "Apache/2.4.41 (Ubuntu)",