diff --git a/.github/workflows/docker-build-push.yml b/.github/workflows/docker-build-push.yml
index 70fbb36..f76cf11 100644
--- a/.github/workflows/docker-build-push.yml
+++ b/.github/workflows/docker-build-push.yml
@@ -4,7 +4,6 @@ on:
push:
branches:
- main
- - beta
- dev
- github-actions-ci
paths:
@@ -15,10 +14,6 @@ on:
- 'requirements.txt'
- 'entrypoint.sh'
- '.github/workflows/docker-build-push.yml'
- tags:
- - 'v*.*.*'
- release:
- types: [published]
workflow_dispatch:
env:
diff --git a/.github/workflows/helm-package-push.yml b/.github/workflows/helm-package-push.yml
index 9ba9150..0091915 100644
--- a/.github/workflows/helm-package-push.yml
+++ b/.github/workflows/helm-package-push.yml
@@ -4,18 +4,11 @@ on:
push:
branches:
- main
- - beta
- dev
- github-actions-ci
paths:
- 'helm/**'
- '.github/workflows/helm-package-push.yml'
- tags:
- - 'v*'
- release:
- types:
- - published
- - created
workflow_dispatch:
env:
diff --git a/README.md b/README.md
index 4dc5702..6bc2ba5 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@
- [Docker Run](#docker-run)
- [Docker Compose](#docker-compose)
- [Kubernetes](#kubernetes)
+ - [Local (Python)](#local-python)
- [Configuration](#configuration)
- [config.yaml](#configuration-via-configyaml)
- [Environment Variables](#configuration-via-enviromental-variables)
@@ -63,6 +64,8 @@ Tip: crawl the `robots.txt` paths for additional fun
It creates realistic fake web applications filled with low‑hanging fruit such as admin panels, configuration files, and exposed fake credentials to attract and identify suspicious activity.
+
+
By wasting attacker resources, Krawl helps clearly distinguish malicious behavior from legitimate crawlers.
It features:
@@ -77,8 +80,9 @@ It features:
- **Customizable Wordlists**: Easy JSON-based configuration
- **Random Error Injection**: Mimic real server behavior
-
+You can easily expose Krawl alongside your other services to shield them from web crawlers and malicious users using a reverse proxy. For more details, see the [Reverse Proxy documentation](docs/reverse-proxy.md).
+
## Krawl Dashboard
@@ -160,6 +164,17 @@ docker-compose down
### Kubernetes
**Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md).
+### Python + Uvicorn
+
+Run Krawl directly with Python (suggested version 13) and uvicorn for local development or testing:
+
+```bash
+pip install -r requirements.txt
+uvicorn app:app --host 0.0.0.0 --port 5000 --app-dir src
+```
+
+Access the server at `http://localhost:5000`
+
## Configuration
Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization.
diff --git a/helm/Chart.yaml b/helm/Chart.yaml
index 92d530c..e4e1cee 100644
--- a/helm/Chart.yaml
+++ b/helm/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart
description: A Helm chart for Krawl honeypot server
type: application
-version: 1.1.0
-appVersion: 1.1.0
+version: 1.1.3
+appVersion: 1.1.3
keywords:
- honeypot
- security
diff --git a/helm/README.md b/helm/README.md
index 09c2387..268ca00 100644
--- a/helm/README.md
+++ b/helm/README.md
@@ -14,7 +14,7 @@ A Helm chart for deploying the Krawl honeypot application on Kubernetes.
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
- --version 1.1.0 \
+ --version 1.1.3 \
--namespace krawl-system \
--create-namespace \
-f values.yaml # optional
@@ -169,7 +169,7 @@ kubectl get secret krawl-server -n krawl-system \
You can override individual values with `--set` without a values file:
```bash
-helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \
+helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.3 \
--set ingress.hosts[0].host=honeypot.example.com \
--set config.canary.token_url=https://canarytokens.com/your-token
```
@@ -177,7 +177,7 @@ helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \
## Upgrading
```bash
-helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 -f values.yaml
+helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.3 -f values.yaml
```
## Uninstalling
diff --git a/helm/values.yaml b/helm/values.yaml
index 8b4a907..3bdebd9 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -3,7 +3,7 @@ replicaCount: 1
image:
repository: ghcr.io/blessedrebus/krawl
pullPolicy: Always
- tag: "1.1.0"
+ tag: "1.1.3"
imagePullSecrets: []
nameOverride: "krawl"
diff --git a/img/geoip_dashboard.png b/img/geoip_dashboard.png
index 1c354bb..5a4f389 100644
Binary files a/img/geoip_dashboard.png and b/img/geoip_dashboard.png differ
diff --git a/img/use-case.drawio b/img/use-case.drawio
new file mode 100644
index 0000000..e8d0d8f
--- /dev/null
+++ b/img/use-case.drawio
@@ -0,0 +1,120 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/img/use-case.png b/img/use-case.png
new file mode 100644
index 0000000..2346423
Binary files /dev/null and b/img/use-case.png differ
diff --git a/src/database.py b/src/database.py
index cbee4a0..803e7e7 100644
--- a/src/database.py
+++ b/src/database.py
@@ -815,24 +815,25 @@ class DatabaseManager:
def flag_stale_ips_for_reevaluation(self) -> int:
"""
Flag IPs for reevaluation where:
- - last_seen is between 5 and 30 days ago
+ - last_seen is newer than the configured retention period
- last_analysis is more than 5 days ago
Returns:
Number of IPs flagged for reevaluation
"""
+ from config import get_config
+
session = self.session
try:
now = datetime.now()
- last_seen_lower = now - timedelta(days=30)
- last_seen_upper = now - timedelta(days=5)
+ retention_days = get_config().database_retention_days
+ last_seen_cutoff = now - timedelta(days=retention_days)
last_analysis_cutoff = now - timedelta(days=5)
count = (
session.query(IpStats)
.filter(
- IpStats.last_seen >= last_seen_lower,
- IpStats.last_seen <= last_seen_upper,
+ IpStats.last_seen >= last_seen_cutoff,
IpStats.last_analysis <= last_analysis_cutoff,
IpStats.need_reevaluation == False,
IpStats.manual_category == False,
@@ -882,6 +883,7 @@ class DatabaseManager:
ip_filter: Optional[str] = None,
suspicious_only: bool = False,
since_minutes: Optional[int] = None,
+ sort_order: str = "desc",
) -> Dict[str, Any]:
"""
Retrieve access logs with pagination and optional filtering.
@@ -892,6 +894,7 @@ class DatabaseManager:
ip_filter: Filter by IP address
suspicious_only: Only return suspicious requests
since_minutes: Only return logs from the last N minutes
+ sort_order: Sort direction for timestamp ('asc' or 'desc')
Returns:
List of access log dictionaries
@@ -899,7 +902,12 @@ class DatabaseManager:
session = self.session
try:
offset = (page - 1) * page_size
- query = session.query(AccessLog).order_by(AccessLog.timestamp.desc())
+ order = (
+ AccessLog.timestamp.asc()
+ if sort_order == "asc"
+ else AccessLog.timestamp.desc()
+ )
+ query = session.query(AccessLog).order_by(order)
if ip_filter:
query = query.filter(AccessLog.ip == sanitize_ip(ip_filter))
@@ -1503,6 +1511,7 @@ class DatabaseManager:
"path": log.path,
"user_agent": log.user_agent,
"timestamp": log.timestamp.isoformat(),
+ "log_id": log.id,
}
for log in logs
]
diff --git a/src/routes/htmx.py b/src/routes/htmx.py
index 303bce5..549f044 100644
--- a/src/routes/htmx.py
+++ b/src/routes/htmx.py
@@ -180,7 +180,10 @@ async def htmx_access_logs_by_ip(
):
db = get_db()
result = db.get_access_logs_paginated(
- page=max(1, page), page_size=25, ip_filter=ip_filter
+ page=max(1, page),
+ page_size=25,
+ ip_filter=ip_filter,
+ sort_order=sort_order if sort_order in ("asc", "desc") else "desc",
)
# Normalize pagination key (DB returns total_attackers, template expects total)
diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py
index af803c6..ab4af86 100644
--- a/src/tasks/db_retention.py
+++ b/src/tasks/db_retention.py
@@ -7,6 +7,8 @@ Periodically deletes old records based on configured retention_days.
from datetime import datetime, timedelta
+from sqlalchemy import or_
+
from database import get_database
from logger import get_app_logger
@@ -26,12 +28,18 @@ app_logger = get_app_logger()
def main():
"""
- Delete access logs, credential attempts, and attack detections
- older than the configured retention period.
+ Delete old records based on the configured retention period.
+ Keeps suspicious access logs, their attack detections, linked IPs,
+ category history, and all credential attempts.
"""
try:
from config import get_config
- from models import AccessLog, CredentialAttempt, AttackDetection
+ from models import (
+ AccessLog,
+ AttackDetection,
+ IpStats,
+ CategoryHistory,
+ )
config = get_config()
retention_days = config.database_retention_days
@@ -41,35 +49,71 @@ def main():
cutoff = datetime.now() - timedelta(days=retention_days)
- # Delete attack detections linked to old access logs first (FK constraint)
- old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff)
+ # Delete attack detections linked to old NON-suspicious access logs (FK constraint)
+ old_nonsuspicious_log_ids = session.query(AccessLog.id).filter(
+ AccessLog.timestamp < cutoff,
+ AccessLog.is_suspicious == False,
+ AccessLog.is_honeypot_trigger == False,
+ )
detections_deleted = (
session.query(AttackDetection)
- .filter(AttackDetection.access_log_id.in_(old_log_ids))
+ .filter(AttackDetection.access_log_id.in_(old_nonsuspicious_log_ids))
.delete(synchronize_session=False)
)
- # Delete old access logs
+ # Delete old non-suspicious access logs (keep suspicious ones)
logs_deleted = (
session.query(AccessLog)
- .filter(AccessLog.timestamp < cutoff)
+ .filter(
+ AccessLog.timestamp < cutoff,
+ AccessLog.is_suspicious == False,
+ AccessLog.is_honeypot_trigger == False,
+ )
.delete(synchronize_session=False)
)
- # Delete old credential attempts
- creds_deleted = (
- session.query(CredentialAttempt)
- .filter(CredentialAttempt.timestamp < cutoff)
+ # IPs to preserve: those with any suspicious access logs
+ preserved_ips = (
+ session.query(AccessLog.ip)
+ .filter(
+ or_(
+ AccessLog.is_suspicious == True,
+ AccessLog.is_honeypot_trigger == True,
+ )
+ )
+ .distinct()
+ )
+
+ # Delete stale IPs, but keep those linked to suspicious logs
+ ips_deleted = (
+ session.query(IpStats)
+ .filter(
+ IpStats.last_seen < cutoff,
+ ~IpStats.ip.in_(preserved_ips),
+ )
+ .delete(synchronize_session=False)
+ )
+
+ # Delete old category history, but keep records for preserved IPs
+ history_deleted = (
+ session.query(CategoryHistory)
+ .filter(
+ CategoryHistory.timestamp < cutoff,
+ ~CategoryHistory.ip.in_(preserved_ips),
+ )
.delete(synchronize_session=False)
)
session.commit()
- if logs_deleted or creds_deleted or detections_deleted:
+ total = logs_deleted + detections_deleted + ips_deleted + history_deleted
+ if total:
app_logger.info(
f"DB retention: Deleted {logs_deleted} access logs, "
f"{detections_deleted} attack detections, "
- f"{creds_deleted} credential attempts older than {retention_days} days"
+ f"{ips_deleted} stale IPs, "
+ f"{history_deleted} category history records "
+ f"older than {retention_days} days"
)
except Exception as e:
diff --git a/src/templates/jinja2/dashboard/partials/access_by_ip_table.html b/src/templates/jinja2/dashboard/partials/access_by_ip_table.html
index 5e7bd6c..e260f56 100644
--- a/src/templates/jinja2/dashboard/partials/access_by_ip_table.html
+++ b/src/templates/jinja2/dashboard/partials/access_by_ip_table.html
@@ -45,7 +45,10 @@
{{ log.timestamp | format_ts }} |
{% if log.id %}
-
+
{% endif %}
|
diff --git a/src/templates/jinja2/dashboard/partials/attack_types_table.html b/src/templates/jinja2/dashboard/partials/attack_types_table.html
index 4ac3369..fd80ac1 100644
--- a/src/templates/jinja2/dashboard/partials/attack_types_table.html
+++ b/src/templates/jinja2/dashboard/partials/attack_types_table.html
@@ -62,7 +62,10 @@
{{ attack.timestamp | format_ts }} |
{% if attack.log_id %}
-
+
{% endif %}
|