diff --git a/.gitignore b/.gitignore
index a36748e..70b93e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,9 +61,12 @@ secrets/
*.log
logs/
-# Database
+# Data and databases
data/
+**/data/
*.db
+*.sqlite
+*.sqlite3
# Temporary files
*.tmp
diff --git a/Dockerfile b/Dockerfile
index adac20f..2c7b954 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,16 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app
+# Install gosu for dropping privileges
+RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
+ rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
COPY src/ /app/src/
COPY wordlists.json /app/
+COPY entrypoint.sh /app/
RUN useradd -m -u 1000 krawl && \
- chown -R krawl:krawl /app
-
-USER krawl
+ mkdir -p /app/logs /app/data && \
+ chown -R krawl:krawl /app && \
+ chmod +x /app/entrypoint.sh
EXPOSE 5000
ENV PYTHONUNBUFFERED=1
+ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["python3", "src/server.py"]
diff --git a/README.md b/README.md
index 7fd0377..f7fe399 100644
--- a/README.md
+++ b/README.md
@@ -48,10 +48,11 @@
-## Star History
-
+## Demo
+Tip: crawl the `robots.txt` paths for additional fun
+### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com)
+### View the dashboard [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard)
-
## What is Krawl?
**Krawl** is a cloud‑native deception server designed to detect, delay, and analyze malicious web crawlers and automated scanners.
@@ -185,7 +186,7 @@ To customize the deception server installation several **environment variables**
| `CANARY_TOKEN_URL` | External canary token URL | None |
| `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated |
| `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` |
-| `SERVER_HEADER` | HTTP Server header for deception, if not set use random server header | |
+| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` |
| `TIMEZONE` | IANA timezone for logs and dashboard (e.g., `America/New_York`, `Europe/Rome`) | System timezone |
## robots.txt
@@ -317,3 +318,6 @@ Contributions welcome! Please:
**This is a deception/honeypot system.**
Deploy in isolated environments and monitor carefully for security events.
Use responsibly and in compliance with applicable laws and regulations.
+
+## Star History
+
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..f9825a0
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,38 @@
+# Krawl Honeypot Configuration
+
+server:
+ port: 5000
+ delay: 100 # Response delay in milliseconds
+ timezone: null # e.g., "America/New_York" or null for system default
+
+ # manually set the server header, if null a random one will be used.
+ server_header: "Apache/2.2.22 (Ubuntu)"
+
+links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+
+canary:
+ token_url: null # Optional canary token URL
+ token_tries: 10
+
+dashboard:
+ # if set to "null" this will Auto-generates random path if not set
+ # can be set to "/dashboard" or similar <-- note this MUST include a forward slash
+ secret_path: dashboard
+
+api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+
+database:
+ path: "data/krawl.db"
+ retention_days: 30
+
+behavior:
+ probability_error_codes: 0 # 0-100 percentage
\ No newline at end of file
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 6f81a47..02b6ae7 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -10,23 +10,10 @@ services:
- "5000:5000"
volumes:
- ./wordlists.json:/app/wordlists.json:ro
+ - ./config.yaml:/app/config.yaml:ro
+ - ./logs:/app/logs
environment:
- - PORT=5000
- - DELAY=100
- - LINKS_MIN_LENGTH=5
- - LINKS_MAX_LENGTH=15
- - LINKS_MIN_PER_PAGE=10
- - LINKS_MAX_PER_PAGE=15
- - MAX_COUNTER=10
- - CANARY_TOKEN_TRIES=10
- - PROBABILITY_ERROR_CODES=0
- # - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
- # Optional: Set your canary token URL
- # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
- # Optional: Set custom dashboard path (auto-generated if not set)
- # - DASHBOARD_SECRET_PATH=/my-secret-dashboard
- # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
- # - TIMEZONE=UTC
+ - CONFIG_LOCATION=config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]
diff --git a/entrypoint.sh b/entrypoint.sh
new file mode 100644
index 0000000..28b5fc0
--- /dev/null
+++ b/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -e
+
+# Fix ownership of mounted directories
+chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
+
+# Drop to krawl user and run the application
+exec gosu krawl "$@"
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index 17cd952..808d9f5 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -5,25 +5,30 @@ metadata:
labels:
{{- include "krawl.labels" . | nindent 4 }}
data:
- PORT: {{ .Values.config.port | quote }}
- DELAY: {{ .Values.config.delay | quote }}
- LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }}
- LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }}
- LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }}
- LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }}
- MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
- CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
- PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
- CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
- {{- if .Values.config.dashboardSecretPath }}
- DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }}
- {{- end }}
- {{- if .Values.config.serverHeader }}
- SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
- {{- end }}
- {{- if .Values.config.timezone }}
- TIMEZONE: {{ .Values.config.timezone | quote }}
- {{- end }}
- # Database configuration
- DATABASE_PATH: {{ .Values.database.path | quote }}
- DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }}
+ config.yaml: |
+ # Krawl Honeypot Configuration
+ server:
+ port: {{ .Values.config.server.port }}
+ delay: {{ .Values.config.server.delay }}
+ timezone: {{ .Values.config.server.timezone | toYaml }}
+ links:
+ min_length: {{ .Values.config.links.min_length }}
+ max_length: {{ .Values.config.links.max_length }}
+ min_per_page: {{ .Values.config.links.min_per_page }}
+ max_per_page: {{ .Values.config.links.max_per_page }}
+ char_space: {{ .Values.config.links.char_space | quote }}
+ max_counter: {{ .Values.config.links.max_counter }}
+ canary:
+ token_url: {{ .Values.config.canary.token_url | toYaml }}
+ token_tries: {{ .Values.config.canary.token_tries }}
+ dashboard:
+ secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
+ api:
+ server_url: {{ .Values.config.api.server_url | toYaml }}
+ server_port: {{ .Values.config.api.server_port }}
+ server_path: {{ .Values.config.api.server_path | quote }}
+ database:
+ path: {{ .Values.config.database.path | quote }}
+ retention_days: {{ .Values.config.database.retention_days }}
+ behavior:
+ probability_error_codes: {{ .Values.config.behavior.probability_error_codes }}
diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml
index ecc9655..5635fa3 100644
--- a/helm/templates/deployment.yaml
+++ b/helm/templates/deployment.yaml
@@ -38,18 +38,16 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
- containerPort: {{ .Values.config.port }}
+ containerPort: {{ .Values.config.server.port }}
protocol: TCP
- envFrom:
- - configMapRef:
- name: {{ include "krawl.fullname" . }}-config
env:
- - name: DASHBOARD_SECRET_PATH
- valueFrom:
- secretKeyRef:
- name: {{ include "krawl.fullname" . }}
- key: dashboard-path
+ - name: CONFIG_LOCATION
+ value: "config.yaml"
volumeMounts:
+ - name: config
+ mountPath: /app/config.yaml
+ subPath: config.yaml
+ readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -63,6 +61,9 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
+ - name: config
+ configMap:
+ name: {{ include "krawl.fullname" . }}-config
- name: wordlists
configMap:
name: {{ include "krawl.fullname" . }}-wordlists
diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml
deleted file mode 100644
index 798289c..0000000
--- a/helm/templates/secret.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}}
-{{- $dashboardPath := "" -}}
-{{- if and $secret $secret.data -}}
- {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}}
-{{- else -}}
- {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}}
-{{- end -}}
-apiVersion: v1
-kind: Secret
-metadata:
- name: {{ include "krawl.fullname" . }}
- labels:
- {{- include "krawl.labels" . | nindent 4 }}
-type: Opaque
-stringData:
- dashboard-path: {{ $dashboardPath | quote }}
diff --git a/helm/values.yaml b/helm/values.yaml
index c92bc0b..60b1a66 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -62,29 +62,36 @@ tolerations: []
affinity: {}
-# Application configuration
+# Application configuration (config.yaml structure)
config:
- port: 5000
- delay: 100
- linksMinLength: 5
- linksMaxLength: 15
- linksMinPerPage: 10
- linksMaxPerPage: 15
- maxCounter: 10
- canaryTokenTries: 10
- probabilityErrorCodes: 0
-# timezone: "UTC"
-# serverHeader: "Apache/2.2.22 (Ubuntu)"
-# dashboardSecretPath: "/my-secret-dashboard"
-# canaryTokenUrl: set-your-canary-token-url-here
-# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
+ server:
+ port: 5000
+ delay: 100
+ timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
+ links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+ canary:
+ token_url: null # Set your canary token URL here
+ token_tries: 10
+ dashboard:
+ secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
+ api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+ database:
+ path: "data/krawl.db"
+ retention_days: 30
+ behavior:
+ probability_error_codes: 0
-# Database configuration
+# Database persistence configuration
database:
- # Path to the SQLite database file
- path: "data/krawl.db"
- # Number of days to retain access logs and attack data
- retentionDays: 30
# Persistence configuration
persistence:
enabled: true
diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml
index d1a026c..3344260 100644
--- a/kubernetes/krawl-all-in-one-deploy.yaml
+++ b/kubernetes/krawl-all-in-one-deploy.yaml
@@ -10,19 +10,41 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
- PORT: "5000"
- DELAY: "100"
- LINKS_MIN_LENGTH: "5"
- LINKS_MAX_LENGTH: "15"
- LINKS_MIN_PER_PAGE: "10"
- LINKS_MAX_PER_PAGE: "15"
- MAX_COUNTER: "10"
- CANARY_TOKEN_TRIES: "10"
- PROBABILITY_ERROR_CODES: "0"
-# CANARY_TOKEN_URL: set-your-canary-token-url-here
- # Database configuration
- DATABASE_PATH: "data/krawl.db"
- DATABASE_RETENTION_DAYS: "30"
+ config.yaml: |
+ # Krawl Honeypot Configuration
+ server:
+ port: 5000
+ delay: 100
+ timezone: null # e.g., "America/New_York" or null for system default
+
+ links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+
+ canary:
+ token_url: null # Optional canary token URL
+ token_tries: 10
+
+ dashboard:
+ # Auto-generates random path if null
+ # Can be set to "/dashboard" or similar
+ secret_path: null
+
+ api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+
+ database:
+ path: "data/krawl.db"
+ retention_days: 30
+
+ behavior:
+ probability_error_codes: 0 # 0-100 percentage
---
apiVersion: v1
kind: ConfigMap
@@ -227,6 +249,14 @@ data:
500,
502,
503
+ ],
+ "server_headers": [
+ "Apache/2.4.41 (Ubuntu)",
+ "nginx/1.18.0",
+ "Microsoft-IIS/10.0",
+ "cloudflare",
+ "AmazonS3",
+ "gunicorn/20.1.0"
]
}
---
@@ -269,10 +299,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
- envFrom:
- - configMapRef:
- name: krawl-config
+ env:
+ - name: CONFIG_LOCATION
+ value: "config.yaml"
volumeMounts:
+ - name: config
+ mountPath: /app/config.yaml
+ subPath: config.yaml
+ readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -287,6 +321,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
+ - name: config
+ configMap:
+ name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists
@@ -353,7 +390,7 @@ spec:
- podSelector: {}
- namespaceSelector: {}
- ipBlock:
- cidr: 0.0.0.0/0
+ cidr: 0.0.0.0/0
ports:
- protocol: TCP
port: 5000
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index ef357b0..38a287b 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -4,18 +4,38 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
- PORT: "5000"
- DELAY: "100"
- LINKS_MIN_LENGTH: "5"
- LINKS_MAX_LENGTH: "15"
- LINKS_MIN_PER_PAGE: "10"
- LINKS_MAX_PER_PAGE: "15"
- MAX_COUNTER: "10"
- CANARY_TOKEN_TRIES: "10"
- PROBABILITY_ERROR_CODES: "0"
- SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
-# CANARY_TOKEN_URL: set-your-canary-token-url-here
-# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome")
- # Database configuration
- DATABASE_PATH: "data/krawl.db"
- DATABASE_RETENTION_DAYS: "30"
\ No newline at end of file
+ config.yaml: |
+ # Krawl Honeypot Configuration
+ server:
+ port: 5000
+ delay: 100
+ timezone: null # e.g., "America/New_York" or null for system default
+
+ links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+
+ canary:
+ token_url: null # Optional canary token URL
+ token_tries: 10
+
+ dashboard:
+ # Auto-generates random path if null
+ # Can be set to "/dashboard" or similar
+ secret_path: null
+
+ api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+
+ database:
+ path: "data/krawl.db"
+ retention_days: 30
+
+ behavior:
+ probability_error_codes: 0 # 0-100 percentage
diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml
index 1650721..f970625 100644
--- a/kubernetes/manifests/deployment.yaml
+++ b/kubernetes/manifests/deployment.yaml
@@ -23,10 +23,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
- envFrom:
- - configMapRef:
- name: krawl-config
+ env:
+ - name: CONFIG_LOCATION
+ value: "config.yaml"
volumeMounts:
+ - name: config
+ mountPath: /app/config.yaml
+ subPath: config.yaml
+ readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -41,6 +45,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
+ - name: config
+ configMap:
+ name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists
diff --git a/requirements.txt b/requirements.txt
index 94f74f2..8cb6dc5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,8 @@
# Krawl Honeypot Dependencies
# Install with: pip install -r requirements.txt
+# Configuration
+PyYAML>=6.0
+
# Database ORM
SQLAlchemy>=2.0.0,<3.0.0
diff --git a/src/config.py b/src/config.py
index 87fca1c..d8aa2f2 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,17 +1,22 @@
#!/usr/bin/env python3
import os
+import sys
from dataclasses import dataclass
+from pathlib import Path
from typing import Optional, Tuple
from zoneinfo import ZoneInfo
import time
+import yaml
+
@dataclass
class Config:
"""Configuration class for the deception server"""
port: int = 5000
delay: int = 100 # milliseconds
+ server_header: str = ""
links_length_range: Tuple[int, int] = (5, 15)
links_per_page_range: Tuple[int, int] = (10, 15)
char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
@@ -23,12 +28,12 @@ class Config:
api_server_port: int = 8080
api_server_path: str = "/api/v2/users"
probability_error_codes: int = 0 # Percentage (0-100)
- server_header: Optional[str] = None
+
# Database settings
database_path: str = "data/krawl.db"
database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
-
+
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
@@ -38,16 +43,16 @@ class Config:
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
-
+
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
-
+
# Default fallback to UTC
return 'UTC'
-
+
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
@@ -55,7 +60,7 @@ class Config:
return ZoneInfo(self.timezone)
except Exception:
pass
-
+
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
@@ -63,31 +68,76 @@ class Config:
return ZoneInfo('UTC')
@classmethod
- def from_env(cls) -> 'Config':
- """Create configuration from environment variables"""
+ def from_yaml(cls) -> 'Config':
+ """Create configuration from YAML file"""
+ config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
+ config_path = Path(__file__).parent.parent / config_location
+
+ try:
+ with open(config_path, 'r') as f:
+ data = yaml.safe_load(f)
+ except FileNotFoundError:
+ print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
+ print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
+ sys.exit(1)
+ except yaml.YAMLError as e:
+ print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
+ sys.exit(1)
+
+ if data is None:
+ data = {}
+
+ # Extract nested values with defaults
+ server = data.get('server', {})
+ links = data.get('links', {})
+ canary = data.get('canary', {})
+ dashboard = data.get('dashboard', {})
+ api = data.get('api', {})
+ database = data.get('database', {})
+ behavior = data.get('behavior', {})
+
+ # Handle dashboard_secret_path - auto-generate if null/not set
+ dashboard_path = dashboard.get('secret_path')
+ if dashboard_path is None:
+ dashboard_path = f'/{os.urandom(16).hex()}'
+ else:
+ # ensure the dashboard path starts with a /
+ if dashboard_path[:1] != "/":
+ dashboard_path = f"/{dashboard_path}"
+
return cls(
- port=int(os.getenv('PORT', 5000)),
- delay=int(os.getenv('DELAY', 100)),
+ port=server.get('port', 5000),
+ delay=server.get('delay', 100),
+ server_header=server.get('server_header',""),
+ timezone=server.get('timezone'),
links_length_range=(
- int(os.getenv('LINKS_MIN_LENGTH', 5)),
- int(os.getenv('LINKS_MAX_LENGTH', 15))
+ links.get('min_length', 5),
+ links.get('max_length', 15)
),
links_per_page_range=(
- int(os.getenv('LINKS_MIN_PER_PAGE', 10)),
- int(os.getenv('LINKS_MAX_PER_PAGE', 15))
+ links.get('min_per_page', 10),
+ links.get('max_per_page', 15)
),
- char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
- max_counter=int(os.getenv('MAX_COUNTER', 10)),
- canary_token_url=os.getenv('CANARY_TOKEN_URL'),
- canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)),
- dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'),
- api_server_url=os.getenv('API_SERVER_URL'),
- api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
- api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
- probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
- server_header=os.getenv('SERVER_HEADER'),
- database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
- database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)),
- timezone=os.getenv('TIMEZONE') # If not set, will use system timezone
-
+ char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
+ max_counter=links.get('max_counter', 10),
+ canary_token_url=canary.get('token_url'),
+ canary_token_tries=canary.get('token_tries', 10),
+ dashboard_secret_path=dashboard_path,
+ api_server_url=api.get('server_url'),
+ api_server_port=api.get('server_port', 8080),
+ api_server_path=api.get('server_path', '/api/v2/users'),
+ probability_error_codes=behavior.get('probability_error_codes', 0),
+ database_path=database.get('path', 'data/krawl.db'),
+ database_retention_days=database.get('retention_days', 30),
)
+
+
+_config_instance = None
+
+
+def get_config() -> Config:
+ """Get the singleton Config instance"""
+ global _config_instance
+ if _config_instance is None:
+ _config_instance = Config.from_yaml()
+ return _config_instance
diff --git a/src/generators.py b/src/generators.py
index 6e24ba8..92eb590 100644
--- a/src/generators.py
+++ b/src/generators.py
@@ -9,8 +9,7 @@ import string
import json
from templates import html_templates
from wordlists import get_wordlists
-from config import Config
-from logger import get_app_logger
+from config import get_config
def random_username() -> str:
"""Generate random username"""
@@ -38,15 +37,12 @@ def random_email(username: str = None) -> str:
return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str:
- """Generate random server header"""
-
- if Config.from_env().server_header:
- server_header = Config.from_env().server_header
- else:
- wl = get_wordlists()
- server_header = random.choice(wl.server_headers)
-
- return server_header
+ """Generate random server header from wordlists"""
+ config = get_config()
+ if config.server_header:
+ return config.server_header
+ wl = get_wordlists()
+ return random.choice(wl.server_headers)
def random_api_key() -> str:
"""Generate random API key"""
diff --git a/src/handler.py b/src/handler.py
index a45661d..846f2d2 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -6,6 +6,7 @@ import time
from datetime import datetime
from http.server import BaseHTTPRequestHandler
from typing import Optional, List
+from urllib.parse import urlparse, parse_qs
from config import Config
from tracker import AccessTracker
@@ -16,6 +17,9 @@ from generators import (
api_response, directory_listing, random_server_header
)
from wordlists import get_wordlists
+from sql_errors import generate_sql_error_response, get_sql_response_with_data
+from xss_detector import detect_xss_pattern, generate_xss_response
+from server_errors import generate_server_error
class Handler(BaseHTTPRequestHandler):
@@ -67,6 +71,67 @@ class Handler(BaseHTTPRequestHandler):
if not error_codes:
error_codes = [400, 401, 403, 404, 500, 502, 503]
return random.choice(error_codes)
+
+ def _parse_query_string(self) -> str:
+ """Extract query string from the request path"""
+ parsed = urlparse(self.path)
+ return parsed.query
+
+ def _handle_sql_endpoint(self, path: str) -> bool:
+ """
+ Handle SQL injection honeypot endpoints.
+ Returns True if the path was handled, False otherwise.
+ """
+ # SQL-vulnerable endpoints
+ sql_endpoints = ['/api/search', '/api/sql', '/api/database']
+
+ base_path = urlparse(path).path
+ if base_path not in sql_endpoints:
+ return False
+
+ try:
+ # Get query parameters
+ query_string = self._parse_query_string()
+
+ # Log SQL injection attempt
+ client_ip = self._get_client_ip()
+ user_agent = self._get_user_agent()
+
+ # Always check for SQL injection patterns
+ error_msg, content_type, status_code = generate_sql_error_response(query_string or "")
+
+ if error_msg:
+ # SQL injection detected - log and return error
+ self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
+ self.send_response(status_code)
+ self.send_header('Content-type', content_type)
+ self.end_headers()
+ self.wfile.write(error_msg.encode())
+ else:
+ # No injection detected - return fake data
+ self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}")
+ self.send_response(200)
+ self.send_header('Content-type', 'application/json')
+ self.end_headers()
+ response_data = get_sql_response_with_data(base_path, query_string or "")
+ self.wfile.write(response_data.encode())
+
+ return True
+
+ except BrokenPipeError:
+ # Client disconnected
+ return True
+ except Exception as e:
+ self.app_logger.error(f"Error handling SQL endpoint {path}: {str(e)}")
+ # Still send a response even on error
+ try:
+ self.send_response(500)
+ self.send_header('Content-type', 'application/json')
+ self.end_headers()
+ self.wfile.write(b'{"error": "Internal server error"}')
+ except:
+ pass
+ return True
def generate_page(self, seed: str) -> str:
"""Generate a webpage containing random links or canary token"""
@@ -207,6 +272,68 @@ class Handler(BaseHTTPRequestHandler):
user_agent = self._get_user_agent()
post_data = ""
+ from urllib.parse import urlparse
+ base_path = urlparse(self.path).path
+
+ if base_path in ['/api/search', '/api/sql', '/api/database']:
+ content_length = int(self.headers.get('Content-Length', 0))
+ if content_length > 0:
+ post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+
+ self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}")
+
+ error_msg, content_type, status_code = generate_sql_error_response(post_data)
+
+ try:
+ if error_msg:
+ self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}")
+ self.send_response(status_code)
+ self.send_header('Content-type', content_type)
+ self.end_headers()
+ self.wfile.write(error_msg.encode())
+ else:
+ self.send_response(200)
+ self.send_header('Content-type', 'application/json')
+ self.end_headers()
+ response_data = get_sql_response_with_data(base_path, post_data)
+ self.wfile.write(response_data.encode())
+ except BrokenPipeError:
+ pass
+ except Exception as e:
+ self.app_logger.error(f"Error in SQL POST handler: {str(e)}")
+ return
+
+ if base_path == '/api/contact':
+ content_length = int(self.headers.get('Content-Length', 0))
+ if content_length > 0:
+ post_data = self.rfile.read(content_length).decode('utf-8', errors="replace")
+
+ parsed_data = {}
+ for pair in post_data.split('&'):
+ if '=' in pair:
+ key, value = pair.split('=', 1)
+ from urllib.parse import unquote_plus
+ parsed_data[unquote_plus(key)] = unquote_plus(value)
+
+ xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values())
+
+ if xss_detected:
+ self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}")
+ else:
+ self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}")
+
+ try:
+ self.send_response(200)
+ self.send_header('Content-type', 'text/html')
+ self.end_headers()
+ response_html = generate_xss_response(parsed_data)
+ self.wfile.write(response_html.encode())
+ except BrokenPipeError:
+ pass
+ except Exception as e:
+ self.app_logger.error(f"Error in XSS POST handler: {str(e)}")
+ return
+
self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}")
content_length = int(self.headers.get('Content-Length', 0))
@@ -248,6 +375,10 @@ class Handler(BaseHTTPRequestHandler):
def serve_special_path(self, path: str) -> bool:
"""Serve special paths like robots.txt, API endpoints, etc."""
+ # Check SQL injection honeypot endpoints first
+ if self._handle_sql_endpoint(path):
+ return True
+
try:
if path == '/robots.txt':
self.send_response(200)
@@ -285,7 +416,28 @@ class Handler(BaseHTTPRequestHandler):
self.wfile.write(html_templates.login_form().encode())
return True
- # WordPress login page
+ if path in ['/users', '/user', '/database', '/db', '/search']:
+ self.send_response(200)
+ self.send_header('Content-type', 'text/html')
+ self.end_headers()
+ self.wfile.write(html_templates.product_search().encode())
+ return True
+
+ if path in ['/info', '/input', '/contact', '/feedback', '/comment']:
+ self.send_response(200)
+ self.send_header('Content-type', 'text/html')
+ self.end_headers()
+ self.wfile.write(html_templates.input_form().encode())
+ return True
+
+ if path == '/server':
+ error_html, content_type = generate_server_error()
+ self.send_response(500)
+ self.send_header('Content-type', content_type)
+ self.end_headers()
+ self.wfile.write(error_html.encode())
+ return True
+
if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']:
self.send_response(200)
self.send_header('Content-type', 'text/html')
diff --git a/src/server.py b/src/server.py
index 06b7c82..7a59c73 100644
--- a/src/server.py
+++ b/src/server.py
@@ -8,7 +8,7 @@ Run this file to start the server.
import sys
from http.server import HTTPServer
-from config import Config
+from config import get_config
from tracker import AccessTracker
from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
@@ -20,24 +20,29 @@ def print_usage():
print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n')
- print('Environment Variables:')
- print(' PORT - Server port (default: 5000)')
- print(' DELAY - Response delay in ms (default: 100)')
- print(' LINKS_MIN_LENGTH - Min link length (default: 5)')
- print(' LINKS_MAX_LENGTH - Max link length (default: 15)')
- print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)')
- print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)')
- print(' MAX_COUNTER - Max counter value (default: 10)')
- print(' CANARY_TOKEN_URL - Canary token URL to display')
- print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)')
- print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
- print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
- print(' CHAR_SPACE - Characters for random links')
- print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
- print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)')
- print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
- print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
- print(' If not set, system timezone will be used')
+ print('Configuration:')
+ print(' Configuration is loaded from a YAML file (default: config.yaml)')
+ print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
+ print(' Example config.yaml structure:')
+ print(' server:')
+ print(' port: 5000')
+ print(' delay: 100')
+ print(' timezone: null # or "America/New_York"')
+ print(' links:')
+ print(' min_length: 5')
+ print(' max_length: 15')
+ print(' min_per_page: 10')
+ print(' max_per_page: 15')
+ print(' canary:')
+ print(' token_url: null')
+ print(' token_tries: 10')
+ print(' dashboard:')
+ print(' secret_path: null # auto-generated if not set')
+ print(' database:')
+ print(' path: "data/krawl.db"')
+ print(' retention_days: 30')
+ print(' behavior:')
+ print(' probability_error_codes: 0')
def main():
@@ -46,19 +51,17 @@ def main():
print_usage()
exit(0)
- config = Config.from_env()
-
+ config = get_config()
+
# Get timezone configuration
tz = config.get_timezone()
-
+
# Initialize logging with timezone
initialize_logging(timezone=tz)
app_logger = get_app_logger()
access_logger = get_access_logger()
credential_logger = get_credential_logger()
- config = Config.from_env()
-
# Initialize database for persistent storage
try:
initialize_database(config.database_path)
diff --git a/src/server_errors.py b/src/server_errors.py
new file mode 100644
index 0000000..7591c64
--- /dev/null
+++ b/src/server_errors.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+
+import random
+from wordlists import get_wordlists
+
+
+def generate_server_error() -> tuple[str, str]:
+ wl = get_wordlists()
+ server_errors = wl.server_errors
+
+ if not server_errors:
+ return ("500 Internal Server Error", "text/html")
+
+ server_type = random.choice(list(server_errors.keys()))
+ server_config = server_errors[server_type]
+
+ error_codes = {
+ 400: "Bad Request",
+ 401: "Unauthorized",
+ 403: "Forbidden",
+ 404: "Not Found",
+ 500: "Internal Server Error",
+ 502: "Bad Gateway",
+ 503: "Service Unavailable"
+ }
+
+ code = random.choice(list(error_codes.keys()))
+ message = error_codes[code]
+
+ template = server_config.get('template', '')
+ version = random.choice(server_config.get('versions', ['1.0']))
+
+ html = template.replace('{code}', str(code))
+ html = html.replace('{message}', message)
+ html = html.replace('{version}', version)
+
+ if server_type == 'apache':
+ os = random.choice(server_config.get('os', ['Ubuntu']))
+ html = html.replace('{os}', os)
+ html = html.replace('{host}', 'localhost')
+
+ return (html, "text/html")
+
+
+def get_server_header(server_type: str = None) -> str:
+ wl = get_wordlists()
+ server_errors = wl.server_errors
+
+ if not server_errors:
+ return "nginx/1.18.0"
+
+ if not server_type:
+ server_type = random.choice(list(server_errors.keys()))
+
+ server_config = server_errors.get(server_type, {})
+ version = random.choice(server_config.get('versions', ['1.0']))
+
+ server_headers = {
+ 'nginx': f"nginx/{version}",
+ 'apache': f"Apache/{version}",
+ 'iis': f"Microsoft-IIS/{version}",
+ 'tomcat': f"Apache-Coyote/1.1"
+ }
+
+ return server_headers.get(server_type, "nginx/1.18.0")
diff --git a/src/sql_errors.py b/src/sql_errors.py
new file mode 100644
index 0000000..dc84886
--- /dev/null
+++ b/src/sql_errors.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+
+import random
+import re
+from typing import Optional, Tuple
+from wordlists import get_wordlists
+
+
+def detect_sql_injection_pattern(query_string: str) -> Optional[str]:
+ if not query_string:
+ return None
+
+ query_lower = query_string.lower()
+
+ patterns = {
+ 'quote': [r"'", r'"', r'`'],
+ 'comment': [r'--', r'#', r'/\*', r'\*/'],
+ 'union': [r'\bunion\b', r'\bunion\s+select\b'],
+ 'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"],
+ 'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'],
+ 'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'],
+ 'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'],
+ 'info_schema': [r'information_schema', r'table_schema', r'table_name'],
+ }
+
+ for injection_type, pattern_list in patterns.items():
+ for pattern in pattern_list:
+ if re.search(pattern, query_lower):
+ return injection_type
+
+ return None
+
+
+def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]:
+ wl = get_wordlists()
+ sql_errors = wl.sql_errors
+
+ if not sql_errors:
+ return ("Database error occurred", "text/plain")
+
+ if not db_type:
+ db_type = random.choice(list(sql_errors.keys()))
+
+ db_errors = sql_errors.get(db_type, {})
+
+ if injection_type and injection_type in db_errors:
+ errors = db_errors[injection_type]
+ elif 'generic' in db_errors:
+ errors = db_errors['generic']
+ else:
+ all_errors = []
+ for error_list in db_errors.values():
+ if isinstance(error_list, list):
+ all_errors.extend(error_list)
+ errors = all_errors if all_errors else ["Database error occurred"]
+
+ error_message = random.choice(errors) if errors else "Database error occurred"
+
+ if '{table}' in error_message:
+ tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions']
+ error_message = error_message.replace('{table}', random.choice(tables))
+
+ if '{column}' in error_message:
+ columns = ['id', 'name', 'email', 'password', 'username', 'created_at']
+ error_message = error_message.replace('{column}', random.choice(columns))
+
+ return (error_message, "text/plain")
+
+
+def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]:
+ injection_type = detect_sql_injection_pattern(query_string)
+
+ if not injection_type:
+ return (None, None, None)
+
+ error_message, content_type = get_random_sql_error(db_type, injection_type)
+
+ status_code = 500
+
+ if random.random() < 0.3:
+ status_code = 200
+
+ return (error_message, content_type, status_code)
+
+
+def get_sql_response_with_data(path: str, params: str) -> str:
+ import json
+ from generators import random_username, random_email, random_password
+
+ injection_type = detect_sql_injection_pattern(params)
+
+ if injection_type in ['union', 'boolean', 'stacked']:
+ data = {
+ "success": True,
+ "results": [
+ {
+ "id": i,
+ "username": random_username(),
+ "email": random_email(),
+ "password_hash": random_password(),
+ "role": random.choice(["admin", "user", "moderator"])
+ }
+ for i in range(1, random.randint(2, 5))
+ ]
+ }
+ return json.dumps(data, indent=2)
+
+ return json.dumps({
+ "success": True,
+ "message": "Query executed successfully",
+ "results": []
+ }, indent=2)
diff --git a/src/templates/html/generic_search.html b/src/templates/html/generic_search.html
new file mode 100644
index 0000000..90171bc
--- /dev/null
+++ b/src/templates/html/generic_search.html
@@ -0,0 +1,66 @@
+
+
+
{key}: {value}
") + + if xss_detected: + html = f""" + + + +We have received your information:
+ {''.join(reflected_content)} +We will get back to you shortly.
+Your message has been received and we will respond soon.
+Sorry, the page you are looking for is currently unavailable.
\nPlease try again later.
If you are the system administrator of this resource then you should check the error log for details.
\nFaithfully yours, nginx/{version}.
\n\n" + }, + "apache": { + "versions": ["2.4.41", "2.4.52", "2.4.54", "2.4.57"], + "os": ["Ubuntu", "Debian", "CentOS"], + "template": "\n\nThe requested URL was not found on this server.
\nType Status Report
Description The server encountered an internal error that prevented it from fulfilling this request.