Merge branch 'feat/config-yaml' into chore/fix-merge-conflicts

This commit is contained in:
Phillip Tarrant
2026-01-03 14:45:32 -06:00
14 changed files with 256 additions and 161 deletions

View File

@@ -4,20 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app
# Install Python dependencies
# Install gosu for dropping privileges
RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
COPY src/ /app/src/
COPY wordlists.json /app/
COPY entrypoint.sh /app/
RUN useradd -m -u 1000 krawl && \
chown -R krawl:krawl /app
USER krawl
mkdir -p /app/logs /app/data && \
chown -R krawl:krawl /app && \
chmod +x /app/entrypoint.sh
EXPOSE 5000
ENV PYTHONUNBUFFERED=1
ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["python3", "src/server.py"]

38
config.yaml Normal file
View File

@@ -0,0 +1,38 @@
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100 # Response delay in milliseconds
timezone: null # e.g., "America/New_York" or null for system default
# manually set the server header, if null a random one will be used.
server_header: "Apache/2.2.22 (Ubuntu)"
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# if set to "null" this will Auto-generates random path if not set
# can be set to "/dashboard" or similar <-- note this MUST include a forward slash
secret_path: dashboard
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage

View File

@@ -10,24 +10,10 @@ services:
- "5000:5000"
volumes:
- ./wordlists.json:/app/wordlists.json:ro
- ./data:/app/data
- ./config.yaml:/app/config.yaml:ro
- ./logs:/app/logs
environment:
- PORT=5000
- DELAY=100
- LINKS_MIN_LENGTH=5
- LINKS_MAX_LENGTH=15
- LINKS_MIN_PER_PAGE=10
- LINKS_MAX_PER_PAGE=15
- MAX_COUNTER=10
- CANARY_TOKEN_TRIES=10
- PROBABILITY_ERROR_CODES=0
- SERVER_HEADER=Apache/2.2.22 (Ubuntu)
# Optional: Set your canary token URL
# - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
# Optional: Set custom dashboard path (auto-generated if not set)
# - DASHBOARD_SECRET_PATH=/my-secret-dashboard
# Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
# - TIMEZONE=UTC
- CONFIG_LOCATION=config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]

8
entrypoint.sh Normal file
View File

@@ -0,0 +1,8 @@
#!/bin/sh
set -e
# Fix ownership of mounted directories
chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
# Drop to krawl user and run the application
exec gosu krawl "$@"

View File

@@ -5,26 +5,30 @@ metadata:
labels:
{{- include "krawl.labels" . | nindent 4 }}
data:
PORT: {{ .Values.config.port | quote }}
DELAY: {{ .Values.config.delay | quote }}
LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }}
LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }}
LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }}
LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }}
MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
{{- if .Values.config.dashboardSecretPath }}
DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }}
{{- end }}
{{- if .Values.config.serverHeader }}
SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
{{- end }}
{{- if .Values.config.timezone }}
TIMEZONE: {{ .Values.config.timezone | quote }}
{{- end }}
# Database configuration
DATABASE_PATH: {{ .Values.database.path | quote }}
DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }}
config.yaml: |
# Krawl Honeypot Configuration
server:
port: {{ .Values.config.server.port }}
delay: {{ .Values.config.server.delay }}
timezone: {{ .Values.config.server.timezone | toYaml }}
links:
min_length: {{ .Values.config.links.min_length }}
max_length: {{ .Values.config.links.max_length }}
min_per_page: {{ .Values.config.links.min_per_page }}
max_per_page: {{ .Values.config.links.max_per_page }}
char_space: {{ .Values.config.links.char_space | quote }}
max_counter: {{ .Values.config.links.max_counter }}
canary:
token_url: {{ .Values.config.canary.token_url | toYaml }}
token_tries: {{ .Values.config.canary.token_tries }}
dashboard:
secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
api:
server_url: {{ .Values.config.api.server_url | toYaml }}
server_port: {{ .Values.config.api.server_port }}
server_path: {{ .Values.config.api.server_path | quote }}
database:
path: {{ .Values.config.database.path | quote }}
retention_days: {{ .Values.config.database.retention_days }}
behavior:
probability_error_codes: {{ .Values.config.behavior.probability_error_codes }}

View File

@@ -38,18 +38,16 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.config.port }}
containerPort: {{ .Values.config.server.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ include "krawl.fullname" . }}-config
env:
- name: DASHBOARD_SECRET_PATH
valueFrom:
secretKeyRef:
name: {{ include "krawl.fullname" . }}
key: dashboard-path
- name: CONFIG_LOCATION
value: "config.yaml"
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -63,6 +61,9 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: config
configMap:
name: {{ include "krawl.fullname" . }}-config
- name: wordlists
configMap:
name: {{ include "krawl.fullname" . }}-wordlists

View File

@@ -1,16 +0,0 @@
{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}}
{{- $dashboardPath := "" -}}
{{- if and $secret $secret.data -}}
{{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}}
{{- else -}}
{{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}}
{{- end -}}
apiVersion: v1
kind: Secret
metadata:
name: {{ include "krawl.fullname" . }}
labels:
{{- include "krawl.labels" . | nindent 4 }}
type: Opaque
stringData:
dashboard-path: {{ $dashboardPath | quote }}

View File

@@ -62,30 +62,36 @@ tolerations: []
affinity: {}
# Application configuration
# Application configuration (config.yaml structure)
config:
port: 5000
delay: 100
linksMinLength: 5
linksMaxLength: 15
linksMinPerPage: 10
linksMaxPerPage: 15
maxCounter: 10
canaryTokenTries: 10
probabilityErrorCodes: 0
serverHeader: "Apache/2.2.22 (Ubuntu)"
# timezone: "UTC"
# serverHeader: "Apache/2.2.22 (Ubuntu)"
# dashboardSecretPath: "/my-secret-dashboard"
# canaryTokenUrl: set-your-canary-token-url-here
# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
server:
port: 5000
delay: 100
timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Set your canary token URL here
token_tries: 10
dashboard:
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0
# Database configuration
# Database persistence configuration
database:
# Path to the SQLite database file
path: "data/krawl.db"
# Number of days to retain access logs and attack data
retentionDays: 30
# Persistence configuration
persistence:
enabled: true

View File

@@ -10,19 +10,41 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
PORT: "5000"
DELAY: "100"
LINKS_MIN_LENGTH: "5"
LINKS_MAX_LENGTH: "15"
LINKS_MIN_PER_PAGE: "10"
LINKS_MAX_PER_PAGE: "15"
MAX_COUNTER: "10"
CANARY_TOKEN_TRIES: "10"
PROBABILITY_ERROR_CODES: "0"
# CANARY_TOKEN_URL: set-your-canary-token-url-here
# Database configuration
DATABASE_PATH: "data/krawl.db"
DATABASE_RETENTION_DAYS: "30"
config.yaml: |
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100
timezone: null # e.g., "America/New_York" or null for system default
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# Auto-generates random path if null
# Can be set to "/dashboard" or similar
secret_path: null
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage
---
apiVersion: v1
kind: ConfigMap
@@ -227,6 +249,14 @@ data:
500,
502,
503
],
"server_headers": [
"Apache/2.4.41 (Ubuntu)",
"nginx/1.18.0",
"Microsoft-IIS/10.0",
"cloudflare",
"AmazonS3",
"gunicorn/20.1.0"
]
}
---
@@ -269,10 +299,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
envFrom:
- configMapRef:
name: krawl-config
env:
- name: CONFIG_LOCATION
value: "config.yaml"
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -287,6 +321,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
- name: config
configMap:
name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists
@@ -353,7 +390,7 @@ spec:
- podSelector: {}
- namespaceSelector: {}
- ipBlock:
cidr: 0.0.0.0/0
cidr: 0.0.0.0/0
ports:
- protocol: TCP
port: 5000

View File

@@ -4,18 +4,38 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
PORT: "5000"
DELAY: "100"
LINKS_MIN_LENGTH: "5"
LINKS_MAX_LENGTH: "15"
LINKS_MIN_PER_PAGE: "10"
LINKS_MAX_PER_PAGE: "15"
MAX_COUNTER: "10"
CANARY_TOKEN_TRIES: "10"
PROBABILITY_ERROR_CODES: "0"
SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
# CANARY_TOKEN_URL: set-your-canary-token-url-here
# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome")
# Database configuration
DATABASE_PATH: "data/krawl.db"
DATABASE_RETENTION_DAYS: "30"
config.yaml: |
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100
timezone: null # e.g., "America/New_York" or null for system default
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# Auto-generates random path if null
# Can be set to "/dashboard" or similar
secret_path: null
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage

View File

@@ -23,10 +23,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
envFrom:
- configMapRef:
name: krawl-config
env:
- name: CONFIG_LOCATION
value: "config.yaml"
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -41,6 +45,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
- name: config
configMap:
name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists

View File

@@ -140,4 +140,4 @@ def get_config() -> Config:
global _config_instance
if _config_instance is None:
_config_instance = Config.from_yaml()
return _config_instance
return _config_instance

View File

@@ -9,8 +9,7 @@ import string
import json
from templates import html_templates
from wordlists import get_wordlists
from config import Config
from logger import get_app_logger
from config import get_config
def random_username() -> str:
"""Generate random username"""
@@ -38,15 +37,12 @@ def random_email(username: str = None) -> str:
return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str:
"""Generate random server header"""
if Config.from_env().server_header:
server_header = Config.from_env().server_header
else:
wl = get_wordlists()
server_header = random.choice(wl.server_headers)
return server_header
"""Generate random server header from wordlists"""
config = get_config()
if config.server_header:
return config.server_header
wl = get_wordlists()
return random.choice(wl.server_headers)
def random_api_key() -> str:
"""Generate random API key"""

View File

@@ -8,7 +8,7 @@ Run this file to start the server.
import sys
from http.server import HTTPServer
from config import Config
from config import get_config
from tracker import AccessTracker
from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
@@ -20,24 +20,29 @@ def print_usage():
print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n')
print('Environment Variables:')
print(' PORT - Server port (default: 5000)')
print(' DELAY - Response delay in ms (default: 100)')
print(' LINKS_MIN_LENGTH - Min link length (default: 5)')
print(' LINKS_MAX_LENGTH - Max link length (default: 15)')
print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)')
print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)')
print(' MAX_COUNTER - Max counter value (default: 10)')
print(' CANARY_TOKEN_URL - Canary token URL to display')
print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)')
print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
print(' CHAR_SPACE - Characters for random links')
print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)')
print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
print(' If not set, system timezone will be used')
print('Configuration:')
print(' Configuration is loaded from a YAML file (default: config.yaml)')
print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
print(' Example config.yaml structure:')
print(' server:')
print(' port: 5000')
print(' delay: 100')
print(' timezone: null # or "America/New_York"')
print(' links:')
print(' min_length: 5')
print(' max_length: 15')
print(' min_per_page: 10')
print(' max_per_page: 15')
print(' canary:')
print(' token_url: null')
print(' token_tries: 10')
print(' dashboard:')
print(' secret_path: null # auto-generated if not set')
print(' database:')
print(' path: "data/krawl.db"')
print(' retention_days: 30')
print(' behavior:')
print(' probability_error_codes: 0')
def main():
@@ -46,19 +51,17 @@ def main():
print_usage()
exit(0)
config = Config.from_env()
config = get_config()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone
initialize_logging(timezone=tz)
app_logger = get_app_logger()
access_logger = get_access_logger()
credential_logger = get_credential_logger()
config = Config.from_env()
# Initialize database for persistent storage
try:
initialize_database(config.database_path)