Migrate configuration from environment variables to YAML file

- Add YAML-based configuration loaded from config.yaml (CONFIG_LOCATION env var)
  - Add PyYAML dependency and install requirements in Dockerfile
  - Replace Config.from_env() with get_config() singleton pattern
  - Remove server_header from config (now randomized from wordlists only)
  - Update docker-compose.yaml to mount config.yaml read-only
  - Update Helm chart: restructure values.yaml, generate config.yaml in ConfigMap
  - Update Kubernetes manifests: ConfigMap now contains config.yaml, deployments mount it
  - Remove Helm secret.yaml (dashboard path now auto-generated in config.yaml)
This commit is contained in:
Phillip Tarrant
2026-01-02 13:39:54 -06:00
parent 5a00e374e6
commit d458eb471d
14 changed files with 307 additions and 181 deletions

View File

@@ -4,6 +4,9 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app
COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt
COPY src/ /app/src/
COPY wordlists.json /app/

35
config.yaml Normal file
View File

@@ -0,0 +1,35 @@
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100 # Response delay in milliseconds
timezone: null # e.g., "America/New_York" or null for system default
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# if set to "null" this will Auto-generates random path if not set
# can be set to "dashboard" or similar
secret_path: dashboard
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage

View File

@@ -10,23 +10,9 @@ services:
- "5000:5000"
volumes:
- ./wordlists.json:/app/wordlists.json:ro
- ./config.yaml:/app/config.yaml:ro
environment:
- PORT=5000
- DELAY=100
- LINKS_MIN_LENGTH=5
- LINKS_MAX_LENGTH=15
- LINKS_MIN_PER_PAGE=10
- LINKS_MAX_PER_PAGE=15
- MAX_COUNTER=10
- CANARY_TOKEN_TRIES=10
- PROBABILITY_ERROR_CODES=0
# - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
# Optional: Set your canary token URL
# - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
# Optional: Set custom dashboard path (auto-generated if not set)
# - DASHBOARD_SECRET_PATH=/my-secret-dashboard
# Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
# - TIMEZONE=UTC
- CONFIG_LOCATION=config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]

View File

@@ -5,25 +5,30 @@ metadata:
labels:
{{- include "krawl.labels" . | nindent 4 }}
data:
PORT: {{ .Values.config.port | quote }}
DELAY: {{ .Values.config.delay | quote }}
LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }}
LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }}
LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }}
LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }}
MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
{{- if .Values.config.dashboardSecretPath }}
DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }}
{{- end }}
{{- if .Values.config.serverHeader }}
SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
{{- end }}
{{- if .Values.config.timezone }}
TIMEZONE: {{ .Values.config.timezone | quote }}
{{- end }}
# Database configuration
DATABASE_PATH: {{ .Values.database.path | quote }}
DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }}
config.yaml: |
# Krawl Honeypot Configuration
server:
port: {{ .Values.config.server.port }}
delay: {{ .Values.config.server.delay }}
timezone: {{ .Values.config.server.timezone | toYaml }}
links:
min_length: {{ .Values.config.links.min_length }}
max_length: {{ .Values.config.links.max_length }}
min_per_page: {{ .Values.config.links.min_per_page }}
max_per_page: {{ .Values.config.links.max_per_page }}
char_space: {{ .Values.config.links.char_space | quote }}
max_counter: {{ .Values.config.links.max_counter }}
canary:
token_url: {{ .Values.config.canary.token_url | toYaml }}
token_tries: {{ .Values.config.canary.token_tries }}
dashboard:
secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
api:
server_url: {{ .Values.config.api.server_url | toYaml }}
server_port: {{ .Values.config.api.server_port }}
server_path: {{ .Values.config.api.server_path | quote }}
database:
path: {{ .Values.config.database.path | quote }}
retention_days: {{ .Values.config.database.retention_days }}
behavior:
probability_error_codes: {{ .Values.config.behavior.probability_error_codes }}

View File

@@ -38,18 +38,16 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
containerPort: {{ .Values.config.port }}
containerPort: {{ .Values.config.server.port }}
protocol: TCP
envFrom:
- configMapRef:
name: {{ include "krawl.fullname" . }}-config
env:
- name: DASHBOARD_SECRET_PATH
valueFrom:
secretKeyRef:
name: {{ include "krawl.fullname" . }}
key: dashboard-path
- name: CONFIG_LOCATION
value: "config.yaml"
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -63,6 +61,9 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
- name: config
configMap:
name: {{ include "krawl.fullname" . }}-config
- name: wordlists
configMap:
name: {{ include "krawl.fullname" . }}-wordlists

View File

@@ -1,16 +0,0 @@
{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}}
{{- $dashboardPath := "" -}}
{{- if and $secret $secret.data -}}
{{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}}
{{- else -}}
{{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}}
{{- end -}}
apiVersion: v1
kind: Secret
metadata:
name: {{ include "krawl.fullname" . }}
labels:
{{- include "krawl.labels" . | nindent 4 }}
type: Opaque
stringData:
dashboard-path: {{ $dashboardPath | quote }}

View File

@@ -62,29 +62,36 @@ tolerations: []
affinity: {}
# Application configuration
# Application configuration (config.yaml structure)
config:
port: 5000
delay: 100
linksMinLength: 5
linksMaxLength: 15
linksMinPerPage: 10
linksMaxPerPage: 15
maxCounter: 10
canaryTokenTries: 10
probabilityErrorCodes: 0
# timezone: "UTC"
# serverHeader: "Apache/2.2.22 (Ubuntu)"
# dashboardSecretPath: "/my-secret-dashboard"
# canaryTokenUrl: set-your-canary-token-url-here
# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
server:
port: 5000
delay: 100
timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Set your canary token URL here
token_tries: 10
dashboard:
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0
# Database configuration
# Database persistence configuration
database:
# Path to the SQLite database file
path: "data/krawl.db"
# Number of days to retain access logs and attack data
retentionDays: 30
# Persistence configuration
persistence:
enabled: true

View File

@@ -10,19 +10,41 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
PORT: "5000"
DELAY: "100"
LINKS_MIN_LENGTH: "5"
LINKS_MAX_LENGTH: "15"
LINKS_MIN_PER_PAGE: "10"
LINKS_MAX_PER_PAGE: "15"
MAX_COUNTER: "10"
CANARY_TOKEN_TRIES: "10"
PROBABILITY_ERROR_CODES: "0"
# CANARY_TOKEN_URL: set-your-canary-token-url-here
# Database configuration
DATABASE_PATH: "data/krawl.db"
DATABASE_RETENTION_DAYS: "30"
config.yaml: |
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100
timezone: null # e.g., "America/New_York" or null for system default
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# Auto-generates random path if null
# Can be set to "/dashboard" or similar
secret_path: null
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage
---
apiVersion: v1
kind: ConfigMap
@@ -227,6 +249,14 @@ data:
500,
502,
503
],
"server_headers": [
"Apache/2.4.41 (Ubuntu)",
"nginx/1.18.0",
"Microsoft-IIS/10.0",
"cloudflare",
"AmazonS3",
"gunicorn/20.1.0"
]
}
---
@@ -269,10 +299,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
envFrom:
- configMapRef:
name: krawl-config
env:
- name: CONFIG_LOCATION
value: "config.yaml"
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -287,6 +321,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
- name: config
configMap:
name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists
@@ -353,7 +390,7 @@ spec:
- podSelector: {}
- namespaceSelector: {}
- ipBlock:
cidr: 0.0.0.0/0
cidr: 0.0.0.0/0
ports:
- protocol: TCP
port: 5000

View File

@@ -4,18 +4,38 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
PORT: "5000"
DELAY: "100"
LINKS_MIN_LENGTH: "5"
LINKS_MAX_LENGTH: "15"
LINKS_MIN_PER_PAGE: "10"
LINKS_MAX_PER_PAGE: "15"
MAX_COUNTER: "10"
CANARY_TOKEN_TRIES: "10"
PROBABILITY_ERROR_CODES: "0"
SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
# CANARY_TOKEN_URL: set-your-canary-token-url-here
# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome")
# Database configuration
DATABASE_PATH: "data/krawl.db"
DATABASE_RETENTION_DAYS: "30"
config.yaml: |
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100
timezone: null # e.g., "America/New_York" or null for system default
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# Auto-generates random path if null
# Can be set to "/dashboard" or similar
secret_path: null
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage

View File

@@ -23,10 +23,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
envFrom:
- configMapRef:
name: krawl-config
env:
- name: CONFIG_LOCATION
value: "config.yaml"
volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -41,6 +45,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
- name: config
configMap:
name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists

View File

@@ -1,5 +1,8 @@
# Krawl Honeypot Dependencies
# Install with: pip install -r requirements.txt
# Configuration
PyYAML>=6.0
# Database ORM
SQLAlchemy>=2.0.0,<3.0.0

View File

@@ -1,11 +1,15 @@
#!/usr/bin/env python3
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple
from zoneinfo import ZoneInfo
import time
import yaml
@dataclass
class Config:
@@ -23,12 +27,11 @@ class Config:
api_server_port: int = 8080
api_server_path: str = "/api/v2/users"
probability_error_codes: int = 0 # Percentage (0-100)
server_header: Optional[str] = None
# Database settings
database_path: str = "data/krawl.db"
database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
@@ -38,16 +41,16 @@ class Config:
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
# Default fallback to UTC
return 'UTC'
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
@@ -55,7 +58,7 @@ class Config:
return ZoneInfo(self.timezone)
except Exception:
pass
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
@@ -63,31 +66,71 @@ class Config:
return ZoneInfo('UTC')
@classmethod
def from_env(cls) -> 'Config':
"""Create configuration from environment variables"""
def from_yaml(cls) -> 'Config':
"""Create configuration from YAML file"""
config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
config_path = Path(__file__).parent.parent / config_location
try:
with open(config_path, 'r') as f:
data = yaml.safe_load(f)
except FileNotFoundError:
print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
sys.exit(1)
except yaml.YAMLError as e:
print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
sys.exit(1)
if data is None:
data = {}
# Extract nested values with defaults
server = data.get('server', {})
links = data.get('links', {})
canary = data.get('canary', {})
dashboard = data.get('dashboard', {})
api = data.get('api', {})
database = data.get('database', {})
behavior = data.get('behavior', {})
# Handle dashboard_secret_path - auto-generate if null/not set
dashboard_path = dashboard.get('secret_path')
if dashboard_path is None:
dashboard_path = f'/{os.urandom(16).hex()}'
return cls(
port=int(os.getenv('PORT', 5000)),
delay=int(os.getenv('DELAY', 100)),
port=server.get('port', 5000),
delay=server.get('delay', 100),
timezone=server.get('timezone'),
links_length_range=(
int(os.getenv('LINKS_MIN_LENGTH', 5)),
int(os.getenv('LINKS_MAX_LENGTH', 15))
links.get('min_length', 5),
links.get('max_length', 15)
),
links_per_page_range=(
int(os.getenv('LINKS_MIN_PER_PAGE', 10)),
int(os.getenv('LINKS_MAX_PER_PAGE', 15))
links.get('min_per_page', 10),
links.get('max_per_page', 15)
),
char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
max_counter=int(os.getenv('MAX_COUNTER', 10)),
canary_token_url=os.getenv('CANARY_TOKEN_URL'),
canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)),
dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'),
api_server_url=os.getenv('API_SERVER_URL'),
api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
server_header=os.getenv('SERVER_HEADER'),
database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)),
timezone=os.getenv('TIMEZONE') # If not set, will use system timezone
char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
max_counter=links.get('max_counter', 10),
canary_token_url=canary.get('token_url'),
canary_token_tries=canary.get('token_tries', 10),
dashboard_secret_path=dashboard_path,
api_server_url=api.get('server_url'),
api_server_port=api.get('server_port', 8080),
api_server_path=api.get('server_path', '/api/v2/users'),
probability_error_codes=behavior.get('probability_error_codes', 0),
database_path=database.get('path', 'data/krawl.db'),
database_retention_days=database.get('retention_days', 30),
)
_config_instance = None
def get_config() -> Config:
"""Get the singleton Config instance"""
global _config_instance
if _config_instance is None:
_config_instance = Config.from_yaml()
return _config_instance

View File

@@ -9,8 +9,6 @@ import string
import json
from templates import html_templates
from wordlists import get_wordlists
from config import Config
from logger import get_app_logger
def random_username() -> str:
"""Generate random username"""
@@ -38,15 +36,9 @@ def random_email(username: str = None) -> str:
return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str:
"""Generate random server header"""
if Config.from_env().server_header:
server_header = Config.from_env().server_header
else:
wl = get_wordlists()
server_header = random.choice(wl.server_headers)
return server_header
"""Generate random server header from wordlists"""
wl = get_wordlists()
return random.choice(wl.server_headers)
def random_api_key() -> str:
"""Generate random API key"""

View File

@@ -8,7 +8,7 @@ Run this file to start the server.
import sys
from http.server import HTTPServer
from config import Config
from config import get_config
from tracker import AccessTracker
from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
@@ -20,24 +20,29 @@ def print_usage():
print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n')
print('Environment Variables:')
print(' PORT - Server port (default: 5000)')
print(' DELAY - Response delay in ms (default: 100)')
print(' LINKS_MIN_LENGTH - Min link length (default: 5)')
print(' LINKS_MAX_LENGTH - Max link length (default: 15)')
print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)')
print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)')
print(' MAX_COUNTER - Max counter value (default: 10)')
print(' CANARY_TOKEN_URL - Canary token URL to display')
print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)')
print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
print(' CHAR_SPACE - Characters for random links')
print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)')
print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
print(' If not set, system timezone will be used')
print('Configuration:')
print(' Configuration is loaded from a YAML file (default: config.yaml)')
print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
print(' Example config.yaml structure:')
print(' server:')
print(' port: 5000')
print(' delay: 100')
print(' timezone: null # or "America/New_York"')
print(' links:')
print(' min_length: 5')
print(' max_length: 15')
print(' min_per_page: 10')
print(' max_per_page: 15')
print(' canary:')
print(' token_url: null')
print(' token_tries: 10')
print(' dashboard:')
print(' secret_path: null # auto-generated if not set')
print(' database:')
print(' path: "data/krawl.db"')
print(' retention_days: 30')
print(' behavior:')
print(' probability_error_codes: 0')
def main():
@@ -46,19 +51,17 @@ def main():
print_usage()
exit(0)
config = Config.from_env()
config = get_config()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone
initialize_logging(timezone=tz)
app_logger = get_app_logger()
access_logger = get_access_logger()
credential_logger = get_credential_logger()
config = Config.from_env()
# Initialize database for persistent storage
try:
initialize_database(config.database_path)