Migrate configuration from environment variables to YAML file

- Add YAML-based configuration loaded from config.yaml (CONFIG_LOCATION env var)
  - Add PyYAML dependency and install requirements in Dockerfile
  - Replace Config.from_env() with get_config() singleton pattern
  - Remove server_header from config (now randomized from wordlists only)
  - Update docker-compose.yaml to mount config.yaml read-only
  - Update Helm chart: restructure values.yaml, generate config.yaml in ConfigMap
  - Update Kubernetes manifests: ConfigMap now contains config.yaml, deployments mount it
  - Remove Helm secret.yaml (dashboard path now auto-generated in config.yaml)
This commit is contained in:
Phillip Tarrant
2026-01-02 13:39:54 -06:00
parent 5a00e374e6
commit d458eb471d
14 changed files with 307 additions and 181 deletions

View File

@@ -1,11 +1,15 @@
#!/usr/bin/env python3
import os
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import Optional, Tuple
from zoneinfo import ZoneInfo
import time
import yaml
@dataclass
class Config:
@@ -23,12 +27,11 @@ class Config:
api_server_port: int = 8080
api_server_path: str = "/api/v2/users"
probability_error_codes: int = 0 # Percentage (0-100)
server_header: Optional[str] = None
# Database settings
database_path: str = "data/krawl.db"
database_retention_days: int = 30
timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome')
@staticmethod
# Try to fetch timezone before if not set
def get_system_timezone() -> str:
@@ -38,16 +41,16 @@ class Config:
tz_path = os.readlink('/etc/localtime')
if 'zoneinfo/' in tz_path:
return tz_path.split('zoneinfo/')[-1]
local_tz = time.tzname[time.daylight]
if local_tz and local_tz != 'UTC':
return local_tz
except Exception:
pass
# Default fallback to UTC
return 'UTC'
def get_timezone(self) -> ZoneInfo:
"""Get configured timezone as ZoneInfo object"""
if self.timezone:
@@ -55,7 +58,7 @@ class Config:
return ZoneInfo(self.timezone)
except Exception:
pass
system_tz = self.get_system_timezone()
try:
return ZoneInfo(system_tz)
@@ -63,31 +66,71 @@ class Config:
return ZoneInfo('UTC')
@classmethod
def from_env(cls) -> 'Config':
"""Create configuration from environment variables"""
def from_yaml(cls) -> 'Config':
"""Create configuration from YAML file"""
config_location = os.getenv('CONFIG_LOCATION', 'config.yaml')
config_path = Path(__file__).parent.parent / config_location
try:
with open(config_path, 'r') as f:
data = yaml.safe_load(f)
except FileNotFoundError:
print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr)
print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr)
sys.exit(1)
except yaml.YAMLError as e:
print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr)
sys.exit(1)
if data is None:
data = {}
# Extract nested values with defaults
server = data.get('server', {})
links = data.get('links', {})
canary = data.get('canary', {})
dashboard = data.get('dashboard', {})
api = data.get('api', {})
database = data.get('database', {})
behavior = data.get('behavior', {})
# Handle dashboard_secret_path - auto-generate if null/not set
dashboard_path = dashboard.get('secret_path')
if dashboard_path is None:
dashboard_path = f'/{os.urandom(16).hex()}'
return cls(
port=int(os.getenv('PORT', 5000)),
delay=int(os.getenv('DELAY', 100)),
port=server.get('port', 5000),
delay=server.get('delay', 100),
timezone=server.get('timezone'),
links_length_range=(
int(os.getenv('LINKS_MIN_LENGTH', 5)),
int(os.getenv('LINKS_MAX_LENGTH', 15))
links.get('min_length', 5),
links.get('max_length', 15)
),
links_per_page_range=(
int(os.getenv('LINKS_MIN_PER_PAGE', 10)),
int(os.getenv('LINKS_MAX_PER_PAGE', 15))
links.get('min_per_page', 10),
links.get('max_per_page', 15)
),
char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
max_counter=int(os.getenv('MAX_COUNTER', 10)),
canary_token_url=os.getenv('CANARY_TOKEN_URL'),
canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)),
dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'),
api_server_url=os.getenv('API_SERVER_URL'),
api_server_port=int(os.getenv('API_SERVER_PORT', 8080)),
api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'),
probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)),
server_header=os.getenv('SERVER_HEADER'),
database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'),
database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)),
timezone=os.getenv('TIMEZONE') # If not set, will use system timezone
char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
max_counter=links.get('max_counter', 10),
canary_token_url=canary.get('token_url'),
canary_token_tries=canary.get('token_tries', 10),
dashboard_secret_path=dashboard_path,
api_server_url=api.get('server_url'),
api_server_port=api.get('server_port', 8080),
api_server_path=api.get('server_path', '/api/v2/users'),
probability_error_codes=behavior.get('probability_error_codes', 0),
database_path=database.get('path', 'data/krawl.db'),
database_retention_days=database.get('retention_days', 30),
)
_config_instance = None
def get_config() -> Config:
"""Get the singleton Config instance"""
global _config_instance
if _config_instance is None:
_config_instance = Config.from_yaml()
return _config_instance

View File

@@ -9,8 +9,6 @@ import string
import json
from templates import html_templates
from wordlists import get_wordlists
from config import Config
from logger import get_app_logger
def random_username() -> str:
"""Generate random username"""
@@ -38,15 +36,9 @@ def random_email(username: str = None) -> str:
return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str:
"""Generate random server header"""
if Config.from_env().server_header:
server_header = Config.from_env().server_header
else:
wl = get_wordlists()
server_header = random.choice(wl.server_headers)
return server_header
"""Generate random server header from wordlists"""
wl = get_wordlists()
return random.choice(wl.server_headers)
def random_api_key() -> str:
"""Generate random API key"""

View File

@@ -8,7 +8,7 @@ Run this file to start the server.
import sys
from http.server import HTTPServer
from config import Config
from config import get_config
from tracker import AccessTracker
from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
@@ -20,24 +20,29 @@ def print_usage():
print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n')
print('Environment Variables:')
print(' PORT - Server port (default: 5000)')
print(' DELAY - Response delay in ms (default: 100)')
print(' LINKS_MIN_LENGTH - Min link length (default: 5)')
print(' LINKS_MAX_LENGTH - Max link length (default: 15)')
print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)')
print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)')
print(' MAX_COUNTER - Max counter value (default: 10)')
print(' CANARY_TOKEN_URL - Canary token URL to display')
print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)')
print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)')
print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)')
print(' CHAR_SPACE - Characters for random links')
print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))')
print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)')
print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)')
print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)')
print(' If not set, system timezone will be used')
print('Configuration:')
print(' Configuration is loaded from a YAML file (default: config.yaml)')
print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
print(' Example config.yaml structure:')
print(' server:')
print(' port: 5000')
print(' delay: 100')
print(' timezone: null # or "America/New_York"')
print(' links:')
print(' min_length: 5')
print(' max_length: 15')
print(' min_per_page: 10')
print(' max_per_page: 15')
print(' canary:')
print(' token_url: null')
print(' token_tries: 10')
print(' dashboard:')
print(' secret_path: null # auto-generated if not set')
print(' database:')
print(' path: "data/krawl.db"')
print(' retention_days: 30')
print(' behavior:')
print(' probability_error_codes: 0')
def main():
@@ -46,19 +51,17 @@ def main():
print_usage()
exit(0)
config = Config.from_env()
config = get_config()
# Get timezone configuration
tz = config.get_timezone()
# Initialize logging with timezone
initialize_logging(timezone=tz)
app_logger = get_app_logger()
access_logger = get_access_logger()
credential_logger = get_credential_logger()
config = Config.from_env()
# Initialize database for persistent storage
try:
initialize_database(config.database_path)