From d458eb471db47ffae2ce6b72ff15228c790017e8 Mon Sep 17 00:00:00 2001 From: Phillip Tarrant Date: Fri, 2 Jan 2026 13:39:54 -0600 Subject: [PATCH] Migrate configuration from environment variables to YAML file - Add YAML-based configuration loaded from config.yaml (CONFIG_LOCATION env var) - Add PyYAML dependency and install requirements in Dockerfile - Replace Config.from_env() with get_config() singleton pattern - Remove server_header from config (now randomized from wordlists only) - Update docker-compose.yaml to mount config.yaml read-only - Update Helm chart: restructure values.yaml, generate config.yaml in ConfigMap - Update Kubernetes manifests: ConfigMap now contains config.yaml, deployments mount it - Remove Helm secret.yaml (dashboard path now auto-generated in config.yaml) --- Dockerfile | 3 + config.yaml | 35 +++++++++ docker-compose.yaml | 18 +---- helm/templates/configmap.yaml | 49 ++++++------ helm/templates/deployment.yaml | 19 ++--- helm/templates/secret.yaml | 16 ---- helm/values.yaml | 47 +++++++----- kubernetes/krawl-all-in-one-deploy.yaml | 71 +++++++++++++----- kubernetes/manifests/configmap.yaml | 50 +++++++++---- kubernetes/manifests/deployment.yaml | 13 +++- requirements.txt | 3 + src/config.py | 99 ++++++++++++++++++------- src/generators.py | 14 +--- src/server.py | 51 +++++++------ 14 files changed, 307 insertions(+), 181 deletions(-) create mode 100644 config.yaml delete mode 100644 helm/templates/secret.yaml diff --git a/Dockerfile b/Dockerfile index adac20f..e0fb6af 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,6 +4,9 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt + COPY src/ /app/src/ COPY wordlists.json /app/ diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..c4faa8f --- /dev/null +++ b/config.yaml @@ -0,0 +1,35 @@ +# Krawl Honeypot Configuration + +server: + port: 5000 + delay: 100 # Response delay in milliseconds + timezone: null # e.g., "America/New_York" or null for system default + +links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + +canary: + token_url: null # Optional canary token URL + token_tries: 10 + +dashboard: + # if set to "null" this will Auto-generates random path if not set + # can be set to "dashboard" or similar + secret_path: dashboard + +api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + +database: + path: "data/krawl.db" + retention_days: 30 + +behavior: + probability_error_codes: 0 # 0-100 percentage \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 6f81a47..776e919 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,23 +10,9 @@ services: - "5000:5000" volumes: - ./wordlists.json:/app/wordlists.json:ro + - ./config.yaml:/app/config.yaml:ro environment: - - PORT=5000 - - DELAY=100 - - LINKS_MIN_LENGTH=5 - - LINKS_MAX_LENGTH=15 - - LINKS_MIN_PER_PAGE=10 - - LINKS_MAX_PER_PAGE=15 - - MAX_COUNTER=10 - - CANARY_TOKEN_TRIES=10 - - PROBABILITY_ERROR_CODES=0 - # - SERVER_HEADER=Apache/2.2.22 (Ubuntu) - # Optional: Set your canary token URL - # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt - # Optional: Set custom dashboard path (auto-generated if not set) - # - DASHBOARD_SECRET_PATH=/my-secret-dashboard - # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome) - # - TIMEZONE=UTC + - CONFIG_LOCATION=config.yaml restart: unless-stopped healthcheck: test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"] diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 17cd952..808d9f5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -5,25 +5,30 @@ metadata: labels: {{- include "krawl.labels" . | nindent 4 }} data: - PORT: {{ .Values.config.port | quote }} - DELAY: {{ .Values.config.delay | quote }} - LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }} - LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }} - LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }} - LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }} - MAX_COUNTER: {{ .Values.config.maxCounter | quote }} - CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} - PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} - CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} - {{- if .Values.config.dashboardSecretPath }} - DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }} - {{- end }} - {{- if .Values.config.serverHeader }} - SERVER_HEADER: {{ .Values.config.serverHeader | quote }} - {{- end }} - {{- if .Values.config.timezone }} - TIMEZONE: {{ .Values.config.timezone | quote }} - {{- end }} - # Database configuration - DATABASE_PATH: {{ .Values.database.path | quote }} - DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }} + config.yaml: | + # Krawl Honeypot Configuration + server: + port: {{ .Values.config.server.port }} + delay: {{ .Values.config.server.delay }} + timezone: {{ .Values.config.server.timezone | toYaml }} + links: + min_length: {{ .Values.config.links.min_length }} + max_length: {{ .Values.config.links.max_length }} + min_per_page: {{ .Values.config.links.min_per_page }} + max_per_page: {{ .Values.config.links.max_per_page }} + char_space: {{ .Values.config.links.char_space | quote }} + max_counter: {{ .Values.config.links.max_counter }} + canary: + token_url: {{ .Values.config.canary.token_url | toYaml }} + token_tries: {{ .Values.config.canary.token_tries }} + dashboard: + secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + api: + server_url: {{ .Values.config.api.server_url | toYaml }} + server_port: {{ .Values.config.api.server_port }} + server_path: {{ .Values.config.api.server_path | quote }} + database: + path: {{ .Values.config.database.path | quote }} + retention_days: {{ .Values.config.database.retention_days }} + behavior: + probability_error_codes: {{ .Values.config.behavior.probability_error_codes }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index ecc9655..5635fa3 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -38,18 +38,16 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - name: http - containerPort: {{ .Values.config.port }} + containerPort: {{ .Values.config.server.port }} protocol: TCP - envFrom: - - configMapRef: - name: {{ include "krawl.fullname" . }}-config env: - - name: DASHBOARD_SECRET_PATH - valueFrom: - secretKeyRef: - name: {{ include "krawl.fullname" . }} - key: dashboard-path + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -63,6 +61,9 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} volumes: + - name: config + configMap: + name: {{ include "krawl.fullname" . }}-config - name: wordlists configMap: name: {{ include "krawl.fullname" . }}-wordlists diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml deleted file mode 100644 index 798289c..0000000 --- a/helm/templates/secret.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}} -{{- $dashboardPath := "" -}} -{{- if and $secret $secret.data -}} - {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}} -{{- else -}} - {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}} -{{- end -}} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "krawl.fullname" . }} - labels: - {{- include "krawl.labels" . | nindent 4 }} -type: Opaque -stringData: - dashboard-path: {{ $dashboardPath | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index c92bc0b..60b1a66 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -62,29 +62,36 @@ tolerations: [] affinity: {} -# Application configuration +# Application configuration (config.yaml structure) config: - port: 5000 - delay: 100 - linksMinLength: 5 - linksMaxLength: 15 - linksMinPerPage: 10 - linksMaxPerPage: 15 - maxCounter: 10 - canaryTokenTries: 10 - probabilityErrorCodes: 0 -# timezone: "UTC" -# serverHeader: "Apache/2.2.22 (Ubuntu)" -# dashboardSecretPath: "/my-secret-dashboard" -# canaryTokenUrl: set-your-canary-token-url-here -# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + server: + port: 5000 + delay: 100 + timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + canary: + token_url: null # Set your canary token URL here + token_tries: 10 + dashboard: + secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + database: + path: "data/krawl.db" + retention_days: 30 + behavior: + probability_error_codes: 0 -# Database configuration +# Database persistence configuration database: - # Path to the SQLite database file - path: "data/krawl.db" - # Number of days to retain access logs and attack data - retentionDays: 30 # Persistence configuration persistence: enabled: true diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index d1a026c..3344260 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -10,19 +10,41 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" -# CANARY_TOKEN_URL: set-your-canary-token-url-here - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage --- apiVersion: v1 kind: ConfigMap @@ -227,6 +249,14 @@ data: 500, 502, 503 + ], + "server_headers": [ + "Apache/2.4.41 (Ubuntu)", + "nginx/1.18.0", + "Microsoft-IIS/10.0", + "cloudflare", + "AmazonS3", + "gunicorn/20.1.0" ] } --- @@ -269,10 +299,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -287,6 +321,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists @@ -353,7 +390,7 @@ spec: - podSelector: {} - namespaceSelector: {} - ipBlock: - cidr: 0.0.0.0/0 + cidr: 0.0.0.0/0 ports: - protocol: TCP port: 5000 diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index ef357b0..38a287b 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -4,18 +4,38 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" - SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" -# CANARY_TOKEN_URL: set-your-canary-token-url-here -# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" \ No newline at end of file + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml index 1650721..f970625 100644 --- a/kubernetes/manifests/deployment.yaml +++ b/kubernetes/manifests/deployment.yaml @@ -23,10 +23,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -41,6 +45,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists diff --git a/requirements.txt b/requirements.txt index 94f74f2..8cb6dc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ # Krawl Honeypot Dependencies # Install with: pip install -r requirements.txt +# Configuration +PyYAML>=6.0 + # Database ORM SQLAlchemy>=2.0.0,<3.0.0 diff --git a/src/config.py b/src/config.py index 87fca1c..fb679b4 100644 --- a/src/config.py +++ b/src/config.py @@ -1,11 +1,15 @@ #!/usr/bin/env python3 import os +import sys from dataclasses import dataclass +from pathlib import Path from typing import Optional, Tuple from zoneinfo import ZoneInfo import time +import yaml + @dataclass class Config: @@ -23,12 +27,11 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) - server_header: Optional[str] = None # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome') - + @staticmethod # Try to fetch timezone before if not set def get_system_timezone() -> str: @@ -38,16 +41,16 @@ class Config: tz_path = os.readlink('/etc/localtime') if 'zoneinfo/' in tz_path: return tz_path.split('zoneinfo/')[-1] - + local_tz = time.tzname[time.daylight] if local_tz and local_tz != 'UTC': return local_tz except Exception: pass - + # Default fallback to UTC return 'UTC' - + def get_timezone(self) -> ZoneInfo: """Get configured timezone as ZoneInfo object""" if self.timezone: @@ -55,7 +58,7 @@ class Config: return ZoneInfo(self.timezone) except Exception: pass - + system_tz = self.get_system_timezone() try: return ZoneInfo(system_tz) @@ -63,31 +66,71 @@ class Config: return ZoneInfo('UTC') @classmethod - def from_env(cls) -> 'Config': - """Create configuration from environment variables""" + def from_yaml(cls) -> 'Config': + """Create configuration from YAML file""" + config_location = os.getenv('CONFIG_LOCATION', 'config.yaml') + config_path = Path(__file__).parent.parent / config_location + + try: + with open(config_path, 'r') as f: + data = yaml.safe_load(f) + except FileNotFoundError: + print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr) + print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr) + sys.exit(1) + except yaml.YAMLError as e: + print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr) + sys.exit(1) + + if data is None: + data = {} + + # Extract nested values with defaults + server = data.get('server', {}) + links = data.get('links', {}) + canary = data.get('canary', {}) + dashboard = data.get('dashboard', {}) + api = data.get('api', {}) + database = data.get('database', {}) + behavior = data.get('behavior', {}) + + # Handle dashboard_secret_path - auto-generate if null/not set + dashboard_path = dashboard.get('secret_path') + if dashboard_path is None: + dashboard_path = f'/{os.urandom(16).hex()}' + return cls( - port=int(os.getenv('PORT', 5000)), - delay=int(os.getenv('DELAY', 100)), + port=server.get('port', 5000), + delay=server.get('delay', 100), + timezone=server.get('timezone'), links_length_range=( - int(os.getenv('LINKS_MIN_LENGTH', 5)), - int(os.getenv('LINKS_MAX_LENGTH', 15)) + links.get('min_length', 5), + links.get('max_length', 15) ), links_per_page_range=( - int(os.getenv('LINKS_MIN_PER_PAGE', 10)), - int(os.getenv('LINKS_MAX_PER_PAGE', 15)) + links.get('min_per_page', 10), + links.get('max_per_page', 15) ), - char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), - max_counter=int(os.getenv('MAX_COUNTER', 10)), - canary_token_url=os.getenv('CANARY_TOKEN_URL'), - canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)), - dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'), - api_server_url=os.getenv('API_SERVER_URL'), - api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), - api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), - probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)), - server_header=os.getenv('SERVER_HEADER'), - database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'), - database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)), - timezone=os.getenv('TIMEZONE') # If not set, will use system timezone - + char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), + max_counter=links.get('max_counter', 10), + canary_token_url=canary.get('token_url'), + canary_token_tries=canary.get('token_tries', 10), + dashboard_secret_path=dashboard_path, + api_server_url=api.get('server_url'), + api_server_port=api.get('server_port', 8080), + api_server_path=api.get('server_path', '/api/v2/users'), + probability_error_codes=behavior.get('probability_error_codes', 0), + database_path=database.get('path', 'data/krawl.db'), + database_retention_days=database.get('retention_days', 30), ) + + +_config_instance = None + + +def get_config() -> Config: + """Get the singleton Config instance""" + global _config_instance + if _config_instance is None: + _config_instance = Config.from_yaml() + return _config_instance diff --git a/src/generators.py b/src/generators.py index 6e24ba8..6eca9fd 100644 --- a/src/generators.py +++ b/src/generators.py @@ -9,8 +9,6 @@ import string import json from templates import html_templates from wordlists import get_wordlists -from config import Config -from logger import get_app_logger def random_username() -> str: """Generate random username""" @@ -38,15 +36,9 @@ def random_email(username: str = None) -> str: return f"{username}@{random.choice(wl.email_domains)}" def random_server_header() -> str: - """Generate random server header""" - - if Config.from_env().server_header: - server_header = Config.from_env().server_header - else: - wl = get_wordlists() - server_header = random.choice(wl.server_headers) - - return server_header + """Generate random server header from wordlists""" + wl = get_wordlists() + return random.choice(wl.server_headers) def random_api_key() -> str: """Generate random API key""" diff --git a/src/server.py b/src/server.py index 06b7c82..7a59c73 100644 --- a/src/server.py +++ b/src/server.py @@ -8,7 +8,7 @@ Run this file to start the server. import sys from http.server import HTTPServer -from config import Config +from config import get_config from tracker import AccessTracker from handler import Handler from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger @@ -20,24 +20,29 @@ def print_usage(): print(f'Usage: {sys.argv[0]} [FILE]\n') print('FILE is file containing a list of webpage names to serve, one per line.') print('If no file is provided, random links will be generated.\n') - print('Environment Variables:') - print(' PORT - Server port (default: 5000)') - print(' DELAY - Response delay in ms (default: 100)') - print(' LINKS_MIN_LENGTH - Min link length (default: 5)') - print(' LINKS_MAX_LENGTH - Max link length (default: 15)') - print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)') - print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)') - print(' MAX_COUNTER - Max counter value (default: 10)') - print(' CANARY_TOKEN_URL - Canary token URL to display') - print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)') - print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') - print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') - print(' CHAR_SPACE - Characters for random links') - print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') - print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)') - print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)') - print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)') - print(' If not set, system timezone will be used') + print('Configuration:') + print(' Configuration is loaded from a YAML file (default: config.yaml)') + print(' Set CONFIG_LOCATION environment variable to use a different file.\n') + print(' Example config.yaml structure:') + print(' server:') + print(' port: 5000') + print(' delay: 100') + print(' timezone: null # or "America/New_York"') + print(' links:') + print(' min_length: 5') + print(' max_length: 15') + print(' min_per_page: 10') + print(' max_per_page: 15') + print(' canary:') + print(' token_url: null') + print(' token_tries: 10') + print(' dashboard:') + print(' secret_path: null # auto-generated if not set') + print(' database:') + print(' path: "data/krawl.db"') + print(' retention_days: 30') + print(' behavior:') + print(' probability_error_codes: 0') def main(): @@ -46,19 +51,17 @@ def main(): print_usage() exit(0) - config = Config.from_env() - + config = get_config() + # Get timezone configuration tz = config.get_timezone() - + # Initialize logging with timezone initialize_logging(timezone=tz) app_logger = get_app_logger() access_logger = get_access_logger() credential_logger = get_credential_logger() - config = Config.from_env() - # Initialize database for persistent storage try: initialize_database(config.database_path)