diff --git a/Dockerfile b/Dockerfile index 63d90bf..2c7b954 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,20 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app -# Install Python dependencies +# Install gosu for dropping privileges +RUN apt-get update && apt-get install -y --no-install-recommends gosu && \ + rm -rf /var/lib/apt/lists/* + COPY requirements.txt /app/ RUN pip install --no-cache-dir -r requirements.txt COPY src/ /app/src/ COPY wordlists.json /app/ +COPY entrypoint.sh /app/ RUN useradd -m -u 1000 krawl && \ - chown -R krawl:krawl /app - -USER krawl + mkdir -p /app/logs /app/data && \ + chown -R krawl:krawl /app && \ + chmod +x /app/entrypoint.sh EXPOSE 5000 ENV PYTHONUNBUFFERED=1 +ENTRYPOINT ["/app/entrypoint.sh"] CMD ["python3", "src/server.py"] diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..f9825a0 --- /dev/null +++ b/config.yaml @@ -0,0 +1,38 @@ +# Krawl Honeypot Configuration + +server: + port: 5000 + delay: 100 # Response delay in milliseconds + timezone: null # e.g., "America/New_York" or null for system default + + # manually set the server header, if null a random one will be used. + server_header: "Apache/2.2.22 (Ubuntu)" + +links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + +canary: + token_url: null # Optional canary token URL + token_tries: 10 + +dashboard: + # if set to "null" this will Auto-generates random path if not set + # can be set to "/dashboard" or similar <-- note this MUST include a forward slash + secret_path: dashboard + +api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + +database: + path: "data/krawl.db" + retention_days: 30 + +behavior: + probability_error_codes: 0 # 0-100 percentage \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 7d519ab..02b6ae7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,24 +10,10 @@ services: - "5000:5000" volumes: - ./wordlists.json:/app/wordlists.json:ro - - ./data:/app/data + - ./config.yaml:/app/config.yaml:ro + - ./logs:/app/logs environment: - - PORT=5000 - - DELAY=100 - - LINKS_MIN_LENGTH=5 - - LINKS_MAX_LENGTH=15 - - LINKS_MIN_PER_PAGE=10 - - LINKS_MAX_PER_PAGE=15 - - MAX_COUNTER=10 - - CANARY_TOKEN_TRIES=10 - - PROBABILITY_ERROR_CODES=0 - - SERVER_HEADER=Apache/2.2.22 (Ubuntu) - # Optional: Set your canary token URL - # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt - # Optional: Set custom dashboard path (auto-generated if not set) - # - DASHBOARD_SECRET_PATH=/my-secret-dashboard - # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome) - # - TIMEZONE=UTC + - CONFIG_LOCATION=config.yaml restart: unless-stopped healthcheck: test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"] diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..28b5fc0 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -e + +# Fix ownership of mounted directories +chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true + +# Drop to krawl user and run the application +exec gosu krawl "$@" diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 504b2a1..808d9f5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -5,26 +5,30 @@ metadata: labels: {{- include "krawl.labels" . | nindent 4 }} data: - PORT: {{ .Values.config.port | quote }} - DELAY: {{ .Values.config.delay | quote }} - LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }} - LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }} - LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }} - LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }} - MAX_COUNTER: {{ .Values.config.maxCounter | quote }} - CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} - PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} - SERVER_HEADER: {{ .Values.config.serverHeader | quote }} - CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} - {{- if .Values.config.dashboardSecretPath }} - DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }} - {{- end }} - {{- if .Values.config.serverHeader }} - SERVER_HEADER: {{ .Values.config.serverHeader | quote }} - {{- end }} - {{- if .Values.config.timezone }} - TIMEZONE: {{ .Values.config.timezone | quote }} - {{- end }} - # Database configuration - DATABASE_PATH: {{ .Values.database.path | quote }} - DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }} + config.yaml: | + # Krawl Honeypot Configuration + server: + port: {{ .Values.config.server.port }} + delay: {{ .Values.config.server.delay }} + timezone: {{ .Values.config.server.timezone | toYaml }} + links: + min_length: {{ .Values.config.links.min_length }} + max_length: {{ .Values.config.links.max_length }} + min_per_page: {{ .Values.config.links.min_per_page }} + max_per_page: {{ .Values.config.links.max_per_page }} + char_space: {{ .Values.config.links.char_space | quote }} + max_counter: {{ .Values.config.links.max_counter }} + canary: + token_url: {{ .Values.config.canary.token_url | toYaml }} + token_tries: {{ .Values.config.canary.token_tries }} + dashboard: + secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + api: + server_url: {{ .Values.config.api.server_url | toYaml }} + server_port: {{ .Values.config.api.server_port }} + server_path: {{ .Values.config.api.server_path | quote }} + database: + path: {{ .Values.config.database.path | quote }} + retention_days: {{ .Values.config.database.retention_days }} + behavior: + probability_error_codes: {{ .Values.config.behavior.probability_error_codes }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index ecc9655..5635fa3 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -38,18 +38,16 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - name: http - containerPort: {{ .Values.config.port }} + containerPort: {{ .Values.config.server.port }} protocol: TCP - envFrom: - - configMapRef: - name: {{ include "krawl.fullname" . }}-config env: - - name: DASHBOARD_SECRET_PATH - valueFrom: - secretKeyRef: - name: {{ include "krawl.fullname" . }} - key: dashboard-path + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -63,6 +61,9 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} volumes: + - name: config + configMap: + name: {{ include "krawl.fullname" . }}-config - name: wordlists configMap: name: {{ include "krawl.fullname" . }}-wordlists diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml deleted file mode 100644 index 798289c..0000000 --- a/helm/templates/secret.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}} -{{- $dashboardPath := "" -}} -{{- if and $secret $secret.data -}} - {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}} -{{- else -}} - {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}} -{{- end -}} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "krawl.fullname" . }} - labels: - {{- include "krawl.labels" . | nindent 4 }} -type: Opaque -stringData: - dashboard-path: {{ $dashboardPath | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index 5c05d7e..60b1a66 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -62,30 +62,36 @@ tolerations: [] affinity: {} -# Application configuration +# Application configuration (config.yaml structure) config: - port: 5000 - delay: 100 - linksMinLength: 5 - linksMaxLength: 15 - linksMinPerPage: 10 - linksMaxPerPage: 15 - maxCounter: 10 - canaryTokenTries: 10 - probabilityErrorCodes: 0 - serverHeader: "Apache/2.2.22 (Ubuntu)" -# timezone: "UTC" -# serverHeader: "Apache/2.2.22 (Ubuntu)" -# dashboardSecretPath: "/my-secret-dashboard" -# canaryTokenUrl: set-your-canary-token-url-here -# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + server: + port: 5000 + delay: 100 + timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + canary: + token_url: null # Set your canary token URL here + token_tries: 10 + dashboard: + secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + database: + path: "data/krawl.db" + retention_days: 30 + behavior: + probability_error_codes: 0 -# Database configuration +# Database persistence configuration database: - # Path to the SQLite database file - path: "data/krawl.db" - # Number of days to retain access logs and attack data - retentionDays: 30 # Persistence configuration persistence: enabled: true diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index d1a026c..3344260 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -10,19 +10,41 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" -# CANARY_TOKEN_URL: set-your-canary-token-url-here - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage --- apiVersion: v1 kind: ConfigMap @@ -227,6 +249,14 @@ data: 500, 502, 503 + ], + "server_headers": [ + "Apache/2.4.41 (Ubuntu)", + "nginx/1.18.0", + "Microsoft-IIS/10.0", + "cloudflare", + "AmazonS3", + "gunicorn/20.1.0" ] } --- @@ -269,10 +299,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -287,6 +321,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists @@ -353,7 +390,7 @@ spec: - podSelector: {} - namespaceSelector: {} - ipBlock: - cidr: 0.0.0.0/0 + cidr: 0.0.0.0/0 ports: - protocol: TCP port: 5000 diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index 4f5d878..38a287b 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -4,18 +4,38 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" - SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" -# CANARY_TOKEN_URL: set-your-canary-token-url-here -# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml index 1650721..f970625 100644 --- a/kubernetes/manifests/deployment.yaml +++ b/kubernetes/manifests/deployment.yaml @@ -23,10 +23,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -41,6 +45,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists diff --git a/src/config.py b/src/config.py index 5ce04ee..d8aa2f2 100644 --- a/src/config.py +++ b/src/config.py @@ -140,4 +140,4 @@ def get_config() -> Config: global _config_instance if _config_instance is None: _config_instance = Config.from_yaml() - return _config_instance \ No newline at end of file + return _config_instance diff --git a/src/generators.py b/src/generators.py index 6e24ba8..92eb590 100644 --- a/src/generators.py +++ b/src/generators.py @@ -9,8 +9,7 @@ import string import json from templates import html_templates from wordlists import get_wordlists -from config import Config -from logger import get_app_logger +from config import get_config def random_username() -> str: """Generate random username""" @@ -38,15 +37,12 @@ def random_email(username: str = None) -> str: return f"{username}@{random.choice(wl.email_domains)}" def random_server_header() -> str: - """Generate random server header""" - - if Config.from_env().server_header: - server_header = Config.from_env().server_header - else: - wl = get_wordlists() - server_header = random.choice(wl.server_headers) - - return server_header + """Generate random server header from wordlists""" + config = get_config() + if config.server_header: + return config.server_header + wl = get_wordlists() + return random.choice(wl.server_headers) def random_api_key() -> str: """Generate random API key""" diff --git a/src/server.py b/src/server.py index 06b7c82..7a59c73 100644 --- a/src/server.py +++ b/src/server.py @@ -8,7 +8,7 @@ Run this file to start the server. import sys from http.server import HTTPServer -from config import Config +from config import get_config from tracker import AccessTracker from handler import Handler from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger @@ -20,24 +20,29 @@ def print_usage(): print(f'Usage: {sys.argv[0]} [FILE]\n') print('FILE is file containing a list of webpage names to serve, one per line.') print('If no file is provided, random links will be generated.\n') - print('Environment Variables:') - print(' PORT - Server port (default: 5000)') - print(' DELAY - Response delay in ms (default: 100)') - print(' LINKS_MIN_LENGTH - Min link length (default: 5)') - print(' LINKS_MAX_LENGTH - Max link length (default: 15)') - print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)') - print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)') - print(' MAX_COUNTER - Max counter value (default: 10)') - print(' CANARY_TOKEN_URL - Canary token URL to display') - print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)') - print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') - print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') - print(' CHAR_SPACE - Characters for random links') - print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') - print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)') - print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)') - print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)') - print(' If not set, system timezone will be used') + print('Configuration:') + print(' Configuration is loaded from a YAML file (default: config.yaml)') + print(' Set CONFIG_LOCATION environment variable to use a different file.\n') + print(' Example config.yaml structure:') + print(' server:') + print(' port: 5000') + print(' delay: 100') + print(' timezone: null # or "America/New_York"') + print(' links:') + print(' min_length: 5') + print(' max_length: 15') + print(' min_per_page: 10') + print(' max_per_page: 15') + print(' canary:') + print(' token_url: null') + print(' token_tries: 10') + print(' dashboard:') + print(' secret_path: null # auto-generated if not set') + print(' database:') + print(' path: "data/krawl.db"') + print(' retention_days: 30') + print(' behavior:') + print(' probability_error_codes: 0') def main(): @@ -46,19 +51,17 @@ def main(): print_usage() exit(0) - config = Config.from_env() - + config = get_config() + # Get timezone configuration tz = config.get_timezone() - + # Initialize logging with timezone initialize_logging(timezone=tz) app_logger = get_app_logger() access_logger = get_access_logger() credential_logger = get_credential_logger() - config = Config.from_env() - # Initialize database for persistent storage try: initialize_database(config.database_path)