Merge branch 'feat/config-yaml' into chore/fix-merge-conflicts

This commit is contained in:
Phillip Tarrant
2026-01-03 14:45:32 -06:00
14 changed files with 256 additions and 161 deletions

View File

@@ -4,20 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app WORKDIR /app
# Install Python dependencies # Install gosu for dropping privileges
RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
rm -rf /var/lib/apt/lists/*
COPY requirements.txt /app/ COPY requirements.txt /app/
RUN pip install --no-cache-dir -r requirements.txt RUN pip install --no-cache-dir -r requirements.txt
COPY src/ /app/src/ COPY src/ /app/src/
COPY wordlists.json /app/ COPY wordlists.json /app/
COPY entrypoint.sh /app/
RUN useradd -m -u 1000 krawl && \ RUN useradd -m -u 1000 krawl && \
chown -R krawl:krawl /app mkdir -p /app/logs /app/data && \
chown -R krawl:krawl /app && \
USER krawl chmod +x /app/entrypoint.sh
EXPOSE 5000 EXPOSE 5000
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["python3", "src/server.py"] CMD ["python3", "src/server.py"]

38
config.yaml Normal file
View File

@@ -0,0 +1,38 @@
# Krawl Honeypot Configuration
server:
port: 5000
delay: 100 # Response delay in milliseconds
timezone: null # e.g., "America/New_York" or null for system default
# manually set the server header, if null a random one will be used.
server_header: "Apache/2.2.22 (Ubuntu)"
links:
min_length: 5
max_length: 15
min_per_page: 10
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# if set to "null" this will Auto-generates random path if not set
# can be set to "/dashboard" or similar <-- note this MUST include a forward slash
secret_path: dashboard
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage

View File

@@ -10,24 +10,10 @@ services:
- "5000:5000" - "5000:5000"
volumes: volumes:
- ./wordlists.json:/app/wordlists.json:ro - ./wordlists.json:/app/wordlists.json:ro
- ./data:/app/data - ./config.yaml:/app/config.yaml:ro
- ./logs:/app/logs
environment: environment:
- PORT=5000 - CONFIG_LOCATION=config.yaml
- DELAY=100
- LINKS_MIN_LENGTH=5
- LINKS_MAX_LENGTH=15
- LINKS_MIN_PER_PAGE=10
- LINKS_MAX_PER_PAGE=15
- MAX_COUNTER=10
- CANARY_TOKEN_TRIES=10
- PROBABILITY_ERROR_CODES=0
- SERVER_HEADER=Apache/2.2.22 (Ubuntu)
# Optional: Set your canary token URL
# - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
# Optional: Set custom dashboard path (auto-generated if not set)
# - DASHBOARD_SECRET_PATH=/my-secret-dashboard
# Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
# - TIMEZONE=UTC
restart: unless-stopped restart: unless-stopped
healthcheck: healthcheck:
test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"] test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]

8
entrypoint.sh Normal file
View File

@@ -0,0 +1,8 @@
#!/bin/sh
set -e
# Fix ownership of mounted directories
chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
# Drop to krawl user and run the application
exec gosu krawl "$@"

View File

@@ -5,26 +5,30 @@ metadata:
labels: labels:
{{- include "krawl.labels" . | nindent 4 }} {{- include "krawl.labels" . | nindent 4 }}
data: data:
PORT: {{ .Values.config.port | quote }} config.yaml: |
DELAY: {{ .Values.config.delay | quote }} # Krawl Honeypot Configuration
LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }} server:
LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }} port: {{ .Values.config.server.port }}
LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }} delay: {{ .Values.config.server.delay }}
LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }} timezone: {{ .Values.config.server.timezone | toYaml }}
MAX_COUNTER: {{ .Values.config.maxCounter | quote }} links:
CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} min_length: {{ .Values.config.links.min_length }}
PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} max_length: {{ .Values.config.links.max_length }}
SERVER_HEADER: {{ .Values.config.serverHeader | quote }} min_per_page: {{ .Values.config.links.min_per_page }}
CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} max_per_page: {{ .Values.config.links.max_per_page }}
{{- if .Values.config.dashboardSecretPath }} char_space: {{ .Values.config.links.char_space | quote }}
DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }} max_counter: {{ .Values.config.links.max_counter }}
{{- end }} canary:
{{- if .Values.config.serverHeader }} token_url: {{ .Values.config.canary.token_url | toYaml }}
SERVER_HEADER: {{ .Values.config.serverHeader | quote }} token_tries: {{ .Values.config.canary.token_tries }}
{{- end }} dashboard:
{{- if .Values.config.timezone }} secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
TIMEZONE: {{ .Values.config.timezone | quote }} api:
{{- end }} server_url: {{ .Values.config.api.server_url | toYaml }}
# Database configuration server_port: {{ .Values.config.api.server_port }}
DATABASE_PATH: {{ .Values.database.path | quote }} server_path: {{ .Values.config.api.server_path | quote }}
DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }} database:
path: {{ .Values.config.database.path | quote }}
retention_days: {{ .Values.config.database.retention_days }}
behavior:
probability_error_codes: {{ .Values.config.behavior.probability_error_codes }}

View File

@@ -38,18 +38,16 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }} imagePullPolicy: {{ .Values.image.pullPolicy }}
ports: ports:
- name: http - name: http
containerPort: {{ .Values.config.port }} containerPort: {{ .Values.config.server.port }}
protocol: TCP protocol: TCP
envFrom:
- configMapRef:
name: {{ include "krawl.fullname" . }}-config
env: env:
- name: DASHBOARD_SECRET_PATH - name: CONFIG_LOCATION
valueFrom: value: "config.yaml"
secretKeyRef:
name: {{ include "krawl.fullname" . }}
key: dashboard-path
volumeMounts: volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists - name: wordlists
mountPath: /app/wordlists.json mountPath: /app/wordlists.json
subPath: wordlists.json subPath: wordlists.json
@@ -63,6 +61,9 @@ spec:
{{- toYaml . | nindent 12 }} {{- toYaml . | nindent 12 }}
{{- end }} {{- end }}
volumes: volumes:
- name: config
configMap:
name: {{ include "krawl.fullname" . }}-config
- name: wordlists - name: wordlists
configMap: configMap:
name: {{ include "krawl.fullname" . }}-wordlists name: {{ include "krawl.fullname" . }}-wordlists

View File

@@ -1,16 +0,0 @@
{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}}
{{- $dashboardPath := "" -}}
{{- if and $secret $secret.data -}}
{{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}}
{{- else -}}
{{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}}
{{- end -}}
apiVersion: v1
kind: Secret
metadata:
name: {{ include "krawl.fullname" . }}
labels:
{{- include "krawl.labels" . | nindent 4 }}
type: Opaque
stringData:
dashboard-path: {{ $dashboardPath | quote }}

View File

@@ -62,30 +62,36 @@ tolerations: []
affinity: {} affinity: {}
# Application configuration # Application configuration (config.yaml structure)
config: config:
port: 5000 server:
delay: 100 port: 5000
linksMinLength: 5 delay: 100
linksMaxLength: 15 timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
linksMinPerPage: 10 links:
linksMaxPerPage: 15 min_length: 5
maxCounter: 10 max_length: 15
canaryTokenTries: 10 min_per_page: 10
probabilityErrorCodes: 0 max_per_page: 15
serverHeader: "Apache/2.2.22 (Ubuntu)" char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
# timezone: "UTC" max_counter: 10
# serverHeader: "Apache/2.2.22 (Ubuntu)" canary:
# dashboardSecretPath: "/my-secret-dashboard" token_url: null # Set your canary token URL here
# canaryTokenUrl: set-your-canary-token-url-here token_tries: 10
# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. dashboard:
secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0
# Database configuration # Database persistence configuration
database: database:
# Path to the SQLite database file
path: "data/krawl.db"
# Number of days to retain access logs and attack data
retentionDays: 30
# Persistence configuration # Persistence configuration
persistence: persistence:
enabled: true enabled: true

View File

@@ -10,19 +10,41 @@ metadata:
name: krawl-config name: krawl-config
namespace: krawl-system namespace: krawl-system
data: data:
PORT: "5000" config.yaml: |
DELAY: "100" # Krawl Honeypot Configuration
LINKS_MIN_LENGTH: "5" server:
LINKS_MAX_LENGTH: "15" port: 5000
LINKS_MIN_PER_PAGE: "10" delay: 100
LINKS_MAX_PER_PAGE: "15" timezone: null # e.g., "America/New_York" or null for system default
MAX_COUNTER: "10"
CANARY_TOKEN_TRIES: "10" links:
PROBABILITY_ERROR_CODES: "0" min_length: 5
# CANARY_TOKEN_URL: set-your-canary-token-url-here max_length: 15
# Database configuration min_per_page: 10
DATABASE_PATH: "data/krawl.db" max_per_page: 15
DATABASE_RETENTION_DAYS: "30" char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# Auto-generates random path if null
# Can be set to "/dashboard" or similar
secret_path: null
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage
--- ---
apiVersion: v1 apiVersion: v1
kind: ConfigMap kind: ConfigMap
@@ -227,6 +249,14 @@ data:
500, 500,
502, 502,
503 503
],
"server_headers": [
"Apache/2.4.41 (Ubuntu)",
"nginx/1.18.0",
"Microsoft-IIS/10.0",
"cloudflare",
"AmazonS3",
"gunicorn/20.1.0"
] ]
} }
--- ---
@@ -269,10 +299,14 @@ spec:
- containerPort: 5000 - containerPort: 5000
name: http name: http
protocol: TCP protocol: TCP
envFrom: env:
- configMapRef: - name: CONFIG_LOCATION
name: krawl-config value: "config.yaml"
volumeMounts: volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists - name: wordlists
mountPath: /app/wordlists.json mountPath: /app/wordlists.json
subPath: wordlists.json subPath: wordlists.json
@@ -287,6 +321,9 @@ spec:
memory: "256Mi" memory: "256Mi"
cpu: "500m" cpu: "500m"
volumes: volumes:
- name: config
configMap:
name: krawl-config
- name: wordlists - name: wordlists
configMap: configMap:
name: krawl-wordlists name: krawl-wordlists

View File

@@ -4,18 +4,38 @@ metadata:
name: krawl-config name: krawl-config
namespace: krawl-system namespace: krawl-system
data: data:
PORT: "5000" config.yaml: |
DELAY: "100" # Krawl Honeypot Configuration
LINKS_MIN_LENGTH: "5" server:
LINKS_MAX_LENGTH: "15" port: 5000
LINKS_MIN_PER_PAGE: "10" delay: 100
LINKS_MAX_PER_PAGE: "15" timezone: null # e.g., "America/New_York" or null for system default
MAX_COUNTER: "10"
CANARY_TOKEN_TRIES: "10" links:
PROBABILITY_ERROR_CODES: "0" min_length: 5
SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" max_length: 15
# CANARY_TOKEN_URL: set-your-canary-token-url-here min_per_page: 10
# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") max_per_page: 15
# Database configuration char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
DATABASE_PATH: "data/krawl.db" max_counter: 10
DATABASE_RETENTION_DAYS: "30"
canary:
token_url: null # Optional canary token URL
token_tries: 10
dashboard:
# Auto-generates random path if null
# Can be set to "/dashboard" or similar
secret_path: null
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
database:
path: "data/krawl.db"
retention_days: 30
behavior:
probability_error_codes: 0 # 0-100 percentage

View File

@@ -23,10 +23,14 @@ spec:
- containerPort: 5000 - containerPort: 5000
name: http name: http
protocol: TCP protocol: TCP
envFrom: env:
- configMapRef: - name: CONFIG_LOCATION
name: krawl-config value: "config.yaml"
volumeMounts: volumeMounts:
- name: config
mountPath: /app/config.yaml
subPath: config.yaml
readOnly: true
- name: wordlists - name: wordlists
mountPath: /app/wordlists.json mountPath: /app/wordlists.json
subPath: wordlists.json subPath: wordlists.json
@@ -41,6 +45,9 @@ spec:
memory: "256Mi" memory: "256Mi"
cpu: "500m" cpu: "500m"
volumes: volumes:
- name: config
configMap:
name: krawl-config
- name: wordlists - name: wordlists
configMap: configMap:
name: krawl-wordlists name: krawl-wordlists

View File

@@ -9,8 +9,7 @@ import string
import json import json
from templates import html_templates from templates import html_templates
from wordlists import get_wordlists from wordlists import get_wordlists
from config import Config from config import get_config
from logger import get_app_logger
def random_username() -> str: def random_username() -> str:
"""Generate random username""" """Generate random username"""
@@ -38,15 +37,12 @@ def random_email(username: str = None) -> str:
return f"{username}@{random.choice(wl.email_domains)}" return f"{username}@{random.choice(wl.email_domains)}"
def random_server_header() -> str: def random_server_header() -> str:
"""Generate random server header""" """Generate random server header from wordlists"""
config = get_config()
if Config.from_env().server_header: if config.server_header:
server_header = Config.from_env().server_header return config.server_header
else: wl = get_wordlists()
wl = get_wordlists() return random.choice(wl.server_headers)
server_header = random.choice(wl.server_headers)
return server_header
def random_api_key() -> str: def random_api_key() -> str:
"""Generate random API key""" """Generate random API key"""

View File

@@ -8,7 +8,7 @@ Run this file to start the server.
import sys import sys
from http.server import HTTPServer from http.server import HTTPServer
from config import Config from config import get_config
from tracker import AccessTracker from tracker import AccessTracker
from handler import Handler from handler import Handler
from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger from logger import initialize_logging, get_app_logger, get_access_logger, get_credential_logger
@@ -20,24 +20,29 @@ def print_usage():
print(f'Usage: {sys.argv[0]} [FILE]\n') print(f'Usage: {sys.argv[0]} [FILE]\n')
print('FILE is file containing a list of webpage names to serve, one per line.') print('FILE is file containing a list of webpage names to serve, one per line.')
print('If no file is provided, random links will be generated.\n') print('If no file is provided, random links will be generated.\n')
print('Environment Variables:') print('Configuration:')
print(' PORT - Server port (default: 5000)') print(' Configuration is loaded from a YAML file (default: config.yaml)')
print(' DELAY - Response delay in ms (default: 100)') print(' Set CONFIG_LOCATION environment variable to use a different file.\n')
print(' LINKS_MIN_LENGTH - Min link length (default: 5)') print(' Example config.yaml structure:')
print(' LINKS_MAX_LENGTH - Max link length (default: 15)') print(' server:')
print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)') print(' port: 5000')
print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)') print(' delay: 100')
print(' MAX_COUNTER - Max counter value (default: 10)') print(' timezone: null # or "America/New_York"')
print(' CANARY_TOKEN_URL - Canary token URL to display') print(' links:')
print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)') print(' min_length: 5')
print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') print(' max_length: 15')
print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') print(' min_per_page: 10')
print(' CHAR_SPACE - Characters for random links') print(' max_per_page: 15')
print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') print(' canary:')
print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)') print(' token_url: null')
print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)') print(' token_tries: 10')
print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)') print(' dashboard:')
print(' If not set, system timezone will be used') print(' secret_path: null # auto-generated if not set')
print(' database:')
print(' path: "data/krawl.db"')
print(' retention_days: 30')
print(' behavior:')
print(' probability_error_codes: 0')
def main(): def main():
@@ -46,7 +51,7 @@ def main():
print_usage() print_usage()
exit(0) exit(0)
config = Config.from_env() config = get_config()
# Get timezone configuration # Get timezone configuration
tz = config.get_timezone() tz = config.get_timezone()
@@ -57,8 +62,6 @@ def main():
access_logger = get_access_logger() access_logger = get_access_logger()
credential_logger = get_credential_logger() credential_logger = get_credential_logger()
config = Config.from_env()
# Initialize database for persistent storage # Initialize database for persistent storage
try: try:
initialize_database(config.database_path) initialize_database(config.database_path)