diff --git a/.gitignore b/.gitignore index a36748e..70b93e4 100644 --- a/.gitignore +++ b/.gitignore @@ -61,9 +61,12 @@ secrets/ *.log logs/ -# Database +# Data and databases data/ +**/data/ *.db +*.sqlite +*.sqlite3 # Temporary files *.tmp diff --git a/Dockerfile b/Dockerfile index adac20f..2c7b954 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,16 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl WORKDIR /app +# Install gosu for dropping privileges +RUN apt-get update && apt-get install -y --no-install-recommends gosu && \ + rm -rf /var/lib/apt/lists/* + +COPY requirements.txt /app/ +RUN pip install --no-cache-dir -r requirements.txt + COPY src/ /app/src/ COPY wordlists.json /app/ +COPY entrypoint.sh /app/ RUN useradd -m -u 1000 krawl && \ - chown -R krawl:krawl /app - -USER krawl + mkdir -p /app/logs /app/data && \ + chown -R krawl:krawl /app && \ + chmod +x /app/entrypoint.sh EXPOSE 5000 ENV PYTHONUNBUFFERED=1 +ENTRYPOINT ["/app/entrypoint.sh"] CMD ["python3", "src/server.py"] diff --git a/README.md b/README.md index 7fd0377..f7fe399 100644 --- a/README.md +++ b/README.md @@ -48,10 +48,11 @@
-## Star History -Star History Chart +## Demo +Tip: crawl the `robots.txt` paths for additional fun +### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com) +### View the dashboard [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard) - ## What is Krawl? **Krawl** is a cloud‑native deception server designed to detect, delay, and analyze malicious web crawlers and automated scanners. @@ -185,7 +186,7 @@ To customize the deception server installation several **environment variables** | `CANARY_TOKEN_URL` | External canary token URL | None | | `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated | | `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` | -| `SERVER_HEADER` | HTTP Server header for deception, if not set use random server header | | +| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` | | `TIMEZONE` | IANA timezone for logs and dashboard (e.g., `America/New_York`, `Europe/Rome`) | System timezone | ## robots.txt @@ -317,3 +318,6 @@ Contributions welcome! Please: **This is a deception/honeypot system.** Deploy in isolated environments and monitor carefully for security events. Use responsibly and in compliance with applicable laws and regulations. + +## Star History +Star History Chart diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..987588c --- /dev/null +++ b/config.yaml @@ -0,0 +1,46 @@ +# Krawl Honeypot Configuration + +server: + port: 5000 + delay: 100 # Response delay in milliseconds + timezone: null # e.g., "America/New_York" or null for system default + + # manually set the server header, if null a random one will be used. + server_header: "Apache/2.2.22 (Ubuntu)" + +links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + +canary: + token_url: null # Optional canary token URL + token_tries: 10 + +dashboard: + # if set to "null" this will Auto-generates random path if not set + # can be set to "/dashboard" or similar <-- note this MUST include a forward slash + secret_path: dashboard + +api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + +database: + path: "data/krawl.db" + retention_days: 30 + +behavior: + probability_error_codes: 0 # 0-100 percentage + +analyzer: + http_risky_methods_threshold: 0.1 + violated_robots_threshold: 0.1 + uneven_request_timing_threshold: 5 + uneven_request_timing_time_window_seconds: 300 + user_agents_used_threshold: 1 + attack_urls_threshold: 1 \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index 6f81a47..02b6ae7 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -10,23 +10,10 @@ services: - "5000:5000" volumes: - ./wordlists.json:/app/wordlists.json:ro + - ./config.yaml:/app/config.yaml:ro + - ./logs:/app/logs environment: - - PORT=5000 - - DELAY=100 - - LINKS_MIN_LENGTH=5 - - LINKS_MAX_LENGTH=15 - - LINKS_MIN_PER_PAGE=10 - - LINKS_MAX_PER_PAGE=15 - - MAX_COUNTER=10 - - CANARY_TOKEN_TRIES=10 - - PROBABILITY_ERROR_CODES=0 - # - SERVER_HEADER=Apache/2.2.22 (Ubuntu) - # Optional: Set your canary token URL - # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt - # Optional: Set custom dashboard path (auto-generated if not set) - # - DASHBOARD_SECRET_PATH=/my-secret-dashboard - # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome) - # - TIMEZONE=UTC + - CONFIG_LOCATION=config.yaml restart: unless-stopped healthcheck: test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"] diff --git a/entrypoint.sh b/entrypoint.sh new file mode 100644 index 0000000..28b5fc0 --- /dev/null +++ b/entrypoint.sh @@ -0,0 +1,8 @@ +#!/bin/sh +set -e + +# Fix ownership of mounted directories +chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true + +# Drop to krawl user and run the application +exec gosu krawl "$@" diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index 17cd952..808d9f5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -5,25 +5,30 @@ metadata: labels: {{- include "krawl.labels" . | nindent 4 }} data: - PORT: {{ .Values.config.port | quote }} - DELAY: {{ .Values.config.delay | quote }} - LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }} - LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }} - LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }} - LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }} - MAX_COUNTER: {{ .Values.config.maxCounter | quote }} - CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }} - PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }} - CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }} - {{- if .Values.config.dashboardSecretPath }} - DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }} - {{- end }} - {{- if .Values.config.serverHeader }} - SERVER_HEADER: {{ .Values.config.serverHeader | quote }} - {{- end }} - {{- if .Values.config.timezone }} - TIMEZONE: {{ .Values.config.timezone | quote }} - {{- end }} - # Database configuration - DATABASE_PATH: {{ .Values.database.path | quote }} - DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }} + config.yaml: | + # Krawl Honeypot Configuration + server: + port: {{ .Values.config.server.port }} + delay: {{ .Values.config.server.delay }} + timezone: {{ .Values.config.server.timezone | toYaml }} + links: + min_length: {{ .Values.config.links.min_length }} + max_length: {{ .Values.config.links.max_length }} + min_per_page: {{ .Values.config.links.min_per_page }} + max_per_page: {{ .Values.config.links.max_per_page }} + char_space: {{ .Values.config.links.char_space | quote }} + max_counter: {{ .Values.config.links.max_counter }} + canary: + token_url: {{ .Values.config.canary.token_url | toYaml }} + token_tries: {{ .Values.config.canary.token_tries }} + dashboard: + secret_path: {{ .Values.config.dashboard.secret_path | toYaml }} + api: + server_url: {{ .Values.config.api.server_url | toYaml }} + server_port: {{ .Values.config.api.server_port }} + server_path: {{ .Values.config.api.server_path | quote }} + database: + path: {{ .Values.config.database.path | quote }} + retention_days: {{ .Values.config.database.retention_days }} + behavior: + probability_error_codes: {{ .Values.config.behavior.probability_error_codes }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index ecc9655..5635fa3 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -38,18 +38,16 @@ spec: imagePullPolicy: {{ .Values.image.pullPolicy }} ports: - name: http - containerPort: {{ .Values.config.port }} + containerPort: {{ .Values.config.server.port }} protocol: TCP - envFrom: - - configMapRef: - name: {{ include "krawl.fullname" . }}-config env: - - name: DASHBOARD_SECRET_PATH - valueFrom: - secretKeyRef: - name: {{ include "krawl.fullname" . }} - key: dashboard-path + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -63,6 +61,9 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} volumes: + - name: config + configMap: + name: {{ include "krawl.fullname" . }}-config - name: wordlists configMap: name: {{ include "krawl.fullname" . }}-wordlists diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml deleted file mode 100644 index 798289c..0000000 --- a/helm/templates/secret.yaml +++ /dev/null @@ -1,16 +0,0 @@ -{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}} -{{- $dashboardPath := "" -}} -{{- if and $secret $secret.data -}} - {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}} -{{- else -}} - {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}} -{{- end -}} -apiVersion: v1 -kind: Secret -metadata: - name: {{ include "krawl.fullname" . }} - labels: - {{- include "krawl.labels" . | nindent 4 }} -type: Opaque -stringData: - dashboard-path: {{ $dashboardPath | quote }} diff --git a/helm/values.yaml b/helm/values.yaml index c92bc0b..60b1a66 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -62,29 +62,36 @@ tolerations: [] affinity: {} -# Application configuration +# Application configuration (config.yaml structure) config: - port: 5000 - delay: 100 - linksMinLength: 5 - linksMaxLength: 15 - linksMinPerPage: 10 - linksMaxPerPage: 15 - maxCounter: 10 - canaryTokenTries: 10 - probabilityErrorCodes: 0 -# timezone: "UTC" -# serverHeader: "Apache/2.2.22 (Ubuntu)" -# dashboardSecretPath: "/my-secret-dashboard" -# canaryTokenUrl: set-your-canary-token-url-here -# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + server: + port: 5000 + delay: 100 + timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used. + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + canary: + token_url: null # Set your canary token URL here + token_tries: 10 + dashboard: + secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + database: + path: "data/krawl.db" + retention_days: 30 + behavior: + probability_error_codes: 0 -# Database configuration +# Database persistence configuration database: - # Path to the SQLite database file - path: "data/krawl.db" - # Number of days to retain access logs and attack data - retentionDays: 30 # Persistence configuration persistence: enabled: true diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index d1a026c..3344260 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -10,19 +10,41 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" -# CANARY_TOKEN_URL: set-your-canary-token-url-here - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage --- apiVersion: v1 kind: ConfigMap @@ -227,6 +249,14 @@ data: 500, 502, 503 + ], + "server_headers": [ + "Apache/2.4.41 (Ubuntu)", + "nginx/1.18.0", + "Microsoft-IIS/10.0", + "cloudflare", + "AmazonS3", + "gunicorn/20.1.0" ] } --- @@ -269,10 +299,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -287,6 +321,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists @@ -353,7 +390,7 @@ spec: - podSelector: {} - namespaceSelector: {} - ipBlock: - cidr: 0.0.0.0/0 + cidr: 0.0.0.0/0 ports: - protocol: TCP port: 5000 diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index ef357b0..38a287b 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -4,18 +4,38 @@ metadata: name: krawl-config namespace: krawl-system data: - PORT: "5000" - DELAY: "100" - LINKS_MIN_LENGTH: "5" - LINKS_MAX_LENGTH: "15" - LINKS_MIN_PER_PAGE: "10" - LINKS_MAX_PER_PAGE: "15" - MAX_COUNTER: "10" - CANARY_TOKEN_TRIES: "10" - PROBABILITY_ERROR_CODES: "0" - SERVER_HEADER: "Apache/2.2.22 (Ubuntu)" -# CANARY_TOKEN_URL: set-your-canary-token-url-here -# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome") - # Database configuration - DATABASE_PATH: "data/krawl.db" - DATABASE_RETENTION_DAYS: "30" \ No newline at end of file + config.yaml: | + # Krawl Honeypot Configuration + server: + port: 5000 + delay: 100 + timezone: null # e.g., "America/New_York" or null for system default + + links: + min_length: 5 + max_length: 15 + min_per_page: 10 + max_per_page: 15 + char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + max_counter: 10 + + canary: + token_url: null # Optional canary token URL + token_tries: 10 + + dashboard: + # Auto-generates random path if null + # Can be set to "/dashboard" or similar + secret_path: null + + api: + server_url: null + server_port: 8080 + server_path: "/api/v2/users" + + database: + path: "data/krawl.db" + retention_days: 30 + + behavior: + probability_error_codes: 0 # 0-100 percentage diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml index 1650721..f970625 100644 --- a/kubernetes/manifests/deployment.yaml +++ b/kubernetes/manifests/deployment.yaml @@ -23,10 +23,14 @@ spec: - containerPort: 5000 name: http protocol: TCP - envFrom: - - configMapRef: - name: krawl-config + env: + - name: CONFIG_LOCATION + value: "config.yaml" volumeMounts: + - name: config + mountPath: /app/config.yaml + subPath: config.yaml + readOnly: true - name: wordlists mountPath: /app/wordlists.json subPath: wordlists.json @@ -41,6 +45,9 @@ spec: memory: "256Mi" cpu: "500m" volumes: + - name: config + configMap: + name: krawl-config - name: wordlists configMap: name: krawl-wordlists diff --git a/requirements.txt b/requirements.txt index 94f74f2..8cb6dc5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,8 @@ # Krawl Honeypot Dependencies # Install with: pip install -r requirements.txt +# Configuration +PyYAML>=6.0 + # Database ORM SQLAlchemy>=2.0.0,<3.0.0 diff --git a/src/analyzer.py b/src/analyzer.py index 8ebef62..48c5fad 100644 --- a/src/analyzer.py +++ b/src/analyzer.py @@ -7,7 +7,7 @@ from pathlib import Path from datetime import datetime, timedelta import re from wordlists import get_wordlists - +from config import get_config """ Functions for user activity analysis """ @@ -47,6 +47,17 @@ class Analyzer: def infer_user_category(self, ip: str) -> str: + config = get_config() + + http_risky_methods_threshold = config.http_risky_methods_threshold + violated_robots_threshold = config.violated_robots_threshold + uneven_request_timing_threshold = config.uneven_request_timing_threshold + user_agents_used_threshold = config.user_agents_used_threshold + attack_urls_threshold = config.attack_urls_threshold + uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds + + print(f"http_risky_methods_threshold: {http_risky_methods_threshold}") + score = {} score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False} @@ -104,14 +115,13 @@ class Analyzer: #print(f"TOTAL: {total_accesses_count} - GET: {get_accesses_count} - POST: {post_accesses_count}") - #if >5% attacker or bad crawler - if total_accesses_count > 0: + if total_accesses_count > http_risky_methods_threshold: http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count else: http_method_attacker_score = 0 #print(f"HTTP Method attacker score: {http_method_attacker_score}") - if http_method_attacker_score > 0.2: + if http_method_attacker_score >= http_risky_methods_threshold: score["attacker"]["risky_http_methods"] = True score["good_crawler"]["risky_http_methods"] = False score["bad_crawler"]["risky_http_methods"] = True @@ -150,33 +160,28 @@ class Analyzer: else: violated_robots_ratio = 0 - if violated_robots_ratio > 0.10: + if violated_robots_ratio >= violated_robots_threshold: score["attacker"]["robots_violations"] = True score["good_crawler"]["robots_violations"] = False score["bad_crawler"]["robots_violations"] = True score["regular_user"]["robots_violations"] = False else: - score["attacker"]["robots_violations"] = True + score["attacker"]["robots_violations"] = False score["good_crawler"]["robots_violations"] = False - score["bad_crawler"]["robots_violations"] = True + score["bad_crawler"]["robots_violations"] = False score["regular_user"]["robots_violations"] = False #--------------------- Requests Timing --------------------- #Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses] - print(f"Timestamps #: {len(timestamps)}") - timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(minutes=5)] - print(f"Timestamps #: {len(timestamps)}") + timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)] timestamps = sorted(timestamps, reverse=True) - print(f"Timestamps #: {len(timestamps)}") time_diffs = [] for i in range(0, len(timestamps)-1): diff = (timestamps[i] - timestamps[i+1]).total_seconds() time_diffs.append(diff) - print(f"Time diffs: {time_diffs}") - mean = 0 variance = 0 std = 0 @@ -186,17 +191,17 @@ class Analyzer: variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs) std = variance ** 0.5 cv = std/mean - print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}") + #print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}") - if mean > 4: + if mean >= uneven_request_timing_threshold: score["attacker"]["uneven_request_timing"] = True score["good_crawler"]["uneven_request_timing"] = False score["bad_crawler"]["uneven_request_timing"] = False score["regular_user"]["uneven_request_timing"] = True else: - score["attacker"]["uneven_request_timing"] = True + score["attacker"]["uneven_request_timing"] = False score["good_crawler"]["uneven_request_timing"] = False - score["bad_crawler"]["uneven_request_timing"] = True + score["bad_crawler"]["uneven_request_timing"] = False score["regular_user"]["uneven_request_timing"] = False @@ -206,39 +211,31 @@ class Analyzer: user_agents_used = list(dict.fromkeys(user_agents_used)) #print(f"User agents used: {user_agents_used}") - if len(user_agents_used)> 4: + if len(user_agents_used) >= user_agents_used_threshold: score["attacker"]["different_user_agents"] = True score["good_crawler"]["different_user_agents"] = False score["bad_crawler"]["different_user_agentss"] = True score["regular_user"]["different_user_agents"] = False else: - score["attacker"]["different_user_agents"] = True + score["attacker"]["different_user_agents"] = False score["good_crawler"]["different_user_agents"] = False - score["bad_crawler"]["different_user_agents"] = True + score["bad_crawler"]["different_user_agents"] = False score["regular_user"]["different_user_agents"] = False #--------------------- Attack URLs --------------------- - attack_url_found = False - # attack_types = { - # 'path_traversal': r'\.\.', - # 'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)", - # 'xss_attempt': r'( attack_urls_threshold: score["attacker"]["attack_url"] = True score["good_crawler"]["attack_url"] = False score["bad_crawler"]["attack_url"] = False @@ -275,12 +272,12 @@ class Analyzer: regular_user_score = regular_user_score + score["regular_user"]["different_user_agents"] * weights["regular_user"]["different_user_agents"] regular_user_score = regular_user_score + score["regular_user"]["attack_url"] * weights["regular_user"]["attack_url"] - #print(f"Attacker score: {attacker_score}") - #print(f"Good Crawler score: {good_crawler_score}") - #print(f"Bad Crawler score: {bad_crawler_score}") - #print(f"Regular User score: {regular_user_score}") + print(f"Attacker score: {attacker_score}") + print(f"Good Crawler score: {good_crawler_score}") + print(f"Bad Crawler score: {bad_crawler_score}") + print(f"Regular User score: {regular_user_score}") - analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_url_found} + analyzed_metrics = {"risky_http_methods": http_method_attacker_score, "robots_violations": violated_robots_ratio, "uneven_request_timing": mean, "different_user_agents": user_agents_used, "attack_url": attack_urls_found_list} category_scores = {"attacker": attacker_score, "good_crawler": good_crawler_score, "bad_crawler": bad_crawler_score, "regular_user": regular_user_score} category = max(category_scores, key=category_scores.get) last_analysis = datetime.utcnow() diff --git a/src/config.py b/src/config.py index 87fca1c..815a8ca 100644 --- a/src/config.py +++ b/src/config.py @@ -1,17 +1,22 @@ #!/usr/bin/env python3 import os +import sys from dataclasses import dataclass +from pathlib import Path from typing import Optional, Tuple from zoneinfo import ZoneInfo import time +import yaml + @dataclass class Config: """Configuration class for the deception server""" port: int = 5000 delay: int = 100 # milliseconds + server_header: str = "" links_length_range: Tuple[int, int] = (5, 15) links_per_page_range: Tuple[int, int] = (10, 15) char_space: str = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789' @@ -23,12 +28,20 @@ class Config: api_server_port: int = 8080 api_server_path: str = "/api/v2/users" probability_error_codes: int = 0 # Percentage (0-100) - server_header: Optional[str] = None + # Database settings database_path: str = "data/krawl.db" database_retention_days: int = 30 timezone: str = None # IANA timezone (e.g., 'America/New_York', 'Europe/Rome') - + + # Analyzer settings + http_risky_methods_threshold: float = None + violated_robots_threshold: float = None + uneven_request_timing_threshold: float = None + uneven_request_timing_time_window_seconds: float = None + user_agents_used_threshold: float = None + attack_urls_threshold: float = None + @staticmethod # Try to fetch timezone before if not set def get_system_timezone() -> str: @@ -38,16 +51,16 @@ class Config: tz_path = os.readlink('/etc/localtime') if 'zoneinfo/' in tz_path: return tz_path.split('zoneinfo/')[-1] - + local_tz = time.tzname[time.daylight] if local_tz and local_tz != 'UTC': return local_tz except Exception: pass - + # Default fallback to UTC return 'UTC' - + def get_timezone(self) -> ZoneInfo: """Get configured timezone as ZoneInfo object""" if self.timezone: @@ -55,7 +68,7 @@ class Config: return ZoneInfo(self.timezone) except Exception: pass - + system_tz = self.get_system_timezone() try: return ZoneInfo(system_tz) @@ -63,31 +76,83 @@ class Config: return ZoneInfo('UTC') @classmethod - def from_env(cls) -> 'Config': - """Create configuration from environment variables""" + def from_yaml(cls) -> 'Config': + """Create configuration from YAML file""" + config_location = os.getenv('CONFIG_LOCATION', 'config.yaml') + config_path = Path(__file__).parent.parent / config_location + + try: + with open(config_path, 'r') as f: + data = yaml.safe_load(f) + except FileNotFoundError: + print(f"Error: Configuration file '{config_path}' not found.", file=sys.stderr) + print(f"Please create a config.yaml file or set CONFIG_LOCATION environment variable.", file=sys.stderr) + sys.exit(1) + except yaml.YAMLError as e: + print(f"Error: Invalid YAML in configuration file '{config_path}': {e}", file=sys.stderr) + sys.exit(1) + + if data is None: + data = {} + + # Extract nested values with defaults + server = data.get('server', {}) + links = data.get('links', {}) + canary = data.get('canary', {}) + dashboard = data.get('dashboard', {}) + api = data.get('api', {}) + database = data.get('database', {}) + behavior = data.get('behavior', {}) + analyzer = data.get('analyzer', {}) + + # Handle dashboard_secret_path - auto-generate if null/not set + dashboard_path = dashboard.get('secret_path') + if dashboard_path is None: + dashboard_path = f'/{os.urandom(16).hex()}' + else: + # ensure the dashboard path starts with a / + if dashboard_path[:1] != "/": + dashboard_path = f"/{dashboard_path}" + return cls( - port=int(os.getenv('PORT', 5000)), - delay=int(os.getenv('DELAY', 100)), + port=server.get('port', 5000), + delay=server.get('delay', 100), + server_header=server.get('server_header',""), + timezone=server.get('timezone'), links_length_range=( - int(os.getenv('LINKS_MIN_LENGTH', 5)), - int(os.getenv('LINKS_MAX_LENGTH', 15)) + links.get('min_length', 5), + links.get('max_length', 15) ), links_per_page_range=( - int(os.getenv('LINKS_MIN_PER_PAGE', 10)), - int(os.getenv('LINKS_MAX_PER_PAGE', 15)) + links.get('min_per_page', 10), + links.get('max_per_page', 15) ), - char_space=os.getenv('CHAR_SPACE', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), - max_counter=int(os.getenv('MAX_COUNTER', 10)), - canary_token_url=os.getenv('CANARY_TOKEN_URL'), - canary_token_tries=int(os.getenv('CANARY_TOKEN_TRIES', 10)), - dashboard_secret_path=os.getenv('DASHBOARD_SECRET_PATH', f'/{os.urandom(16).hex()}'), - api_server_url=os.getenv('API_SERVER_URL'), - api_server_port=int(os.getenv('API_SERVER_PORT', 8080)), - api_server_path=os.getenv('API_SERVER_PATH', '/api/v2/users'), - probability_error_codes=int(os.getenv('PROBABILITY_ERROR_CODES', 0)), - server_header=os.getenv('SERVER_HEADER'), - database_path=os.getenv('DATABASE_PATH', 'data/krawl.db'), - database_retention_days=int(os.getenv('DATABASE_RETENTION_DAYS', 30)), - timezone=os.getenv('TIMEZONE') # If not set, will use system timezone - + char_space=links.get('char_space', 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'), + max_counter=links.get('max_counter', 10), + canary_token_url=canary.get('token_url'), + canary_token_tries=canary.get('token_tries', 10), + dashboard_secret_path=dashboard_path, + api_server_url=api.get('server_url'), + api_server_port=api.get('server_port', 8080), + api_server_path=api.get('server_path', '/api/v2/users'), + probability_error_codes=behavior.get('probability_error_codes', 0), + database_path=database.get('path', 'data/krawl.db'), + database_retention_days=database.get('retention_days', 30), + http_risky_methods_threshold=analyzer.get('http_risky_methods_threshold', 0.1), + violated_robots_threshold=analyzer.get('violated_robots_threshold', 0.1), + uneven_request_timing_threshold=analyzer.get('uneven_request_timing_threshold', 5), + uneven_request_timing_time_window_seconds=analyzer.get('uneven_request_timing_time_window_seconds', 300), + user_agents_used_threshold=analyzer.get('user_agents_used_threshold', 1), + attack_urls_threshold=analyzer.get('attack_urls_threshold', 1) ) + + +_config_instance = None + + +def get_config() -> Config: + """Get the singleton Config instance""" + global _config_instance + if _config_instance is None: + _config_instance = Config.from_yaml() + return _config_instance diff --git a/src/generators.py b/src/generators.py index 6e24ba8..92eb590 100644 --- a/src/generators.py +++ b/src/generators.py @@ -9,8 +9,7 @@ import string import json from templates import html_templates from wordlists import get_wordlists -from config import Config -from logger import get_app_logger +from config import get_config def random_username() -> str: """Generate random username""" @@ -38,15 +37,12 @@ def random_email(username: str = None) -> str: return f"{username}@{random.choice(wl.email_domains)}" def random_server_header() -> str: - """Generate random server header""" - - if Config.from_env().server_header: - server_header = Config.from_env().server_header - else: - wl = get_wordlists() - server_header = random.choice(wl.server_headers) - - return server_header + """Generate random server header from wordlists""" + config = get_config() + if config.server_header: + return config.server_header + wl = get_wordlists() + return random.choice(wl.server_headers) def random_api_key() -> str: """Generate random API key""" diff --git a/src/handler.py b/src/handler.py index 1dd6a45..bbc87ea 100644 --- a/src/handler.py +++ b/src/handler.py @@ -6,6 +6,7 @@ import time from datetime import datetime from http.server import BaseHTTPRequestHandler from typing import Optional, List +from urllib.parse import urlparse, parse_qs from config import Config from tracker import AccessTracker @@ -17,6 +18,9 @@ from generators import ( api_response, directory_listing, random_server_header ) from wordlists import get_wordlists +from sql_errors import generate_sql_error_response, get_sql_response_with_data +from xss_detector import detect_xss_pattern, generate_xss_response +from server_errors import generate_server_error class Handler(BaseHTTPRequestHandler): @@ -69,6 +73,67 @@ class Handler(BaseHTTPRequestHandler): if not error_codes: error_codes = [400, 401, 403, 404, 500, 502, 503] return random.choice(error_codes) + + def _parse_query_string(self) -> str: + """Extract query string from the request path""" + parsed = urlparse(self.path) + return parsed.query + + def _handle_sql_endpoint(self, path: str) -> bool: + """ + Handle SQL injection honeypot endpoints. + Returns True if the path was handled, False otherwise. + """ + # SQL-vulnerable endpoints + sql_endpoints = ['/api/search', '/api/sql', '/api/database'] + + base_path = urlparse(path).path + if base_path not in sql_endpoints: + return False + + try: + # Get query parameters + query_string = self._parse_query_string() + + # Log SQL injection attempt + client_ip = self._get_client_ip() + user_agent = self._get_user_agent() + + # Always check for SQL injection patterns + error_msg, content_type, status_code = generate_sql_error_response(query_string or "") + + if error_msg: + # SQL injection detected - log and return error + self.access_logger.warning(f"[SQL INJECTION DETECTED] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") + self.send_response(status_code) + self.send_header('Content-type', content_type) + self.end_headers() + self.wfile.write(error_msg.encode()) + else: + # No injection detected - return fake data + self.access_logger.info(f"[SQL ENDPOINT] {client_ip} - {base_path} - Query: {query_string[:100] if query_string else 'empty'}") + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + response_data = get_sql_response_with_data(base_path, query_string or "") + self.wfile.write(response_data.encode()) + + return True + + except BrokenPipeError: + # Client disconnected + return True + except Exception as e: + self.app_logger.error(f"Error handling SQL endpoint {path}: {str(e)}") + # Still send a response even on error + try: + self.send_response(500) + self.send_header('Content-type', 'application/json') + self.end_headers() + self.wfile.write(b'{"error": "Internal server error"}') + except: + pass + return True def generate_page(self, seed: str) -> str: """Generate a webpage containing random links or canary token""" @@ -209,6 +274,68 @@ class Handler(BaseHTTPRequestHandler): user_agent = self._get_user_agent() post_data = "" + from urllib.parse import urlparse + base_path = urlparse(self.path).path + + if base_path in ['/api/search', '/api/sql', '/api/database']: + content_length = int(self.headers.get('Content-Length', 0)) + if content_length > 0: + post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") + + self.access_logger.info(f"[SQL ENDPOINT POST] {client_ip} - {base_path} - Data: {post_data[:100] if post_data else 'empty'}") + + error_msg, content_type, status_code = generate_sql_error_response(post_data) + + try: + if error_msg: + self.access_logger.warning(f"[SQL INJECTION DETECTED POST] {client_ip} - {base_path}") + self.send_response(status_code) + self.send_header('Content-type', content_type) + self.end_headers() + self.wfile.write(error_msg.encode()) + else: + self.send_response(200) + self.send_header('Content-type', 'application/json') + self.end_headers() + response_data = get_sql_response_with_data(base_path, post_data) + self.wfile.write(response_data.encode()) + except BrokenPipeError: + pass + except Exception as e: + self.app_logger.error(f"Error in SQL POST handler: {str(e)}") + return + + if base_path == '/api/contact': + content_length = int(self.headers.get('Content-Length', 0)) + if content_length > 0: + post_data = self.rfile.read(content_length).decode('utf-8', errors="replace") + + parsed_data = {} + for pair in post_data.split('&'): + if '=' in pair: + key, value = pair.split('=', 1) + from urllib.parse import unquote_plus + parsed_data[unquote_plus(key)] = unquote_plus(value) + + xss_detected = any(detect_xss_pattern(v) for v in parsed_data.values()) + + if xss_detected: + self.access_logger.warning(f"[XSS ATTEMPT DETECTED] {client_ip} - {base_path} - Data: {post_data[:200]}") + else: + self.access_logger.info(f"[XSS ENDPOINT POST] {client_ip} - {base_path}") + + try: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + response_html = generate_xss_response(parsed_data) + self.wfile.write(response_html.encode()) + except BrokenPipeError: + pass + except Exception as e: + self.app_logger.error(f"Error in XSS POST handler: {str(e)}") + return + self.access_logger.warning(f"[LOGIN ATTEMPT] {client_ip} - {self.path} - {user_agent[:50]}") content_length = int(self.headers.get('Content-Length', 0)) @@ -250,6 +377,10 @@ class Handler(BaseHTTPRequestHandler): def serve_special_path(self, path: str) -> bool: """Serve special paths like robots.txt, API endpoints, etc.""" + # Check SQL injection honeypot endpoints first + if self._handle_sql_endpoint(path): + return True + try: if path == '/robots.txt': self.send_response(200) @@ -287,7 +418,28 @@ class Handler(BaseHTTPRequestHandler): self.wfile.write(html_templates.login_form().encode()) return True - # WordPress login page + if path in ['/users', '/user', '/database', '/db', '/search']: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(html_templates.product_search().encode()) + return True + + if path in ['/info', '/input', '/contact', '/feedback', '/comment']: + self.send_response(200) + self.send_header('Content-type', 'text/html') + self.end_headers() + self.wfile.write(html_templates.input_form().encode()) + return True + + if path == '/server': + error_html, content_type = generate_server_error() + self.send_response(500) + self.send_header('Content-type', content_type) + self.end_headers() + self.wfile.write(error_html.encode()) + return True + if path in ['/wp-login.php', '/wp-login', '/wp-admin', '/wp-admin/']: self.send_response(200) self.send_header('Content-type', 'text/html') diff --git a/src/server.py b/src/server.py index 4431d55..59244c5 100644 --- a/src/server.py +++ b/src/server.py @@ -8,7 +8,7 @@ Run this file to start the server. import sys from http.server import HTTPServer -from config import Config +from config import get_config from tracker import AccessTracker from analyzer import Analyzer from handler import Handler @@ -21,24 +21,29 @@ def print_usage(): print(f'Usage: {sys.argv[0]} [FILE]\n') print('FILE is file containing a list of webpage names to serve, one per line.') print('If no file is provided, random links will be generated.\n') - print('Environment Variables:') - print(' PORT - Server port (default: 5000)') - print(' DELAY - Response delay in ms (default: 100)') - print(' LINKS_MIN_LENGTH - Min link length (default: 5)') - print(' LINKS_MAX_LENGTH - Max link length (default: 15)') - print(' LINKS_MIN_PER_PAGE - Min links per page (default: 10)') - print(' LINKS_MAX_PER_PAGE - Max links per page (default: 15)') - print(' MAX_COUNTER - Max counter value (default: 10)') - print(' CANARY_TOKEN_URL - Canary token URL to display') - print(' CANARY_TOKEN_TRIES - Number of tries before showing token (default: 10)') - print(' DASHBOARD_SECRET_PATH - Secret path for dashboard (auto-generated if not set)') - print(' PROBABILITY_ERROR_CODES - Probability (0-100) to return HTTP error codes (default: 0)') - print(' CHAR_SPACE - Characters for random links') - print(' SERVER_HEADER - HTTP Server header for deception (default: Apache/2.2.22 (Ubuntu))') - print(' DATABASE_PATH - Path to SQLite database (default: data/krawl.db)') - print(' DATABASE_RETENTION_DAYS - Days to retain database records (default: 30)') - print(' TIMEZONE - IANA timezone for logs/dashboard (e.g., America/New_York, Europe/Rome)') - print(' If not set, system timezone will be used') + print('Configuration:') + print(' Configuration is loaded from a YAML file (default: config.yaml)') + print(' Set CONFIG_LOCATION environment variable to use a different file.\n') + print(' Example config.yaml structure:') + print(' server:') + print(' port: 5000') + print(' delay: 100') + print(' timezone: null # or "America/New_York"') + print(' links:') + print(' min_length: 5') + print(' max_length: 15') + print(' min_per_page: 10') + print(' max_per_page: 15') + print(' canary:') + print(' token_url: null') + print(' token_tries: 10') + print(' dashboard:') + print(' secret_path: null # auto-generated if not set') + print(' database:') + print(' path: "data/krawl.db"') + print(' retention_days: 30') + print(' behavior:') + print(' probability_error_codes: 0') def main(): @@ -47,19 +52,17 @@ def main(): print_usage() exit(0) - config = Config.from_env() - + config = get_config() + # Get timezone configuration tz = config.get_timezone() - + # Initialize logging with timezone initialize_logging(timezone=tz) app_logger = get_app_logger() access_logger = get_access_logger() credential_logger = get_credential_logger() - config = Config.from_env() - # Initialize database for persistent storage try: initialize_database(config.database_path) diff --git a/src/server_errors.py b/src/server_errors.py new file mode 100644 index 0000000..7591c64 --- /dev/null +++ b/src/server_errors.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 + +import random +from wordlists import get_wordlists + + +def generate_server_error() -> tuple[str, str]: + wl = get_wordlists() + server_errors = wl.server_errors + + if not server_errors: + return ("500 Internal Server Error", "text/html") + + server_type = random.choice(list(server_errors.keys())) + server_config = server_errors[server_type] + + error_codes = { + 400: "Bad Request", + 401: "Unauthorized", + 403: "Forbidden", + 404: "Not Found", + 500: "Internal Server Error", + 502: "Bad Gateway", + 503: "Service Unavailable" + } + + code = random.choice(list(error_codes.keys())) + message = error_codes[code] + + template = server_config.get('template', '') + version = random.choice(server_config.get('versions', ['1.0'])) + + html = template.replace('{code}', str(code)) + html = html.replace('{message}', message) + html = html.replace('{version}', version) + + if server_type == 'apache': + os = random.choice(server_config.get('os', ['Ubuntu'])) + html = html.replace('{os}', os) + html = html.replace('{host}', 'localhost') + + return (html, "text/html") + + +def get_server_header(server_type: str = None) -> str: + wl = get_wordlists() + server_errors = wl.server_errors + + if not server_errors: + return "nginx/1.18.0" + + if not server_type: + server_type = random.choice(list(server_errors.keys())) + + server_config = server_errors.get(server_type, {}) + version = random.choice(server_config.get('versions', ['1.0'])) + + server_headers = { + 'nginx': f"nginx/{version}", + 'apache': f"Apache/{version}", + 'iis': f"Microsoft-IIS/{version}", + 'tomcat': f"Apache-Coyote/1.1" + } + + return server_headers.get(server_type, "nginx/1.18.0") diff --git a/src/sql_errors.py b/src/sql_errors.py new file mode 100644 index 0000000..dc84886 --- /dev/null +++ b/src/sql_errors.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 + +import random +import re +from typing import Optional, Tuple +from wordlists import get_wordlists + + +def detect_sql_injection_pattern(query_string: str) -> Optional[str]: + if not query_string: + return None + + query_lower = query_string.lower() + + patterns = { + 'quote': [r"'", r'"', r'`'], + 'comment': [r'--', r'#', r'/\*', r'\*/'], + 'union': [r'\bunion\b', r'\bunion\s+select\b'], + 'boolean': [r'\bor\b.*=.*', r'\band\b.*=.*', r"'.*or.*'.*=.*'"], + 'time_based': [r'\bsleep\b', r'\bwaitfor\b', r'\bdelay\b', r'\bbenchmark\b'], + 'stacked': [r';.*select', r';.*drop', r';.*insert', r';.*update', r';.*delete'], + 'command': [r'\bexec\b', r'\bexecute\b', r'\bxp_cmdshell\b'], + 'info_schema': [r'information_schema', r'table_schema', r'table_name'], + } + + for injection_type, pattern_list in patterns.items(): + for pattern in pattern_list: + if re.search(pattern, query_lower): + return injection_type + + return None + + +def get_random_sql_error(db_type: str = None, injection_type: str = None) -> Tuple[str, str]: + wl = get_wordlists() + sql_errors = wl.sql_errors + + if not sql_errors: + return ("Database error occurred", "text/plain") + + if not db_type: + db_type = random.choice(list(sql_errors.keys())) + + db_errors = sql_errors.get(db_type, {}) + + if injection_type and injection_type in db_errors: + errors = db_errors[injection_type] + elif 'generic' in db_errors: + errors = db_errors['generic'] + else: + all_errors = [] + for error_list in db_errors.values(): + if isinstance(error_list, list): + all_errors.extend(error_list) + errors = all_errors if all_errors else ["Database error occurred"] + + error_message = random.choice(errors) if errors else "Database error occurred" + + if '{table}' in error_message: + tables = ['users', 'products', 'orders', 'customers', 'accounts', 'sessions'] + error_message = error_message.replace('{table}', random.choice(tables)) + + if '{column}' in error_message: + columns = ['id', 'name', 'email', 'password', 'username', 'created_at'] + error_message = error_message.replace('{column}', random.choice(columns)) + + return (error_message, "text/plain") + + +def generate_sql_error_response(query_string: str, db_type: str = None) -> Tuple[str, str, int]: + injection_type = detect_sql_injection_pattern(query_string) + + if not injection_type: + return (None, None, None) + + error_message, content_type = get_random_sql_error(db_type, injection_type) + + status_code = 500 + + if random.random() < 0.3: + status_code = 200 + + return (error_message, content_type, status_code) + + +def get_sql_response_with_data(path: str, params: str) -> str: + import json + from generators import random_username, random_email, random_password + + injection_type = detect_sql_injection_pattern(params) + + if injection_type in ['union', 'boolean', 'stacked']: + data = { + "success": True, + "results": [ + { + "id": i, + "username": random_username(), + "email": random_email(), + "password_hash": random_password(), + "role": random.choice(["admin", "user", "moderator"]) + } + for i in range(1, random.randint(2, 5)) + ] + } + return json.dumps(data, indent=2) + + return json.dumps({ + "success": True, + "message": "Query executed successfully", + "results": [] + }, indent=2) diff --git a/src/templates/html/generic_search.html b/src/templates/html/generic_search.html new file mode 100644 index 0000000..90171bc --- /dev/null +++ b/src/templates/html/generic_search.html @@ -0,0 +1,66 @@ + + + + Search + + + +

Search

+
+ + +
+
+ + + + diff --git a/src/templates/html/input_form.html b/src/templates/html/input_form.html new file mode 100644 index 0000000..c03b1a8 --- /dev/null +++ b/src/templates/html/input_form.html @@ -0,0 +1,74 @@ + + + + Contact + + + +

Contact

+
+ + + + +
+
+ + + + diff --git a/src/templates/html/robots.txt b/src/templates/html/robots.txt index 2bae8ca..3618937 100644 --- a/src/templates/html/robots.txt +++ b/src/templates/html/robots.txt @@ -11,8 +11,18 @@ Disallow: /login/ Disallow: /admin/login Disallow: /phpMyAdmin/ Disallow: /admin/login.php +Disallow: /users +Disallow: /search +Disallow: /contact +Disallow: /info +Disallow: /input +Disallow: /feedback +Disallow: /server Disallow: /api/v1/users Disallow: /api/v2/secrets +Disallow: /api/search +Disallow: /api/sql +Disallow: /api/database Disallow: /.env Disallow: /credentials.txt Disallow: /passwords.txt diff --git a/src/templates/html_templates.py b/src/templates/html_templates.py index c6ad09a..a7cefbc 100644 --- a/src/templates/html_templates.py +++ b/src/templates/html_templates.py @@ -50,3 +50,13 @@ def directory_listing(path: str, dirs: list, files: list) -> str: rows += row_template.format(href=f, name=f, date="2024-12-01 14:22", size=size) return load_template("directory_listing", path=path, rows=rows) + + +def product_search() -> str: + """Generate product search page with SQL injection honeypot""" + return load_template("generic_search") + + +def input_form() -> str: + """Generate input form page for XSS honeypot""" + return load_template("input_form") diff --git a/src/tracker.py b/src/tracker.py index 4c89c0b..cd8a187 100644 --- a/src/tracker.py +++ b/src/tracker.py @@ -6,7 +6,7 @@ from datetime import datetime from zoneinfo import ZoneInfo import re import urllib.parse - +from wordlists import get_wordlists from database import get_database, DatabaseManager @@ -37,14 +37,19 @@ class AccessTracker: 'burp', 'zap', 'w3af', 'metasploit', 'nuclei', 'gobuster', 'dirbuster' ] - # Common attack types such as xss, shell injection, probes - self.attack_types = { - 'path_traversal': r'\.\.', - 'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)", - 'xss_attempt': r'( bool: + if not input_string: + return False + + wl = get_wordlists() + xss_pattern = wl.attack_patterns.get('xss_attempt', '') + + if not xss_pattern: + xss_pattern = r'( str: + xss_detected = False + reflected_content = [] + + for key, value in input_data.items(): + if detect_xss_pattern(value): + xss_detected = True + reflected_content.append(f"

{key}: {value}

") + + if xss_detected: + html = f""" + + + + Submission Received + + + +
+

Thank you for your submission!

+

We have received your information:

+ {''.join(reflected_content)} +

We will get back to you shortly.

+
+ + +""" + return html + + return """ + + + + Submission Received + + + +
+

Thank you for your submission!

+

Your message has been received and we will respond soon.

+
+ + +""" diff --git a/tests/sim_attacks.sh b/tests/sim_attacks.sh index d4a72b2..3502c3a 100755 --- a/tests/sim_attacks.sh +++ b/tests/sim_attacks.sh @@ -17,4 +17,4 @@ curl -s "$TARGET/wp-admin/" echo -e "\n=== Testing Shell Injection ===" curl -s -X POST "$TARGET/ping" -d "host=127.0.0.1; cat /etc/passwd" -echo -e "\n=== Done ===" \ No newline at end of file +echo -e "\n=== Done ===" diff --git a/tests/test_sql_injection.sh b/tests/test_sql_injection.sh new file mode 100644 index 0000000..e178b3c --- /dev/null +++ b/tests/test_sql_injection.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +# Test script for SQL injection honeypot endpoints + +BASE_URL="http://localhost:5000" + +echo "=========================================" +echo "Testing SQL Injection Honeypot Endpoints" +echo "=========================================" +echo "" + +# Test 1: Normal query +echo "Test 1: Normal GET request to /api/search" +curl -s "${BASE_URL}/api/search?q=test" | head -20 +echo "" +echo "---" +echo "" + +# Test 2: SQL injection with single quote +echo "Test 2: SQL injection with single quote" +curl -s "${BASE_URL}/api/search?id=1'" | head -20 +echo "" +echo "---" +echo "" + +# Test 3: UNION-based injection +echo "Test 3: UNION-based SQL injection" +curl -s "${BASE_URL}/api/search?id=1%20UNION%20SELECT%20*" | head -20 +echo "" +echo "---" +echo "" + +# Test 4: Boolean-based injection +echo "Test 4: Boolean-based SQL injection" +curl -s "${BASE_URL}/api/sql?user=admin'%20OR%201=1--" | head -20 +echo "" +echo "---" +echo "" + +# Test 5: Comment-based injection +echo "Test 5: Comment-based SQL injection" +curl -s "${BASE_URL}/api/database?q=test'--" | head -20 +echo "" +echo "---" +echo "" + +# Test 6: Time-based injection +echo "Test 6: Time-based SQL injection" +curl -s "${BASE_URL}/api/search?id=1%20AND%20SLEEP(5)" | head -20 +echo "" +echo "---" +echo "" + +# Test 7: POST request with SQL injection +echo "Test 7: POST request with SQL injection" +curl -s -X POST "${BASE_URL}/api/search" -d "username=admin'%20OR%201=1--&password=test" | head -20 +echo "" +echo "---" +echo "" + +# Test 8: Information schema query +echo "Test 8: Information schema injection" +curl -s "${BASE_URL}/api/sql?table=information_schema.tables" | head -20 +echo "" +echo "---" +echo "" + +# Test 9: Stacked queries +echo "Test 9: Stacked queries injection" +curl -s "${BASE_URL}/api/database?id=1;DROP%20TABLE%20users" | head -20 +echo "" +echo "---" +echo "" + +echo "=========================================" +echo "Tests completed!" +echo "Check logs for detailed attack detection" +echo "=========================================" diff --git a/wordlists.json b/wordlists.json index 39ab698..833f1eb 100644 --- a/wordlists.json +++ b/wordlists.json @@ -194,6 +194,171 @@ 502, 503 ], + "server_errors": { + "nginx": { + "versions": ["1.18.0", "1.20.1", "1.22.0", "1.24.0"], + "template": "\n\n\n{code} {message}\n\n\n\n

An error occurred.

\n

Sorry, the page you are looking for is currently unavailable.
\nPlease try again later.

\n

If you are the system administrator of this resource then you should check the error log for details.

\n

Faithfully yours, nginx/{version}.

\n\n" + }, + "apache": { + "versions": ["2.4.41", "2.4.52", "2.4.54", "2.4.57"], + "os": ["Ubuntu", "Debian", "CentOS"], + "template": "\n\n{code} {message}\n\n

{message}

\n

The requested URL was not found on this server.

\n
\n
Apache/{version} ({os}) Server at {host} Port 80
\n" + }, + "iis": { + "versions": ["10.0", "8.5", "8.0"], + "template": "\n\n\n\n{code} - {message}\n\n\n\n

Server Error

\n
\n
\n

{code} - {message}

\n

The page cannot be displayed because an internal server error has occurred.

\n
\n
\n\n" + }, + "tomcat": { + "versions": ["9.0.65", "10.0.27", "10.1.5"], + "template": "HTTP Status {code} - {message}

HTTP Status {code} - {message}


Type Status Report

Description The server encountered an internal error that prevented it from fulfilling this request.


Apache Tomcat/{version}

" + } + }, + "sql_errors": { + "mysql": { + "generic": [ + "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''1'' at line 1", + "Unknown column '{column}' in 'where clause'", + "Table '{table}' doesn't exist", + "Operand should contain 1 column(s)", + "Subquery returns more than 1 row", + "Duplicate entry 'admin' for key 'PRIMARY'" + ], + "quote": [ + "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near ''''' at line 1", + "Unclosed quotation mark after the character string ''", + "You have an error in your SQL syntax near '\\'' LIMIT 0,30'" + ], + "union": [ + "The used SELECT statements have a different number of columns", + "Operand should contain 1 column(s)", + "Mixing of GROUP columns (MIN(),MAX(),COUNT(),...) with no GROUP columns is illegal" + ], + "boolean": [ + "You have an error in your SQL syntax near 'OR 1=1' at line 1", + "Unknown column '1' in 'where clause'" + ], + "time_based": [ + "Query execution was interrupted", + "Lock wait timeout exceeded; try restarting transaction" + ], + "comment": [ + "You have an error in your SQL syntax; check the manual that corresponds to your MySQL server version for the right syntax to use near '--' at line 1" + ] + }, + "postgresql": { + "generic": [ + "ERROR: syntax error at or near \"1\"", + "ERROR: column \"{column}\" does not exist", + "ERROR: relation \"{table}\" does not exist", + "ERROR: operator does not exist: integer = text", + "ERROR: invalid input syntax for type integer: \"admin\"" + ], + "quote": [ + "ERROR: unterminated quoted string at or near \"'\"", + "ERROR: syntax error at or near \"'\"", + "ERROR: unterminated quoted identifier at or near \"'\"" + ], + "union": [ + "ERROR: each UNION query must have the same number of columns", + "ERROR: UNION types integer and text cannot be matched" + ], + "boolean": [ + "ERROR: syntax error at or near \"OR\"", + "ERROR: invalid input syntax for type boolean: \"1=1\"" + ], + "time_based": [ + "ERROR: canceling statement due to user request", + "ERROR: function pg_sleep(integer) does not exist" + ], + "info_schema": [ + "ERROR: permission denied for table {table}", + "ERROR: permission denied for schema information_schema" + ] + }, + "mssql": { + "generic": [ + "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near '1'.", + "Msg 207, Level 16, State 1, Line 1\nInvalid column name '{column}'.", + "Msg 208, Level 16, State 1, Line 1\nInvalid object name '{table}'.", + "Msg 245, Level 16, State 1, Line 1\nConversion failed when converting the varchar value 'admin' to data type int." + ], + "quote": [ + "Msg 105, Level 15, State 1, Line 1\nUnclosed quotation mark after the character string ''.", + "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near '''." + ], + "union": [ + "Msg 205, Level 16, State 1, Line 1\nAll queries combined using a UNION, INTERSECT or EXCEPT operator must have an equal number of expressions in their target lists.", + "Msg 8167, Level 16, State 1, Line 1\nThe type of column \"{column}\" conflicts with the type of other columns specified in the UNION, INTERSECT, or EXCEPT list." + ], + "boolean": [ + "Msg 102, Level 15, State 1, Line 1\nIncorrect syntax near 'OR'." + ], + "command": [ + "Msg 15281, Level 16, State 1, Procedure xp_cmdshell, Line 1\nSQL Server blocked access to procedure 'sys.xp_cmdshell' of component 'xp_cmdshell'" + ] + }, + "oracle": { + "generic": [ + "ORA-00933: SQL command not properly ended", + "ORA-00904: \"{column}\": invalid identifier", + "ORA-00942: table or view \"{table}\" does not exist", + "ORA-01722: invalid number", + "ORA-01756: quoted string not properly terminated" + ], + "quote": [ + "ORA-01756: quoted string not properly terminated", + "ORA-00933: SQL command not properly ended" + ], + "union": [ + "ORA-01789: query block has incorrect number of result columns", + "ORA-01790: expression must have same datatype as corresponding expression" + ], + "boolean": [ + "ORA-00933: SQL command not properly ended", + "ORA-00920: invalid relational operator" + ] + }, + "sqlite": { + "generic": [ + "near \"1\": syntax error", + "no such column: {column}", + "no such table: {table}", + "unrecognized token: \"'\"", + "incomplete input" + ], + "quote": [ + "unrecognized token: \"'\"", + "incomplete input", + "near \"'\": syntax error" + ], + "union": [ + "SELECTs to the left and right of UNION do not have the same number of result columns" + ] + }, + "mongodb": { + "generic": [ + "MongoError: Can't canonicalize query: BadValue unknown operator: $where", + "MongoError: Failed to parse: { $where: \"this.{column} == '1'\" }", + "SyntaxError: unterminated string literal", + "MongoError: exception: invalid operator: $gt" + ], + "quote": [ + "SyntaxError: unterminated string literal", + "SyntaxError: missing } after property list" + ], + "command": [ + "MongoError: $where is not allowed in this context", + "MongoError: can't eval: security" + ] + } + }, + "attack_patterns": { + "path_traversal": "\\.\\.", + "sql_injection": "('|\"|`|--|#|/\\*|\\*/|\\bunion\\b|\\bunion\\s+select\\b|\\bor\\b.*=.*|\\band\\b.*=.*|'.*or.*'.*=.*'|\\bsleep\\b|\\bwaitfor\\b|\\bdelay\\b|\\bbenchmark\\b|;.*select|;.*drop|;.*insert|;.*update|;.*delete|\\bexec\\b|\\bexecute\\b|\\bxp_cmdshell\\b|information_schema|table_schema|table_name)", + "xss_attempt": "(