diff --git a/.gitignore b/.gitignore
index a36748e..70b93e4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,9 +61,12 @@ secrets/
*.log
logs/
-# Database
+# Data and databases
data/
+**/data/
*.db
+*.sqlite
+*.sqlite3
# Temporary files
*.tmp
diff --git a/Dockerfile b/Dockerfile
index adac20f..2c7b954 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,16 +4,25 @@ LABEL org.opencontainers.image.source=https://github.com/BlessedRebuS/Krawl
WORKDIR /app
+# Install gosu for dropping privileges
+RUN apt-get update && apt-get install -y --no-install-recommends gosu && \
+ rm -rf /var/lib/apt/lists/*
+
+COPY requirements.txt /app/
+RUN pip install --no-cache-dir -r requirements.txt
+
COPY src/ /app/src/
COPY wordlists.json /app/
+COPY entrypoint.sh /app/
RUN useradd -m -u 1000 krawl && \
- chown -R krawl:krawl /app
-
-USER krawl
+ mkdir -p /app/logs /app/data && \
+ chown -R krawl:krawl /app && \
+ chmod +x /app/entrypoint.sh
EXPOSE 5000
ENV PYTHONUNBUFFERED=1
+ENTRYPOINT ["/app/entrypoint.sh"]
CMD ["python3", "src/server.py"]
diff --git a/README.md b/README.md
index 7fd0377..f7fe399 100644
--- a/README.md
+++ b/README.md
@@ -48,10 +48,11 @@
-## Star History
-
+## Demo
+Tip: crawl the `robots.txt` paths for additional fun
+### Krawl URL: [http://demo.krawlme.com](http://demo.krawlme.com)
+### View the dashboard [http://demo.krawlme.com/das_dashboard](http://demo.krawlme.com/das_dashboard)
-
## What is Krawl?
**Krawl** is a cloud‑native deception server designed to detect, delay, and analyze malicious web crawlers and automated scanners.
@@ -185,7 +186,7 @@ To customize the deception server installation several **environment variables**
| `CANARY_TOKEN_URL` | External canary token URL | None |
| `DASHBOARD_SECRET_PATH` | Custom dashboard path | Auto-generated |
| `PROBABILITY_ERROR_CODES` | Error response probability (0-100%) | `0` |
-| `SERVER_HEADER` | HTTP Server header for deception, if not set use random server header | |
+| `SERVER_HEADER` | HTTP Server header for deception | `Apache/2.2.22 (Ubuntu)` |
| `TIMEZONE` | IANA timezone for logs and dashboard (e.g., `America/New_York`, `Europe/Rome`) | System timezone |
## robots.txt
@@ -317,3 +318,6 @@ Contributions welcome! Please:
**This is a deception/honeypot system.**
Deploy in isolated environments and monitor carefully for security events.
Use responsibly and in compliance with applicable laws and regulations.
+
+## Star History
+
diff --git a/config.yaml b/config.yaml
new file mode 100644
index 0000000..987588c
--- /dev/null
+++ b/config.yaml
@@ -0,0 +1,46 @@
+# Krawl Honeypot Configuration
+
+server:
+ port: 5000
+ delay: 100 # Response delay in milliseconds
+ timezone: null # e.g., "America/New_York" or null for system default
+
+ # manually set the server header, if null a random one will be used.
+ server_header: "Apache/2.2.22 (Ubuntu)"
+
+links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+
+canary:
+ token_url: null # Optional canary token URL
+ token_tries: 10
+
+dashboard:
+ # if set to "null" this will Auto-generates random path if not set
+ # can be set to "/dashboard" or similar <-- note this MUST include a forward slash
+ secret_path: dashboard
+
+api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+
+database:
+ path: "data/krawl.db"
+ retention_days: 30
+
+behavior:
+ probability_error_codes: 0 # 0-100 percentage
+
+analyzer:
+ http_risky_methods_threshold: 0.1
+ violated_robots_threshold: 0.1
+ uneven_request_timing_threshold: 5
+ uneven_request_timing_time_window_seconds: 300
+ user_agents_used_threshold: 1
+ attack_urls_threshold: 1
\ No newline at end of file
diff --git a/docker-compose.yaml b/docker-compose.yaml
index 6f81a47..02b6ae7 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -10,23 +10,10 @@ services:
- "5000:5000"
volumes:
- ./wordlists.json:/app/wordlists.json:ro
+ - ./config.yaml:/app/config.yaml:ro
+ - ./logs:/app/logs
environment:
- - PORT=5000
- - DELAY=100
- - LINKS_MIN_LENGTH=5
- - LINKS_MAX_LENGTH=15
- - LINKS_MIN_PER_PAGE=10
- - LINKS_MAX_PER_PAGE=15
- - MAX_COUNTER=10
- - CANARY_TOKEN_TRIES=10
- - PROBABILITY_ERROR_CODES=0
- # - SERVER_HEADER=Apache/2.2.22 (Ubuntu)
- # Optional: Set your canary token URL
- # - CANARY_TOKEN_URL=http://canarytokens.com/api/users/YOUR_TOKEN/passwords.txt
- # Optional: Set custom dashboard path (auto-generated if not set)
- # - DASHBOARD_SECRET_PATH=/my-secret-dashboard
- # Optional: Set timezone for logs and dashboard (e.g., America/New_York, Europe/Rome)
- # - TIMEZONE=UTC
+ - CONFIG_LOCATION=config.yaml
restart: unless-stopped
healthcheck:
test: ["CMD", "python3", "-c", "import requests; requests.get('http://localhost:5000')"]
diff --git a/entrypoint.sh b/entrypoint.sh
new file mode 100644
index 0000000..28b5fc0
--- /dev/null
+++ b/entrypoint.sh
@@ -0,0 +1,8 @@
+#!/bin/sh
+set -e
+
+# Fix ownership of mounted directories
+chown -R krawl:krawl /app/logs /app/data 2>/dev/null || true
+
+# Drop to krawl user and run the application
+exec gosu krawl "$@"
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index 17cd952..808d9f5 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -5,25 +5,30 @@ metadata:
labels:
{{- include "krawl.labels" . | nindent 4 }}
data:
- PORT: {{ .Values.config.port | quote }}
- DELAY: {{ .Values.config.delay | quote }}
- LINKS_MIN_LENGTH: {{ .Values.config.linksMinLength | quote }}
- LINKS_MAX_LENGTH: {{ .Values.config.linksMaxLength | quote }}
- LINKS_MIN_PER_PAGE: {{ .Values.config.linksMinPerPage | quote }}
- LINKS_MAX_PER_PAGE: {{ .Values.config.linksMaxPerPage | quote }}
- MAX_COUNTER: {{ .Values.config.maxCounter | quote }}
- CANARY_TOKEN_TRIES: {{ .Values.config.canaryTokenTries | quote }}
- PROBABILITY_ERROR_CODES: {{ .Values.config.probabilityErrorCodes | quote }}
- CANARY_TOKEN_URL: {{ .Values.config.canaryTokenUrl | quote }}
- {{- if .Values.config.dashboardSecretPath }}
- DASHBOARD_SECRET_PATH: {{ .Values.config.dashboardSecretPath | quote }}
- {{- end }}
- {{- if .Values.config.serverHeader }}
- SERVER_HEADER: {{ .Values.config.serverHeader | quote }}
- {{- end }}
- {{- if .Values.config.timezone }}
- TIMEZONE: {{ .Values.config.timezone | quote }}
- {{- end }}
- # Database configuration
- DATABASE_PATH: {{ .Values.database.path | quote }}
- DATABASE_RETENTION_DAYS: {{ .Values.database.retentionDays | quote }}
+ config.yaml: |
+ # Krawl Honeypot Configuration
+ server:
+ port: {{ .Values.config.server.port }}
+ delay: {{ .Values.config.server.delay }}
+ timezone: {{ .Values.config.server.timezone | toYaml }}
+ links:
+ min_length: {{ .Values.config.links.min_length }}
+ max_length: {{ .Values.config.links.max_length }}
+ min_per_page: {{ .Values.config.links.min_per_page }}
+ max_per_page: {{ .Values.config.links.max_per_page }}
+ char_space: {{ .Values.config.links.char_space | quote }}
+ max_counter: {{ .Values.config.links.max_counter }}
+ canary:
+ token_url: {{ .Values.config.canary.token_url | toYaml }}
+ token_tries: {{ .Values.config.canary.token_tries }}
+ dashboard:
+ secret_path: {{ .Values.config.dashboard.secret_path | toYaml }}
+ api:
+ server_url: {{ .Values.config.api.server_url | toYaml }}
+ server_port: {{ .Values.config.api.server_port }}
+ server_path: {{ .Values.config.api.server_path | quote }}
+ database:
+ path: {{ .Values.config.database.path | quote }}
+ retention_days: {{ .Values.config.database.retention_days }}
+ behavior:
+ probability_error_codes: {{ .Values.config.behavior.probability_error_codes }}
diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml
index ecc9655..5635fa3 100644
--- a/helm/templates/deployment.yaml
+++ b/helm/templates/deployment.yaml
@@ -38,18 +38,16 @@ spec:
imagePullPolicy: {{ .Values.image.pullPolicy }}
ports:
- name: http
- containerPort: {{ .Values.config.port }}
+ containerPort: {{ .Values.config.server.port }}
protocol: TCP
- envFrom:
- - configMapRef:
- name: {{ include "krawl.fullname" . }}-config
env:
- - name: DASHBOARD_SECRET_PATH
- valueFrom:
- secretKeyRef:
- name: {{ include "krawl.fullname" . }}
- key: dashboard-path
+ - name: CONFIG_LOCATION
+ value: "config.yaml"
volumeMounts:
+ - name: config
+ mountPath: /app/config.yaml
+ subPath: config.yaml
+ readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -63,6 +61,9 @@ spec:
{{- toYaml . | nindent 12 }}
{{- end }}
volumes:
+ - name: config
+ configMap:
+ name: {{ include "krawl.fullname" . }}-config
- name: wordlists
configMap:
name: {{ include "krawl.fullname" . }}-wordlists
diff --git a/helm/templates/secret.yaml b/helm/templates/secret.yaml
deleted file mode 100644
index 798289c..0000000
--- a/helm/templates/secret.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-{{- $secret := (lookup "v1" "Secret" .Release.Namespace (include "krawl.fullname" .)) -}}
-{{- $dashboardPath := "" -}}
-{{- if and $secret $secret.data -}}
- {{- $dashboardPath = index $secret.data "dashboard-path" | b64dec -}}
-{{- else -}}
- {{- $dashboardPath = printf "/%s" (randAlphaNum 32) -}}
-{{- end -}}
-apiVersion: v1
-kind: Secret
-metadata:
- name: {{ include "krawl.fullname" . }}
- labels:
- {{- include "krawl.labels" . | nindent 4 }}
-type: Opaque
-stringData:
- dashboard-path: {{ $dashboardPath | quote }}
diff --git a/helm/values.yaml b/helm/values.yaml
index c92bc0b..60b1a66 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -62,29 +62,36 @@ tolerations: []
affinity: {}
-# Application configuration
+# Application configuration (config.yaml structure)
config:
- port: 5000
- delay: 100
- linksMinLength: 5
- linksMaxLength: 15
- linksMinPerPage: 10
- linksMaxPerPage: 15
- maxCounter: 10
- canaryTokenTries: 10
- probabilityErrorCodes: 0
-# timezone: "UTC"
-# serverHeader: "Apache/2.2.22 (Ubuntu)"
-# dashboardSecretPath: "/my-secret-dashboard"
-# canaryTokenUrl: set-your-canary-token-url-here
-# timezone: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
+ server:
+ port: 5000
+ delay: 100
+ timezone: null # IANA timezone (e.g., "America/New_York", "Europe/Rome"). If not set, system timezone is used.
+ links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+ canary:
+ token_url: null # Set your canary token URL here
+ token_tries: 10
+ dashboard:
+ secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard"
+ api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+ database:
+ path: "data/krawl.db"
+ retention_days: 30
+ behavior:
+ probability_error_codes: 0
-# Database configuration
+# Database persistence configuration
database:
- # Path to the SQLite database file
- path: "data/krawl.db"
- # Number of days to retain access logs and attack data
- retentionDays: 30
# Persistence configuration
persistence:
enabled: true
diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml
index d1a026c..3344260 100644
--- a/kubernetes/krawl-all-in-one-deploy.yaml
+++ b/kubernetes/krawl-all-in-one-deploy.yaml
@@ -10,19 +10,41 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
- PORT: "5000"
- DELAY: "100"
- LINKS_MIN_LENGTH: "5"
- LINKS_MAX_LENGTH: "15"
- LINKS_MIN_PER_PAGE: "10"
- LINKS_MAX_PER_PAGE: "15"
- MAX_COUNTER: "10"
- CANARY_TOKEN_TRIES: "10"
- PROBABILITY_ERROR_CODES: "0"
-# CANARY_TOKEN_URL: set-your-canary-token-url-here
- # Database configuration
- DATABASE_PATH: "data/krawl.db"
- DATABASE_RETENTION_DAYS: "30"
+ config.yaml: |
+ # Krawl Honeypot Configuration
+ server:
+ port: 5000
+ delay: 100
+ timezone: null # e.g., "America/New_York" or null for system default
+
+ links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+
+ canary:
+ token_url: null # Optional canary token URL
+ token_tries: 10
+
+ dashboard:
+ # Auto-generates random path if null
+ # Can be set to "/dashboard" or similar
+ secret_path: null
+
+ api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+
+ database:
+ path: "data/krawl.db"
+ retention_days: 30
+
+ behavior:
+ probability_error_codes: 0 # 0-100 percentage
---
apiVersion: v1
kind: ConfigMap
@@ -227,6 +249,14 @@ data:
500,
502,
503
+ ],
+ "server_headers": [
+ "Apache/2.4.41 (Ubuntu)",
+ "nginx/1.18.0",
+ "Microsoft-IIS/10.0",
+ "cloudflare",
+ "AmazonS3",
+ "gunicorn/20.1.0"
]
}
---
@@ -269,10 +299,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
- envFrom:
- - configMapRef:
- name: krawl-config
+ env:
+ - name: CONFIG_LOCATION
+ value: "config.yaml"
volumeMounts:
+ - name: config
+ mountPath: /app/config.yaml
+ subPath: config.yaml
+ readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -287,6 +321,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
+ - name: config
+ configMap:
+ name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists
@@ -353,7 +390,7 @@ spec:
- podSelector: {}
- namespaceSelector: {}
- ipBlock:
- cidr: 0.0.0.0/0
+ cidr: 0.0.0.0/0
ports:
- protocol: TCP
port: 5000
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index ef357b0..38a287b 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -4,18 +4,38 @@ metadata:
name: krawl-config
namespace: krawl-system
data:
- PORT: "5000"
- DELAY: "100"
- LINKS_MIN_LENGTH: "5"
- LINKS_MAX_LENGTH: "15"
- LINKS_MIN_PER_PAGE: "10"
- LINKS_MAX_PER_PAGE: "15"
- MAX_COUNTER: "10"
- CANARY_TOKEN_TRIES: "10"
- PROBABILITY_ERROR_CODES: "0"
- SERVER_HEADER: "Apache/2.2.22 (Ubuntu)"
-# CANARY_TOKEN_URL: set-your-canary-token-url-here
-# TIMEZONE: "UTC" # IANA timezone (e.g., "America/New_York", "Europe/Rome")
- # Database configuration
- DATABASE_PATH: "data/krawl.db"
- DATABASE_RETENTION_DAYS: "30"
\ No newline at end of file
+ config.yaml: |
+ # Krawl Honeypot Configuration
+ server:
+ port: 5000
+ delay: 100
+ timezone: null # e.g., "America/New_York" or null for system default
+
+ links:
+ min_length: 5
+ max_length: 15
+ min_per_page: 10
+ max_per_page: 15
+ char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
+ max_counter: 10
+
+ canary:
+ token_url: null # Optional canary token URL
+ token_tries: 10
+
+ dashboard:
+ # Auto-generates random path if null
+ # Can be set to "/dashboard" or similar
+ secret_path: null
+
+ api:
+ server_url: null
+ server_port: 8080
+ server_path: "/api/v2/users"
+
+ database:
+ path: "data/krawl.db"
+ retention_days: 30
+
+ behavior:
+ probability_error_codes: 0 # 0-100 percentage
diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml
index 1650721..f970625 100644
--- a/kubernetes/manifests/deployment.yaml
+++ b/kubernetes/manifests/deployment.yaml
@@ -23,10 +23,14 @@ spec:
- containerPort: 5000
name: http
protocol: TCP
- envFrom:
- - configMapRef:
- name: krawl-config
+ env:
+ - name: CONFIG_LOCATION
+ value: "config.yaml"
volumeMounts:
+ - name: config
+ mountPath: /app/config.yaml
+ subPath: config.yaml
+ readOnly: true
- name: wordlists
mountPath: /app/wordlists.json
subPath: wordlists.json
@@ -41,6 +45,9 @@ spec:
memory: "256Mi"
cpu: "500m"
volumes:
+ - name: config
+ configMap:
+ name: krawl-config
- name: wordlists
configMap:
name: krawl-wordlists
diff --git a/requirements.txt b/requirements.txt
index 94f74f2..8cb6dc5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,8 @@
# Krawl Honeypot Dependencies
# Install with: pip install -r requirements.txt
+# Configuration
+PyYAML>=6.0
+
# Database ORM
SQLAlchemy>=2.0.0,<3.0.0
diff --git a/src/analyzer.py b/src/analyzer.py
index 8ebef62..48c5fad 100644
--- a/src/analyzer.py
+++ b/src/analyzer.py
@@ -7,7 +7,7 @@ from pathlib import Path
from datetime import datetime, timedelta
import re
from wordlists import get_wordlists
-
+from config import get_config
"""
Functions for user activity analysis
"""
@@ -47,6 +47,17 @@ class Analyzer:
def infer_user_category(self, ip: str) -> str:
+ config = get_config()
+
+ http_risky_methods_threshold = config.http_risky_methods_threshold
+ violated_robots_threshold = config.violated_robots_threshold
+ uneven_request_timing_threshold = config.uneven_request_timing_threshold
+ user_agents_used_threshold = config.user_agents_used_threshold
+ attack_urls_threshold = config.attack_urls_threshold
+ uneven_request_timing_time_window_seconds = config.uneven_request_timing_time_window_seconds
+
+ print(f"http_risky_methods_threshold: {http_risky_methods_threshold}")
+
score = {}
score["attacker"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
score["good_crawler"] = {"risky_http_methods": False, "robots_violations": False, "uneven_request_timing": False, "different_user_agents": False, "attack_url": False}
@@ -104,14 +115,13 @@ class Analyzer:
#print(f"TOTAL: {total_accesses_count} - GET: {get_accesses_count} - POST: {post_accesses_count}")
- #if >5% attacker or bad crawler
- if total_accesses_count > 0:
+ if total_accesses_count > http_risky_methods_threshold:
http_method_attacker_score = (post_accesses_count + put_accesses_count + delete_accesses_count + options_accesses_count + patch_accesses_count) / total_accesses_count
else:
http_method_attacker_score = 0
#print(f"HTTP Method attacker score: {http_method_attacker_score}")
- if http_method_attacker_score > 0.2:
+ if http_method_attacker_score >= http_risky_methods_threshold:
score["attacker"]["risky_http_methods"] = True
score["good_crawler"]["risky_http_methods"] = False
score["bad_crawler"]["risky_http_methods"] = True
@@ -150,33 +160,28 @@ class Analyzer:
else:
violated_robots_ratio = 0
- if violated_robots_ratio > 0.10:
+ if violated_robots_ratio >= violated_robots_threshold:
score["attacker"]["robots_violations"] = True
score["good_crawler"]["robots_violations"] = False
score["bad_crawler"]["robots_violations"] = True
score["regular_user"]["robots_violations"] = False
else:
- score["attacker"]["robots_violations"] = True
+ score["attacker"]["robots_violations"] = False
score["good_crawler"]["robots_violations"] = False
- score["bad_crawler"]["robots_violations"] = True
+ score["bad_crawler"]["robots_violations"] = False
score["regular_user"]["robots_violations"] = False
#--------------------- Requests Timing ---------------------
#Request rate and timing: steady, throttled, polite vs attackers' bursty, aggressive, or oddly rhythmic behavior
timestamps = [datetime.fromisoformat(item["timestamp"]) for item in accesses]
- print(f"Timestamps #: {len(timestamps)}")
- timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(minutes=5)]
- print(f"Timestamps #: {len(timestamps)}")
+ timestamps = [ts for ts in timestamps if datetime.utcnow() - ts <= timedelta(seconds=uneven_request_timing_time_window_seconds)]
timestamps = sorted(timestamps, reverse=True)
- print(f"Timestamps #: {len(timestamps)}")
time_diffs = []
for i in range(0, len(timestamps)-1):
diff = (timestamps[i] - timestamps[i+1]).total_seconds()
time_diffs.append(diff)
- print(f"Time diffs: {time_diffs}")
-
mean = 0
variance = 0
std = 0
@@ -186,17 +191,17 @@ class Analyzer:
variance = sum((x - mean) ** 2 for x in time_diffs) / len(time_diffs)
std = variance ** 0.5
cv = std/mean
- print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
+ #print(f"Mean: {mean} - Variance {variance} - Standard Deviation {std} - Coefficient of Variation: {cv}")
- if mean > 4:
+ if mean >= uneven_request_timing_threshold:
score["attacker"]["uneven_request_timing"] = True
score["good_crawler"]["uneven_request_timing"] = False
score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = True
else:
- score["attacker"]["uneven_request_timing"] = True
+ score["attacker"]["uneven_request_timing"] = False
score["good_crawler"]["uneven_request_timing"] = False
- score["bad_crawler"]["uneven_request_timing"] = True
+ score["bad_crawler"]["uneven_request_timing"] = False
score["regular_user"]["uneven_request_timing"] = False
@@ -206,39 +211,31 @@ class Analyzer:
user_agents_used = list(dict.fromkeys(user_agents_used))
#print(f"User agents used: {user_agents_used}")
- if len(user_agents_used)> 4:
+ if len(user_agents_used) >= user_agents_used_threshold:
score["attacker"]["different_user_agents"] = True
score["good_crawler"]["different_user_agents"] = False
score["bad_crawler"]["different_user_agentss"] = True
score["regular_user"]["different_user_agents"] = False
else:
- score["attacker"]["different_user_agents"] = True
+ score["attacker"]["different_user_agents"] = False
score["good_crawler"]["different_user_agents"] = False
- score["bad_crawler"]["different_user_agents"] = True
+ score["bad_crawler"]["different_user_agents"] = False
score["regular_user"]["different_user_agents"] = False
#--------------------- Attack URLs ---------------------
- attack_url_found = False
- # attack_types = {
- # 'path_traversal': r'\.\.',
- # 'sql_injection': r"('|--|;|\bOR\b|\bUNION\b|\bSELECT\b|\bDROP\b)",
- # 'xss_attempt': r'(
+