From 8c76f6c84742e6d08d313c47fac4b39750456bff Mon Sep 17 00:00:00 2001
From: Lorenzo Venerandi <68255980+Lore09@users.noreply.github.com>
Date: Mon, 26 Jan 2026 12:36:22 +0100
Subject: [PATCH] Feat/deployment update (#56)
* feat: update analyzer thresholds and add crawl configuration options
* feat: update Helm chart version and add README for installation instructions
* feat: update installation instructions in README and add Docker support
* feat: update deployment manifests and configuration for improved service handling and analyzer settings
* feat: add API endpoint for paginated IP retrieval and enhance dashboard visualization with category filters
* feat: update configuration for Krawl service to use external config file
* feat: refactor code for improved readability and consistency across multiple files
* feat: remove Flake8, Pylint, and test steps from PR checks workflow
---
.github/workflows/pr-checks.yml | 9 -
README.md | 185 +++++++----
config.yaml | 7 +-
helm/Chart.yaml | 5 +-
helm/README.md | 286 ++++++++++++++++++
helm/templates/configmap.yaml | 4 +
helm/values.yaml | 6 +-
kubernetes/krawl-all-in-one-deploy.yaml | 46 +--
kubernetes/manifests/configmap.yaml | 25 +-
kubernetes/manifests/ingress.yaml | 4 +-
kubernetes/manifests/service.yaml | 5 +
kubernetes/manifests/wordlists-configmap.yaml | 12 +
src/config.py | 8 +-
src/database.py | 286 ++++++++++++++----
src/handler.py | 102 ++++++-
src/ip_utils.py | 12 +-
src/tasks/memory_cleanup.py | 9 +-
src/tasks/top_attacking_ips.py | 7 +-
src/templates/dashboard_template.py | 264 +++++++++++-----
src/tracker.py | 12 +-
20 files changed, 1025 insertions(+), 269 deletions(-)
create mode 100644 helm/README.md
diff --git a/.github/workflows/pr-checks.yml b/.github/workflows/pr-checks.yml
index 0259795..9feb01c 100644
--- a/.github/workflows/pr-checks.yml
+++ b/.github/workflows/pr-checks.yml
@@ -37,15 +37,6 @@ jobs:
exit 1
fi
- - name: Flake8 lint
- run: flake8 src/ --max-line-length=120 --extend-ignore=E203,W503
-
- - name: Pylint check
- run: pylint src/ --fail-under=7.0 || true
-
- - name: Run tests
- run: pytest tests/ -v || true
-
build-docker:
name: Build Docker
runs-on: ubuntu-latest
diff --git a/README.md b/README.md
index 1d0e8a5..8b5011c 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@
What is Krawl? •
- Quick Start •
+ Installation •
Honeypot Pages •
Dashboard •
Todo •
@@ -74,100 +74,155 @@ It features:

-## 🚀 Quick Start
-## Helm Chart
+## 🚀 Installation
-Install with default values
+### Docker Run
-```bash
-helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
- --namespace krawl-system \
- --create-namespace
-```
-
-Install with custom [canary token](#customizing-the-canary-token)
-
-```bash
-helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
- --namespace krawl-system \
- --create-namespace \
- --set config.canaryTokenUrl="http://your-canary-token-url"
-```
-
-To access the deception server
-
-```bash
-kubectl get svc krawl -n krawl-system
-```
-
-Once the EXTERNAL-IP is assigned, access your deception server at:
-
-```
-http://:5000
-```
-
-## Kubernetes / Kustomize
-Apply all manifests with
-
-```bash
-kubectl apply -f https://raw.githubusercontent.com/BlessedRebuS/Krawl/refs/heads/main/manifests/krawl-all-in-one-deploy.yaml
-```
-
-Retrieve dashboard path with
-```bash
-kubectl get secret krawl-server -n krawl-system -o jsonpath='{.data.dashboard-path}' | base64 -d
-```
-
-Or clone the repo and apply the `manifest` folder with
-
-```bash
-kubectl apply -k manifests
-```
-
-## Docker
-Run Krawl as a docker container with
+Run Krawl with the latest image:
```bash
docker run -d \
-p 5000:5000 \
- -e CANARY_TOKEN_URL="http://your-canary-token-url" \
+ -e KRAWL_PORT=5000 \
+ -e KRAWL_DELAY=100 \
+ -e KRAWL_DASHBOARD_SECRET_PATH="/my-secret-dashboard" \
+ -e KRAWL_DATABASE_RETENTION_DAYS=30 \
--name krawl \
ghcr.io/blessedrebus/krawl:latest
```
-## Docker Compose
-Run Krawl with docker-compose in the project folder with
+Access the server at `http://localhost:5000`
+
+### Docker Compose
+
+Create a `docker-compose.yaml` file:
+
+```yaml
+services:
+ krawl:
+ image: ghcr.io/blessedrebus/krawl:latest
+ container_name: krawl-server
+ ports:
+ - "5000:5000"
+ environment:
+ - CONFIG_LOCATION=config.yaml
+ volumes:
+ - ./config.yaml:/app/config.yaml:ro
+ - krawl-data:/app/data
+ restart: unless-stopped
+
+volumes:
+ krawl-data:
+```
+
+Run with:
```bash
docker-compose up -d
```
-Stop it with
+Stop with:
```bash
docker-compose down
```
-## Python 3.11+
+### Helm Chart
-Clone the repository
+Install with default values:
+
+```bash
+helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
+ --version 2.0.0 \
+ --namespace krawl-system \
+ --create-namespace
+```
+
+Or create a minimal `values.yaml` file:
+
+```yaml
+service:
+ type: LoadBalancer
+ port: 5000
+
+ingress:
+ enabled: true
+ className: "traefik"
+ hosts:
+ - host: krawl.example.com
+ paths:
+ - path: /
+ pathType: Prefix
+
+config:
+ server:
+ port: 5000
+ delay: 100
+ dashboard:
+ secret_path: null # Auto-generated if not set
+
+database:
+ persistence:
+ enabled: true
+ size: 1Gi
+```
+
+Install with custom values:
+
+```bash
+helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \
+ --version 2.0.0 \
+ --namespace krawl-system \
+ --create-namespace \
+ -f values.yaml
+```
+
+To access the deception server:
+
+```bash
+kubectl get svc krawl -n krawl-system
+```
+
+Once the EXTERNAL-IP is assigned, access your deception server at `http://:5000`
+
+### Kubernetes
+
+Apply all manifests with:
+
+```bash
+kubectl apply -f https://raw.githubusercontent.com/BlessedRebuS/Krawl/refs/heads/main/kubernetes/krawl-all-in-one-deploy.yaml
+```
+
+Or clone the repo and apply the manifest:
+
+```bash
+kubectl apply -f kubernetes/krawl-all-in-one-deploy.yaml
+```
+
+Access the deception server:
+
+```bash
+kubectl get svc krawl-server -n krawl-system
+```
+
+Once the EXTERNAL-IP is assigned, access your deception server at `http://:5000`
+
+### From Source (Python 3.11+)
+
+Clone the repository:
```bash
git clone https://github.com/blessedrebus/krawl.git
cd krawl/src
```
-Run the server
+
+Run the server:
+
```bash
python3 server.py
```
-Visit
-
-`http://localhost:5000`
-
-To access the dashboard
-
-`http://localhost:5000/`
+Visit `http://localhost:5000` and access the dashboard at `http://localhost:5000/`
## Configuration via Environment Variables
diff --git a/config.yaml b/config.yaml
index 388b694..3e1d644 100644
--- a/config.yaml
+++ b/config.yaml
@@ -39,7 +39,12 @@ behavior:
analyzer:
http_risky_methods_threshold: 0.1
violated_robots_threshold: 0.1
- uneven_request_timing_threshold: 2
+ uneven_request_timing_threshold: 0.5
uneven_request_timing_time_window_seconds: 300
user_agents_used_threshold: 2
attack_urls_threshold: 1
+
+crawl:
+ infinite_pages_for_malicious: true
+ max_pages_limit: 250
+ ban_duration_seconds: 600
\ No newline at end of file
diff --git a/helm/Chart.yaml b/helm/Chart.yaml
index 938bfa3..b5bc6f2 100644
--- a/helm/Chart.yaml
+++ b/helm/Chart.yaml
@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart
description: A Helm chart for Krawl honeypot server
type: application
-version: 0.1.5
-appVersion: 0.1.8
+version: 0.2.0
+appVersion: 0.2.0
keywords:
- honeypot
- security
@@ -13,3 +13,4 @@ maintainers:
home: https://github.com/blessedrebus/krawl
sources:
- https://github.com/blessedrebus/krawl
+icon: https://raw.githubusercontent.com/blessedrebus/krawl/main/docs/images/krawl-logo.png
\ No newline at end of file
diff --git a/helm/README.md b/helm/README.md
new file mode 100644
index 0000000..0882c3d
--- /dev/null
+++ b/helm/README.md
@@ -0,0 +1,286 @@
+# Krawl Helm Chart
+
+A Helm chart for deploying the Krawl honeypot application on Kubernetes.
+
+## Prerequisites
+
+- Kubernetes 1.19+
+- Helm 3.0+
+- Persistent Volume provisioner (optional, for database persistence)
+
+## Installation
+
+### Add the repository (if applicable)
+
+```bash
+helm repo add krawl https://github.com/BlessedRebuS/Krawl
+helm repo update
+```
+
+### Install from OCI Registry
+
+```bash
+helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.1.5-dev
+```
+
+Or with a specific namespace:
+
+```bash
+helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.1.5-dev -n krawl --create-namespace
+```
+
+### Install the chart locally
+
+```bash
+helm install krawl ./helm
+```
+
+### Install with custom values
+
+```bash
+helm install krawl ./helm -f values.yaml
+```
+
+### Install in a specific namespace
+
+```bash
+helm install krawl ./helm -n krawl --create-namespace
+```
+
+## Configuration
+
+The following table lists the main configuration parameters of the Krawl chart and their default values.
+
+### Global Settings
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `replicaCount` | Number of pod replicas | `1` |
+| `image.repository` | Image repository | `ghcr.io/blessedrebus/krawl` |
+| `image.tag` | Image tag | `latest` |
+| `image.pullPolicy` | Image pull policy | `Always` |
+
+### Service Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `service.type` | Service type | `LoadBalancer` |
+| `service.port` | Service port | `5000` |
+| `service.externalTrafficPolicy` | External traffic policy | `Local` |
+
+### Ingress Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `ingress.enabled` | Enable ingress | `true` |
+| `ingress.className` | Ingress class name | `traefik` |
+| `ingress.hosts[0].host` | Ingress hostname | `krawl.example.com` |
+
+### Server Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.server.port` | Server port | `5000` |
+| `config.server.delay` | Response delay in milliseconds | `100` |
+| `config.server.timezone` | IANA timezone (e.g., "America/New_York") | `null` |
+
+### Links Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.links.min_length` | Minimum link length | `5` |
+| `config.links.max_length` | Maximum link length | `15` |
+| `config.links.min_per_page` | Minimum links per page | `10` |
+| `config.links.max_per_page` | Maximum links per page | `15` |
+| `config.links.char_space` | Character space for link generation | `abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789` |
+| `config.links.max_counter` | Maximum counter value | `10` |
+
+### Canary Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.canary.token_url` | Canary token URL | `null` |
+| `config.canary.token_tries` | Number of canary token tries | `10` |
+
+### Dashboard Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.dashboard.secret_path` | Secret dashboard path (auto-generated if null) | `null` |
+
+### API Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.api.server_url` | API server URL | `null` |
+| `config.api.server_port` | API server port | `8080` |
+| `config.api.server_path` | API server path | `/api/v2/users` |
+
+### Database Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.database.path` | Database file path | `data/krawl.db` |
+| `config.database.retention_days` | Data retention in days | `30` |
+| `database.persistence.enabled` | Enable persistent volume | `true` |
+| `database.persistence.size` | Persistent volume size | `1Gi` |
+| `database.persistence.accessMode` | Access mode | `ReadWriteOnce` |
+
+### Behavior Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.behavior.probability_error_codes` | Error code probability (0-100) | `0` |
+
+### Analyzer Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.analyzer.http_risky_methods_threshold` | HTTP risky methods threshold | `0.1` |
+| `config.analyzer.violated_robots_threshold` | Violated robots.txt threshold | `0.1` |
+| `config.analyzer.uneven_request_timing_threshold` | Uneven request timing threshold | `0.5` |
+| `config.analyzer.uneven_request_timing_time_window_seconds` | Time window for request timing analysis | `300` |
+| `config.analyzer.user_agents_used_threshold` | User agents threshold | `2` |
+| `config.analyzer.attack_urls_threshold` | Attack URLs threshold | `1` |
+
+### Crawl Configuration
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `config.crawl.infinite_pages_for_malicious` | Infinite pages for malicious crawlers | `true` |
+| `config.crawl.max_pages_limit` | Maximum pages limit for legitimate crawlers | `250` |
+| `config.crawl.ban_duration_seconds` | IP ban duration in seconds | `600` |
+
+### Resource Limits
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `resources.limits.cpu` | CPU limit | `500m` |
+| `resources.limits.memory` | Memory limit | `256Mi` |
+| `resources.requests.cpu` | CPU request | `100m` |
+| `resources.requests.memory` | Memory request | `64Mi` |
+
+### Autoscaling
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `autoscaling.enabled` | Enable horizontal pod autoscaling | `false` |
+| `autoscaling.minReplicas` | Minimum replicas | `1` |
+| `autoscaling.maxReplicas` | Maximum replicas | `1` |
+| `autoscaling.targetCPUUtilizationPercentage` | Target CPU utilization | `70` |
+| `autoscaling.targetMemoryUtilizationPercentage` | Target memory utilization | `80` |
+
+### Network Policy
+
+| Parameter | Description | Default |
+|-----------|-------------|---------|
+| `networkPolicy.enabled` | Enable network policy | `true` |
+
+## Usage Examples
+
+### Basic Installation
+
+```bash
+helm install krawl ./helm
+```
+
+### Installation with Custom Domain
+
+```bash
+helm install krawl ./helm \
+ --set ingress.hosts[0].host=honeypot.example.com
+```
+
+### Enable Canary Tokens
+
+```bash
+helm install krawl ./helm \
+ --set config.canary.token_url=https://canarytokens.com/your-token
+```
+
+### Configure Custom API Endpoint
+
+```bash
+helm install krawl ./helm \
+ --set config.api.server_url=https://api.example.com \
+ --set config.api.server_port=443
+```
+
+### Create Values Override File
+
+Create `custom-values.yaml`:
+
+```yaml
+config:
+ server:
+ port: 8080
+ delay: 500
+ canary:
+ token_url: https://your-canary-token-url
+ dashboard:
+ secret_path: /super-secret-path
+ crawl:
+ max_pages_limit: 500
+ ban_duration_seconds: 3600
+```
+
+Then install:
+
+```bash
+helm install krawl ./helm -f custom-values.yaml
+```
+
+## Upgrading
+
+```bash
+helm upgrade krawl ./helm
+```
+
+## Uninstalling
+
+```bash
+helm uninstall krawl
+```
+
+## Troubleshooting
+
+### Check chart syntax
+
+```bash
+helm lint ./helm
+```
+
+### Dry run to verify values
+
+```bash
+helm install krawl ./helm --dry-run --debug
+```
+
+### Check deployed configuration
+
+```bash
+kubectl get configmap krawl-config -o yaml
+```
+
+### View pod logs
+
+```bash
+kubectl logs -l app.kubernetes.io/name=krawl
+```
+
+## Chart Files
+
+- `Chart.yaml` - Chart metadata
+- `values.yaml` - Default configuration values
+- `templates/` - Kubernetes resource templates
+ - `deployment.yaml` - Krawl deployment
+ - `service.yaml` - Service configuration
+ - `configmap.yaml` - Application configuration
+ - `pvc.yaml` - Persistent volume claim
+ - `ingress.yaml` - Ingress configuration
+ - `hpa.yaml` - Horizontal pod autoscaler
+ - `network-policy.yaml` - Network policies
+
+## Support
+
+For issues and questions, please visit the [Krawl GitHub repository](https://github.com/BlessedRebuS/Krawl).
diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml
index d6e5f5c..f6efdf4 100644
--- a/helm/templates/configmap.yaml
+++ b/helm/templates/configmap.yaml
@@ -39,3 +39,7 @@ data:
uneven_request_timing_time_window_seconds: {{ .Values.config.analyzer.uneven_request_timing_time_window_seconds }}
user_agents_used_threshold: {{ .Values.config.analyzer.user_agents_used_threshold }}
attack_urls_threshold: {{ .Values.config.analyzer.attack_urls_threshold }}
+ crawl:
+ infinite_pages_for_malicious: {{ .Values.config.crawl.infinite_pages_for_malicious }}
+ max_pages_limit: {{ .Values.config.crawl.max_pages_limit }}
+ ban_duration_seconds: {{ .Values.config.crawl.ban_duration_seconds }}
diff --git a/helm/values.yaml b/helm/values.yaml
index 0b83892..35add96 100644
--- a/helm/values.yaml
+++ b/helm/values.yaml
@@ -92,10 +92,14 @@ config:
analyzer:
http_risky_methods_threshold: 0.1
violated_robots_threshold: 0.1
- uneven_request_timing_threshold: 2
+ uneven_request_timing_threshold: 0.5
uneven_request_timing_time_window_seconds: 300
user_agents_used_threshold: 2
attack_urls_threshold: 1
+ crawl:
+ infinite_pages_for_malicious: true
+ max_pages_limit: 250
+ ban_duration_seconds: 600
# Database persistence configuration
database:
diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml
index 3344260..b49d070 100644
--- a/kubernetes/krawl-all-in-one-deploy.yaml
+++ b/kubernetes/krawl-all-in-one-deploy.yaml
@@ -15,8 +15,7 @@ data:
server:
port: 5000
delay: 100
- timezone: null # e.g., "America/New_York" or null for system default
-
+ timezone: null
links:
min_length: 5
max_length: 15
@@ -24,27 +23,31 @@ data:
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
-
canary:
- token_url: null # Optional canary token URL
+ token_url: null
token_tries: 10
-
dashboard:
- # Auto-generates random path if null
- # Can be set to "/dashboard" or similar
secret_path: null
-
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
-
database:
path: "data/krawl.db"
retention_days: 30
-
behavior:
- probability_error_codes: 0 # 0-100 percentage
+ probability_error_codes: 0
+ analyzer:
+ http_risky_methods_threshold: 0.1
+ violated_robots_threshold: 0.1
+ uneven_request_timing_threshold: 0.5
+ uneven_request_timing_time_window_seconds: 300
+ user_agents_used_threshold: 2
+ attack_urls_threshold: 1
+ crawl:
+ infinite_pages_for_malicious: true
+ max_pages_limit: 250
+ ban_duration_seconds: 600
---
apiVersion: v1
kind: ConfigMap
@@ -251,12 +254,16 @@ data:
503
],
"server_headers": [
- "Apache/2.4.41 (Ubuntu)",
+ "Apache/2.2.22 (Ubuntu)",
"nginx/1.18.0",
"Microsoft-IIS/10.0",
- "cloudflare",
- "AmazonS3",
- "gunicorn/20.1.0"
+ "LiteSpeed",
+ "Caddy",
+ "Gunicorn/20.0.4",
+ "uvicorn/0.13.4",
+ "Express",
+ "Flask/1.1.2",
+ "Django/3.1"
]
}
---
@@ -340,6 +347,11 @@ metadata:
app: krawl-server
spec:
type: LoadBalancer
+ externalTrafficPolicy: Local
+ sessionAffinity: ClientIP
+ sessionAffinityConfig:
+ clientIP:
+ timeoutSeconds: 10800
ports:
- port: 5000
targetPort: 5000
@@ -353,10 +365,8 @@ kind: Ingress
metadata:
name: krawl-ingress
namespace: krawl-system
- annotations:
- nginx.ingress.kubernetes.io/rewrite-target: /
spec:
- ingressClassName: nginx
+ ingressClassName: traefik
rules:
- host: krawl.example.com # Change to your domain
http:
diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml
index 38a287b..d03e1c3 100644
--- a/kubernetes/manifests/configmap.yaml
+++ b/kubernetes/manifests/configmap.yaml
@@ -9,8 +9,7 @@ data:
server:
port: 5000
delay: 100
- timezone: null # e.g., "America/New_York" or null for system default
-
+ timezone: null
links:
min_length: 5
max_length: 15
@@ -18,24 +17,28 @@ data:
max_per_page: 15
char_space: "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
max_counter: 10
-
canary:
- token_url: null # Optional canary token URL
+ token_url: null
token_tries: 10
-
dashboard:
- # Auto-generates random path if null
- # Can be set to "/dashboard" or similar
secret_path: null
-
api:
server_url: null
server_port: 8080
server_path: "/api/v2/users"
-
database:
path: "data/krawl.db"
retention_days: 30
-
behavior:
- probability_error_codes: 0 # 0-100 percentage
+ probability_error_codes: 0
+ analyzer:
+ http_risky_methods_threshold: 0.1
+ violated_robots_threshold: 0.1
+ uneven_request_timing_threshold: 0.5
+ uneven_request_timing_time_window_seconds: 300
+ user_agents_used_threshold: 2
+ attack_urls_threshold: 1
+ crawl:
+ infinite_pages_for_malicious: true
+ max_pages_limit: 250
+ ban_duration_seconds: 600
diff --git a/kubernetes/manifests/ingress.yaml b/kubernetes/manifests/ingress.yaml
index f5a6efc..52cea39 100644
--- a/kubernetes/manifests/ingress.yaml
+++ b/kubernetes/manifests/ingress.yaml
@@ -3,10 +3,8 @@ kind: Ingress
metadata:
name: krawl-ingress
namespace: krawl-system
- annotations:
- nginx.ingress.kubernetes.io/rewrite-target: /
spec:
- ingressClassName: nginx
+ ingressClassName: traefik
rules:
- host: krawl.example.com # Change to your domain
http:
diff --git a/kubernetes/manifests/service.yaml b/kubernetes/manifests/service.yaml
index 8db65b4..0f9291a 100644
--- a/kubernetes/manifests/service.yaml
+++ b/kubernetes/manifests/service.yaml
@@ -7,6 +7,11 @@ metadata:
app: krawl-server
spec:
type: LoadBalancer
+ externalTrafficPolicy: Local
+ sessionAffinity: ClientIP
+ sessionAffinityConfig:
+ clientIP:
+ timeoutSeconds: 10800
ports:
- port: 5000
targetPort: 5000
diff --git a/kubernetes/manifests/wordlists-configmap.yaml b/kubernetes/manifests/wordlists-configmap.yaml
index 4ff0b5d..cc541c6 100644
--- a/kubernetes/manifests/wordlists-configmap.yaml
+++ b/kubernetes/manifests/wordlists-configmap.yaml
@@ -201,5 +201,17 @@ data:
500,
502,
503
+ ],
+ "server_headers": [
+ "Apache/2.2.22 (Ubuntu)",
+ "nginx/1.18.0",
+ "Microsoft-IIS/10.0",
+ "LiteSpeed",
+ "Caddy",
+ "Gunicorn/20.0.4",
+ "uvicorn/0.13.4",
+ "Express",
+ "Flask/1.1.2",
+ "Django/3.1"
]
}
diff --git a/src/config.py b/src/config.py
index 1e96e09..71cef0e 100644
--- a/src/config.py
+++ b/src/config.py
@@ -76,10 +76,10 @@ class Config:
# Try multiple external IP detection services (fallback chain)
ip_detection_services = [
"https://api.ipify.org", # Plain text response
- "http://ident.me", # Plain text response
- "https://ifconfig.me", # Plain text response
+ "http://ident.me", # Plain text response
+ "https://ifconfig.me", # Plain text response
]
-
+
ip = None
for service_url in ip_detection_services:
try:
@@ -90,7 +90,7 @@ class Config:
break
except Exception:
continue
-
+
if not ip:
get_app_logger().warning(
"Could not determine server IP from external services. "
diff --git a/src/database.py b/src/database.py
index 80eb194..b88497e 100644
--- a/src/database.py
+++ b/src/database.py
@@ -587,7 +587,9 @@ class DatabaseManager:
"analyzed_metrics": s.analyzed_metrics,
"category": s.category,
"manual_category": s.manual_category,
- "last_analysis": s.last_analysis.isoformat() if s.last_analysis else None,
+ "last_analysis": (
+ s.last_analysis.isoformat() if s.last_analysis else None
+ ),
}
for s in stats
]
@@ -638,7 +640,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_attackers_paginated(self, page: int = 1, page_size: int = 25, sort_by: str = "total_requests", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_attackers_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 25,
+ sort_by: str = "total_requests",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of attacker IPs ordered by specified field.
@@ -658,29 +666,35 @@ class DatabaseManager:
# Validate sort parameters
valid_sort_fields = {"total_requests", "first_seen", "last_seen"}
sort_by = sort_by if sort_by in valid_sort_fields else "total_requests"
- sort_order = sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ sort_order = (
+ sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ )
# Get total count of attackers
total_attackers = (
- session.query(IpStats)
- .filter(IpStats.category == "attacker")
- .count()
+ session.query(IpStats).filter(IpStats.category == "attacker").count()
)
# Build query with sorting
query = session.query(IpStats).filter(IpStats.category == "attacker")
-
+
if sort_by == "total_requests":
query = query.order_by(
- IpStats.total_requests.desc() if sort_order == "desc" else IpStats.total_requests.asc()
+ IpStats.total_requests.desc()
+ if sort_order == "desc"
+ else IpStats.total_requests.asc()
)
elif sort_by == "first_seen":
query = query.order_by(
- IpStats.first_seen.desc() if sort_order == "desc" else IpStats.first_seen.asc()
+ IpStats.first_seen.desc()
+ if sort_order == "desc"
+ else IpStats.first_seen.asc()
)
elif sort_by == "last_seen":
query = query.order_by(
- IpStats.last_seen.desc() if sort_order == "desc" else IpStats.last_seen.asc()
+ IpStats.last_seen.desc()
+ if sort_order == "desc"
+ else IpStats.last_seen.asc()
)
# Get paginated attackers
@@ -693,7 +707,9 @@ class DatabaseManager:
{
"ip": a.ip,
"total_requests": a.total_requests,
- "first_seen": a.first_seen.isoformat() if a.first_seen else None,
+ "first_seen": (
+ a.first_seen.isoformat() if a.first_seen else None
+ ),
"last_seen": a.last_seen.isoformat() if a.last_seen else None,
"country_code": a.country_code,
"city": a.city,
@@ -716,6 +732,101 @@ class DatabaseManager:
finally:
self.close_session()
+ def get_all_ips_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 25,
+ sort_by: str = "total_requests",
+ sort_order: str = "desc",
+ categories: Optional[List[str]] = None,
+ ) -> Dict[str, Any]:
+ """
+ Retrieve paginated list of all IPs (or filtered by categories) ordered by specified field.
+
+ Args:
+ page: Page number (1-indexed)
+ page_size: Number of results per page
+ sort_by: Field to sort by (total_requests, first_seen, last_seen)
+ sort_order: Sort order (asc or desc)
+ categories: Optional list of categories to filter by
+
+ Returns:
+ Dictionary with IPs list and pagination info
+ """
+ session = self.session
+ try:
+ offset = (page - 1) * page_size
+
+ # Validate sort parameters
+ valid_sort_fields = {"total_requests", "first_seen", "last_seen"}
+ sort_by = sort_by if sort_by in valid_sort_fields else "total_requests"
+ sort_order = (
+ sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ )
+
+ # Build query with optional category filter
+ query = session.query(IpStats)
+ if categories:
+ query = query.filter(IpStats.category.in_(categories))
+
+ # Get total count
+ total_ips = query.count()
+
+ # Apply sorting
+ if sort_by == "total_requests":
+ query = query.order_by(
+ IpStats.total_requests.desc()
+ if sort_order == "desc"
+ else IpStats.total_requests.asc()
+ )
+ elif sort_by == "first_seen":
+ query = query.order_by(
+ IpStats.first_seen.desc()
+ if sort_order == "desc"
+ else IpStats.first_seen.asc()
+ )
+ elif sort_by == "last_seen":
+ query = query.order_by(
+ IpStats.last_seen.desc()
+ if sort_order == "desc"
+ else IpStats.last_seen.asc()
+ )
+
+ # Get paginated IPs
+ ips = query.offset(offset).limit(page_size).all()
+
+ total_pages = (total_ips + page_size - 1) // page_size
+
+ return {
+ "ips": [
+ {
+ "ip": ip.ip,
+ "total_requests": ip.total_requests,
+ "first_seen": (
+ ip.first_seen.isoformat() if ip.first_seen else None
+ ),
+ "last_seen": ip.last_seen.isoformat() if ip.last_seen else None,
+ "country_code": ip.country_code,
+ "city": ip.city,
+ "asn": ip.asn,
+ "asn_org": ip.asn_org,
+ "reputation_score": ip.reputation_score,
+ "reputation_source": ip.reputation_source,
+ "category": ip.category,
+ "category_scores": ip.category_scores or {},
+ }
+ for ip in ips
+ ],
+ "pagination": {
+ "page": page,
+ "page_size": page_size,
+ "total": total_ips,
+ "total_pages": total_pages,
+ },
+ }
+ finally:
+ self.close_session()
+
def get_dashboard_counts(self) -> Dict[str, int]:
"""
Get aggregate statistics for the dashboard (excludes local/private IPs and server IP).
@@ -728,28 +839,34 @@ class DatabaseManager:
try:
# Get server IP to filter it out
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
-
+
# Get all accesses first, then filter out local IPs and server IP
all_accesses = session.query(AccessLog).all()
-
+
# Filter out local/private IPs and server IP
public_accesses = [
- log for log in all_accesses
- if is_valid_public_ip(log.ip, server_ip)
+ log for log in all_accesses if is_valid_public_ip(log.ip, server_ip)
]
-
+
# Calculate counts from filtered data
total_accesses = len(public_accesses)
unique_ips = len(set(log.ip for log in public_accesses))
unique_paths = len(set(log.path for log in public_accesses))
suspicious_accesses = sum(1 for log in public_accesses if log.is_suspicious)
- honeypot_triggered = sum(1 for log in public_accesses if log.is_honeypot_trigger)
- honeypot_ips = len(set(log.ip for log in public_accesses if log.is_honeypot_trigger))
-
+ honeypot_triggered = sum(
+ 1 for log in public_accesses if log.is_honeypot_trigger
+ )
+ honeypot_ips = len(
+ set(log.ip for log in public_accesses if log.is_honeypot_trigger)
+ )
+
# Count unique attackers from IpStats (matching the "Attackers by Total Requests" table)
- unique_attackers = session.query(IpStats).filter(IpStats.category == "attacker").count()
+ unique_attackers = (
+ session.query(IpStats).filter(IpStats.category == "attacker").count()
+ )
return {
"total_accesses": total_accesses,
@@ -777,9 +894,10 @@ class DatabaseManager:
try:
# Get server IP to filter it out
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
-
+
results = (
session.query(AccessLog.ip, func.count(AccessLog.id).label("count"))
.group_by(AccessLog.ip)
@@ -862,9 +980,10 @@ class DatabaseManager:
try:
# Get server IP to filter it out
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
-
+
logs = (
session.query(AccessLog)
.filter(AccessLog.is_suspicious == True)
@@ -874,8 +993,7 @@ class DatabaseManager:
# Filter out local/private IPs and server IP
filtered_logs = [
- log for log in logs
- if is_valid_public_ip(log.ip, server_ip)
+ log for log in logs if is_valid_public_ip(log.ip, server_ip)
]
return [
@@ -902,9 +1020,10 @@ class DatabaseManager:
try:
# Get server IP to filter it out
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
-
+
# Get all honeypot triggers grouped by IP
results = (
session.query(AccessLog.ip, AccessLog.path)
@@ -961,7 +1080,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_honeypot_paginated(self, page: int = 1, page_size: int = 5, sort_by: str = "count", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_honeypot_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 5,
+ sort_by: str = "count",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of honeypot-triggered IPs with their paths.
@@ -977,6 +1102,7 @@ class DatabaseManager:
session = self.session
try:
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
@@ -1007,17 +1133,15 @@ class DatabaseManager:
if sort_by == "count":
honeypot_list.sort(
- key=lambda x: x["count"],
- reverse=(sort_order == "desc")
+ key=lambda x: x["count"], reverse=(sort_order == "desc")
)
else: # sort by ip
honeypot_list.sort(
- key=lambda x: x["ip"],
- reverse=(sort_order == "desc")
+ key=lambda x: x["ip"], reverse=(sort_order == "desc")
)
total_honeypots = len(honeypot_list)
- paginated = honeypot_list[offset:offset + page_size]
+ paginated = honeypot_list[offset : offset + page_size]
total_pages = (total_honeypots + page_size - 1) // page_size
return {
@@ -1032,7 +1156,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_credentials_paginated(self, page: int = 1, page_size: int = 5, sort_by: str = "timestamp", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_credentials_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 5,
+ sort_by: str = "timestamp",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of credential attempts.
@@ -1052,7 +1182,9 @@ class DatabaseManager:
# Validate sort parameters
valid_sort_fields = {"timestamp", "ip", "username"}
sort_by = sort_by if sort_by in valid_sort_fields else "timestamp"
- sort_order = sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ sort_order = (
+ sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ )
total_credentials = session.query(CredentialAttempt).count()
@@ -1061,15 +1193,21 @@ class DatabaseManager:
if sort_by == "timestamp":
query = query.order_by(
- CredentialAttempt.timestamp.desc() if sort_order == "desc" else CredentialAttempt.timestamp.asc()
+ CredentialAttempt.timestamp.desc()
+ if sort_order == "desc"
+ else CredentialAttempt.timestamp.asc()
)
elif sort_by == "ip":
query = query.order_by(
- CredentialAttempt.ip.desc() if sort_order == "desc" else CredentialAttempt.ip.asc()
+ CredentialAttempt.ip.desc()
+ if sort_order == "desc"
+ else CredentialAttempt.ip.asc()
)
elif sort_by == "username":
query = query.order_by(
- CredentialAttempt.username.desc() if sort_order == "desc" else CredentialAttempt.username.asc()
+ CredentialAttempt.username.desc()
+ if sort_order == "desc"
+ else CredentialAttempt.username.asc()
)
credentials = query.offset(offset).limit(page_size).all()
@@ -1096,7 +1234,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_top_ips_paginated(self, page: int = 1, page_size: int = 5, sort_by: str = "count", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_top_ips_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 5,
+ sort_by: str = "count",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of top IP addresses by access count.
@@ -1112,6 +1256,7 @@ class DatabaseManager:
session = self.session
try:
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
@@ -1136,7 +1281,7 @@ class DatabaseManager:
filtered.sort(key=lambda x: x["ip"], reverse=(sort_order == "desc"))
total_ips = len(filtered)
- paginated = filtered[offset:offset + page_size]
+ paginated = filtered[offset : offset + page_size]
total_pages = (total_ips + page_size - 1) // page_size
return {
@@ -1151,7 +1296,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_top_paths_paginated(self, page: int = 1, page_size: int = 5, sort_by: str = "count", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_top_paths_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 5,
+ sort_by: str = "count",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of top paths by access count.
@@ -1175,18 +1326,17 @@ class DatabaseManager:
)
# Create list and sort
- paths_list = [
- {"path": row.path, "count": row.count}
- for row in results
- ]
+ paths_list = [{"path": row.path, "count": row.count} for row in results]
if sort_by == "count":
- paths_list.sort(key=lambda x: x["count"], reverse=(sort_order == "desc"))
+ paths_list.sort(
+ key=lambda x: x["count"], reverse=(sort_order == "desc")
+ )
else: # sort by path
paths_list.sort(key=lambda x: x["path"], reverse=(sort_order == "desc"))
total_paths = len(paths_list)
- paginated = paths_list[offset:offset + page_size]
+ paginated = paths_list[offset : offset + page_size]
total_pages = (total_paths + page_size - 1) // page_size
return {
@@ -1201,7 +1351,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_top_user_agents_paginated(self, page: int = 1, page_size: int = 5, sort_by: str = "count", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_top_user_agents_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 5,
+ sort_by: str = "count",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of top user agents by access count.
@@ -1219,7 +1375,9 @@ class DatabaseManager:
offset = (page - 1) * page_size
results = (
- session.query(AccessLog.user_agent, func.count(AccessLog.id).label("count"))
+ session.query(
+ AccessLog.user_agent, func.count(AccessLog.id).label("count")
+ )
.filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "")
.group_by(AccessLog.user_agent)
.all()
@@ -1227,17 +1385,18 @@ class DatabaseManager:
# Create list and sort
ua_list = [
- {"user_agent": row.user_agent, "count": row.count}
- for row in results
+ {"user_agent": row.user_agent, "count": row.count} for row in results
]
if sort_by == "count":
ua_list.sort(key=lambda x: x["count"], reverse=(sort_order == "desc"))
else: # sort by user_agent
- ua_list.sort(key=lambda x: x["user_agent"], reverse=(sort_order == "desc"))
+ ua_list.sort(
+ key=lambda x: x["user_agent"], reverse=(sort_order == "desc")
+ )
total_uas = len(ua_list)
- paginated = ua_list[offset:offset + page_size]
+ paginated = ua_list[offset : offset + page_size]
total_pages = (total_uas + page_size - 1) // page_size
return {
@@ -1252,7 +1411,13 @@ class DatabaseManager:
finally:
self.close_session()
- def get_attack_types_paginated(self, page: int = 1, page_size: int = 5, sort_by: str = "timestamp", sort_order: str = "desc") -> Dict[str, Any]:
+ def get_attack_types_paginated(
+ self,
+ page: int = 1,
+ page_size: int = 5,
+ sort_by: str = "timestamp",
+ sort_order: str = "desc",
+ ) -> Dict[str, Any]:
"""
Retrieve paginated list of detected attack types with access logs.
@@ -1272,17 +1437,18 @@ class DatabaseManager:
# Validate sort parameters
valid_sort_fields = {"timestamp", "ip", "attack_type"}
sort_by = sort_by if sort_by in valid_sort_fields else "timestamp"
- sort_order = sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ sort_order = (
+ sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc"
+ )
# Get all access logs with attack detections
- query = (
- session.query(AccessLog)
- .join(AttackDetection)
- )
+ query = session.query(AccessLog).join(AttackDetection)
if sort_by == "timestamp":
query = query.order_by(
- AccessLog.timestamp.desc() if sort_order == "desc" else AccessLog.timestamp.asc()
+ AccessLog.timestamp.desc()
+ if sort_order == "desc"
+ else AccessLog.timestamp.asc()
)
elif sort_by == "ip":
query = query.order_by(
@@ -1307,11 +1473,11 @@ class DatabaseManager:
if sort_by == "attack_type":
attack_list.sort(
key=lambda x: x["attack_types"][0] if x["attack_types"] else "",
- reverse=(sort_order == "desc")
+ reverse=(sort_order == "desc"),
)
total_attacks = len(attack_list)
- paginated = attack_list[offset:offset + page_size]
+ paginated = attack_list[offset : offset + page_size]
total_pages = (total_attacks + page_size - 1) // page_size
return {
diff --git a/src/handler.py b/src/handler.py
index df04465..b3c76e7 100644
--- a/src/handler.py
+++ b/src/handler.py
@@ -511,7 +511,10 @@ class Handler(BaseHTTPRequestHandler):
return
# API endpoint for fetching all IP statistics
- if self.config.dashboard_secret_path and self.path == f"{self.config.dashboard_secret_path}/api/all-ip-stats":
+ if (
+ self.config.dashboard_secret_path
+ and self.path == f"{self.config.dashboard_secret_path}/api/all-ip-stats"
+ ):
self.send_response(200)
self.send_header("Content-type", "application/json")
self.send_header("Access-Control-Allow-Origin", "*")
@@ -554,7 +557,7 @@ class Handler(BaseHTTPRequestHandler):
from urllib.parse import urlparse, parse_qs
db = get_database()
-
+
# Parse query parameters
parsed_url = urlparse(self.path)
query_params = parse_qs(parsed_url.query)
@@ -567,7 +570,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100) # Max 100 per page
- result = db.get_attackers_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_attackers_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
@@ -576,6 +584,52 @@ class Handler(BaseHTTPRequestHandler):
self.wfile.write(json.dumps({"error": str(e)}).encode())
return
+ # API endpoint for fetching all IPs (all categories)
+ if self.config.dashboard_secret_path and self.path.startswith(
+ f"{self.config.dashboard_secret_path}/api/all-ips"
+ ):
+ self.send_response(200)
+ self.send_header("Content-type", "application/json")
+ self.send_header("Access-Control-Allow-Origin", "*")
+ self.send_header(
+ "Cache-Control", "no-store, no-cache, must-revalidate, max-age=0"
+ )
+ self.send_header("Pragma", "no-cache")
+ self.send_header("Expires", "0")
+ self.end_headers()
+ try:
+ from database import get_database
+ import json
+ from urllib.parse import urlparse, parse_qs
+
+ db = get_database()
+
+ # Parse query parameters
+ parsed_url = urlparse(self.path)
+ query_params = parse_qs(parsed_url.query)
+ page = int(query_params.get("page", ["1"])[0])
+ page_size = int(query_params.get("page_size", ["25"])[0])
+ sort_by = query_params.get("sort_by", ["total_requests"])[0]
+ sort_order = query_params.get("sort_order", ["desc"])[0]
+
+ # Ensure valid parameters
+ page = max(1, page)
+ page_size = min(max(1, page_size), 100) # Max 100 per page
+
+ result = db.get_all_ips_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
+ self.wfile.write(json.dumps(result).encode())
+ except BrokenPipeError:
+ pass
+ except Exception as e:
+ self.app_logger.error(f"Error fetching all IPs: {e}")
+ self.wfile.write(json.dumps({"error": str(e)}).encode())
+ return
+
# API endpoint for fetching IP stats
if self.config.dashboard_secret_path and self.path.startswith(
f"{self.config.dashboard_secret_path}/api/ip-stats/"
@@ -639,7 +693,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100)
- result = db.get_honeypot_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_honeypot_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
@@ -677,7 +736,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100)
- result = db.get_credentials_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_credentials_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
@@ -715,7 +779,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100)
- result = db.get_top_ips_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_top_ips_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
@@ -753,7 +822,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100)
- result = db.get_top_paths_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_top_paths_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
@@ -791,7 +865,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100)
- result = db.get_top_user_agents_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_top_user_agents_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
@@ -829,7 +908,12 @@ class Handler(BaseHTTPRequestHandler):
page = max(1, page)
page_size = min(max(1, page_size), 100)
- result = db.get_attack_types_paginated(page=page, page_size=page_size, sort_by=sort_by, sort_order=sort_order)
+ result = db.get_attack_types_paginated(
+ page=page,
+ page_size=page_size,
+ sort_by=sort_by,
+ sort_order=sort_order,
+ )
self.wfile.write(json.dumps(result).encode())
except BrokenPipeError:
pass
diff --git a/src/ip_utils.py b/src/ip_utils.py
index 35504c8..1eab6b8 100644
--- a/src/ip_utils.py
+++ b/src/ip_utils.py
@@ -12,7 +12,7 @@ from typing import Optional
def is_local_or_private_ip(ip_str: str) -> bool:
"""
Check if an IP address is local, private, or reserved.
-
+
Filters out:
- 127.0.0.1 (localhost)
- 127.0.0.0/8 (loopback)
@@ -22,10 +22,10 @@ def is_local_or_private_ip(ip_str: str) -> bool:
- 0.0.0.0/8 (this network)
- ::1 (IPv6 localhost)
- ::ffff:127.0.0.0/104 (IPv6-mapped IPv4 loopback)
-
+
Args:
ip_str: IP address string
-
+
Returns:
True if IP is local/private/reserved, False if it's public
"""
@@ -46,15 +46,15 @@ def is_local_or_private_ip(ip_str: str) -> bool:
def is_valid_public_ip(ip: str, server_ip: Optional[str] = None) -> bool:
"""
Check if an IP is public and not the server's own IP.
-
+
Returns True only if:
- IP is not in local/private ranges AND
- IP is not the server's own public IP (if server_ip provided)
-
+
Args:
ip: IP address string to check
server_ip: Server's public IP (optional). If provided, filters out this IP too.
-
+
Returns:
True if IP is a valid public IP to track, False otherwise
"""
diff --git a/src/tasks/memory_cleanup.py b/src/tasks/memory_cleanup.py
index ba1ace5..38a27a2 100644
--- a/src/tasks/memory_cleanup.py
+++ b/src/tasks/memory_cleanup.py
@@ -45,8 +45,13 @@ def main():
stats_after = Handler.tracker.get_memory_stats()
# Log changes
- access_log_reduced = stats_before["access_log_size"] - stats_after["access_log_size"]
- cred_reduced = stats_before["credential_attempts_size"] - stats_after["credential_attempts_size"]
+ access_log_reduced = (
+ stats_before["access_log_size"] - stats_after["access_log_size"]
+ )
+ cred_reduced = (
+ stats_before["credential_attempts_size"]
+ - stats_after["credential_attempts_size"]
+ )
if access_log_reduced > 0 or cred_reduced > 0:
app_logger.info(
diff --git a/src/tasks/top_attacking_ips.py b/src/tasks/top_attacking_ips.py
index 1648c93..73a135c 100644
--- a/src/tasks/top_attacking_ips.py
+++ b/src/tasks/top_attacking_ips.py
@@ -71,11 +71,8 @@ def main():
# Filter out local/private IPs and the server's own IP
config = get_config()
server_ip = config.get_server_ip()
-
- public_ips = [
- ip for (ip,) in results
- if is_valid_public_ip(ip, server_ip)
- ]
+
+ public_ips = [ip for (ip,) in results if is_valid_public_ip(ip, server_ip)]
# Ensure exports directory exists
os.makedirs(EXPORTS_DIR, exist_ok=True)
diff --git a/src/templates/dashboard_template.py b/src/templates/dashboard_template.py
index 8babb4d..3ef693f 100644
--- a/src/templates/dashboard_template.py
+++ b/src/templates/dashboard_template.py
@@ -548,10 +548,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
background: #161b22;
border-top: 6px solid #30363d;
}}
- .attacker-marker {{
- width: 20px;
- height: 20px;
- background: #f85149;
+ .ip-marker {{
border: 2px solid #fff;
border-radius: 50%;
display: flex;
@@ -560,20 +557,27 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
font-size: 10px;
font-weight: bold;
color: white;
- box-shadow: 0 0 8px rgba(248, 81, 73, 0.8), inset 0 0 4px rgba(248, 81, 73, 0.5);
cursor: pointer;
}}
- .attacker-marker-cluster {{
- background: #f85149 !important;
- border: 2px solid #fff !important;
- background-clip: padding-box !important;
+ .marker-attacker {{
+ background: #f85149;
+ box-shadow: 0 0 8px rgba(248, 81, 73, 0.8), inset 0 0 4px rgba(248, 81, 73, 0.5);
}}
- .attacker-marker-cluster div {{
- background: #f85149 !important;
+ .marker-bad_crawler {{
+ background: #f0883e;
+ box-shadow: 0 0 8px rgba(240, 136, 62, 0.8), inset 0 0 4px rgba(240, 136, 62, 0.5);
}}
- .attacker-marker-cluster span {{
- color: white !important;
- font-weight: bold !important;
+ .marker-good_crawler {{
+ background: #3fb950;
+ box-shadow: 0 0 8px rgba(63, 185, 80, 0.8), inset 0 0 4px rgba(63, 185, 80, 0.5);
+ }}
+ .marker-regular_user {{
+ background: #58a6ff;
+ box-shadow: 0 0 8px rgba(88, 166, 255, 0.8), inset 0 0 4px rgba(88, 166, 255, 0.5);
+ }}
+ .marker-unknown {{
+ background: #8b949e;
+ box-shadow: 0 0 8px rgba(139, 148, 158, 0.8), inset 0 0 4px rgba(139, 148, 158, 0.5);
}}
.leaflet-bottom.leaflet-right {{
display: none !important;
@@ -734,7 +738,31 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
-
Attacker Origins Map
+
@@ -1862,9 +1890,20 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
`;
document.head.appendChild(style);
- // Attacker Map Visualization
+ // IP Map Visualization
let attackerMap = null;
+ let allIps = [];
let mapMarkers = [];
+ let markerLayers = {{}};
+ let circleLayers = {{}};
+
+ const categoryColors = {{
+ attacker: '#f85149',
+ bad_crawler: '#f0883e',
+ good_crawler: '#3fb950',
+ regular_user: '#58a6ff',
+ unknown: '#8b949e'
+ }};
async function initializeAttackerMap() {{
const mapContainer = document.getElementById('attacker-map');
@@ -1884,8 +1923,8 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
]
}});
- // Fetch all attackers
- const response = await fetch(DASHBOARD_PATH + '/api/attackers?page=1&page_size=100&sort_by=total_requests&sort_order=desc', {{
+ // Fetch all IPs (not just attackers)
+ const response = await fetch(DASHBOARD_PATH + '/api/all-ips?page=1&page_size=100&sort_by=total_requests&sort_order=desc', {{
cache: 'no-store',
headers: {{
'Cache-Control': 'no-cache',
@@ -1893,18 +1932,18 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
}}
}});
- if (!response.ok) throw new Error('Failed to fetch attackers');
-
- const data = await response.json();
- const attackers = data.attackers || [];
+ if (!response.ok) throw new Error('Failed to fetch IPs');
- if (attackers.length === 0) {{
- mapContainer.innerHTML = '
No attacker location data available
';
+ const data = await response.json();
+ allIps = data.ips || [];
+
+ if (allIps.length === 0) {{
+ mapContainer.innerHTML = '
No IP location data available
';
return;
}}
// Get max request count for scaling
- const maxRequests = Math.max(...attackers.map(a => a.total_requests || 0));
+ const maxRequests = Math.max(...allIps.map(ip => ip.total_requests || 0));
// Create a map of country locations (approximate country centers)
const countryCoordinates = {{
@@ -1922,22 +1961,40 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
'FI': [61.9, 25.8], 'NO': [60.5, 8.5], 'GR': [39.1, 21.8], 'PT': [39.4, -8.2]
}};
- // Add markers for each attacker
- const markerGroup = L.featureGroup();
+ // Create layer groups for each category
+ markerLayers = {{
+ attacker: L.featureGroup(),
+ bad_crawler: L.featureGroup(),
+ good_crawler: L.featureGroup(),
+ regular_user: L.featureGroup(),
+ unknown: L.featureGroup()
+ }};
- attackers.slice(0, 50).forEach(attacker => {{
- if (!attacker.country_code) return;
+ circleLayers = {{
+ attacker: L.featureGroup(),
+ bad_crawler: L.featureGroup(),
+ good_crawler: L.featureGroup(),
+ regular_user: L.featureGroup(),
+ unknown: L.featureGroup()
+ }};
- const coords = countryCoordinates[attacker.country_code];
+ // Add markers for each IP
+ allIps.slice(0, 100).forEach(ip => {{
+ if (!ip.country_code || !ip.category) return;
+
+ const coords = countryCoordinates[ip.country_code];
if (!coords) return;
+ const category = ip.category.toLowerCase();
+ if (!markerLayers[category]) return;
+
// Calculate marker size based on request count
- const sizeRatio = (attacker.total_requests / maxRequests) * 0.7 + 0.3;
+ const sizeRatio = (ip.total_requests / maxRequests) * 0.7 + 0.3;
const markerSize = Math.max(15, Math.min(40, 20 * sizeRatio));
- // Create custom marker element
+ // Create custom marker element with category-specific class
const markerElement = document.createElement('div');
- markerElement.className = 'attacker-marker';
+ markerElement.className = `ip-marker marker-${{category}}`;
markerElement.style.width = markerSize + 'px';
markerElement.style.height = markerSize + 'px';
markerElement.style.fontSize = (markerSize * 0.5) + 'px';
@@ -1947,62 +2004,89 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
icon: L.divIcon({{
html: markerElement,
iconSize: [markerSize, markerSize],
- className: 'attacker-custom-marker'
+ className: `ip-custom-marker category-${{category}}`
}})
}});
- // Create popup content
+ // Create popup content with category badge
+ const categoryColor = categoryColors[category] || '#8b949e';
+ const categoryLabels = {{
+ attacker: 'Attacker',
+ bad_crawler: 'Bad Crawler',
+ good_crawler: 'Good Crawler',
+ regular_user: 'Regular User',
+ unknown: 'Unknown'
+ }};
+
const popupContent = `
-
-
${{attacker.ip}}
+
+
+ ${{ip.ip}}
+
+ ${{categoryLabels[category]}}
+
+
- ${{attacker.city || ''}}${{attacker.city && attacker.country_code ? ', ' : ''}}${{attacker.country_code || 'Unknown'}}
+ ${{ip.city || ''}}${{ip.city && ip.country_code ? ', ' : ''}}${{ip.country_code || 'Unknown'}}
-
Requests: ${{attacker.total_requests}}
-
First Seen: ${{formatTimestamp(attacker.first_seen)}}
-
Last Seen: ${{formatTimestamp(attacker.last_seen)}}
+
Requests: ${{ip.total_requests}}
+
First Seen: ${{formatTimestamp(ip.first_seen)}}
+
Last Seen: ${{formatTimestamp(ip.last_seen)}}
`;
marker.bindPopup(popupContent);
- markerGroup.addLayer(marker);
- mapMarkers.push(marker);
+ markerLayers[category].addLayer(marker);
}});
- // Add cluster circle effect
- const circleGroup = L.featureGroup();
- const countryAttackerCount = {{}};
-
- attackers.forEach(attacker => {{
- if (attacker.country_code) {{
- countryAttackerCount[attacker.country_code] = (countryAttackerCount[attacker.country_code] || 0) + 1;
+ // Add cluster circles for each category
+ const categoryCountryCounts = {{}};
+
+ allIps.forEach(ip => {{
+ if (ip.country_code && ip.category) {{
+ const category = ip.category.toLowerCase();
+ if (!categoryCountryCounts[category]) {{
+ categoryCountryCounts[category] = {{}};
+ }}
+ categoryCountryCounts[category][ip.country_code] =
+ (categoryCountryCounts[category][ip.country_code] || 0) + 1;
}}
}});
- Object.entries(countryAttackerCount).forEach(([country, count]) => {{
- const coords = countryCoordinates[country];
- if (coords) {{
- const circle = L.circle(coords, {{
- radius: 100000 + (count * 150000),
- color: '#f85149',
- fillColor: '#f85149',
- fillOpacity: 0.15,
- weight: 1,
- opacity: 0.4,
- dashArray: '3'
- }});
- circleGroup.addLayer(circle);
- }}
+ Object.entries(categoryCountryCounts).forEach(([category, countryCounts]) => {{
+ Object.entries(countryCounts).forEach(([country, count]) => {{
+ const coords = countryCoordinates[country];
+ if (coords && circleLayers[category]) {{
+ const color = categoryColors[category] || '#8b949e';
+ const circle = L.circle(coords, {{
+ radius: 100000 + (count * 150000),
+ color: color,
+ fillColor: color,
+ fillOpacity: 0.15,
+ weight: 1,
+ opacity: 0.4,
+ dashArray: '3'
+ }});
+ circleLayers[category].addLayer(circle);
+ }}
+ }});
}});
- attackerMap.addLayer(circleGroup);
- markerGroup.addTo(attackerMap);
-
- // Fit map to markers
- if (markerGroup.getLayers().length > 0) {{
- attackerMap.fitBounds(markerGroup.getBounds(), {{ padding: [50, 50] }});
+ // Add all layers to map initially
+ Object.values(circleLayers).forEach(layer => attackerMap.addLayer(layer));
+ Object.values(markerLayers).forEach(layer => attackerMap.addLayer(layer));
+
+ // Fit map to all markers
+ const allMarkers = Object.values(markerLayers).reduce((acc, layer) => {{
+ acc.push(...layer.getLayers());
+ return acc;
+ }}, []);
+
+ if (allMarkers.length > 0) {{
+ const bounds = L.featureGroup(allMarkers).getBounds();
+ attackerMap.fitBounds(bounds, {{ padding: [50, 50] }});
}}
}} catch (err) {{
@@ -2011,6 +2095,46 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
}}
}}
+ // Update map filters based on checkbox selection
+ function updateMapFilters() {{
+ if (!attackerMap) return;
+
+ const filters = {{
+ attacker: document.getElementById('filter-attacker').checked,
+ bad_crawler: document.getElementById('filter-bad-crawler').checked,
+ good_crawler: document.getElementById('filter-good-crawler').checked,
+ regular_user: document.getElementById('filter-regular-user').checked,
+ unknown: document.getElementById('filter-unknown').checked
+ }};
+
+ // Update marker and circle layers visibility
+ Object.entries(filters).forEach(([category, show]) => {{
+ if (markerLayers[category]) {{
+ if (show) {{
+ if (!attackerMap.hasLayer(markerLayers[category])) {{
+ attackerMap.addLayer(markerLayers[category]);
+ }}
+ }} else {{
+ if (attackerMap.hasLayer(markerLayers[category])) {{
+ attackerMap.removeLayer(markerLayers[category]);
+ }}
+ }}
+ }}
+
+ if (circleLayers[category]) {{
+ if (show) {{
+ if (!attackerMap.hasLayer(circleLayers[category])) {{
+ attackerMap.addLayer(circleLayers[category]);
+ }}
+ }} else {{
+ if (attackerMap.hasLayer(circleLayers[category])) {{
+ attackerMap.removeLayer(circleLayers[category]);
+ }}
+ }}
+ }}
+ }});
+ }}
+
// Initialize map when Attacks tab is opened
const originalSwitchTab = window.switchTab;
let attackTypesChartLoaded = false;
diff --git a/src/tracker.py b/src/tracker.py
index 0706e82..60e05f0 100644
--- a/src/tracker.py
+++ b/src/tracker.py
@@ -173,6 +173,7 @@ class AccessTracker:
"""
# Skip if this is the server's own IP
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
if server_ip and ip == server_ip:
@@ -228,6 +229,7 @@ class AccessTracker:
"""
# Skip if this is the server's own IP
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
if server_ip and ip == server_ip:
@@ -397,6 +399,7 @@ class AccessTracker:
"""
# Skip if this is the server's own IP
from config import get_config
+
config = get_config()
server_ip = config.get_server_ip()
if server_ip and client_ip == server_ip:
@@ -429,7 +432,9 @@ class AccessTracker:
self.ip_page_visits[client_ip]["ban_multiplier"] = 2 ** (violations - 1)
# Set ban timestamp
- self.ip_page_visits[client_ip]["ban_timestamp"] = datetime.now().isoformat()
+ self.ip_page_visits[client_ip][
+ "ban_timestamp"
+ ] = datetime.now().isoformat()
return self.ip_page_visits[client_ip]["count"]
@@ -572,7 +577,8 @@ class AccessTracker:
suspicious = [
log
for log in self.access_log
- if log.get("suspicious", False) and not is_local_or_private_ip(log.get("ip", ""))
+ if log.get("suspicious", False)
+ and not is_local_or_private_ip(log.get("ip", ""))
]
return suspicious[-limit:]
@@ -624,7 +630,7 @@ class AccessTracker:
"""
# Trim access_log to max size (keep most recent)
if len(self.access_log) > self.max_access_log_size:
- self.access_log = self.access_log[-self.max_access_log_size:]
+ self.access_log = self.access_log[-self.max_access_log_size :]
# Trim credential_attempts to max size (keep most recent)
if len(self.credential_attempts) > self.max_credential_log_size: