diff --git a/.github/workflows/docker-build-push.yml b/.github/workflows/docker-build-push.yml index 70fbb36..f76cf11 100644 --- a/.github/workflows/docker-build-push.yml +++ b/.github/workflows/docker-build-push.yml @@ -4,7 +4,6 @@ on: push: branches: - main - - beta - dev - github-actions-ci paths: @@ -15,10 +14,6 @@ on: - 'requirements.txt' - 'entrypoint.sh' - '.github/workflows/docker-build-push.yml' - tags: - - 'v*.*.*' - release: - types: [published] workflow_dispatch: env: diff --git a/.github/workflows/helm-package-push.yml b/.github/workflows/helm-package-push.yml index 9ba9150..0091915 100644 --- a/.github/workflows/helm-package-push.yml +++ b/.github/workflows/helm-package-push.yml @@ -4,18 +4,11 @@ on: push: branches: - main - - beta - dev - github-actions-ci paths: - 'helm/**' - '.github/workflows/helm-package-push.yml' - tags: - - 'v*' - release: - types: - - published - - created workflow_dispatch: env: diff --git a/README.md b/README.md index 4dc5702..6bc2ba5 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ - [Docker Run](#docker-run) - [Docker Compose](#docker-compose) - [Kubernetes](#kubernetes) + - [Local (Python)](#local-python) - [Configuration](#configuration) - [config.yaml](#configuration-via-configyaml) - [Environment Variables](#configuration-via-enviromental-variables) @@ -63,6 +64,8 @@ Tip: crawl the `robots.txt` paths for additional fun It creates realistic fake web applications filled with low‑hanging fruit such as admin panels, configuration files, and exposed fake credentials to attract and identify suspicious activity. +![dashboard](img/deception-page.png) + By wasting attacker resources, Krawl helps clearly distinguish malicious behavior from legitimate crawlers. It features: @@ -77,8 +80,9 @@ It features: - **Customizable Wordlists**: Easy JSON-based configuration - **Random Error Injection**: Mimic real server behavior -![dashboard](img/deception-page.png) +You can easily expose Krawl alongside your other services to shield them from web crawlers and malicious users using a reverse proxy. For more details, see the [Reverse Proxy documentation](docs/reverse-proxy.md). +![use case](img/use-case.png) ## Krawl Dashboard @@ -160,6 +164,17 @@ docker-compose down ### Kubernetes **Krawl is also available natively on Kubernetes**. Installation can be done either [via manifest](kubernetes/README.md) or [using the helm chart](helm/README.md). +### Python + Uvicorn + +Run Krawl directly with Python (suggested version 13) and uvicorn for local development or testing: + +```bash +pip install -r requirements.txt +uvicorn app:app --host 0.0.0.0 --port 5000 --app-dir src +``` + +Access the server at `http://localhost:5000` + ## Configuration Krawl uses a **configuration hierarchy** in which **environment variables take precedence over the configuration file**. This approach is recommended for Docker deployments and quick out-of-the-box customization. diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 92d530c..e4e1cee 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 1.1.0 -appVersion: 1.1.0 +version: 1.1.3 +appVersion: 1.1.3 keywords: - honeypot - security diff --git a/helm/README.md b/helm/README.md index 09c2387..268ca00 100644 --- a/helm/README.md +++ b/helm/README.md @@ -14,7 +14,7 @@ A Helm chart for deploying the Krawl honeypot application on Kubernetes. ```bash helm install krawl oci://ghcr.io/blessedrebus/krawl-chart \ - --version 1.1.0 \ + --version 1.1.3 \ --namespace krawl-system \ --create-namespace \ -f values.yaml # optional @@ -169,7 +169,7 @@ kubectl get secret krawl-server -n krawl-system \ You can override individual values with `--set` without a values file: ```bash -helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \ +helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.3 \ --set ingress.hosts[0].host=honeypot.example.com \ --set config.canary.token_url=https://canarytokens.com/your-token ``` @@ -177,7 +177,7 @@ helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 \ ## Upgrading ```bash -helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.0 -f values.yaml +helm upgrade krawl oci://ghcr.io/blessedrebus/krawl-chart --version 1.1.3 -f values.yaml ``` ## Uninstalling diff --git a/helm/values.yaml b/helm/values.yaml index 8b4a907..3bdebd9 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -3,7 +3,7 @@ replicaCount: 1 image: repository: ghcr.io/blessedrebus/krawl pullPolicy: Always - tag: "1.1.0" + tag: "1.1.3" imagePullSecrets: [] nameOverride: "krawl" diff --git a/img/geoip_dashboard.png b/img/geoip_dashboard.png index 1c354bb..5a4f389 100644 Binary files a/img/geoip_dashboard.png and b/img/geoip_dashboard.png differ diff --git a/img/use-case.drawio b/img/use-case.drawio new file mode 100644 index 0000000..e8d0d8f --- /dev/null +++ b/img/use-case.drawio @@ -0,0 +1,120 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/img/use-case.png b/img/use-case.png new file mode 100644 index 0000000..2346423 Binary files /dev/null and b/img/use-case.png differ diff --git a/src/database.py b/src/database.py index cbee4a0..803e7e7 100644 --- a/src/database.py +++ b/src/database.py @@ -815,24 +815,25 @@ class DatabaseManager: def flag_stale_ips_for_reevaluation(self) -> int: """ Flag IPs for reevaluation where: - - last_seen is between 5 and 30 days ago + - last_seen is newer than the configured retention period - last_analysis is more than 5 days ago Returns: Number of IPs flagged for reevaluation """ + from config import get_config + session = self.session try: now = datetime.now() - last_seen_lower = now - timedelta(days=30) - last_seen_upper = now - timedelta(days=5) + retention_days = get_config().database_retention_days + last_seen_cutoff = now - timedelta(days=retention_days) last_analysis_cutoff = now - timedelta(days=5) count = ( session.query(IpStats) .filter( - IpStats.last_seen >= last_seen_lower, - IpStats.last_seen <= last_seen_upper, + IpStats.last_seen >= last_seen_cutoff, IpStats.last_analysis <= last_analysis_cutoff, IpStats.need_reevaluation == False, IpStats.manual_category == False, @@ -882,6 +883,7 @@ class DatabaseManager: ip_filter: Optional[str] = None, suspicious_only: bool = False, since_minutes: Optional[int] = None, + sort_order: str = "desc", ) -> Dict[str, Any]: """ Retrieve access logs with pagination and optional filtering. @@ -892,6 +894,7 @@ class DatabaseManager: ip_filter: Filter by IP address suspicious_only: Only return suspicious requests since_minutes: Only return logs from the last N minutes + sort_order: Sort direction for timestamp ('asc' or 'desc') Returns: List of access log dictionaries @@ -899,7 +902,12 @@ class DatabaseManager: session = self.session try: offset = (page - 1) * page_size - query = session.query(AccessLog).order_by(AccessLog.timestamp.desc()) + order = ( + AccessLog.timestamp.asc() + if sort_order == "asc" + else AccessLog.timestamp.desc() + ) + query = session.query(AccessLog).order_by(order) if ip_filter: query = query.filter(AccessLog.ip == sanitize_ip(ip_filter)) @@ -1503,6 +1511,7 @@ class DatabaseManager: "path": log.path, "user_agent": log.user_agent, "timestamp": log.timestamp.isoformat(), + "log_id": log.id, } for log in logs ] diff --git a/src/routes/htmx.py b/src/routes/htmx.py index 303bce5..549f044 100644 --- a/src/routes/htmx.py +++ b/src/routes/htmx.py @@ -180,7 +180,10 @@ async def htmx_access_logs_by_ip( ): db = get_db() result = db.get_access_logs_paginated( - page=max(1, page), page_size=25, ip_filter=ip_filter + page=max(1, page), + page_size=25, + ip_filter=ip_filter, + sort_order=sort_order if sort_order in ("asc", "desc") else "desc", ) # Normalize pagination key (DB returns total_attackers, template expects total) diff --git a/src/tasks/db_retention.py b/src/tasks/db_retention.py index af803c6..ab4af86 100644 --- a/src/tasks/db_retention.py +++ b/src/tasks/db_retention.py @@ -7,6 +7,8 @@ Periodically deletes old records based on configured retention_days. from datetime import datetime, timedelta +from sqlalchemy import or_ + from database import get_database from logger import get_app_logger @@ -26,12 +28,18 @@ app_logger = get_app_logger() def main(): """ - Delete access logs, credential attempts, and attack detections - older than the configured retention period. + Delete old records based on the configured retention period. + Keeps suspicious access logs, their attack detections, linked IPs, + category history, and all credential attempts. """ try: from config import get_config - from models import AccessLog, CredentialAttempt, AttackDetection + from models import ( + AccessLog, + AttackDetection, + IpStats, + CategoryHistory, + ) config = get_config() retention_days = config.database_retention_days @@ -41,35 +49,71 @@ def main(): cutoff = datetime.now() - timedelta(days=retention_days) - # Delete attack detections linked to old access logs first (FK constraint) - old_log_ids = session.query(AccessLog.id).filter(AccessLog.timestamp < cutoff) + # Delete attack detections linked to old NON-suspicious access logs (FK constraint) + old_nonsuspicious_log_ids = session.query(AccessLog.id).filter( + AccessLog.timestamp < cutoff, + AccessLog.is_suspicious == False, + AccessLog.is_honeypot_trigger == False, + ) detections_deleted = ( session.query(AttackDetection) - .filter(AttackDetection.access_log_id.in_(old_log_ids)) + .filter(AttackDetection.access_log_id.in_(old_nonsuspicious_log_ids)) .delete(synchronize_session=False) ) - # Delete old access logs + # Delete old non-suspicious access logs (keep suspicious ones) logs_deleted = ( session.query(AccessLog) - .filter(AccessLog.timestamp < cutoff) + .filter( + AccessLog.timestamp < cutoff, + AccessLog.is_suspicious == False, + AccessLog.is_honeypot_trigger == False, + ) .delete(synchronize_session=False) ) - # Delete old credential attempts - creds_deleted = ( - session.query(CredentialAttempt) - .filter(CredentialAttempt.timestamp < cutoff) + # IPs to preserve: those with any suspicious access logs + preserved_ips = ( + session.query(AccessLog.ip) + .filter( + or_( + AccessLog.is_suspicious == True, + AccessLog.is_honeypot_trigger == True, + ) + ) + .distinct() + ) + + # Delete stale IPs, but keep those linked to suspicious logs + ips_deleted = ( + session.query(IpStats) + .filter( + IpStats.last_seen < cutoff, + ~IpStats.ip.in_(preserved_ips), + ) + .delete(synchronize_session=False) + ) + + # Delete old category history, but keep records for preserved IPs + history_deleted = ( + session.query(CategoryHistory) + .filter( + CategoryHistory.timestamp < cutoff, + ~CategoryHistory.ip.in_(preserved_ips), + ) .delete(synchronize_session=False) ) session.commit() - if logs_deleted or creds_deleted or detections_deleted: + total = logs_deleted + detections_deleted + ips_deleted + history_deleted + if total: app_logger.info( f"DB retention: Deleted {logs_deleted} access logs, " f"{detections_deleted} attack detections, " - f"{creds_deleted} credential attempts older than {retention_days} days" + f"{ips_deleted} stale IPs, " + f"{history_deleted} category history records " + f"older than {retention_days} days" ) except Exception as e: diff --git a/src/templates/jinja2/dashboard/partials/access_by_ip_table.html b/src/templates/jinja2/dashboard/partials/access_by_ip_table.html index 5e7bd6c..e260f56 100644 --- a/src/templates/jinja2/dashboard/partials/access_by_ip_table.html +++ b/src/templates/jinja2/dashboard/partials/access_by_ip_table.html @@ -45,7 +45,10 @@ {{ log.timestamp | format_ts }} {% if log.id %} - + {% endif %} diff --git a/src/templates/jinja2/dashboard/partials/attack_types_table.html b/src/templates/jinja2/dashboard/partials/attack_types_table.html index 4ac3369..fd80ac1 100644 --- a/src/templates/jinja2/dashboard/partials/attack_types_table.html +++ b/src/templates/jinja2/dashboard/partials/attack_types_table.html @@ -62,7 +62,10 @@ {{ attack.timestamp | format_ts }} {% if attack.log_id %} - + {% endif %} + + diff --git a/src/templates/jinja2/dashboard/partials/search_results.html b/src/templates/jinja2/dashboard/partials/search_results.html index 1ae0d41..70d8429 100644 --- a/src/templates/jinja2/dashboard/partials/search_results.html +++ b/src/templates/jinja2/dashboard/partials/search_results.html @@ -118,7 +118,10 @@ {{ attack.timestamp | format_ts }} {% if attack.log_id %} - + {% endif %} diff --git a/src/templates/jinja2/dashboard/partials/suspicious_table.html b/src/templates/jinja2/dashboard/partials/suspicious_table.html index 333e8df..0ceb72b 100644 --- a/src/templates/jinja2/dashboard/partials/suspicious_table.html +++ b/src/templates/jinja2/dashboard/partials/suspicious_table.html @@ -8,7 +8,7 @@ Path User-Agent Time - + @@ -24,7 +24,13 @@ {{ activity.path | e }} {{ (activity.user_agent | default(''))[:80] | e }} {{ activity.timestamp | format_ts(time_only=True) }} - + + {% if activity.log_id %} + + {% endif %} diff --git a/src/templates/static/css/dashboard.css b/src/templates/static/css/dashboard.css index 5074528..7297e89 100644 --- a/src/templates/static/css/dashboard.css +++ b/src/templates/static/css/dashboard.css @@ -1108,20 +1108,47 @@ tbody { border-top: 1px solid #30363d; border-radius: 0 0 6px 6px; text-align: right; + display: flex; + justify-content: flex-end; + gap: 8px; } -.raw-request-download-btn { - padding: 8px 16px; - background: #238636; - color: #ffffff; - border: none; +.raw-request-icon-btn { + position: relative; + display: inline-flex; + align-items: center; + justify-content: center; + width: 36px; + height: 36px; + background: #21262d; + color: #8b949e; + border: 1px solid #30363d; border-radius: 6px; - font-weight: 500; - font-size: 13px; cursor: pointer; - transition: background 0.2s; + transition: all 0.2s; } -.raw-request-download-btn:hover { - background: #2ea043; +.raw-request-icon-btn:hover { + background: #30363d; + color: #58a6ff; + border-color: #58a6ff; +} +.raw-request-icon-tooltip { + position: absolute; + bottom: calc(100% + 6px); + left: 50%; + transform: translateX(-50%); + padding: 4px 8px; + background: #1c2128; + color: #e6edf3; + border: 1px solid #30363d; + border-radius: 4px; + font-size: 11px; + white-space: nowrap; + opacity: 0; + pointer-events: none; + transition: opacity 0.15s; +} +.raw-request-icon-btn:hover .raw-request-icon-tooltip { + opacity: 1; } /* Attack Types Cell Styling */ @@ -1576,19 +1603,45 @@ tbody { /* Dynamically injected button styles (previously in JS) */ .view-btn { - padding: 4px 10px; - background: #21262d; + position: relative; + display: inline-flex; + align-items: center; + justify-content: center; + padding: 4px; + background: none; + color: #8b949e; + border: none; + border-radius: 4px; + cursor: pointer; + transition: color 0.2s, background 0.2s; +} +.view-btn:hover { color: #58a6ff; + background: rgba(88, 166, 255, 0.1); +} +.view-btn svg { + width: 18px; + height: 18px; + fill: currentColor; +} +.view-btn-tooltip { + position: absolute; + bottom: calc(100% + 6px); + left: 50%; + transform: translateX(-50%); + padding: 4px 8px; + background: #1c2128; + color: #e6edf3; border: 1px solid #30363d; border-radius: 4px; font-size: 11px; - cursor: pointer; - transition: all 0.2s; white-space: nowrap; + opacity: 0; + pointer-events: none; + transition: opacity 0.15s; } -.view-btn:hover { - background: #30363d; - border-color: #58a6ff; +.view-btn:hover .view-btn-tooltip { + opacity: 1; } .inspect-btn { display: inline-flex; @@ -1603,8 +1656,8 @@ tbody { transition: color 0.2s, background 0.2s; } .inspect-btn svg { - width: 16px; - height: 16px; + width: 18px; + height: 18px; fill: currentColor; } .inspect-btn:hover { diff --git a/src/templates/static/js/dashboard.js b/src/templates/static/js/dashboard.js index e6e848b..329e1e4 100644 --- a/src/templates/static/js/dashboard.js +++ b/src/templates/static/js/dashboard.js @@ -111,6 +111,20 @@ document.addEventListener('alpine:init', () => { this.rawModal.logId = null; }, + async copyRawRequest(event) { + if (!this.rawModal.content) return; + const btn = event.currentTarget; + const originalHTML = btn.innerHTML; + const checkIcon = ''; + try { + await navigator.clipboard.writeText(this.rawModal.content); + btn.innerHTML = checkIcon; + } catch { + btn.style.color = '#f85149'; + } + setTimeout(() => { btn.innerHTML = originalHTML; btn.style.color = ''; }, 1500); + }, + downloadRawRequest() { if (!this.rawModal.content) return; const blob = new Blob([this.rawModal.content], { type: 'text/plain' });