diff --git a/.github/workflows/security-scan.yml b/.github/workflows/security-scan.yml index 732b1b7..4b471cd 100644 --- a/.github/workflows/security-scan.yml +++ b/.github/workflows/security-scan.yml @@ -50,7 +50,7 @@ jobs: run: safety check --json || true - name: Trivy vulnerability scan - uses: aquasecurity/trivy-action@master + uses: aquasecurity/trivy-action@0.31.0 with: scan-type: 'fs' scan-ref: '.' diff --git a/config.yaml b/config.yaml index 9d736e5..dd61720 100644 --- a/config.yaml +++ b/config.yaml @@ -33,6 +33,9 @@ backups: exports: path: "exports" +logging: + level: "DEBUG" # DEBUG, INFO, WARNING, ERROR, CRITICAL + database: path: "data/krawl.db" retention_days: 30 diff --git a/helm/Chart.yaml b/helm/Chart.yaml index 26ce1ef..15ffe7c 100644 --- a/helm/Chart.yaml +++ b/helm/Chart.yaml @@ -2,8 +2,8 @@ apiVersion: v2 name: krawl-chart description: A Helm chart for Krawl honeypot server type: application -version: 1.0.9 -appVersion: 1.0.9 +version: 1.0.10 +appVersion: 1.0.10 keywords: - honeypot - security diff --git a/helm/templates/configmap.yaml b/helm/templates/configmap.yaml index ed38d8d..73ffbb5 100644 --- a/helm/templates/configmap.yaml +++ b/helm/templates/configmap.yaml @@ -28,6 +28,8 @@ data: enabled: {{ .Values.config.backups.enabled }} exports: path: {{ .Values.config.exports.path | quote }} + logging: + level: {{ .Values.config.logging.level | quote }} database: path: {{ .Values.config.database.path | quote }} retention_days: {{ .Values.config.database.retention_days }} diff --git a/helm/templates/deployment.yaml b/helm/templates/deployment.yaml index f24261c..3676817 100644 --- a/helm/templates/deployment.yaml +++ b/helm/templates/deployment.yaml @@ -8,6 +8,8 @@ spec: {{- if not .Values.autoscaling.enabled }} replicas: {{ .Values.replicaCount }} {{- end }} + strategy: + type: Recreate selector: matchLabels: {{- include "krawl.selectorLabels" . | nindent 6 }} @@ -29,7 +31,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - - name: {{ .Chart.Name }} + - name: krawl {{- with .Values.securityContext }} securityContext: {{- toYaml . | nindent 12 }} diff --git a/helm/values.yaml b/helm/values.yaml index 20e7b3f..df4df23 100644 --- a/helm/values.yaml +++ b/helm/values.yaml @@ -86,10 +86,12 @@ config: secret_path: null # Auto-generated if not set, or set to "/my-secret-dashboard" backups: path: "backups" - enabled: true + enabled: false cron: "*/30 * * * *" exports: path: "exports" + logging: + level: "INFO" # DEBUG, INFO, WARNING, ERROR, CRITICAL database: path: "data/krawl.db" retention_days: 30 diff --git a/kubernetes/krawl-all-in-one-deploy.yaml b/kubernetes/krawl-all-in-one-deploy.yaml index 767c080..38e9c9b 100644 --- a/kubernetes/krawl-all-in-one-deploy.yaml +++ b/kubernetes/krawl-all-in-one-deploy.yaml @@ -68,6 +68,14 @@ data: token_tries: 10 dashboard: secret_path: null + backups: + path: "backups" + cron: "*/30 * * * *" + enabled: false + exports: + path: "exports" + logging: + level: "INFO" database: path: "data/krawl.db" retention_days: 30 @@ -154,6 +162,8 @@ metadata: app.kubernetes.io/version: "1.0.0" spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: app.kubernetes.io/name: krawl diff --git a/kubernetes/manifests/configmap.yaml b/kubernetes/manifests/configmap.yaml index cdf6f1b..7782c9a 100644 --- a/kubernetes/manifests/configmap.yaml +++ b/kubernetes/manifests/configmap.yaml @@ -26,6 +26,14 @@ data: token_tries: 10 dashboard: secret_path: null + backups: + path: "backups" + cron: "*/30 * * * *" + enabled: false + exports: + path: "exports" + logging: + level: "INFO" database: path: "data/krawl.db" retention_days: 30 diff --git a/kubernetes/manifests/deployment.yaml b/kubernetes/manifests/deployment.yaml index 4c87a73..aff7469 100644 --- a/kubernetes/manifests/deployment.yaml +++ b/kubernetes/manifests/deployment.yaml @@ -10,6 +10,8 @@ metadata: app.kubernetes.io/version: "1.0.0" spec: replicas: 1 + strategy: + type: Recreate selector: matchLabels: app.kubernetes.io/name: krawl diff --git a/src/app.py b/src/app.py index 788bcf2..2b2df92 100644 --- a/src/app.py +++ b/src/app.py @@ -26,7 +26,7 @@ async def lifespan(app: FastAPI): config = get_config() # Initialize logging - initialize_logging() + initialize_logging(log_level=config.log_level) app_logger = get_app_logger() # Initialize database and run pending migrations before accepting traffic diff --git a/src/config.py b/src/config.py index 3bdf7e5..8344883 100644 --- a/src/config.py +++ b/src/config.py @@ -56,6 +56,8 @@ class Config: user_agents_used_threshold: float = None attack_urls_threshold: float = None + log_level: str = "INFO" + _server_ip: Optional[str] = None _server_ip_cache_time: float = 0 _ip_cache_ttl: int = 300 @@ -163,6 +165,7 @@ class Config: behavior = data.get("behavior", {}) analyzer = data.get("analyzer") or {} crawl = data.get("crawl", {}) + logging_cfg = data.get("logging", {}) # Handle dashboard_secret_path - auto-generate if null/not set dashboard_path = dashboard.get("secret_path") @@ -217,6 +220,9 @@ class Config: ), max_pages_limit=crawl.get("max_pages_limit", 250), ban_duration_seconds=crawl.get("ban_duration_seconds", 600), + log_level=os.getenv( + "KRAWL_LOG_LEVEL", logging_cfg.get("level", "INFO") + ).upper(), ) diff --git a/src/database.py b/src/database.py index 9daca49..cbee4a0 100644 --- a/src/database.py +++ b/src/database.py @@ -815,8 +815,8 @@ class DatabaseManager: def flag_stale_ips_for_reevaluation(self) -> int: """ Flag IPs for reevaluation where: - - last_seen is between 15 and 30 days ago - - last_analysis is more than 10 days ago (or never analyzed) + - last_seen is between 5 and 30 days ago + - last_analysis is more than 5 days ago Returns: Number of IPs flagged for reevaluation @@ -825,18 +825,15 @@ class DatabaseManager: try: now = datetime.now() last_seen_lower = now - timedelta(days=30) - last_seen_upper = now - timedelta(days=15) - last_analysis_cutoff = now - timedelta(days=10) + last_seen_upper = now - timedelta(days=5) + last_analysis_cutoff = now - timedelta(days=5) count = ( session.query(IpStats) .filter( IpStats.last_seen >= last_seen_lower, IpStats.last_seen <= last_seen_upper, - or_( - IpStats.last_analysis <= last_analysis_cutoff, - IpStats.last_analysis.is_(None), - ), + IpStats.last_analysis <= last_analysis_cutoff, IpStats.need_reevaluation == False, IpStats.manual_category == False, ) @@ -850,6 +847,99 @@ class DatabaseManager: except Exception as e: session.rollback() raise + + def flag_all_ips_for_reevaluation(self) -> int: + """ + Flag ALL IPs for reevaluation, regardless of staleness. + Skips IPs that have a manual category set. + + Returns: + Number of IPs flagged for reevaluation + """ + session = self.session + try: + count = ( + session.query(IpStats) + .filter( + IpStats.need_reevaluation == False, + IpStats.manual_category == False, + ) + .update( + {IpStats.need_reevaluation: True}, + synchronize_session=False, + ) + ) + session.commit() + return count + except Exception as e: + session.rollback() + raise + + def get_access_logs_paginated( + self, + page: int = 1, + page_size: int = 25, + ip_filter: Optional[str] = None, + suspicious_only: bool = False, + since_minutes: Optional[int] = None, + ) -> Dict[str, Any]: + """ + Retrieve access logs with pagination and optional filtering. + + Args: + page: Page to retrieve + page_size: Number of records for page + ip_filter: Filter by IP address + suspicious_only: Only return suspicious requests + since_minutes: Only return logs from the last N minutes + + Returns: + List of access log dictionaries + """ + session = self.session + try: + offset = (page - 1) * page_size + query = session.query(AccessLog).order_by(AccessLog.timestamp.desc()) + + if ip_filter: + query = query.filter(AccessLog.ip == sanitize_ip(ip_filter)) + if suspicious_only: + query = query.filter(AccessLog.is_suspicious == True) + if since_minutes is not None: + cutoff_time = datetime.now() - timedelta(minutes=since_minutes) + query = query.filter(AccessLog.timestamp >= cutoff_time) + + logs = query.offset(offset).limit(page_size).all() + # Get total count of attackers + total_access_logs = ( + session.query(AccessLog) + .filter(AccessLog.ip == sanitize_ip(ip_filter)) + .count() + ) + total_pages = (total_access_logs + page_size - 1) // page_size + + return { + "access_logs": [ + { + "id": log.id, + "ip": log.ip, + "path": log.path, + "user_agent": log.user_agent, + "method": log.method, + "is_suspicious": log.is_suspicious, + "is_honeypot_trigger": log.is_honeypot_trigger, + "timestamp": log.timestamp.isoformat(), + "attack_types": [d.attack_type for d in log.attack_detections], + } + for log in logs + ], + "pagination": { + "page": page, + "page_size": page_size, + "total_logs": total_access_logs, + "total_pages": total_pages, + }, + } finally: self.close_session() @@ -1018,6 +1108,8 @@ class DatabaseManager: "region": stat.region, "region_name": stat.region_name, "timezone": stat.timezone, + "latitude": stat.latitude, + "longitude": stat.longitude, "isp": stat.isp, "reverse": stat.reverse, "asn": stat.asn, @@ -1316,26 +1408,16 @@ class DatabaseManager: """ session = self.session try: - # Get server IP to filter it out from config import get_config config = get_config() server_ip = config.get_server_ip() - results = ( - session.query(AccessLog.ip, func.count(AccessLog.id).label("count")) - .group_by(AccessLog.ip) - .order_by(func.count(AccessLog.id).desc()) - .all() - ) + query = session.query(IpStats.ip, IpStats.total_requests) + query = self._public_ip_filter(query, IpStats.ip, server_ip) + results = query.order_by(IpStats.total_requests.desc()).limit(limit).all() - # Filter out local/private IPs and server IP, then limit results - filtered = [ - (row.ip, row.count) - for row in results - if is_valid_public_ip(row.ip, server_ip) - ] - return filtered[:limit] + return [(row.ip, row.total_requests) for row in results] finally: self.close_session() @@ -1402,23 +1484,18 @@ class DatabaseManager: """ session = self.session try: - # Get server IP to filter it out from config import get_config config = get_config() server_ip = config.get_server_ip() - logs = ( + query = ( session.query(AccessLog) .filter(AccessLog.is_suspicious == True) .order_by(AccessLog.timestamp.desc()) - .all() ) - - # Filter out local/private IPs and server IP - filtered_logs = [ - log for log in logs if is_valid_public_ip(log.ip, server_ip) - ] + query = self._public_ip_filter(query, AccessLog.ip, server_ip) + logs = query.limit(limit).all() return [ { @@ -1427,7 +1504,7 @@ class DatabaseManager: "user_agent": log.user_agent, "timestamp": log.timestamp.isoformat(), } - for log in filtered_logs[:limit] + for log in logs ] finally: self.close_session() @@ -1532,44 +1609,59 @@ class DatabaseManager: offset = (page - 1) * page_size - # Get honeypot triggers grouped by IP - results = ( - session.query(AccessLog.ip, AccessLog.path) - .filter(AccessLog.is_honeypot_trigger == True) - .all() + # Count distinct paths per IP using SQL GROUP BY + count_col = func.count(distinct(AccessLog.path)).label("path_count") + base_query = session.query(AccessLog.ip, count_col).filter( + AccessLog.is_honeypot_trigger == True + ) + base_query = self._public_ip_filter(base_query, AccessLog.ip, server_ip) + base_query = base_query.group_by(AccessLog.ip) + + # Get total count of distinct honeypot IPs + total_honeypots = base_query.count() + + # Apply sorting + if sort_by == "count": + order_expr = ( + count_col.desc() if sort_order == "desc" else count_col.asc() + ) + else: + order_expr = ( + AccessLog.ip.desc() if sort_order == "desc" else AccessLog.ip.asc() + ) + + ip_rows = ( + base_query.order_by(order_expr).offset(offset).limit(page_size).all() ) - # Group paths by IP, filtering out invalid IPs - ip_paths: Dict[str, List[str]] = {} - for row in results: - if not is_valid_public_ip(row.ip, server_ip): - continue - if row.ip not in ip_paths: - ip_paths[row.ip] = [] - if row.path not in ip_paths[row.ip]: - ip_paths[row.ip].append(row.path) - - # Create list and sort - honeypot_list = [ - {"ip": ip, "paths": paths, "count": len(paths)} - for ip, paths in ip_paths.items() - ] - - if sort_by == "count": - honeypot_list.sort( - key=lambda x: x["count"], reverse=(sort_order == "desc") - ) - else: # sort by ip - honeypot_list.sort( - key=lambda x: x["ip"], reverse=(sort_order == "desc") + # Fetch distinct paths only for the paginated IPs + paginated_ips = [row.ip for row in ip_rows] + honeypot_list = [] + if paginated_ips: + path_rows = ( + session.query(AccessLog.ip, AccessLog.path) + .filter( + AccessLog.is_honeypot_trigger == True, + AccessLog.ip.in_(paginated_ips), + ) + .distinct(AccessLog.ip, AccessLog.path) + .all() ) + ip_paths: Dict[str, List[str]] = {} + for row in path_rows: + ip_paths.setdefault(row.ip, []).append(row.path) - total_honeypots = len(honeypot_list) - paginated = honeypot_list[offset : offset + page_size] - total_pages = (total_honeypots + page_size - 1) // page_size + # Preserve the order from the sorted query + for row in ip_rows: + paths = ip_paths.get(row.ip, []) + honeypot_list.append( + {"ip": row.ip, "paths": paths, "count": row.path_count} + ) + + total_pages = max(1, (total_honeypots + page_size - 1) // page_size) return { - "honeypots": paginated, + "honeypots": honeypot_list, "pagination": { "page": page, "page_size": page_size, @@ -1668,6 +1760,9 @@ class DatabaseManager: """ Retrieve paginated list of top IP addresses by access count. + Uses the IpStats table (which already stores total_requests per IP) + instead of doing a costly GROUP BY on the large access_logs table. + Args: page: Page number (1-indexed) page_size: Number of results per page @@ -1686,30 +1781,34 @@ class DatabaseManager: offset = (page - 1) * page_size - results = ( - session.query(AccessLog.ip, func.count(AccessLog.id).label("count")) - .group_by(AccessLog.ip) - .all() - ) + base_query = session.query(IpStats) + base_query = self._public_ip_filter(base_query, IpStats.ip, server_ip) - # Filter out local/private IPs and server IP, then sort - filtered = [ - {"ip": row.ip, "count": row.count} - for row in results - if is_valid_public_ip(row.ip, server_ip) - ] + total_ips = base_query.count() if sort_by == "count": - filtered.sort(key=lambda x: x["count"], reverse=(sort_order == "desc")) - else: # sort by ip - filtered.sort(key=lambda x: x["ip"], reverse=(sort_order == "desc")) + order_col = IpStats.total_requests + else: + order_col = IpStats.ip - total_ips = len(filtered) - paginated = filtered[offset : offset + page_size] - total_pages = (total_ips + page_size - 1) // page_size + if sort_order == "desc": + base_query = base_query.order_by(order_col.desc()) + else: + base_query = base_query.order_by(order_col.asc()) + + results = base_query.offset(offset).limit(page_size).all() + + total_pages = max(1, (total_ips + page_size - 1) // page_size) return { - "ips": paginated, + "ips": [ + { + "ip": row.ip, + "count": row.total_requests, + "category": row.category or "unknown", + } + for row in results + ], "pagination": { "page": page, "page_size": page_size, @@ -1743,28 +1842,32 @@ class DatabaseManager: try: offset = (page - 1) * page_size - results = ( - session.query(AccessLog.path, func.count(AccessLog.id).label("count")) - .group_by(AccessLog.path) - .all() + count_col = func.count(AccessLog.id).label("count") + + # Get total number of distinct paths + total_paths = ( + session.query(func.count(distinct(AccessLog.path))).scalar() or 0 ) - # Create list and sort - paths_list = [{"path": row.path, "count": row.count} for row in results] + # Build query with SQL-level sorting and pagination + query = session.query(AccessLog.path, count_col).group_by(AccessLog.path) if sort_by == "count": - paths_list.sort( - key=lambda x: x["count"], reverse=(sort_order == "desc") + order_expr = ( + count_col.desc() if sort_order == "desc" else count_col.asc() + ) + else: + order_expr = ( + AccessLog.path.desc() + if sort_order == "desc" + else AccessLog.path.asc() ) - else: # sort by path - paths_list.sort(key=lambda x: x["path"], reverse=(sort_order == "desc")) - total_paths = len(paths_list) - paginated = paths_list[offset : offset + page_size] - total_pages = (total_paths + page_size - 1) // page_size + results = query.order_by(order_expr).offset(offset).limit(page_size).all() + total_pages = max(1, (total_paths + page_size - 1) // page_size) return { - "paths": paginated, + "paths": [{"path": row.path, "count": row.count} for row in results], "pagination": { "page": page, "page_size": page_size, @@ -1798,33 +1901,44 @@ class DatabaseManager: try: offset = (page - 1) * page_size - results = ( - session.query( - AccessLog.user_agent, func.count(AccessLog.id).label("count") - ) - .filter(AccessLog.user_agent.isnot(None), AccessLog.user_agent != "") - .group_by(AccessLog.user_agent) - .all() + count_col = func.count(AccessLog.id).label("count") + + base_filter = [AccessLog.user_agent.isnot(None), AccessLog.user_agent != ""] + + # Get total number of distinct user agents + total_uas = ( + session.query(func.count(distinct(AccessLog.user_agent))) + .filter(*base_filter) + .scalar() + or 0 ) - # Create list and sort - ua_list = [ - {"user_agent": row.user_agent, "count": row.count} for row in results - ] + # Build query with SQL-level sorting and pagination + query = ( + session.query(AccessLog.user_agent, count_col) + .filter(*base_filter) + .group_by(AccessLog.user_agent) + ) if sort_by == "count": - ua_list.sort(key=lambda x: x["count"], reverse=(sort_order == "desc")) - else: # sort by user_agent - ua_list.sort( - key=lambda x: x["user_agent"], reverse=(sort_order == "desc") + order_expr = ( + count_col.desc() if sort_order == "desc" else count_col.asc() + ) + else: + order_expr = ( + AccessLog.user_agent.desc() + if sort_order == "desc" + else AccessLog.user_agent.asc() ) - total_uas = len(ua_list) - paginated = ua_list[offset : offset + page_size] - total_pages = (total_uas + page_size - 1) // page_size + results = query.order_by(order_expr).offset(offset).limit(page_size).all() + total_pages = max(1, (total_uas + page_size - 1) // page_size) return { - "user_agents": paginated, + "user_agents": [ + {"user_agent": row.user_agent, "count": row.count} + for row in results + ], "pagination": { "page": page, "page_size": page_size, @@ -1841,6 +1955,7 @@ class DatabaseManager: page_size: int = 5, sort_by: str = "timestamp", sort_order: str = "desc", + ip_filter: Optional[str] = None, ) -> Dict[str, Any]: """ Retrieve paginated list of detected attack types with access logs. @@ -1850,6 +1965,7 @@ class DatabaseManager: page_size: Number of results per page sort_by: Field to sort by (timestamp, ip, attack_type) sort_order: Sort order (asc or desc) + ip_filter: Optional IP address to filter results Returns: Dictionary with attacks list and pagination info @@ -1865,18 +1981,22 @@ class DatabaseManager: sort_order.lower() if sort_order.lower() in {"asc", "desc"} else "desc" ) + # Base query filter + base_filters = [] + if ip_filter: + base_filters.append(AccessLog.ip == ip_filter) + # Count total unique access logs with attack detections - total_attacks = ( - session.query(AccessLog) - .join(AttackDetection) - .distinct(AccessLog.id) - .count() - ) + count_query = session.query(AccessLog).join(AttackDetection) + if base_filters: + count_query = count_query.filter(*base_filters) + total_attacks = count_query.distinct(AccessLog.id).count() # Get paginated access logs with attack detections - query = ( - session.query(AccessLog).join(AttackDetection).distinct(AccessLog.id) - ) + query = session.query(AccessLog).join(AttackDetection) + if base_filters: + query = query.filter(*base_filters) + query = query.distinct(AccessLog.id) if sort_by == "timestamp": query = query.order_by( @@ -1939,12 +2059,15 @@ class DatabaseManager: finally: self.close_session() - def get_attack_types_stats(self, limit: int = 20) -> Dict[str, Any]: + def get_attack_types_stats( + self, limit: int = 20, ip_filter: str | None = None + ) -> Dict[str, Any]: """ Get aggregated statistics for attack types (efficient for large datasets). Args: limit: Maximum number of attack types to return + ip_filter: Optional IP address to filter results for Returns: Dictionary with attack type counts @@ -1954,12 +2077,18 @@ class DatabaseManager: from sqlalchemy import func # Aggregate attack types with count + query = session.query( + AttackDetection.attack_type, + func.count(AttackDetection.id).label("count"), + ) + + if ip_filter: + query = query.join( + AccessLog, AttackDetection.access_log_id == AccessLog.id + ).filter(AccessLog.ip == ip_filter) + results = ( - session.query( - AttackDetection.attack_type, - func.count(AttackDetection.id).label("count"), - ) - .group_by(AttackDetection.attack_type) + query.group_by(AttackDetection.attack_type) .order_by(func.count(AttackDetection.id).desc()) .limit(limit) .all() @@ -1973,6 +2102,126 @@ class DatabaseManager: finally: self.close_session() + def search_attacks_and_ips( + self, + query: str, + page: int = 1, + page_size: int = 20, + ) -> Dict[str, Any]: + """ + Search attacks and IPs matching a query string. + + Searches across AttackDetection (attack_type, matched_pattern), + AccessLog (ip, path), and IpStats (ip, city, country, isp, asn_org). + + Args: + query: Search term (partial match) + page: Page number (1-indexed) + page_size: Results per page + + Returns: + Dictionary with matching attacks, ips, and pagination info + """ + session = self.session + try: + offset = (page - 1) * page_size + like_q = f"%{query}%" + + # --- Search attacks (AccessLog + AttackDetection) --- + attack_query = ( + session.query(AccessLog) + .join(AttackDetection) + .filter( + or_( + AccessLog.ip.ilike(like_q), + AccessLog.path.ilike(like_q), + AttackDetection.attack_type.ilike(like_q), + AttackDetection.matched_pattern.ilike(like_q), + ) + ) + .distinct(AccessLog.id) + ) + + total_attacks = attack_query.count() + attack_logs = ( + attack_query.order_by(AccessLog.timestamp.desc()) + .offset(offset) + .limit(page_size) + .all() + ) + + attacks = [ + { + "id": log.id, + "ip": log.ip, + "path": log.path, + "user_agent": log.user_agent, + "timestamp": log.timestamp.isoformat() if log.timestamp else None, + "attack_types": [d.attack_type for d in log.attack_detections], + "log_id": log.id, + } + for log in attack_logs + ] + + # --- Search IPs (IpStats) --- + ip_query = session.query(IpStats).filter( + or_( + IpStats.ip.ilike(like_q), + IpStats.city.ilike(like_q), + IpStats.country.ilike(like_q), + IpStats.country_code.ilike(like_q), + IpStats.isp.ilike(like_q), + IpStats.asn_org.ilike(like_q), + IpStats.reverse.ilike(like_q), + ) + ) + + total_ips = ip_query.count() + ips = ( + ip_query.order_by(IpStats.total_requests.desc()) + .offset(offset) + .limit(page_size) + .all() + ) + + ip_results = [ + { + "ip": stat.ip, + "total_requests": stat.total_requests, + "first_seen": ( + stat.first_seen.isoformat() if stat.first_seen else None + ), + "last_seen": stat.last_seen.isoformat() if stat.last_seen else None, + "country_code": stat.country_code, + "city": stat.city, + "category": stat.category, + "isp": stat.isp, + "asn_org": stat.asn_org, + } + for stat in ips + ] + + total = total_attacks + total_ips + total_pages = max( + 1, (max(total_attacks, total_ips) + page_size - 1) // page_size + ) + + return { + "attacks": attacks, + "ips": ip_results, + "query": query, + "pagination": { + "page": page, + "page_size": page_size, + "total_attacks": total_attacks, + "total_ips": total_ips, + "total": total, + "total_pages": total_pages, + }, + } + finally: + self.close_session() + # Module-level singleton instance _db_manager = DatabaseManager() diff --git a/src/dependencies.py b/src/dependencies.py index a713738..e1f908f 100644 --- a/src/dependencies.py +++ b/src/dependencies.py @@ -30,7 +30,7 @@ def get_templates() -> Jinja2Templates: return _templates -def _format_ts(value): +def _format_ts(value, time_only=False): """Custom Jinja2 filter for formatting ISO timestamps.""" if not value: return "N/A" @@ -39,6 +39,8 @@ def _format_ts(value): value = datetime.fromisoformat(value) except (ValueError, TypeError): return value + if time_only: + return value.strftime("%H:%M:%S") if value.date() == datetime.now().date(): return value.strftime("%H:%M:%S") return value.strftime("%m/%d/%Y %H:%M:%S") diff --git a/src/logger.py b/src/logger.py index 9762002..d556684 100644 --- a/src/logger.py +++ b/src/logger.py @@ -36,12 +36,13 @@ class LoggerManager: cls._instance._initialized = False return cls._instance - def initialize(self, log_dir: str = "logs") -> None: + def initialize(self, log_dir: str = "logs", log_level: str = "INFO") -> None: """ Initialize the logging system with rotating file handlers.loggers Args: log_dir: Directory for log files (created if not exists) + log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL) """ if self._initialized: return @@ -59,9 +60,11 @@ class LoggerManager: max_bytes = 1048576 # 1MB backup_count = 5 + level = getattr(logging, log_level.upper(), logging.INFO) + # Setup application logger self._app_logger = logging.getLogger("krawl.app") - self._app_logger.setLevel(logging.INFO) + self._app_logger.setLevel(level) self._app_logger.handlers.clear() app_file_handler = RotatingFileHandler( @@ -78,7 +81,7 @@ class LoggerManager: # Setup access logger self._access_logger = logging.getLogger("krawl.access") - self._access_logger.setLevel(logging.INFO) + self._access_logger.setLevel(level) self._access_logger.handlers.clear() access_file_handler = RotatingFileHandler( @@ -95,7 +98,7 @@ class LoggerManager: # Setup credential logger (special format, no stream handler) self._credential_logger = logging.getLogger("krawl.credentials") - self._credential_logger.setLevel(logging.INFO) + self._credential_logger.setLevel(level) self._credential_logger.handlers.clear() # Credential logger uses a simple format: timestamp|ip|username|password|path @@ -152,6 +155,6 @@ def get_credential_logger() -> logging.Logger: return _logger_manager.credentials -def initialize_logging(log_dir: str = "logs") -> None: +def initialize_logging(log_dir: str = "logs", log_level: str = "INFO") -> None: """Initialize the logging system.""" - _logger_manager.initialize(log_dir) + _logger_manager.initialize(log_dir, log_level) diff --git a/src/routes/api.py b/src/routes/api.py index 02b52dc..d94b3b6 100644 --- a/src/routes/api.py +++ b/src/routes/api.py @@ -7,7 +7,6 @@ All endpoints are prefixed with the secret dashboard path. """ import os -import json from fastapi import APIRouter, Request, Response, Query from fastapi.responses import JSONResponse, PlainTextResponse @@ -215,12 +214,13 @@ async def top_user_agents( async def attack_types_stats( request: Request, limit: int = Query(20), + ip_filter: str = Query(None), ): db = get_db() limit = min(max(1, limit), 100) try: - result = db.get_attack_types_stats(limit=limit) + result = db.get_attack_types_stats(limit=limit, ip_filter=ip_filter) return JSONResponse(content=result, headers=_no_cache_headers()) except Exception as e: get_app_logger().error(f"Error fetching attack types stats: {e}") diff --git a/src/routes/dashboard.py b/src/routes/dashboard.py index 6f5773b..081336c 100644 --- a/src/routes/dashboard.py +++ b/src/routes/dashboard.py @@ -6,6 +6,8 @@ Renders the main dashboard page with server-side data for initial load. """ from fastapi import APIRouter, Request +from fastapi.responses import JSONResponse +from logger import get_app_logger from dependencies import get_db, get_templates @@ -21,7 +23,7 @@ async def dashboard_page(request: Request): # Get initial data for server-rendered sections stats = db.get_dashboard_counts() - suspicious = db.get_recent_suspicious(limit=20) + suspicious = db.get_recent_suspicious(limit=10) # Get credential count for the stats card cred_result = db.get_credentials_paginated(page=1, page_size=1) @@ -37,3 +39,36 @@ async def dashboard_page(request: Request): "suspicious_activities": suspicious, }, ) + + +@router.get("/ip/{ip_address:path}") +async def ip_page(ip_address: str, request: Request): + db = get_db() + try: + stats = db.get_ip_stats_by_ip(ip_address) + config = request.app.state.config + dashboard_path = "/" + config.dashboard_secret_path.lstrip("/") + + if stats: + # Transform fields for template compatibility + list_on = stats.get("list_on") or {} + stats["blocklist_memberships"] = list(list_on.keys()) if list_on else [] + stats["reverse_dns"] = stats.get("reverse") + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/ip.html", + { + "request": request, + "dashboard_path": dashboard_path, + "stats": stats, + "ip_address": ip_address, + }, + ) + else: + return JSONResponse( + content={"error": "IP not found"}, + ) + except Exception as e: + get_app_logger().error(f"Error fetching IP stats: {e}") + return JSONResponse(content={"error": str(e)}) diff --git a/src/routes/htmx.py b/src/routes/htmx.py index 4013ce5..303bce5 100644 --- a/src/routes/htmx.py +++ b/src/routes/htmx.py @@ -2,7 +2,7 @@ """ HTMX fragment endpoints. -Server-rendered HTML partials for table pagination, sorting, and IP details. +Server-rendered HTML partials for table pagination, sorting, IP details, and search. """ from fastapi import APIRouter, Request, Response, Query @@ -58,7 +58,7 @@ async def htmx_top_ips( ): db = get_db() result = db.get_top_ips_paginated( - page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + page=max(1, page), page_size=8, sort_by=sort_by, sort_order=sort_order ) templates = get_templates() @@ -167,6 +167,42 @@ async def htmx_attackers( ) +# ── Access logs by ip ──────────────────────────────────────────────────────── + + +@router.get("/htmx/access-logs") +async def htmx_access_logs_by_ip( + request: Request, + page: int = Query(1), + sort_by: str = Query("total_requests"), + sort_order: str = Query("desc"), + ip_filter: str = Query("ip_filter"), +): + db = get_db() + result = db.get_access_logs_paginated( + page=max(1, page), page_size=25, ip_filter=ip_filter + ) + + # Normalize pagination key (DB returns total_attackers, template expects total) + pagination = result["pagination"] + if "total_access_logs" in pagination and "total" not in pagination: + pagination["total"] = pagination["total_access_logs"] + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/access_by_ip_table.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "items": result["access_logs"], + "pagination": pagination, + "sort_by": sort_by, + "sort_order": sort_order, + "ip_filter": ip_filter, + }, + ) + + # ── Credentials ────────────────────────────────────────────────────── @@ -205,10 +241,15 @@ async def htmx_attacks( page: int = Query(1), sort_by: str = Query("timestamp"), sort_order: str = Query("desc"), + ip_filter: str = Query(None), ): db = get_db() result = db.get_attack_types_paginated( - page=max(1, page), page_size=5, sort_by=sort_by, sort_order=sort_order + page=max(1, page), + page_size=5, + sort_by=sort_by, + sort_order=sort_order, + ip_filter=ip_filter, ) # Transform attack data for template (join attack_types list, map id to log_id) @@ -235,6 +276,7 @@ async def htmx_attacks( "pagination": result["pagination"], "sort_by": sort_by, "sort_order": sort_order, + "ip_filter": ip_filter or "", }, ) @@ -280,6 +322,34 @@ async def htmx_patterns( ) +# ── IP Insight (full IP page as partial) ───────────────────────────── + + +@router.get("/htmx/ip-insight/{ip_address:path}") +async def htmx_ip_insight(ip_address: str, request: Request): + db = get_db() + stats = db.get_ip_stats_by_ip(ip_address) + + if not stats: + stats = {"ip": ip_address, "total_requests": "N/A"} + + # Transform fields for template compatibility + list_on = stats.get("list_on") or {} + stats["blocklist_memberships"] = list(list_on.keys()) if list_on else [] + stats["reverse_dns"] = stats.get("reverse") + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/ip_insight.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "stats": stats, + "ip_address": ip_address, + }, + ) + + # ── IP Detail ──────────────────────────────────────────────────────── @@ -305,3 +375,33 @@ async def htmx_ip_detail(ip_address: str, request: Request): "stats": stats, }, ) + + +# ── Search ─────────────────────────────────────────────────────────── + + +@router.get("/htmx/search") +async def htmx_search( + request: Request, + q: str = Query(""), + page: int = Query(1), +): + q = q.strip() + if not q: + return Response(content="", media_type="text/html") + + db = get_db() + result = db.search_attacks_and_ips(query=q, page=max(1, page), page_size=20) + + templates = get_templates() + return templates.TemplateResponse( + "dashboard/partials/search_results.html", + { + "request": request, + "dashboard_path": _dashboard_path(request), + "attacks": result["attacks"], + "ips": result["ips"], + "query": q, + "pagination": result["pagination"], + }, + ) diff --git a/src/tasks/analyze_ips.py b/src/tasks/analyze_ips.py index 295cd92..7095a13 100644 --- a/src/tasks/analyze_ips.py +++ b/src/tasks/analyze_ips.py @@ -70,7 +70,7 @@ def main(): "risky_http_methods": 6, "robots_violations": 4, "uneven_request_timing": 3, - "different_user_agents": 8, + "different_user_agents": 2, "attack_url": 15, }, "good_crawler": { @@ -84,7 +84,7 @@ def main(): "risky_http_methods": 2, "robots_violations": 7, "uneven_request_timing": 0, - "different_user_agents": 5, + "different_user_agents": 7, "attack_url": 5, }, "regular_user": { diff --git a/src/tasks/flag_stale_ips.py b/src/tasks/flag_stale_ips.py index a9e8e01..0428e15 100644 --- a/src/tasks/flag_stale_ips.py +++ b/src/tasks/flag_stale_ips.py @@ -9,24 +9,37 @@ TASK_CONFIG = { "name": "flag-stale-ips", "cron": "0 2 * * *", # Run daily at 2 AM "enabled": True, - "run_when_loaded": False, + "run_when_loaded": True, } +# Set to True to force all IPs to be flagged for reevaluation on next run. +# Resets to False automatically after execution. +FORCE_IP_RESCAN = False + def main(): + global FORCE_IP_RESCAN + app_logger = get_app_logger() db = get_database() try: - count = db.flag_stale_ips_for_reevaluation() - if count > 0: + if FORCE_IP_RESCAN: + count = db.flag_all_ips_for_reevaluation() + FORCE_IP_RESCAN = False app_logger.info( - f"[Background Task] flag-stale-ips: Flagged {count} stale IPs for reevaluation" + f"[Background Task] flag-stale-ips: FORCE RESCAN - Flagged {count} IPs for reevaluation" ) else: - app_logger.debug( - "[Background Task] flag-stale-ips: No stale IPs found to flag" - ) + count = db.flag_stale_ips_for_reevaluation() + if count > 0: + app_logger.info( + f"[Background Task] flag-stale-ips: Flagged {count} stale IPs for reevaluation" + ) + else: + app_logger.debug( + "[Background Task] flag-stale-ips: No stale IPs found to flag" + ) except Exception as e: app_logger.error( f"[Background Task] flag-stale-ips: Error flagging stale IPs: {e}" diff --git a/src/templates/jinja2/base.html b/src/templates/jinja2/base.html index 1ba2af5..22105c4 100644 --- a/src/templates/jinja2/base.html +++ b/src/templates/jinja2/base.html @@ -5,15 +5,15 @@
Select an IP address from any table to view detailed insights.
++ {{ stats.city | default('') }}{% if stats.city and stats.country %}, {% endif %}{{ stats.country | default(stats.country_code | default('')) }} +
+ {% endif %} +| # | +Path | +User-Agent | ++ Time + | ++ |
|---|---|---|---|---|
| {{ loop.index + (pagination.page - 1) * pagination.page_size }} | +
+
+ {{ log.path | e }}
+ {% if log.path | length > 30 %}
+
+ {{ log.path | e }}
+ {% endif %}
+ |
+ {{ (log.user_agent | default(''))[:50] | e }} | +{{ log.timestamp | format_ts }} | ++ {% if log.id %} + + {% endif %} + | +
| No logs detected | ||||
| # | +IP Address | +Requests | +Category | +Location | +ISP / ASN | +Last Seen | ++ |
|---|---|---|---|---|---|---|---|
| {{ loop.index + (pagination.page - 1) * pagination.page_size }} | ++ {{ ip.ip | e }} + | +{{ ip.total_requests }} | ++ {% if ip.category %} + + {{ ip.category | e }} + + {% else %} + unknown + {% endif %} + | +{{ ip.city | default('') | e }}{% if ip.city and ip.country_code %}, {% endif %}{{ ip.country_code | default('N/A') | e }} | +{{ ip.isp | default(ip.asn_org | default('N/A')) | e }} | +{{ ip.last_seen | format_ts }} | ++ + | +
| # | +IP Address | +Path | +Attack Types | +User-Agent | +Time | +Actions | +
|---|---|---|---|---|---|---|
| {{ loop.index + (pagination.page - 1) * pagination.page_size }} | ++ {{ attack.ip | e }} + | +
+
+ {{ attack.path | e }}
+ {% if attack.path | length > 30 %}
+
+ {{ attack.path | e }}
+ {% endif %}
+ |
+
+
+ {% set types_str = attack.attack_types | join(', ') %}
+ {{ types_str | e }}
+ {% if types_str | length > 30 %}
+
+ {{ types_str | e }}
+ {% endif %}
+ |
+ {{ (attack.user_agent | default(''))[:50] | e }} | +{{ attack.timestamp | format_ts }} | ++ {% if attack.log_id %} + + {% endif %} + | +