Feat/attack map improvement (#57)

* feat: enhance IP reputation management with city data and geolocation integration

* feat: enhance dashboard with city coordinates and improved marker handling

* feat: update chart version to 0.2.1 in Chart.yaml, README.md, and values.yaml

* feat: update logo format and size in README.md

* feat: improve location display logic in dashboard for attackers and IPs
This commit is contained in:
Lorenzo Venerandi
2026-01-27 16:56:34 +01:00
committed by GitHub
parent 8c76f6c847
commit 5aca684df9
8 changed files with 428 additions and 95 deletions

View File

@@ -1,10 +1,10 @@
<h1 align="center">🕷️ Krawl</h1>
<h1 align="center">Krawl</h1>
<h3 align="center">
<a name="readme-top"></a>
<img
src="img/krawl-logo.jpg"
height="200"
src="img/krawl-svg.svg"
height="250"
>
</h3>
<div align="center">

View File

@@ -2,8 +2,8 @@ apiVersion: v2
name: krawl-chart
description: A Helm chart for Krawl honeypot server
type: application
version: 0.2.0
appVersion: 0.2.0
version: 0.2.1
appVersion: 0.2.1
keywords:
- honeypot
- security

View File

@@ -20,13 +20,13 @@ helm repo update
### Install from OCI Registry
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.1.5-dev
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1
```
Or with a specific namespace:
```bash
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.1.5-dev -n krawl --create-namespace
helm install krawl oci://ghcr.io/blessedrebus/krawl-chart --version 0.2.1 -n krawl --create-namespace
```
### Install the chart locally

View File

@@ -3,7 +3,7 @@ replicaCount: 1
image:
repository: ghcr.io/blessedrebus/krawl
pullPolicy: Always
tag: "latest"
tag: "0.2.1"
imagePullSecrets: []
nameOverride: "krawl"

View File

@@ -398,6 +398,7 @@ class DatabaseManager:
asn: str,
asn_org: str,
list_on: Dict[str, str],
city: Optional[str] = None,
) -> None:
"""
Update IP rep stats
@@ -408,6 +409,7 @@ class DatabaseManager:
asn: IP address ASN
asn_org: IP address ASN ORG
list_on: public lists containing the IP address
city: City name (optional)
"""
session = self.session
@@ -419,6 +421,8 @@ class DatabaseManager:
ip_stats.asn = asn
ip_stats.asn_org = asn_org
ip_stats.list_on = list_on
if city:
ip_stats.city = city
session.commit()
except Exception as e:
session.rollback()

View File

@@ -34,14 +34,17 @@ def main():
if payload.get("results"):
data = payload["results"][0]
country_iso_code = data["geoip_data"]["country_iso_code"]
asn = data["geoip_data"]["asn_autonomous_system_number"]
asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
geoip_data = data["geoip_data"]
country_iso_code = geoip_data.get("country_iso_code")
asn = geoip_data.get("asn_autonomous_system_number")
asn_org = geoip_data.get("asn_autonomous_system_organization")
city = geoip_data.get("city_name") # Extract city name from API
list_on = data["list_on"]
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
sanitized_asn = sanitize_for_storage(asn, 100)
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
sanitized_city = sanitize_for_storage(city, 100) if city else None
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(
@@ -50,6 +53,7 @@ def main():
sanitized_asn,
sanitized_asn_org,
sanitized_list_on,
sanitized_city, # Pass city to database
)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")

View File

@@ -548,6 +548,11 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
background: #161b22;
border-top: 6px solid #30363d;
}}
/* Remove the default leaflet icon background */
.ip-custom-marker {{
background: none !important;
border: none !important;
}}
.ip-marker {{
border: 2px solid #fff;
border-radius: 50%;
@@ -558,27 +563,46 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
font-weight: bold;
color: white;
cursor: pointer;
transition: transform 0.2s, box-shadow 0.2s;
}}
.ip-marker:hover {{
transform: scale(1.15);
}}
.marker-attacker {{
background: #f85149;
box-shadow: 0 0 8px rgba(248, 81, 73, 0.8), inset 0 0 4px rgba(248, 81, 73, 0.5);
}}
.marker-attacker:hover {{
box-shadow: 0 0 15px rgba(248, 81, 73, 1), inset 0 0 6px rgba(248, 81, 73, 0.7);
}}
.marker-bad_crawler {{
background: #f0883e;
box-shadow: 0 0 8px rgba(240, 136, 62, 0.8), inset 0 0 4px rgba(240, 136, 62, 0.5);
}}
.marker-bad_crawler:hover {{
box-shadow: 0 0 15px rgba(240, 136, 62, 1), inset 0 0 6px rgba(240, 136, 62, 0.7);
}}
.marker-good_crawler {{
background: #3fb950;
box-shadow: 0 0 8px rgba(63, 185, 80, 0.8), inset 0 0 4px rgba(63, 185, 80, 0.5);
}}
.marker-good_crawler:hover {{
box-shadow: 0 0 15px rgba(63, 185, 80, 1), inset 0 0 6px rgba(63, 185, 80, 0.7);
}}
.marker-regular_user {{
background: #58a6ff;
box-shadow: 0 0 8px rgba(88, 166, 255, 0.8), inset 0 0 4px rgba(88, 166, 255, 0.5);
}}
.marker-regular_user:hover {{
box-shadow: 0 0 15px rgba(88, 166, 255, 1), inset 0 0 6px rgba(88, 166, 255, 0.7);
}}
.marker-unknown {{
background: #8b949e;
box-shadow: 0 0 8px rgba(139, 148, 158, 0.8), inset 0 0 4px rgba(139, 148, 158, 0.5);
}}
.marker-unknown:hover {{
box-shadow: 0 0 15px rgba(139, 148, 158, 1), inset 0 0 6px rgba(139, 148, 158, 0.7);
}}
.leaflet-bottom.leaflet-right {{
display: none !important;
}}
@@ -1011,7 +1035,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
if (stats.country_code || stats.city) {{
html += '<div class="stat-row">';
html += '<span class="stat-label-sm">Location:</span>';
html += `<span class="stat-value-sm">${{stats.city || ''}}${{stats.city && stats.country_code ? ', ' : ''}}${{stats.country_code || 'Unknown'}}</span>`;
html += `<span class="stat-value-sm">${{stats.city ? (stats.country_code ? `${{stats.city}}, ${{stats.country_code}}` : stats.city) : (stats.country_code || 'Unknown')}}</span>`;
html += '</div>';
}}
@@ -1345,7 +1369,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
<td>${{attacker.total_requests}}</td>
<td>${{formatTimestamp(attacker.first_seen)}}</td>
<td>${{formatTimestamp(attacker.last_seen)}}</td>
<td>${{attacker.city || 'Unknown'}}${{attacker.city && attacker.country_code ? ', ' : ''}}${{attacker.country_code || ''}}</td>
<td>${{attacker.city ? (attacker.country_code ? `${{attacker.city}}, ${{attacker.country_code}}` : attacker.city) : (attacker.country_code || 'Unknown')}}</td>
</tr>
<tr class="ip-stats-row" id="stats-row-${{attacker.ip.replace('.', '-')}}" style="display: none;">
<td colspan="6" class="ip-stats-cell">
@@ -1895,7 +1919,6 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
let allIps = [];
let mapMarkers = [];
let markerLayers = {{}};
let circleLayers = {{}};
const categoryColors = {{
attacker: '#f85149',
@@ -1945,7 +1968,60 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
// Get max request count for scaling
const maxRequests = Math.max(...allIps.map(ip => ip.total_requests || 0));
// Create a map of country locations (approximate country centers)
// City coordinates database (major cities worldwide)
const cityCoordinates = {{
// United States
'New York': [40.7128, -74.0060], 'Los Angeles': [34.0522, -118.2437],
'San Francisco': [37.7749, -122.4194], 'Chicago': [41.8781, -87.6298],
'Seattle': [47.6062, -122.3321], 'Miami': [25.7617, -80.1918],
'Boston': [42.3601, -71.0589], 'Atlanta': [33.7490, -84.3880],
'Dallas': [32.7767, -96.7970], 'Houston': [29.7604, -95.3698],
'Denver': [39.7392, -104.9903], 'Phoenix': [33.4484, -112.0740],
// Europe
'London': [51.5074, -0.1278], 'Paris': [48.8566, 2.3522],
'Berlin': [52.5200, 13.4050], 'Amsterdam': [52.3676, 4.9041],
'Moscow': [55.7558, 37.6173], 'Rome': [41.9028, 12.4964],
'Madrid': [40.4168, -3.7038], 'Barcelona': [41.3874, 2.1686],
'Milan': [45.4642, 9.1900], 'Vienna': [48.2082, 16.3738],
'Stockholm': [59.3293, 18.0686], 'Oslo': [59.9139, 10.7522],
'Copenhagen': [55.6761, 12.5683], 'Warsaw': [52.2297, 21.0122],
'Prague': [50.0755, 14.4378], 'Budapest': [47.4979, 19.0402],
'Athens': [37.9838, 23.7275], 'Lisbon': [38.7223, -9.1393],
'Brussels': [50.8503, 4.3517], 'Dublin': [53.3498, -6.2603],
'Zurich': [47.3769, 8.5417], 'Geneva': [46.2044, 6.1432],
'Helsinki': [60.1699, 24.9384], 'Bucharest': [44.4268, 26.1025],
'Saint Petersburg': [59.9343, 30.3351], 'Manchester': [53.4808, -2.2426],
'Roubaix': [50.6942, 3.1746], 'Frankfurt': [50.1109, 8.6821],
'Munich': [48.1351, 11.5820], 'Hamburg': [53.5511, 9.9937],
// Asia
'Tokyo': [35.6762, 139.6503], 'Beijing': [39.9042, 116.4074],
'Shanghai': [31.2304, 121.4737], 'Singapore': [1.3521, 103.8198],
'Mumbai': [19.0760, 72.8777], 'Delhi': [28.7041, 77.1025],
'Bangalore': [12.9716, 77.5946], 'Seoul': [37.5665, 126.9780],
'Hong Kong': [22.3193, 114.1694], 'Bangkok': [13.7563, 100.5018],
'Jakarta': [6.2088, 106.8456], 'Manila': [14.5995, 120.9842],
'Hanoi': [21.0285, 105.8542], 'Ho Chi Minh City': [10.8231, 106.6297],
'Taipei': [25.0330, 121.5654], 'Kuala Lumpur': [3.1390, 101.6869],
'Karachi': [24.8607, 67.0011], 'Islamabad': [33.6844, 73.0479],
'Dhaka': [23.8103, 90.4125], 'Colombo': [6.9271, 79.8612],
// South America
'São Paulo': [-23.5505, -46.6333], 'Rio de Janeiro': [-22.9068, -43.1729],
'Buenos Aires': [-34.6037, -58.3816], 'Bogotá': [4.7110, -74.0721],
'Lima': [-12.0464, -77.0428], 'Santiago': [-33.4489, -70.6693],
// Middle East & Africa
'Cairo': [30.0444, 31.2357], 'Dubai': [25.2048, 55.2708],
'Istanbul': [41.0082, 28.9784], 'Tel Aviv': [32.0853, 34.7818],
'Johannesburg': [26.2041, 28.0473], 'Lagos': [6.5244, 3.3792],
'Nairobi': [-1.2921, 36.8219], 'Cape Town': [-33.9249, 18.4241],
// Australia & Oceania
'Sydney': [-33.8688, 151.2093], 'Melbourne': [-37.8136, 144.9631],
'Brisbane': [-27.4698, 153.0251], 'Perth': [-31.9505, 115.8605],
'Auckland': [-36.8485, 174.7633],
// Additional cities
'Unknown': null
}};
// Country center coordinates (fallback when city not found)
const countryCoordinates = {{
'US': [37.1, -95.7], 'GB': [55.4, -3.4], 'CN': [35.9, 104.1], 'RU': [61.5, 105.3],
'JP': [36.2, 138.3], 'DE': [51.2, 10.5], 'FR': [46.6, 2.2], 'IN': [20.6, 78.96],
@@ -1958,9 +2034,51 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
'TR': [38.9, 35.2], 'IR': [32.4, 53.7], 'AE': [23.4, 53.8], 'KZ': [48.0, 66.9],
'UA': [48.4, 31.2], 'BG': [42.7, 25.5], 'RO': [45.9, 24.97], 'CZ': [49.8, 15.5],
'HU': [47.2, 19.5], 'AT': [47.5, 14.6], 'BE': [50.5, 4.5], 'DK': [56.3, 9.5],
'FI': [61.9, 25.8], 'NO': [60.5, 8.5], 'GR': [39.1, 21.8], 'PT': [39.4, -8.2]
'FI': [61.9, 25.8], 'NO': [60.5, 8.5], 'GR': [39.1, 21.8], 'PT': [39.4, -8.2],
'AR': [-38.4161, -63.6167], 'CO': [4.5709, -74.2973], 'CL': [-35.6751, -71.5430],
'PE': [-9.1900, -75.0152], 'VE': [6.4238, -66.5897], 'LS': [40.0, -100.0]
}};
// Helper function to get coordinates for an IP
function getIPCoordinates(ip) {{
// Try city first
if (ip.city && cityCoordinates[ip.city]) {{
return cityCoordinates[ip.city];
}}
// Fall back to country
if (ip.country_code && countryCoordinates[ip.country_code]) {{
return countryCoordinates[ip.country_code];
}}
return null;
}}
// Track used coordinates to add small offsets for overlapping markers
const usedCoordinates = {{}};
function getUniqueCoordinates(baseCoords) {{
const key = `${{baseCoords[0].toFixed(4)}},${{baseCoords[1].toFixed(4)}}`;
if (!usedCoordinates[key]) {{
usedCoordinates[key] = 0;
}}
usedCoordinates[key]++;
// If this is the first marker at this location, use exact coordinates
if (usedCoordinates[key] === 1) {{
return baseCoords;
}}
// Add small random offset for subsequent markers
// Offset increases with each marker to create a spread pattern
const angle = (usedCoordinates[key] * 137.5) % 360; // Golden angle for even distribution
const distance = 0.05 * Math.sqrt(usedCoordinates[key]); // Increase distance with more markers
const latOffset = distance * Math.cos(angle * Math.PI / 180);
const lngOffset = distance * Math.sin(angle * Math.PI / 180);
return [
baseCoords[0] + latOffset,
baseCoords[1] + lngOffset
];
}}
// Create layer groups for each category
markerLayers = {{
attacker: L.featureGroup(),
@@ -1970,20 +2088,16 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
unknown: L.featureGroup()
}};
circleLayers = {{
attacker: L.featureGroup(),
bad_crawler: L.featureGroup(),
good_crawler: L.featureGroup(),
regular_user: L.featureGroup(),
unknown: L.featureGroup()
}};
// Add markers for each IP
allIps.slice(0, 100).forEach(ip => {{
if (!ip.country_code || !ip.category) return;
const coords = countryCoordinates[ip.country_code];
if (!coords) return;
// Get coordinates (city first, then country)
const baseCoords = getIPCoordinates(ip);
if (!baseCoords) return;
// Get unique coordinates with offset to prevent overlap
const coords = getUniqueCoordinates(baseCoords);
const category = ip.category.toLowerCase();
if (!markerLayers[category]) return;
@@ -2002,7 +2116,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
const marker = L.marker(coords, {{
icon: L.divIcon({{
html: markerElement,
html: markerElement.outerHTML,
iconSize: [markerSize, markerSize],
className: `ip-custom-marker category-${{category}}`
}})
@@ -2027,7 +2141,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
</span>
</div>
<span style="color: #8b949e; font-size: 12px;">
${{ip.city || ''}}${{ip.city && ip.country_code ? ', ' : ''}}${{ip.country_code || 'Unknown'}}
${{ip.city ? (ip.country_code ? `${{ip.city}}, ${{ip.country_code}}` : ip.city) : (ip.country_code || 'Unknown')}}
</span><br/>
<div style="margin-top: 8px; border-top: 1px solid #30363d; padding-top: 8px;">
<div><span style="color: #8b949e;">Requests:</span> <span style="color: ${{categoryColor}}; font-weight: bold;">${{ip.total_requests}}</span></div>
@@ -2041,41 +2155,7 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
markerLayers[category].addLayer(marker);
}});
// Add cluster circles for each category
const categoryCountryCounts = {{}};
allIps.forEach(ip => {{
if (ip.country_code && ip.category) {{
const category = ip.category.toLowerCase();
if (!categoryCountryCounts[category]) {{
categoryCountryCounts[category] = {{}};
}}
categoryCountryCounts[category][ip.country_code] =
(categoryCountryCounts[category][ip.country_code] || 0) + 1;
}}
}});
Object.entries(categoryCountryCounts).forEach(([category, countryCounts]) => {{
Object.entries(countryCounts).forEach(([country, count]) => {{
const coords = countryCoordinates[country];
if (coords && circleLayers[category]) {{
const color = categoryColors[category] || '#8b949e';
const circle = L.circle(coords, {{
radius: 100000 + (count * 150000),
color: color,
fillColor: color,
fillOpacity: 0.15,
weight: 1,
opacity: 0.4,
dashArray: '3'
}});
circleLayers[category].addLayer(circle);
}}
}});
}});
// Add all layers to map initially
Object.values(circleLayers).forEach(layer => attackerMap.addLayer(layer));
// Add all marker layers to map initially
Object.values(markerLayers).forEach(layer => attackerMap.addLayer(layer));
// Fit map to all markers
@@ -2120,18 +2200,6 @@ def generate_dashboard(stats: dict, dashboard_path: str = "") -> str:
}}
}}
}}
if (circleLayers[category]) {{
if (show) {{
if (!attackerMap.hasLayer(circleLayers[category])) {{
attackerMap.addLayer(circleLayers[category]);
}}
}} else {{
if (attackerMap.hasLayer(circleLayers[category])) {{
attackerMap.removeLayer(circleLayers[category]);
}}
}}
}}
}});
}}

View File

@@ -2,8 +2,21 @@
"""
Test script to insert fake external IPs into the database for testing the dashboard.
This generates realistic-looking test data including access logs, credential attempts, and attack detections.
Also triggers category behavior changes to demonstrate the timeline feature.
This generates realistic-looking test data including:
- Access logs with various suspicious activities
- Credential attempts
- Attack detections (SQL injection, XSS, etc.)
- Category behavior changes for timeline demonstration
- Real good crawler IPs (Googlebot, Bingbot, etc.) with API-fetched geolocation
Usage:
python test_insert_fake_ips.py [num_ips] [logs_per_ip] [credentials_per_ip] [--no-cleanup]
Examples:
python test_insert_fake_ips.py # Generate 20 IPs with defaults, cleanup DB first
python test_insert_fake_ips.py 30 # Generate 30 IPs with defaults
python test_insert_fake_ips.py 30 20 5 # Generate 30 IPs, 20 logs each, 5 credentials each
python test_insert_fake_ips.py --no-cleanup # Generate data without cleaning DB first
"""
import random
@@ -12,6 +25,7 @@ import sys
from datetime import datetime, timedelta
from zoneinfo import ZoneInfo
from pathlib import Path
import requests
# Add parent src directory to path so we can import database and logger
sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
@@ -23,14 +37,81 @@ from logger import get_app_logger
# TEST DATA GENERATORS
# ----------------------
FAKE_IPS = [
"203.0.113.45", # Regular attacker IP
"198.51.100.89", # Credential harvester IP
"192.0.2.120", # Bot IP
"205.32.180.65", # Another attacker
"210.45.67.89", # Suspicious IP
"175.23.45.67", # International IP
"182.91.102.45", # Another suspicious IP
# Fake IPs with geolocation data (country_code, city, ASN org)
# These will appear on the map based on their country_code
FAKE_IPS_WITH_GEO = [
# United States
("45.142.120.10", "US", "New York", "AS14061 DigitalOcean"),
("107.189.10.143", "US", "Los Angeles", "AS20473 Vultr"),
("162.243.175.23", "US", "San Francisco", "AS14061 DigitalOcean"),
("198.51.100.89", "US", "Chicago", "AS16509 Amazon"),
# Europe
("185.220.101.45", "DE", "Berlin", "AS24940 Hetzner"),
("195.154.133.20", "FR", "Paris", "AS12876 Scaleway"),
("178.128.83.165", "GB", "London", "AS14061 DigitalOcean"),
("87.251.67.90", "NL", "Amsterdam", "AS49453 GlobalConnect"),
("91.203.5.165", "RU", "Moscow", "AS51115 HLL LLC"),
("46.105.57.169", "FR", "Roubaix", "AS16276 OVH"),
("217.182.143.207", "RU", "Saint Petersburg", "AS51570 JSC ER-Telecom"),
("188.166.123.45", "GB", "Manchester", "AS14061 DigitalOcean"),
# Asia
("103.253.145.36", "CN", "Beijing", "AS4134 Chinanet"),
("42.112.28.216", "CN", "Shanghai", "AS4134 Chinanet"),
("118.163.74.160", "JP", "Tokyo", "AS2516 KDDI"),
("43.229.53.35", "SG", "Singapore", "AS23969 TOT"),
("115.78.208.140", "IN", "Mumbai", "AS9829 BSNL"),
("14.139.56.18", "IN", "Bangalore", "AS4755 TATA"),
("61.19.25.207", "TW", "Taipei", "AS3462 HiNet"),
("121.126.219.198", "KR", "Seoul", "AS4766 Korea Telecom"),
("202.134.4.212", "ID", "Jakarta", "AS7597 TELKOMNET"),
("171.244.140.134", "VN", "Hanoi", "AS7552 Viettel"),
# South America
("177.87.169.20", "BR", "São Paulo", "AS28573 Claro"),
("200.21.19.58", "BR", "Rio de Janeiro", "AS7738 Telemar"),
("181.13.140.98", "AR", "Buenos Aires", "AS7303 Telecom Argentina"),
("190.150.24.34", "CO", "Bogotá", "AS3816 Colombia Telecomunicaciones"),
# Middle East & Africa
("41.223.53.141", "EG", "Cairo", "AS8452 TE-Data"),
("196.207.35.152", "ZA", "Johannesburg", "AS37271 Workonline"),
("5.188.62.214", "TR", "Istanbul", "AS51115 HLL LLC"),
("37.48.93.125", "AE", "Dubai", "AS5384 Emirates Telecom"),
("102.66.137.29", "NG", "Lagos", "AS29465 MTN Nigeria"),
# Australia & Oceania
("103.28.248.110", "AU", "Sydney", "AS4739 Internode"),
("202.168.45.33", "AU", "Melbourne", "AS1221 Telstra"),
# Additional European IPs
("94.102.49.190", "PL", "Warsaw", "AS12912 T-Mobile"),
("213.32.93.140", "ES", "Madrid", "AS3352 Telefónica"),
("79.137.79.167", "IT", "Rome", "AS3269 Telecom Italia"),
("37.9.169.146", "SE", "Stockholm", "AS3301 Telia"),
("188.92.80.123", "RO", "Bucharest", "AS8708 RCS & RDS"),
("80.240.25.198", "CZ", "Prague", "AS6830 UPC"),
]
# Extract just IPs for backward compatibility
FAKE_IPS = [ip_data[0] for ip_data in FAKE_IPS_WITH_GEO]
# Create geo data dictionary
FAKE_GEO_DATA = {
ip_data[0]: (ip_data[1], ip_data[2], ip_data[3])
for ip_data in FAKE_IPS_WITH_GEO
}
# Real good crawler IPs (Googlebot, Bingbot, etc.) - geolocation will be fetched from API
GOOD_CRAWLER_IPS = [
"66.249.66.1", # Googlebot
"66.249.79.23", # Googlebot
"40.77.167.52", # Bingbot
"157.55.39.145", # Bingbot
"17.58.98.100", # Applebot
"199.59.150.39", # Twitterbot
"54.236.1.15", # Amazon Bot
]
FAKE_PATHS = [
@@ -79,11 +160,11 @@ ATTACK_TYPES = [
]
CATEGORIES = [
"ATTACKER",
"BAD_CRAWLER",
"GOOD_CRAWLER",
"REGULAR_USER",
"UNKNOWN",
"attacker",
"bad_crawler",
"good_crawler",
"regular_user",
"unknown",
]
@@ -109,14 +190,92 @@ def generate_analyzed_metrics():
}
def generate_fake_data(num_ips: int = 45, logs_per_ip: int = 15, credentials_per_ip: int = 3):
def cleanup_database(db_manager, app_logger):
"""
Clean up all existing test data from the database.
Args:
db_manager: Database manager instance
app_logger: Logger instance
"""
from models import AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
app_logger.info("=" * 60)
app_logger.info("Cleaning up existing database data")
app_logger.info("=" * 60)
session = db_manager.session
try:
# Delete all records from each table
deleted_attack_detections = session.query(AttackDetection).delete()
deleted_access_logs = session.query(AccessLog).delete()
deleted_credentials = session.query(CredentialAttempt).delete()
deleted_category_history = session.query(CategoryHistory).delete()
deleted_ip_stats = session.query(IpStats).delete()
session.commit()
app_logger.info(f"Deleted {deleted_access_logs} access logs")
app_logger.info(f"Deleted {deleted_attack_detections} attack detections")
app_logger.info(f"Deleted {deleted_credentials} credential attempts")
app_logger.info(f"Deleted {deleted_category_history} category history records")
app_logger.info(f"Deleted {deleted_ip_stats} IP statistics")
app_logger.info("✓ Database cleanup complete")
except Exception as e:
session.rollback()
app_logger.error(f"Error during database cleanup: {e}")
raise
finally:
db_manager.close_session()
def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
"""
Fetch geolocation data from the IP reputation API.
Args:
ip: IP address to lookup
app_logger: Logger instance
Returns:
Tuple of (country_code, city, asn, asn_org) or None if failed
"""
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"
params = {"cidr": ip}
headers = {"Content-Type": "application/json"}
response = requests.get(api_url, headers=headers, params=params, timeout=10)
if response.status_code == 200:
payload = response.json()
if payload.get("results"):
data = payload["results"][0]
geoip_data = data.get("geoip_data", {})
country_code = geoip_data.get("country_iso_code", "Unknown")
city = geoip_data.get("city_name", "Unknown")
asn = geoip_data.get("asn_autonomous_system_number")
asn_org = geoip_data.get("asn_autonomous_system_organization", "Unknown")
return (country_code, city, asn, asn_org)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch geolocation for {ip}: {e}")
except Exception as e:
app_logger.error(f"Error processing geolocation for {ip}: {e}")
return None
def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per_ip: int = 3, include_good_crawlers: bool = True, cleanup: bool = True):
"""
Generate and insert fake test data into the database.
Args:
num_ips: Number of unique fake IPs to generate (default: 5)
num_ips: Number of unique fake IPs to generate (default: 20)
logs_per_ip: Number of access logs per IP (default: 15)
credentials_per_ip: Number of credential attempts per IP (default: 3)
include_good_crawlers: Whether to add real good crawler IPs with API-fetched geolocation (default: True)
cleanup: Whether to clean up existing database data before generating new data (default: True)
"""
db_manager = get_database()
app_logger = get_app_logger()
@@ -125,6 +284,11 @@ def generate_fake_data(num_ips: int = 45, logs_per_ip: int = 15, credentials_per
if not db_manager._initialized:
db_manager.initialize()
# Clean up existing data if requested
if cleanup:
cleanup_database(db_manager, app_logger)
print() # Add blank line for readability
app_logger.info("=" * 60)
app_logger.info("Starting fake IP data generation for testing")
app_logger.info("=" * 60)
@@ -186,6 +350,28 @@ def generate_fake_data(num_ips: int = 45, logs_per_ip: int = 15, credentials_per
app_logger.info(f" ✓ Generated {logs_per_ip} access logs")
app_logger.info(f" ✓ Generated {credentials_per_ip} credential attempts")
# Add geolocation data if available for this IP
if ip in FAKE_GEO_DATA:
country_code, city, asn_org = FAKE_GEO_DATA[ip]
# Extract ASN number from ASN string (e.g., "AS12345 Name" -> 12345)
asn_number = None
if asn_org and asn_org.startswith("AS"):
try:
asn_number = int(asn_org.split()[0][2:]) # Remove "AS" prefix and get number
except (ValueError, IndexError):
asn_number = 12345 # Fallback
# Update IP reputation info including geolocation and city
db_manager.update_ip_rep_infos(
ip=ip,
country_code=country_code,
asn=asn_number or 12345,
asn_org=asn_org,
list_on={},
city=city # Now passing city to the function
)
app_logger.info(f" 📍 Added geolocation: {city}, {country_code} ({asn_org})")
# Trigger behavior/category changes to demonstrate timeline feature
# First analysis
initial_category = random.choice(CATEGORIES)
@@ -232,11 +418,79 @@ def generate_fake_data(num_ips: int = 45, logs_per_ip: int = 15, credentials_per
)
total_category_changes += 1
# Add good crawler IPs with real geolocation from API
total_good_crawlers = 0
if include_good_crawlers:
app_logger.info("\n" + "=" * 60)
app_logger.info("Adding Good Crawler IPs with API-fetched geolocation")
app_logger.info("=" * 60)
for crawler_ip in GOOD_CRAWLER_IPS:
app_logger.info(f"\nProcessing Good Crawler: {crawler_ip}")
# Fetch real geolocation from API
geo_data = fetch_geolocation_from_api(crawler_ip, app_logger)
# Don't generate access logs for good crawlers to prevent re-categorization
# We'll just create the IP stats entry with the category set
app_logger.info(f" ✓ Adding as good crawler (no logs to prevent re-categorization)")
# First, we need to create the IP in the database via persist_access
# (but we'll only create one minimal log entry)
db_manager.persist_access(
ip=crawler_ip,
path="/robots.txt", # Minimal, normal crawler behavior
user_agent="Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
method="GET",
is_suspicious=False,
is_honeypot_trigger=False,
attack_types=None,
)
# Add geolocation if API fetch was successful
if geo_data:
country_code, city, asn, asn_org = geo_data
db_manager.update_ip_rep_infos(
ip=crawler_ip,
country_code=country_code,
asn=asn if asn else 12345,
asn_org=asn_org,
list_on={},
city=city
)
app_logger.info(f" 📍 API-fetched geolocation: {city}, {country_code} ({asn_org})")
else:
app_logger.warning(f" ⚠ Could not fetch geolocation for {crawler_ip}")
# Set category to good_crawler - this sets manual_category=True to prevent re-analysis
db_manager.update_ip_stats_analysis(
ip=crawler_ip,
analyzed_metrics={
"request_frequency": 0.1, # Very low frequency
"suspicious_patterns": 0,
"credential_attempts": 0,
"attack_diversity": 0.0,
},
category="good_crawler",
category_scores={
"attacker": 0,
"good_crawler": 100,
"bad_crawler": 0,
"regular_user": 0,
"unknown": 0,
},
last_analysis=datetime.now(tz=ZoneInfo('UTC'))
)
total_good_crawlers += 1
time.sleep(0.5) # Small delay between API calls
# Print summary
app_logger.info("\n" + "=" * 60)
app_logger.info("Test Data Generation Complete!")
app_logger.info("=" * 60)
app_logger.info(f"Total IPs created: {len(selected_ips)}")
app_logger.info(f"Total IPs created: {len(selected_ips) + total_good_crawlers}")
app_logger.info(f" - Attackers/Mixed: {len(selected_ips)}")
app_logger.info(f" - Good Crawlers: {total_good_crawlers}")
app_logger.info(f"Total access logs: {total_logs}")
app_logger.info(f"Total attack detections: {total_attacks}")
app_logger.info(f"Total credential attempts: {total_credentials}")
@@ -244,6 +498,7 @@ def generate_fake_data(num_ips: int = 45, logs_per_ip: int = 15, credentials_per
app_logger.info("=" * 60)
app_logger.info("\nYou can now view the dashboard with this test data.")
app_logger.info("The 'Behavior Timeline' will show category transitions for each IP.")
app_logger.info("The map will show good crawlers with real geolocation from API.")
app_logger.info("Run: python server.py")
app_logger.info("=" * 60)
@@ -252,8 +507,10 @@ if __name__ == "__main__":
import sys
# Allow command-line arguments for customization
num_ips = int(sys.argv[1]) if len(sys.argv) > 1 else 5
num_ips = int(sys.argv[1]) if len(sys.argv) > 1 else 20
logs_per_ip = int(sys.argv[2]) if len(sys.argv) > 2 else 15
credentials_per_ip = int(sys.argv[3]) if len(sys.argv) > 3 else 3
# Add --no-cleanup flag to skip database cleanup
cleanup = "--no-cleanup" not in sys.argv
generate_fake_data(num_ips, logs_per_ip, credentials_per_ip)
generate_fake_data(num_ips, logs_per_ip, credentials_per_ip, include_good_crawlers=True, cleanup=cleanup)