Feat/attack map improvement (#58)

* Enhance geolocation functionality and improve unenriched IP retrieval logic

* Refactor test_insert_fake_ips.py to enhance geolocation data handling and improve IP data structure

* Refactor code for improved readability and consistency in database and geolocation utilities
This commit is contained in:
Lorenzo Venerandi
2026-01-27 17:32:38 +01:00
committed by GitHub
parent 5aca684df9
commit 39d9d62247
4 changed files with 277 additions and 116 deletions

View File

@@ -11,7 +11,7 @@ from datetime import datetime, timedelta
from typing import Optional, List, Dict, Any from typing import Optional, List, Dict, Any
from zoneinfo import ZoneInfo from zoneinfo import ZoneInfo
from sqlalchemy import create_engine, func, distinct, case, event from sqlalchemy import create_engine, func, distinct, case, event, or_
from sqlalchemy.orm import sessionmaker, scoped_session, Session from sqlalchemy.orm import sessionmaker, scoped_session, Session
from sqlalchemy.engine import Engine from sqlalchemy.engine import Engine
@@ -432,21 +432,22 @@ class DatabaseManager:
def get_unenriched_ips(self, limit: int = 100) -> List[str]: def get_unenriched_ips(self, limit: int = 100) -> List[str]:
""" """
Get IPs that don't have reputation data yet. Get IPs that don't have complete reputation data yet.
Returns IPs without country_code OR without city data.
Excludes RFC1918 private addresses and other non-routable IPs. Excludes RFC1918 private addresses and other non-routable IPs.
Args: Args:
limit: Maximum number of IPs to return limit: Maximum number of IPs to return
Returns: Returns:
List of IP addresses without reputation data List of IP addresses without complete reputation data
""" """
session = self.session session = self.session
try: try:
ips = ( ips = (
session.query(IpStats.ip) session.query(IpStats.ip)
.filter( .filter(
IpStats.country_code.is_(None), or_(IpStats.country_code.is_(None), IpStats.city.is_(None)),
~IpStats.ip.like("10.%"), ~IpStats.ip.like("10.%"),
~IpStats.ip.like("172.16.%"), ~IpStats.ip.like("172.16.%"),
~IpStats.ip.like("172.17.%"), ~IpStats.ip.like("172.17.%"),

113
src/geo_utils.py Normal file
View File

@@ -0,0 +1,113 @@
#!/usr/bin/env python3
"""
Geolocation utilities for reverse geocoding and city lookups.
"""
import requests
from typing import Optional, Tuple
from logger import get_app_logger
app_logger = get_app_logger()
# Simple city name cache to avoid repeated API calls
_city_cache = {}
def reverse_geocode_city(latitude: float, longitude: float) -> Optional[str]:
"""
Reverse geocode coordinates to get city name using Nominatim (OpenStreetMap).
Args:
latitude: Latitude coordinate
longitude: Longitude coordinate
Returns:
City name or None if not found
"""
# Check cache first
cache_key = f"{latitude},{longitude}"
if cache_key in _city_cache:
return _city_cache[cache_key]
try:
# Use Nominatim reverse geocoding API (free, no API key required)
url = "https://nominatim.openstreetmap.org/reverse"
params = {
"lat": latitude,
"lon": longitude,
"format": "json",
"zoom": 10, # City level
"addressdetails": 1,
}
headers = {"User-Agent": "Krawl-Honeypot/1.0"} # Required by Nominatim ToS
response = requests.get(url, params=params, headers=headers, timeout=5)
response.raise_for_status()
data = response.json()
address = data.get("address", {})
# Try to get city from various possible fields
city = (
address.get("city")
or address.get("town")
or address.get("village")
or address.get("municipality")
or address.get("county")
)
# Cache the result
_city_cache[cache_key] = city
if city:
app_logger.debug(f"Reverse geocoded {latitude},{longitude} to {city}")
return city
except requests.RequestException as e:
app_logger.warning(f"Reverse geocoding failed for {latitude},{longitude}: {e}")
return None
except Exception as e:
app_logger.error(f"Error in reverse geocoding: {e}")
return None
def get_most_recent_geoip_data(results: list) -> Optional[dict]:
"""
Extract the most recent geoip_data from API results.
Results are assumed to be sorted by record_added (most recent first).
Args:
results: List of result dictionaries from IP reputation API
Returns:
Most recent geoip_data dict or None
"""
if not results:
return None
# The first result is the most recent (sorted by record_added)
most_recent = results[0]
return most_recent.get("geoip_data")
def extract_city_from_coordinates(geoip_data: dict) -> Optional[str]:
"""
Extract city name from geoip_data using reverse geocoding.
Args:
geoip_data: Dictionary containing location_latitude and location_longitude
Returns:
City name or None
"""
if not geoip_data:
return None
latitude = geoip_data.get("location_latitude")
longitude = geoip_data.get("location_longitude")
if latitude is None or longitude is None:
return None
return reverse_geocode_city(latitude, longitude)

View File

@@ -2,6 +2,7 @@ from database import get_database
from logger import get_app_logger from logger import get_app_logger
import requests import requests
from sanitizer import sanitize_for_storage, sanitize_dict from sanitizer import sanitize_for_storage, sanitize_dict
from geo_utils import get_most_recent_geoip_data, extract_city_from_coordinates
# ---------------------- # ----------------------
# TASK CONFIG # TASK CONFIG
@@ -33,13 +34,20 @@ def main():
payload = response.json() payload = response.json()
if payload.get("results"): if payload.get("results"):
data = payload["results"][0] results = payload["results"]
geoip_data = data["geoip_data"]
# Get the most recent result (first in list, sorted by record_added)
most_recent = results[0]
geoip_data = most_recent.get("geoip_data", {})
list_on = most_recent.get("list_on", {})
# Extract standard fields
country_iso_code = geoip_data.get("country_iso_code") country_iso_code = geoip_data.get("country_iso_code")
asn = geoip_data.get("asn_autonomous_system_number") asn = geoip_data.get("asn_autonomous_system_number")
asn_org = geoip_data.get("asn_autonomous_system_organization") asn_org = geoip_data.get("asn_autonomous_system_organization")
city = geoip_data.get("city_name") # Extract city name from API
list_on = data["list_on"] # Extract city from coordinates using reverse geocoding
city = extract_city_from_coordinates(geoip_data)
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3) sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
sanitized_asn = sanitize_for_storage(asn, 100) sanitized_asn = sanitize_for_storage(asn, 100)
@@ -53,7 +61,7 @@ def main():
sanitized_asn, sanitized_asn,
sanitized_asn_org, sanitized_asn_org,
sanitized_list_on, sanitized_list_on,
sanitized_city, # Pass city to database sanitized_city,
) )
except requests.RequestException as e: except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}") app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")

View File

@@ -7,7 +7,8 @@ This generates realistic-looking test data including:
- Credential attempts - Credential attempts
- Attack detections (SQL injection, XSS, etc.) - Attack detections (SQL injection, XSS, etc.)
- Category behavior changes for timeline demonstration - Category behavior changes for timeline demonstration
- Real good crawler IPs (Googlebot, Bingbot, etc.) with API-fetched geolocation - Geolocation data fetched from API with reverse geocoded city names
- Real good crawler IPs (Googlebot, Bingbot, etc.)
Usage: Usage:
python test_insert_fake_ips.py [num_ips] [logs_per_ip] [credentials_per_ip] [--no-cleanup] python test_insert_fake_ips.py [num_ips] [logs_per_ip] [credentials_per_ip] [--no-cleanup]
@@ -17,6 +18,8 @@ Examples:
python test_insert_fake_ips.py 30 # Generate 30 IPs with defaults python test_insert_fake_ips.py 30 # Generate 30 IPs with defaults
python test_insert_fake_ips.py 30 20 5 # Generate 30 IPs, 20 logs each, 5 credentials each python test_insert_fake_ips.py 30 20 5 # Generate 30 IPs, 20 logs each, 5 credentials each
python test_insert_fake_ips.py --no-cleanup # Generate data without cleaning DB first python test_insert_fake_ips.py --no-cleanup # Generate data without cleaning DB first
Note: This script will make API calls to fetch geolocation data, so it may take a while.
""" """
import random import random
@@ -32,86 +35,72 @@ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
from database import get_database from database import get_database
from logger import get_app_logger from logger import get_app_logger
from geo_utils import extract_city_from_coordinates
# ---------------------- # ----------------------
# TEST DATA GENERATORS # TEST DATA GENERATORS
# ---------------------- # ----------------------
# Fake IPs with geolocation data (country_code, city, ASN org) # Fake IPs for testing - geolocation data will be fetched from API
# These will appear on the map based on their country_code # These are real public IPs from various locations around the world
FAKE_IPS_WITH_GEO = [ FAKE_IPS = [
# United States # United States
("45.142.120.10", "US", "New York", "AS14061 DigitalOcean"), "45.142.120.10",
("107.189.10.143", "US", "Los Angeles", "AS20473 Vultr"), "107.189.10.143",
("162.243.175.23", "US", "San Francisco", "AS14061 DigitalOcean"), "162.243.175.23",
("198.51.100.89", "US", "Chicago", "AS16509 Amazon"), "198.51.100.89",
# Europe # Europe
("185.220.101.45", "DE", "Berlin", "AS24940 Hetzner"), "185.220.101.45",
("195.154.133.20", "FR", "Paris", "AS12876 Scaleway"), "195.154.133.20",
("178.128.83.165", "GB", "London", "AS14061 DigitalOcean"), "178.128.83.165",
("87.251.67.90", "NL", "Amsterdam", "AS49453 GlobalConnect"), "87.251.67.90",
("91.203.5.165", "RU", "Moscow", "AS51115 HLL LLC"), "91.203.5.165",
("46.105.57.169", "FR", "Roubaix", "AS16276 OVH"), "46.105.57.169",
("217.182.143.207", "RU", "Saint Petersburg", "AS51570 JSC ER-Telecom"), "217.182.143.207",
("188.166.123.45", "GB", "Manchester", "AS14061 DigitalOcean"), "188.166.123.45",
# Asia # Asia
("103.253.145.36", "CN", "Beijing", "AS4134 Chinanet"), "103.253.145.36",
("42.112.28.216", "CN", "Shanghai", "AS4134 Chinanet"), "42.112.28.216",
("118.163.74.160", "JP", "Tokyo", "AS2516 KDDI"), "118.163.74.160",
("43.229.53.35", "SG", "Singapore", "AS23969 TOT"), "43.229.53.35",
("115.78.208.140", "IN", "Mumbai", "AS9829 BSNL"), "115.78.208.140",
("14.139.56.18", "IN", "Bangalore", "AS4755 TATA"), "14.139.56.18",
("61.19.25.207", "TW", "Taipei", "AS3462 HiNet"), "61.19.25.207",
("121.126.219.198", "KR", "Seoul", "AS4766 Korea Telecom"), "121.126.219.198",
("202.134.4.212", "ID", "Jakarta", "AS7597 TELKOMNET"), "202.134.4.212",
("171.244.140.134", "VN", "Hanoi", "AS7552 Viettel"), "171.244.140.134",
# South America # South America
("177.87.169.20", "BR", "São Paulo", "AS28573 Claro"), "177.87.169.20",
("200.21.19.58", "BR", "Rio de Janeiro", "AS7738 Telemar"), "200.21.19.58",
("181.13.140.98", "AR", "Buenos Aires", "AS7303 Telecom Argentina"), "181.13.140.98",
("190.150.24.34", "CO", "Bogotá", "AS3816 Colombia Telecomunicaciones"), "190.150.24.34",
# Middle East & Africa # Middle East & Africa
("41.223.53.141", "EG", "Cairo", "AS8452 TE-Data"), "41.223.53.141",
("196.207.35.152", "ZA", "Johannesburg", "AS37271 Workonline"), "196.207.35.152",
("5.188.62.214", "TR", "Istanbul", "AS51115 HLL LLC"), "5.188.62.214",
("37.48.93.125", "AE", "Dubai", "AS5384 Emirates Telecom"), "37.48.93.125",
("102.66.137.29", "NG", "Lagos", "AS29465 MTN Nigeria"), "102.66.137.29",
# Australia & Oceania # Australia & Oceania
("103.28.248.110", "AU", "Sydney", "AS4739 Internode"), "103.28.248.110",
("202.168.45.33", "AU", "Melbourne", "AS1221 Telstra"), "202.168.45.33",
# Additional European IPs # Additional European IPs
("94.102.49.190", "PL", "Warsaw", "AS12912 T-Mobile"), "94.102.49.190",
("213.32.93.140", "ES", "Madrid", "AS3352 Telefónica"), "213.32.93.140",
("79.137.79.167", "IT", "Rome", "AS3269 Telecom Italia"), "79.137.79.167",
("37.9.169.146", "SE", "Stockholm", "AS3301 Telia"), "37.9.169.146",
("188.92.80.123", "RO", "Bucharest", "AS8708 RCS & RDS"), "188.92.80.123",
("80.240.25.198", "CZ", "Prague", "AS6830 UPC"), "80.240.25.198",
] ]
# Extract just IPs for backward compatibility
FAKE_IPS = [ip_data[0] for ip_data in FAKE_IPS_WITH_GEO]
# Create geo data dictionary
FAKE_GEO_DATA = {
ip_data[0]: (ip_data[1], ip_data[2], ip_data[3])
for ip_data in FAKE_IPS_WITH_GEO
}
# Real good crawler IPs (Googlebot, Bingbot, etc.) - geolocation will be fetched from API # Real good crawler IPs (Googlebot, Bingbot, etc.) - geolocation will be fetched from API
GOOD_CRAWLER_IPS = [ GOOD_CRAWLER_IPS = [
"66.249.66.1", # Googlebot "66.249.66.1", # Googlebot
"66.249.79.23", # Googlebot "66.249.79.23", # Googlebot
"40.77.167.52", # Bingbot "40.77.167.52", # Bingbot
"157.55.39.145", # Bingbot "157.55.39.145", # Bingbot
"17.58.98.100", # Applebot "17.58.98.100", # Applebot
"199.59.150.39", # Twitterbot "199.59.150.39", # Twitterbot
"54.236.1.15", # Amazon Bot "54.236.1.15", # Amazon Bot
] ]
FAKE_PATHS = [ FAKE_PATHS = [
@@ -198,7 +187,13 @@ def cleanup_database(db_manager, app_logger):
db_manager: Database manager instance db_manager: Database manager instance
app_logger: Logger instance app_logger: Logger instance
""" """
from models import AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory from models import (
AccessLog,
CredentialAttempt,
AttackDetection,
IpStats,
CategoryHistory,
)
app_logger.info("=" * 60) app_logger.info("=" * 60)
app_logger.info("Cleaning up existing database data") app_logger.info("Cleaning up existing database data")
@@ -232,6 +227,7 @@ def cleanup_database(db_manager, app_logger):
def fetch_geolocation_from_api(ip: str, app_logger) -> tuple: def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
""" """
Fetch geolocation data from the IP reputation API. Fetch geolocation data from the IP reputation API.
Uses the most recent result and extracts city from coordinates.
Args: Args:
ip: IP address to lookup ip: IP address to lookup
@@ -249,13 +245,18 @@ def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
if response.status_code == 200: if response.status_code == 200:
payload = response.json() payload = response.json()
if payload.get("results"): if payload.get("results"):
data = payload["results"][0] results = payload["results"]
geoip_data = data.get("geoip_data", {})
country_code = geoip_data.get("country_iso_code", "Unknown") # Get the most recent result (first in list, sorted by record_added)
city = geoip_data.get("city_name", "Unknown") most_recent = results[0]
geoip_data = most_recent.get("geoip_data", {})
country_code = geoip_data.get("country_iso_code")
asn = geoip_data.get("asn_autonomous_system_number") asn = geoip_data.get("asn_autonomous_system_number")
asn_org = geoip_data.get("asn_autonomous_system_organization", "Unknown") asn_org = geoip_data.get("asn_autonomous_system_organization")
# Extract city from coordinates using reverse geocoding
city = extract_city_from_coordinates(geoip_data)
return (country_code, city, asn, asn_org) return (country_code, city, asn, asn_org)
except requests.RequestException as e: except requests.RequestException as e:
@@ -266,7 +267,13 @@ def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
return None return None
def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per_ip: int = 3, include_good_crawlers: bool = True, cleanup: bool = True): def generate_fake_data(
num_ips: int = 20,
logs_per_ip: int = 15,
credentials_per_ip: int = 3,
include_good_crawlers: bool = True,
cleanup: bool = True,
):
""" """
Generate and insert fake test data into the database. Generate and insert fake test data into the database.
@@ -308,8 +315,12 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
for _ in range(logs_per_ip): for _ in range(logs_per_ip):
path = random.choice(FAKE_PATHS) path = random.choice(FAKE_PATHS)
user_agent = random.choice(FAKE_USER_AGENTS) user_agent = random.choice(FAKE_USER_AGENTS)
is_suspicious = random.choice([True, False, False]) # 33% chance of suspicious is_suspicious = random.choice(
is_honeypot = random.choice([True, False, False, False]) # 25% chance of honeypot trigger [True, False, False]
) # 33% chance of suspicious
is_honeypot = random.choice(
[True, False, False, False]
) # 25% chance of honeypot trigger
# Randomly decide if this log has attack detections # Randomly decide if this log has attack detections
attack_types = None attack_types = None
@@ -350,39 +361,45 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
app_logger.info(f" ✓ Generated {logs_per_ip} access logs") app_logger.info(f" ✓ Generated {logs_per_ip} access logs")
app_logger.info(f" ✓ Generated {credentials_per_ip} credential attempts") app_logger.info(f" ✓ Generated {credentials_per_ip} credential attempts")
# Add geolocation data if available for this IP # Fetch geolocation data from API
if ip in FAKE_GEO_DATA: app_logger.info(f" 🌍 Fetching geolocation from API...")
country_code, city, asn_org = FAKE_GEO_DATA[ip] geo_data = fetch_geolocation_from_api(ip, app_logger)
# Extract ASN number from ASN string (e.g., "AS12345 Name" -> 12345)
asn_number = None
if asn_org and asn_org.startswith("AS"):
try:
asn_number = int(asn_org.split()[0][2:]) # Remove "AS" prefix and get number
except (ValueError, IndexError):
asn_number = 12345 # Fallback
# Update IP reputation info including geolocation and city if geo_data:
country_code, city, asn, asn_org = geo_data
db_manager.update_ip_rep_infos( db_manager.update_ip_rep_infos(
ip=ip, ip=ip,
country_code=country_code, country_code=country_code,
asn=asn_number or 12345, asn=asn if asn else 12345,
asn_org=asn_org, asn_org=asn_org or "Unknown",
list_on={}, list_on={},
city=city # Now passing city to the function city=city,
) )
app_logger.info(f" 📍 Added geolocation: {city}, {country_code} ({asn_org})") location_display = (
f"{city}, {country_code}" if city else country_code or "Unknown"
)
app_logger.info(
f" 📍 API-fetched geolocation: {location_display} ({asn_org or 'Unknown'})"
)
else:
app_logger.warning(f" ⚠ Could not fetch geolocation for {ip}")
# Small delay to be nice to the API
time.sleep(0.5)
# Trigger behavior/category changes to demonstrate timeline feature # Trigger behavior/category changes to demonstrate timeline feature
# First analysis # First analysis
initial_category = random.choice(CATEGORIES) initial_category = random.choice(CATEGORIES)
app_logger.info(f" ⟳ Analyzing behavior - Initial category: {initial_category}") app_logger.info(
f" ⟳ Analyzing behavior - Initial category: {initial_category}"
)
db_manager.update_ip_stats_analysis( db_manager.update_ip_stats_analysis(
ip=ip, ip=ip,
analyzed_metrics=generate_analyzed_metrics(), analyzed_metrics=generate_analyzed_metrics(),
category=initial_category, category=initial_category,
category_scores=generate_category_scores(), category_scores=generate_category_scores(),
last_analysis=datetime.now(tz=ZoneInfo('UTC')) last_analysis=datetime.now(tz=ZoneInfo("UTC")),
) )
total_category_changes += 1 total_category_changes += 1
@@ -391,30 +408,38 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
# Second analysis with potential category change (70% chance) # Second analysis with potential category change (70% chance)
if random.random() < 0.7: if random.random() < 0.7:
new_category = random.choice([c for c in CATEGORIES if c != initial_category]) new_category = random.choice(
app_logger.info(f" ⟳ Behavior change detected: {initial_category}{new_category}") [c for c in CATEGORIES if c != initial_category]
)
app_logger.info(
f" ⟳ Behavior change detected: {initial_category}{new_category}"
)
db_manager.update_ip_stats_analysis( db_manager.update_ip_stats_analysis(
ip=ip, ip=ip,
analyzed_metrics=generate_analyzed_metrics(), analyzed_metrics=generate_analyzed_metrics(),
category=new_category, category=new_category,
category_scores=generate_category_scores(), category_scores=generate_category_scores(),
last_analysis=datetime.now(tz=ZoneInfo('UTC')) last_analysis=datetime.now(tz=ZoneInfo("UTC")),
) )
total_category_changes += 1 total_category_changes += 1
# Optional third change (40% chance) # Optional third change (40% chance)
if random.random() < 0.4: if random.random() < 0.4:
final_category = random.choice([c for c in CATEGORIES if c != new_category]) final_category = random.choice(
app_logger.info(f" ⟳ Another behavior change: {new_category}{final_category}") [c for c in CATEGORIES if c != new_category]
)
app_logger.info(
f" ⟳ Another behavior change: {new_category}{final_category}"
)
time.sleep(0.1) time.sleep(0.1)
db_manager.update_ip_stats_analysis( db_manager.update_ip_stats_analysis(
ip=ip, ip=ip,
analyzed_metrics=generate_analyzed_metrics(), analyzed_metrics=generate_analyzed_metrics(),
category=final_category, category=final_category,
category_scores=generate_category_scores(), category_scores=generate_category_scores(),
last_analysis=datetime.now(tz=ZoneInfo('UTC')) last_analysis=datetime.now(tz=ZoneInfo("UTC")),
) )
total_category_changes += 1 total_category_changes += 1
@@ -433,7 +458,9 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
# Don't generate access logs for good crawlers to prevent re-categorization # Don't generate access logs for good crawlers to prevent re-categorization
# We'll just create the IP stats entry with the category set # We'll just create the IP stats entry with the category set
app_logger.info(f" ✓ Adding as good crawler (no logs to prevent re-categorization)") app_logger.info(
f" ✓ Adding as good crawler (no logs to prevent re-categorization)"
)
# First, we need to create the IP in the database via persist_access # First, we need to create the IP in the database via persist_access
# (but we'll only create one minimal log entry) # (but we'll only create one minimal log entry)
@@ -456,9 +483,11 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
asn=asn if asn else 12345, asn=asn if asn else 12345,
asn_org=asn_org, asn_org=asn_org,
list_on={}, list_on={},
city=city city=city,
)
app_logger.info(
f" 📍 API-fetched geolocation: {city}, {country_code} ({asn_org})"
) )
app_logger.info(f" 📍 API-fetched geolocation: {city}, {country_code} ({asn_org})")
else: else:
app_logger.warning(f" ⚠ Could not fetch geolocation for {crawler_ip}") app_logger.warning(f" ⚠ Could not fetch geolocation for {crawler_ip}")
@@ -479,7 +508,7 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
"regular_user": 0, "regular_user": 0,
"unknown": 0, "unknown": 0,
}, },
last_analysis=datetime.now(tz=ZoneInfo('UTC')) last_analysis=datetime.now(tz=ZoneInfo("UTC")),
) )
total_good_crawlers += 1 total_good_crawlers += 1
time.sleep(0.5) # Small delay between API calls time.sleep(0.5) # Small delay between API calls
@@ -497,8 +526,12 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
app_logger.info(f"Total category changes: {total_category_changes}") app_logger.info(f"Total category changes: {total_category_changes}")
app_logger.info("=" * 60) app_logger.info("=" * 60)
app_logger.info("\nYou can now view the dashboard with this test data.") app_logger.info("\nYou can now view the dashboard with this test data.")
app_logger.info("The 'Behavior Timeline' will show category transitions for each IP.") app_logger.info(
app_logger.info("The map will show good crawlers with real geolocation from API.") "The 'Behavior Timeline' will show category transitions for each IP."
)
app_logger.info(
"All IPs have API-fetched geolocation with reverse geocoded city names."
)
app_logger.info("Run: python server.py") app_logger.info("Run: python server.py")
app_logger.info("=" * 60) app_logger.info("=" * 60)
@@ -513,4 +546,10 @@ if __name__ == "__main__":
# Add --no-cleanup flag to skip database cleanup # Add --no-cleanup flag to skip database cleanup
cleanup = "--no-cleanup" not in sys.argv cleanup = "--no-cleanup" not in sys.argv
generate_fake_data(num_ips, logs_per_ip, credentials_per_ip, include_good_crawlers=True, cleanup=cleanup) generate_fake_data(
num_ips,
logs_per_ip,
credentials_per_ip,
include_good_crawlers=True,
cleanup=cleanup,
)