Feat/attack map improvement (#58)
* Enhance geolocation functionality and improve unenriched IP retrieval logic * Refactor test_insert_fake_ips.py to enhance geolocation data handling and improve IP data structure * Refactor code for improved readability and consistency in database and geolocation utilities
This commit is contained in:
committed by
GitHub
parent
5aca684df9
commit
39d9d62247
@@ -11,7 +11,7 @@ from datetime import datetime, timedelta
|
|||||||
from typing import Optional, List, Dict, Any
|
from typing import Optional, List, Dict, Any
|
||||||
from zoneinfo import ZoneInfo
|
from zoneinfo import ZoneInfo
|
||||||
|
|
||||||
from sqlalchemy import create_engine, func, distinct, case, event
|
from sqlalchemy import create_engine, func, distinct, case, event, or_
|
||||||
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
from sqlalchemy.orm import sessionmaker, scoped_session, Session
|
||||||
from sqlalchemy.engine import Engine
|
from sqlalchemy.engine import Engine
|
||||||
|
|
||||||
@@ -432,21 +432,22 @@ class DatabaseManager:
|
|||||||
|
|
||||||
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
|
def get_unenriched_ips(self, limit: int = 100) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Get IPs that don't have reputation data yet.
|
Get IPs that don't have complete reputation data yet.
|
||||||
|
Returns IPs without country_code OR without city data.
|
||||||
Excludes RFC1918 private addresses and other non-routable IPs.
|
Excludes RFC1918 private addresses and other non-routable IPs.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
limit: Maximum number of IPs to return
|
limit: Maximum number of IPs to return
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List of IP addresses without reputation data
|
List of IP addresses without complete reputation data
|
||||||
"""
|
"""
|
||||||
session = self.session
|
session = self.session
|
||||||
try:
|
try:
|
||||||
ips = (
|
ips = (
|
||||||
session.query(IpStats.ip)
|
session.query(IpStats.ip)
|
||||||
.filter(
|
.filter(
|
||||||
IpStats.country_code.is_(None),
|
or_(IpStats.country_code.is_(None), IpStats.city.is_(None)),
|
||||||
~IpStats.ip.like("10.%"),
|
~IpStats.ip.like("10.%"),
|
||||||
~IpStats.ip.like("172.16.%"),
|
~IpStats.ip.like("172.16.%"),
|
||||||
~IpStats.ip.like("172.17.%"),
|
~IpStats.ip.like("172.17.%"),
|
||||||
|
|||||||
113
src/geo_utils.py
Normal file
113
src/geo_utils.py
Normal file
@@ -0,0 +1,113 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Geolocation utilities for reverse geocoding and city lookups.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from typing import Optional, Tuple
|
||||||
|
from logger import get_app_logger
|
||||||
|
|
||||||
|
app_logger = get_app_logger()
|
||||||
|
|
||||||
|
# Simple city name cache to avoid repeated API calls
|
||||||
|
_city_cache = {}
|
||||||
|
|
||||||
|
|
||||||
|
def reverse_geocode_city(latitude: float, longitude: float) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Reverse geocode coordinates to get city name using Nominatim (OpenStreetMap).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
latitude: Latitude coordinate
|
||||||
|
longitude: Longitude coordinate
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
City name or None if not found
|
||||||
|
"""
|
||||||
|
# Check cache first
|
||||||
|
cache_key = f"{latitude},{longitude}"
|
||||||
|
if cache_key in _city_cache:
|
||||||
|
return _city_cache[cache_key]
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Use Nominatim reverse geocoding API (free, no API key required)
|
||||||
|
url = "https://nominatim.openstreetmap.org/reverse"
|
||||||
|
params = {
|
||||||
|
"lat": latitude,
|
||||||
|
"lon": longitude,
|
||||||
|
"format": "json",
|
||||||
|
"zoom": 10, # City level
|
||||||
|
"addressdetails": 1,
|
||||||
|
}
|
||||||
|
headers = {"User-Agent": "Krawl-Honeypot/1.0"} # Required by Nominatim ToS
|
||||||
|
|
||||||
|
response = requests.get(url, params=params, headers=headers, timeout=5)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
data = response.json()
|
||||||
|
address = data.get("address", {})
|
||||||
|
|
||||||
|
# Try to get city from various possible fields
|
||||||
|
city = (
|
||||||
|
address.get("city")
|
||||||
|
or address.get("town")
|
||||||
|
or address.get("village")
|
||||||
|
or address.get("municipality")
|
||||||
|
or address.get("county")
|
||||||
|
)
|
||||||
|
|
||||||
|
# Cache the result
|
||||||
|
_city_cache[cache_key] = city
|
||||||
|
|
||||||
|
if city:
|
||||||
|
app_logger.debug(f"Reverse geocoded {latitude},{longitude} to {city}")
|
||||||
|
|
||||||
|
return city
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
app_logger.warning(f"Reverse geocoding failed for {latitude},{longitude}: {e}")
|
||||||
|
return None
|
||||||
|
except Exception as e:
|
||||||
|
app_logger.error(f"Error in reverse geocoding: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def get_most_recent_geoip_data(results: list) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Extract the most recent geoip_data from API results.
|
||||||
|
Results are assumed to be sorted by record_added (most recent first).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
results: List of result dictionaries from IP reputation API
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Most recent geoip_data dict or None
|
||||||
|
"""
|
||||||
|
if not results:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# The first result is the most recent (sorted by record_added)
|
||||||
|
most_recent = results[0]
|
||||||
|
return most_recent.get("geoip_data")
|
||||||
|
|
||||||
|
|
||||||
|
def extract_city_from_coordinates(geoip_data: dict) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Extract city name from geoip_data using reverse geocoding.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
geoip_data: Dictionary containing location_latitude and location_longitude
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
City name or None
|
||||||
|
"""
|
||||||
|
if not geoip_data:
|
||||||
|
return None
|
||||||
|
|
||||||
|
latitude = geoip_data.get("location_latitude")
|
||||||
|
longitude = geoip_data.get("location_longitude")
|
||||||
|
|
||||||
|
if latitude is None or longitude is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
return reverse_geocode_city(latitude, longitude)
|
||||||
@@ -2,6 +2,7 @@ from database import get_database
|
|||||||
from logger import get_app_logger
|
from logger import get_app_logger
|
||||||
import requests
|
import requests
|
||||||
from sanitizer import sanitize_for_storage, sanitize_dict
|
from sanitizer import sanitize_for_storage, sanitize_dict
|
||||||
|
from geo_utils import get_most_recent_geoip_data, extract_city_from_coordinates
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# TASK CONFIG
|
# TASK CONFIG
|
||||||
@@ -33,13 +34,20 @@ def main():
|
|||||||
payload = response.json()
|
payload = response.json()
|
||||||
|
|
||||||
if payload.get("results"):
|
if payload.get("results"):
|
||||||
data = payload["results"][0]
|
results = payload["results"]
|
||||||
geoip_data = data["geoip_data"]
|
|
||||||
|
# Get the most recent result (first in list, sorted by record_added)
|
||||||
|
most_recent = results[0]
|
||||||
|
geoip_data = most_recent.get("geoip_data", {})
|
||||||
|
list_on = most_recent.get("list_on", {})
|
||||||
|
|
||||||
|
# Extract standard fields
|
||||||
country_iso_code = geoip_data.get("country_iso_code")
|
country_iso_code = geoip_data.get("country_iso_code")
|
||||||
asn = geoip_data.get("asn_autonomous_system_number")
|
asn = geoip_data.get("asn_autonomous_system_number")
|
||||||
asn_org = geoip_data.get("asn_autonomous_system_organization")
|
asn_org = geoip_data.get("asn_autonomous_system_organization")
|
||||||
city = geoip_data.get("city_name") # Extract city name from API
|
|
||||||
list_on = data["list_on"]
|
# Extract city from coordinates using reverse geocoding
|
||||||
|
city = extract_city_from_coordinates(geoip_data)
|
||||||
|
|
||||||
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
|
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
|
||||||
sanitized_asn = sanitize_for_storage(asn, 100)
|
sanitized_asn = sanitize_for_storage(asn, 100)
|
||||||
@@ -53,7 +61,7 @@ def main():
|
|||||||
sanitized_asn,
|
sanitized_asn,
|
||||||
sanitized_asn_org,
|
sanitized_asn_org,
|
||||||
sanitized_list_on,
|
sanitized_list_on,
|
||||||
sanitized_city, # Pass city to database
|
sanitized_city,
|
||||||
)
|
)
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")
|
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ This generates realistic-looking test data including:
|
|||||||
- Credential attempts
|
- Credential attempts
|
||||||
- Attack detections (SQL injection, XSS, etc.)
|
- Attack detections (SQL injection, XSS, etc.)
|
||||||
- Category behavior changes for timeline demonstration
|
- Category behavior changes for timeline demonstration
|
||||||
- Real good crawler IPs (Googlebot, Bingbot, etc.) with API-fetched geolocation
|
- Geolocation data fetched from API with reverse geocoded city names
|
||||||
|
- Real good crawler IPs (Googlebot, Bingbot, etc.)
|
||||||
|
|
||||||
Usage:
|
Usage:
|
||||||
python test_insert_fake_ips.py [num_ips] [logs_per_ip] [credentials_per_ip] [--no-cleanup]
|
python test_insert_fake_ips.py [num_ips] [logs_per_ip] [credentials_per_ip] [--no-cleanup]
|
||||||
@@ -17,6 +18,8 @@ Examples:
|
|||||||
python test_insert_fake_ips.py 30 # Generate 30 IPs with defaults
|
python test_insert_fake_ips.py 30 # Generate 30 IPs with defaults
|
||||||
python test_insert_fake_ips.py 30 20 5 # Generate 30 IPs, 20 logs each, 5 credentials each
|
python test_insert_fake_ips.py 30 20 5 # Generate 30 IPs, 20 logs each, 5 credentials each
|
||||||
python test_insert_fake_ips.py --no-cleanup # Generate data without cleaning DB first
|
python test_insert_fake_ips.py --no-cleanup # Generate data without cleaning DB first
|
||||||
|
|
||||||
|
Note: This script will make API calls to fetch geolocation data, so it may take a while.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import random
|
import random
|
||||||
@@ -32,86 +35,72 @@ sys.path.insert(0, str(Path(__file__).parent.parent / "src"))
|
|||||||
|
|
||||||
from database import get_database
|
from database import get_database
|
||||||
from logger import get_app_logger
|
from logger import get_app_logger
|
||||||
|
from geo_utils import extract_city_from_coordinates
|
||||||
|
|
||||||
# ----------------------
|
# ----------------------
|
||||||
# TEST DATA GENERATORS
|
# TEST DATA GENERATORS
|
||||||
# ----------------------
|
# ----------------------
|
||||||
|
|
||||||
# Fake IPs with geolocation data (country_code, city, ASN org)
|
# Fake IPs for testing - geolocation data will be fetched from API
|
||||||
# These will appear on the map based on their country_code
|
# These are real public IPs from various locations around the world
|
||||||
FAKE_IPS_WITH_GEO = [
|
FAKE_IPS = [
|
||||||
# United States
|
# United States
|
||||||
("45.142.120.10", "US", "New York", "AS14061 DigitalOcean"),
|
"45.142.120.10",
|
||||||
("107.189.10.143", "US", "Los Angeles", "AS20473 Vultr"),
|
"107.189.10.143",
|
||||||
("162.243.175.23", "US", "San Francisco", "AS14061 DigitalOcean"),
|
"162.243.175.23",
|
||||||
("198.51.100.89", "US", "Chicago", "AS16509 Amazon"),
|
"198.51.100.89",
|
||||||
|
|
||||||
# Europe
|
# Europe
|
||||||
("185.220.101.45", "DE", "Berlin", "AS24940 Hetzner"),
|
"185.220.101.45",
|
||||||
("195.154.133.20", "FR", "Paris", "AS12876 Scaleway"),
|
"195.154.133.20",
|
||||||
("178.128.83.165", "GB", "London", "AS14061 DigitalOcean"),
|
"178.128.83.165",
|
||||||
("87.251.67.90", "NL", "Amsterdam", "AS49453 GlobalConnect"),
|
"87.251.67.90",
|
||||||
("91.203.5.165", "RU", "Moscow", "AS51115 HLL LLC"),
|
"91.203.5.165",
|
||||||
("46.105.57.169", "FR", "Roubaix", "AS16276 OVH"),
|
"46.105.57.169",
|
||||||
("217.182.143.207", "RU", "Saint Petersburg", "AS51570 JSC ER-Telecom"),
|
"217.182.143.207",
|
||||||
("188.166.123.45", "GB", "Manchester", "AS14061 DigitalOcean"),
|
"188.166.123.45",
|
||||||
|
|
||||||
# Asia
|
# Asia
|
||||||
("103.253.145.36", "CN", "Beijing", "AS4134 Chinanet"),
|
"103.253.145.36",
|
||||||
("42.112.28.216", "CN", "Shanghai", "AS4134 Chinanet"),
|
"42.112.28.216",
|
||||||
("118.163.74.160", "JP", "Tokyo", "AS2516 KDDI"),
|
"118.163.74.160",
|
||||||
("43.229.53.35", "SG", "Singapore", "AS23969 TOT"),
|
"43.229.53.35",
|
||||||
("115.78.208.140", "IN", "Mumbai", "AS9829 BSNL"),
|
"115.78.208.140",
|
||||||
("14.139.56.18", "IN", "Bangalore", "AS4755 TATA"),
|
"14.139.56.18",
|
||||||
("61.19.25.207", "TW", "Taipei", "AS3462 HiNet"),
|
"61.19.25.207",
|
||||||
("121.126.219.198", "KR", "Seoul", "AS4766 Korea Telecom"),
|
"121.126.219.198",
|
||||||
("202.134.4.212", "ID", "Jakarta", "AS7597 TELKOMNET"),
|
"202.134.4.212",
|
||||||
("171.244.140.134", "VN", "Hanoi", "AS7552 Viettel"),
|
"171.244.140.134",
|
||||||
|
|
||||||
# South America
|
# South America
|
||||||
("177.87.169.20", "BR", "São Paulo", "AS28573 Claro"),
|
"177.87.169.20",
|
||||||
("200.21.19.58", "BR", "Rio de Janeiro", "AS7738 Telemar"),
|
"200.21.19.58",
|
||||||
("181.13.140.98", "AR", "Buenos Aires", "AS7303 Telecom Argentina"),
|
"181.13.140.98",
|
||||||
("190.150.24.34", "CO", "Bogotá", "AS3816 Colombia Telecomunicaciones"),
|
"190.150.24.34",
|
||||||
|
|
||||||
# Middle East & Africa
|
# Middle East & Africa
|
||||||
("41.223.53.141", "EG", "Cairo", "AS8452 TE-Data"),
|
"41.223.53.141",
|
||||||
("196.207.35.152", "ZA", "Johannesburg", "AS37271 Workonline"),
|
"196.207.35.152",
|
||||||
("5.188.62.214", "TR", "Istanbul", "AS51115 HLL LLC"),
|
"5.188.62.214",
|
||||||
("37.48.93.125", "AE", "Dubai", "AS5384 Emirates Telecom"),
|
"37.48.93.125",
|
||||||
("102.66.137.29", "NG", "Lagos", "AS29465 MTN Nigeria"),
|
"102.66.137.29",
|
||||||
|
|
||||||
# Australia & Oceania
|
# Australia & Oceania
|
||||||
("103.28.248.110", "AU", "Sydney", "AS4739 Internode"),
|
"103.28.248.110",
|
||||||
("202.168.45.33", "AU", "Melbourne", "AS1221 Telstra"),
|
"202.168.45.33",
|
||||||
|
|
||||||
# Additional European IPs
|
# Additional European IPs
|
||||||
("94.102.49.190", "PL", "Warsaw", "AS12912 T-Mobile"),
|
"94.102.49.190",
|
||||||
("213.32.93.140", "ES", "Madrid", "AS3352 Telefónica"),
|
"213.32.93.140",
|
||||||
("79.137.79.167", "IT", "Rome", "AS3269 Telecom Italia"),
|
"79.137.79.167",
|
||||||
("37.9.169.146", "SE", "Stockholm", "AS3301 Telia"),
|
"37.9.169.146",
|
||||||
("188.92.80.123", "RO", "Bucharest", "AS8708 RCS & RDS"),
|
"188.92.80.123",
|
||||||
("80.240.25.198", "CZ", "Prague", "AS6830 UPC"),
|
"80.240.25.198",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Extract just IPs for backward compatibility
|
|
||||||
FAKE_IPS = [ip_data[0] for ip_data in FAKE_IPS_WITH_GEO]
|
|
||||||
|
|
||||||
# Create geo data dictionary
|
|
||||||
FAKE_GEO_DATA = {
|
|
||||||
ip_data[0]: (ip_data[1], ip_data[2], ip_data[3])
|
|
||||||
for ip_data in FAKE_IPS_WITH_GEO
|
|
||||||
}
|
|
||||||
|
|
||||||
# Real good crawler IPs (Googlebot, Bingbot, etc.) - geolocation will be fetched from API
|
# Real good crawler IPs (Googlebot, Bingbot, etc.) - geolocation will be fetched from API
|
||||||
GOOD_CRAWLER_IPS = [
|
GOOD_CRAWLER_IPS = [
|
||||||
"66.249.66.1", # Googlebot
|
"66.249.66.1", # Googlebot
|
||||||
"66.249.79.23", # Googlebot
|
"66.249.79.23", # Googlebot
|
||||||
"40.77.167.52", # Bingbot
|
"40.77.167.52", # Bingbot
|
||||||
"157.55.39.145", # Bingbot
|
"157.55.39.145", # Bingbot
|
||||||
"17.58.98.100", # Applebot
|
"17.58.98.100", # Applebot
|
||||||
"199.59.150.39", # Twitterbot
|
"199.59.150.39", # Twitterbot
|
||||||
"54.236.1.15", # Amazon Bot
|
"54.236.1.15", # Amazon Bot
|
||||||
]
|
]
|
||||||
|
|
||||||
FAKE_PATHS = [
|
FAKE_PATHS = [
|
||||||
@@ -198,7 +187,13 @@ def cleanup_database(db_manager, app_logger):
|
|||||||
db_manager: Database manager instance
|
db_manager: Database manager instance
|
||||||
app_logger: Logger instance
|
app_logger: Logger instance
|
||||||
"""
|
"""
|
||||||
from models import AccessLog, CredentialAttempt, AttackDetection, IpStats, CategoryHistory
|
from models import (
|
||||||
|
AccessLog,
|
||||||
|
CredentialAttempt,
|
||||||
|
AttackDetection,
|
||||||
|
IpStats,
|
||||||
|
CategoryHistory,
|
||||||
|
)
|
||||||
|
|
||||||
app_logger.info("=" * 60)
|
app_logger.info("=" * 60)
|
||||||
app_logger.info("Cleaning up existing database data")
|
app_logger.info("Cleaning up existing database data")
|
||||||
@@ -232,6 +227,7 @@ def cleanup_database(db_manager, app_logger):
|
|||||||
def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
|
def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
|
||||||
"""
|
"""
|
||||||
Fetch geolocation data from the IP reputation API.
|
Fetch geolocation data from the IP reputation API.
|
||||||
|
Uses the most recent result and extracts city from coordinates.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
ip: IP address to lookup
|
ip: IP address to lookup
|
||||||
@@ -249,13 +245,18 @@ def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
|
|||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
payload = response.json()
|
payload = response.json()
|
||||||
if payload.get("results"):
|
if payload.get("results"):
|
||||||
data = payload["results"][0]
|
results = payload["results"]
|
||||||
geoip_data = data.get("geoip_data", {})
|
|
||||||
|
|
||||||
country_code = geoip_data.get("country_iso_code", "Unknown")
|
# Get the most recent result (first in list, sorted by record_added)
|
||||||
city = geoip_data.get("city_name", "Unknown")
|
most_recent = results[0]
|
||||||
|
geoip_data = most_recent.get("geoip_data", {})
|
||||||
|
|
||||||
|
country_code = geoip_data.get("country_iso_code")
|
||||||
asn = geoip_data.get("asn_autonomous_system_number")
|
asn = geoip_data.get("asn_autonomous_system_number")
|
||||||
asn_org = geoip_data.get("asn_autonomous_system_organization", "Unknown")
|
asn_org = geoip_data.get("asn_autonomous_system_organization")
|
||||||
|
|
||||||
|
# Extract city from coordinates using reverse geocoding
|
||||||
|
city = extract_city_from_coordinates(geoip_data)
|
||||||
|
|
||||||
return (country_code, city, asn, asn_org)
|
return (country_code, city, asn, asn_org)
|
||||||
except requests.RequestException as e:
|
except requests.RequestException as e:
|
||||||
@@ -266,7 +267,13 @@ def fetch_geolocation_from_api(ip: str, app_logger) -> tuple:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per_ip: int = 3, include_good_crawlers: bool = True, cleanup: bool = True):
|
def generate_fake_data(
|
||||||
|
num_ips: int = 20,
|
||||||
|
logs_per_ip: int = 15,
|
||||||
|
credentials_per_ip: int = 3,
|
||||||
|
include_good_crawlers: bool = True,
|
||||||
|
cleanup: bool = True,
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Generate and insert fake test data into the database.
|
Generate and insert fake test data into the database.
|
||||||
|
|
||||||
@@ -308,8 +315,12 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
for _ in range(logs_per_ip):
|
for _ in range(logs_per_ip):
|
||||||
path = random.choice(FAKE_PATHS)
|
path = random.choice(FAKE_PATHS)
|
||||||
user_agent = random.choice(FAKE_USER_AGENTS)
|
user_agent = random.choice(FAKE_USER_AGENTS)
|
||||||
is_suspicious = random.choice([True, False, False]) # 33% chance of suspicious
|
is_suspicious = random.choice(
|
||||||
is_honeypot = random.choice([True, False, False, False]) # 25% chance of honeypot trigger
|
[True, False, False]
|
||||||
|
) # 33% chance of suspicious
|
||||||
|
is_honeypot = random.choice(
|
||||||
|
[True, False, False, False]
|
||||||
|
) # 25% chance of honeypot trigger
|
||||||
|
|
||||||
# Randomly decide if this log has attack detections
|
# Randomly decide if this log has attack detections
|
||||||
attack_types = None
|
attack_types = None
|
||||||
@@ -350,39 +361,45 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
app_logger.info(f" ✓ Generated {logs_per_ip} access logs")
|
app_logger.info(f" ✓ Generated {logs_per_ip} access logs")
|
||||||
app_logger.info(f" ✓ Generated {credentials_per_ip} credential attempts")
|
app_logger.info(f" ✓ Generated {credentials_per_ip} credential attempts")
|
||||||
|
|
||||||
# Add geolocation data if available for this IP
|
# Fetch geolocation data from API
|
||||||
if ip in FAKE_GEO_DATA:
|
app_logger.info(f" 🌍 Fetching geolocation from API...")
|
||||||
country_code, city, asn_org = FAKE_GEO_DATA[ip]
|
geo_data = fetch_geolocation_from_api(ip, app_logger)
|
||||||
# Extract ASN number from ASN string (e.g., "AS12345 Name" -> 12345)
|
|
||||||
asn_number = None
|
|
||||||
if asn_org and asn_org.startswith("AS"):
|
|
||||||
try:
|
|
||||||
asn_number = int(asn_org.split()[0][2:]) # Remove "AS" prefix and get number
|
|
||||||
except (ValueError, IndexError):
|
|
||||||
asn_number = 12345 # Fallback
|
|
||||||
|
|
||||||
# Update IP reputation info including geolocation and city
|
if geo_data:
|
||||||
|
country_code, city, asn, asn_org = geo_data
|
||||||
db_manager.update_ip_rep_infos(
|
db_manager.update_ip_rep_infos(
|
||||||
ip=ip,
|
ip=ip,
|
||||||
country_code=country_code,
|
country_code=country_code,
|
||||||
asn=asn_number or 12345,
|
asn=asn if asn else 12345,
|
||||||
asn_org=asn_org,
|
asn_org=asn_org or "Unknown",
|
||||||
list_on={},
|
list_on={},
|
||||||
city=city # Now passing city to the function
|
city=city,
|
||||||
)
|
)
|
||||||
app_logger.info(f" 📍 Added geolocation: {city}, {country_code} ({asn_org})")
|
location_display = (
|
||||||
|
f"{city}, {country_code}" if city else country_code or "Unknown"
|
||||||
|
)
|
||||||
|
app_logger.info(
|
||||||
|
f" 📍 API-fetched geolocation: {location_display} ({asn_org or 'Unknown'})"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
app_logger.warning(f" ⚠ Could not fetch geolocation for {ip}")
|
||||||
|
|
||||||
|
# Small delay to be nice to the API
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
# Trigger behavior/category changes to demonstrate timeline feature
|
# Trigger behavior/category changes to demonstrate timeline feature
|
||||||
# First analysis
|
# First analysis
|
||||||
initial_category = random.choice(CATEGORIES)
|
initial_category = random.choice(CATEGORIES)
|
||||||
app_logger.info(f" ⟳ Analyzing behavior - Initial category: {initial_category}")
|
app_logger.info(
|
||||||
|
f" ⟳ Analyzing behavior - Initial category: {initial_category}"
|
||||||
|
)
|
||||||
|
|
||||||
db_manager.update_ip_stats_analysis(
|
db_manager.update_ip_stats_analysis(
|
||||||
ip=ip,
|
ip=ip,
|
||||||
analyzed_metrics=generate_analyzed_metrics(),
|
analyzed_metrics=generate_analyzed_metrics(),
|
||||||
category=initial_category,
|
category=initial_category,
|
||||||
category_scores=generate_category_scores(),
|
category_scores=generate_category_scores(),
|
||||||
last_analysis=datetime.now(tz=ZoneInfo('UTC'))
|
last_analysis=datetime.now(tz=ZoneInfo("UTC")),
|
||||||
)
|
)
|
||||||
total_category_changes += 1
|
total_category_changes += 1
|
||||||
|
|
||||||
@@ -391,22 +408,30 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
|
|
||||||
# Second analysis with potential category change (70% chance)
|
# Second analysis with potential category change (70% chance)
|
||||||
if random.random() < 0.7:
|
if random.random() < 0.7:
|
||||||
new_category = random.choice([c for c in CATEGORIES if c != initial_category])
|
new_category = random.choice(
|
||||||
app_logger.info(f" ⟳ Behavior change detected: {initial_category} → {new_category}")
|
[c for c in CATEGORIES if c != initial_category]
|
||||||
|
)
|
||||||
|
app_logger.info(
|
||||||
|
f" ⟳ Behavior change detected: {initial_category} → {new_category}"
|
||||||
|
)
|
||||||
|
|
||||||
db_manager.update_ip_stats_analysis(
|
db_manager.update_ip_stats_analysis(
|
||||||
ip=ip,
|
ip=ip,
|
||||||
analyzed_metrics=generate_analyzed_metrics(),
|
analyzed_metrics=generate_analyzed_metrics(),
|
||||||
category=new_category,
|
category=new_category,
|
||||||
category_scores=generate_category_scores(),
|
category_scores=generate_category_scores(),
|
||||||
last_analysis=datetime.now(tz=ZoneInfo('UTC'))
|
last_analysis=datetime.now(tz=ZoneInfo("UTC")),
|
||||||
)
|
)
|
||||||
total_category_changes += 1
|
total_category_changes += 1
|
||||||
|
|
||||||
# Optional third change (40% chance)
|
# Optional third change (40% chance)
|
||||||
if random.random() < 0.4:
|
if random.random() < 0.4:
|
||||||
final_category = random.choice([c for c in CATEGORIES if c != new_category])
|
final_category = random.choice(
|
||||||
app_logger.info(f" ⟳ Another behavior change: {new_category} → {final_category}")
|
[c for c in CATEGORIES if c != new_category]
|
||||||
|
)
|
||||||
|
app_logger.info(
|
||||||
|
f" ⟳ Another behavior change: {new_category} → {final_category}"
|
||||||
|
)
|
||||||
|
|
||||||
time.sleep(0.1)
|
time.sleep(0.1)
|
||||||
db_manager.update_ip_stats_analysis(
|
db_manager.update_ip_stats_analysis(
|
||||||
@@ -414,7 +439,7 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
analyzed_metrics=generate_analyzed_metrics(),
|
analyzed_metrics=generate_analyzed_metrics(),
|
||||||
category=final_category,
|
category=final_category,
|
||||||
category_scores=generate_category_scores(),
|
category_scores=generate_category_scores(),
|
||||||
last_analysis=datetime.now(tz=ZoneInfo('UTC'))
|
last_analysis=datetime.now(tz=ZoneInfo("UTC")),
|
||||||
)
|
)
|
||||||
total_category_changes += 1
|
total_category_changes += 1
|
||||||
|
|
||||||
@@ -433,7 +458,9 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
|
|
||||||
# Don't generate access logs for good crawlers to prevent re-categorization
|
# Don't generate access logs for good crawlers to prevent re-categorization
|
||||||
# We'll just create the IP stats entry with the category set
|
# We'll just create the IP stats entry with the category set
|
||||||
app_logger.info(f" ✓ Adding as good crawler (no logs to prevent re-categorization)")
|
app_logger.info(
|
||||||
|
f" ✓ Adding as good crawler (no logs to prevent re-categorization)"
|
||||||
|
)
|
||||||
|
|
||||||
# First, we need to create the IP in the database via persist_access
|
# First, we need to create the IP in the database via persist_access
|
||||||
# (but we'll only create one minimal log entry)
|
# (but we'll only create one minimal log entry)
|
||||||
@@ -456,9 +483,11 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
asn=asn if asn else 12345,
|
asn=asn if asn else 12345,
|
||||||
asn_org=asn_org,
|
asn_org=asn_org,
|
||||||
list_on={},
|
list_on={},
|
||||||
city=city
|
city=city,
|
||||||
|
)
|
||||||
|
app_logger.info(
|
||||||
|
f" 📍 API-fetched geolocation: {city}, {country_code} ({asn_org})"
|
||||||
)
|
)
|
||||||
app_logger.info(f" 📍 API-fetched geolocation: {city}, {country_code} ({asn_org})")
|
|
||||||
else:
|
else:
|
||||||
app_logger.warning(f" ⚠ Could not fetch geolocation for {crawler_ip}")
|
app_logger.warning(f" ⚠ Could not fetch geolocation for {crawler_ip}")
|
||||||
|
|
||||||
@@ -479,7 +508,7 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
"regular_user": 0,
|
"regular_user": 0,
|
||||||
"unknown": 0,
|
"unknown": 0,
|
||||||
},
|
},
|
||||||
last_analysis=datetime.now(tz=ZoneInfo('UTC'))
|
last_analysis=datetime.now(tz=ZoneInfo("UTC")),
|
||||||
)
|
)
|
||||||
total_good_crawlers += 1
|
total_good_crawlers += 1
|
||||||
time.sleep(0.5) # Small delay between API calls
|
time.sleep(0.5) # Small delay between API calls
|
||||||
@@ -497,8 +526,12 @@ def generate_fake_data(num_ips: int = 20, logs_per_ip: int = 15, credentials_per
|
|||||||
app_logger.info(f"Total category changes: {total_category_changes}")
|
app_logger.info(f"Total category changes: {total_category_changes}")
|
||||||
app_logger.info("=" * 60)
|
app_logger.info("=" * 60)
|
||||||
app_logger.info("\nYou can now view the dashboard with this test data.")
|
app_logger.info("\nYou can now view the dashboard with this test data.")
|
||||||
app_logger.info("The 'Behavior Timeline' will show category transitions for each IP.")
|
app_logger.info(
|
||||||
app_logger.info("The map will show good crawlers with real geolocation from API.")
|
"The 'Behavior Timeline' will show category transitions for each IP."
|
||||||
|
)
|
||||||
|
app_logger.info(
|
||||||
|
"All IPs have API-fetched geolocation with reverse geocoded city names."
|
||||||
|
)
|
||||||
app_logger.info("Run: python server.py")
|
app_logger.info("Run: python server.py")
|
||||||
app_logger.info("=" * 60)
|
app_logger.info("=" * 60)
|
||||||
|
|
||||||
@@ -513,4 +546,10 @@ if __name__ == "__main__":
|
|||||||
# Add --no-cleanup flag to skip database cleanup
|
# Add --no-cleanup flag to skip database cleanup
|
||||||
cleanup = "--no-cleanup" not in sys.argv
|
cleanup = "--no-cleanup" not in sys.argv
|
||||||
|
|
||||||
generate_fake_data(num_ips, logs_per_ip, credentials_per_ip, include_good_crawlers=True, cleanup=cleanup)
|
generate_fake_data(
|
||||||
|
num_ips,
|
||||||
|
logs_per_ip,
|
||||||
|
credentials_per_ip,
|
||||||
|
include_good_crawlers=True,
|
||||||
|
cleanup=cleanup,
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user