99 lines
3.9 KiB
Python
99 lines
3.9 KiB
Python
from database import get_database
|
|
from logger import get_app_logger
|
|
import requests
|
|
from sanitizer import sanitize_for_storage, sanitize_dict
|
|
from geo_utils import extract_geolocation_from_ip, fetch_blocklist_data
|
|
|
|
# ----------------------
|
|
# TASK CONFIG
|
|
# ----------------------
|
|
|
|
TASK_CONFIG = {
|
|
"name": "fetch-ip-rep",
|
|
"cron": "*/5 * * * *",
|
|
"enabled": True,
|
|
"run_when_loaded": True,
|
|
}
|
|
|
|
|
|
def main():
|
|
db_manager = get_database()
|
|
app_logger = get_app_logger()
|
|
|
|
# Only get IPs that haven't been enriched yet
|
|
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
|
|
app_logger.info(
|
|
f"{len(unenriched_ips)} IP's need to be have reputation enrichment."
|
|
)
|
|
for ip in unenriched_ips:
|
|
try:
|
|
# Fetch geolocation data using ip-api.com
|
|
geoloc_data = extract_geolocation_from_ip(ip)
|
|
|
|
# Fetch blocklist data from lcrawl API
|
|
blocklist_data = fetch_blocklist_data(ip)
|
|
|
|
if geoloc_data:
|
|
# Extract fields from the new API response
|
|
country_iso_code = geoloc_data.get("country_code")
|
|
country = geoloc_data.get("country")
|
|
region = geoloc_data.get("region")
|
|
region_name = geoloc_data.get("region_name")
|
|
city = geoloc_data.get("city")
|
|
timezone = geoloc_data.get("timezone")
|
|
isp = geoloc_data.get("isp")
|
|
reverse = geoloc_data.get("reverse")
|
|
asn = geoloc_data.get("asn")
|
|
asn_org = geoloc_data.get("org")
|
|
latitude = geoloc_data.get("latitude")
|
|
longitude = geoloc_data.get("longitude")
|
|
is_proxy = geoloc_data.get("is_proxy", False)
|
|
is_hosting = geoloc_data.get("is_hosting", False)
|
|
|
|
# Use blocklist data if available, otherwise create default with flags
|
|
if blocklist_data:
|
|
list_on = blocklist_data
|
|
else:
|
|
list_on = {}
|
|
|
|
# Add flags to list_on
|
|
list_on["is_proxy"] = is_proxy
|
|
list_on["is_hosting"] = is_hosting
|
|
|
|
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
|
|
sanitized_country = sanitize_for_storage(country, 100)
|
|
sanitized_region = sanitize_for_storage(region, 2)
|
|
sanitized_region_name = sanitize_for_storage(region_name, 100)
|
|
sanitized_asn = sanitize_for_storage(asn, 100)
|
|
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
|
|
sanitized_city = sanitize_for_storage(city, 100) if city else None
|
|
sanitized_timezone = sanitize_for_storage(timezone, 50)
|
|
sanitized_isp = sanitize_for_storage(isp, 100)
|
|
sanitized_reverse = (
|
|
sanitize_for_storage(reverse, 255) if reverse else None
|
|
)
|
|
sanitized_list_on = sanitize_dict(list_on, 100000)
|
|
|
|
db_manager.update_ip_rep_infos(
|
|
ip,
|
|
sanitized_country_iso_code,
|
|
sanitized_asn,
|
|
sanitized_asn_org,
|
|
sanitized_list_on,
|
|
city=sanitized_city,
|
|
latitude=latitude,
|
|
longitude=longitude,
|
|
country=sanitized_country,
|
|
region=sanitized_region,
|
|
region_name=sanitized_region_name,
|
|
timezone=sanitized_timezone,
|
|
isp=sanitized_isp,
|
|
reverse=sanitized_reverse,
|
|
is_proxy=is_proxy,
|
|
is_hosting=is_hosting,
|
|
)
|
|
except requests.RequestException as e:
|
|
app_logger.warning(f"Failed to fetch geolocation for {ip}: {e}")
|
|
except Exception as e:
|
|
app_logger.error(f"Error processing IP {ip}: {e}")
|