Fix SQLite "database is locked" errors in fetch_ip_rep task

- Add missing session.commit() and cleanup to update_ip_rep_infos() **bugfix**
  - Enable SQLite WAL mode and 30s busy timeout for better concurrency **race condition prevention**
  - Add get_unenriched_ips() method to only fetch IPs needing enrichment **don't enrich what's already done*
  - Rewrite fetch_ip_rep task to process only unenriched IPs (limit 50) **API kindness**
  - Change task frequency from every 1 minute to every 5 minutes **API kindness**
  - Add request timeout (10s) and proper error handling **can update to longer if needed**
This commit is contained in:
Phillip Tarrant
2026-01-10 14:59:15 -06:00
parent edaafb0263
commit f7b9ee54e3
2 changed files with 75 additions and 48 deletions

View File

@@ -1,13 +1,4 @@
from sqlalchemy import select
from typing import Optional
from database import get_database, DatabaseManager
from zoneinfo import ZoneInfo
from pathlib import Path
from datetime import datetime, timedelta
import re
import urllib.parse
from wordlists import get_wordlists
from config import get_config
from database import get_database
from logger import get_app_logger
import requests
from sanitizer import sanitize_for_storage, sanitize_dict
@@ -18,42 +9,44 @@ from sanitizer import sanitize_for_storage, sanitize_dict
TASK_CONFIG = {
"name": "fetch-ip-rep",
"cron": "*/1 * * * *",
"cron": "*/5 * * * *",
"enabled": True,
"run_when_loaded": True
}
def main():
config = get_config()
db_manager = get_database()
app_logger = get_app_logger()
accesses = db_manager.get_access_logs(limit=999999999)
ips = {item['ip'] for item in accesses}
# Only get IPs that haven't been enriched yet
unenriched_ips = db_manager.get_unenriched_ips(limit=50)
for ip in ips:
api_url = "https://iprep.lcrawl.com/api/iprep/"
params = {
"cidr": ip
}
headers = {
"Content-Type": "application/json"
}
response = requests.get(api_url, headers=headers, params=params)
payload = response.json()
if payload["results"]:
data = payload["results"][0]
country_iso_code = data["geoip_data"]["country_iso_code"]
asn = data["geoip_data"]["asn_autonomous_system_number"]
asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
list_on = data["list_on"]
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
sanitized_asn = sanitize_for_storage(asn, 100)
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(ip, sanitized_country_iso_code, sanitized_asn, sanitized_asn_org, sanitized_list_on)
return
for ip in unenriched_ips:
try:
api_url = "https://iprep.lcrawl.com/api/iprep/"
params = {"cidr": ip}
headers = {"Content-Type": "application/json"}
response = requests.get(api_url, headers=headers, params=params, timeout=10)
payload = response.json()
if payload.get("results"):
data = payload["results"][0]
country_iso_code = data["geoip_data"]["country_iso_code"]
asn = data["geoip_data"]["asn_autonomous_system_number"]
asn_org = data["geoip_data"]["asn_autonomous_system_organization"]
list_on = data["list_on"]
sanitized_country_iso_code = sanitize_for_storage(country_iso_code, 3)
sanitized_asn = sanitize_for_storage(asn, 100)
sanitized_asn_org = sanitize_for_storage(asn_org, 100)
sanitized_list_on = sanitize_dict(list_on, 100000)
db_manager.update_ip_rep_infos(
ip, sanitized_country_iso_code, sanitized_asn,
sanitized_asn_org, sanitized_list_on
)
except requests.RequestException as e:
app_logger.warning(f"Failed to fetch IP rep for {ip}: {e}")
except Exception as e:
app_logger.error(f"Error processing IP {ip}: {e}")