Update badbots.py

parsing fixes
This commit is contained in:
fab 2025-01-01 12:56:12 +01:00 committed by GitHub
parent 0c9e469587
commit 9f1266f81f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -76,18 +76,26 @@ def parse_bot_list(url: str, response: requests.Response) -> list:
json_data = response.json() json_data = response.json()
if isinstance(json_data, list): if isinstance(json_data, list):
for entry in json_data: for entry in json_data:
bot_patterns.add(entry.get('pattern', entry.get('ua', ''))) user_agent = entry.get('pattern') or entry.get('ua', '')
if user_agent and not user_agent.startswith("#"):
bot_patterns.add(user_agent)
elif isinstance(json_data, dict): elif isinstance(json_data, dict):
for entry in json_data.get('test_cases', []): for entry in json_data.get('test_cases', []):
bot_patterns.add(entry.get('user_agent_string', '')) user_agent = entry.get('user_agent_string', '')
if user_agent and not user_agent.startswith("#"):
bot_patterns.add(user_agent)
else: else:
bot_patterns.update(response.text.splitlines()) for line in response.text.splitlines():
# Exclude comments, empty lines, and non-UA strings
if line and not line.startswith("#") and len(line) > 3 and "Mozilla" in line:
bot_patterns.add(line)
except (ValueError, json.JSONDecodeError) as e: except (ValueError, json.JSONDecodeError) as e:
logging.warning(f"Error parsing {url}: {e}") logging.warning(f"Error parsing {url}: {e}")
return list(bot_patterns) return list(bot_patterns)
def fetch_bot_list(): def fetch_bot_list():
bot_patterns = set() bot_patterns = set()