mirror of
https://github.com/fabriziosalmi/patterns.git
synced 2025-12-17 17:55:48 +00:00
Update badbots.py
parsing fixes
This commit is contained in:
parent
0c9e469587
commit
9f1266f81f
14
badbots.py
14
badbots.py
@ -76,18 +76,26 @@ def parse_bot_list(url: str, response: requests.Response) -> list:
|
|||||||
json_data = response.json()
|
json_data = response.json()
|
||||||
if isinstance(json_data, list):
|
if isinstance(json_data, list):
|
||||||
for entry in json_data:
|
for entry in json_data:
|
||||||
bot_patterns.add(entry.get('pattern', entry.get('ua', '')))
|
user_agent = entry.get('pattern') or entry.get('ua', '')
|
||||||
|
if user_agent and not user_agent.startswith("#"):
|
||||||
|
bot_patterns.add(user_agent)
|
||||||
elif isinstance(json_data, dict):
|
elif isinstance(json_data, dict):
|
||||||
for entry in json_data.get('test_cases', []):
|
for entry in json_data.get('test_cases', []):
|
||||||
bot_patterns.add(entry.get('user_agent_string', ''))
|
user_agent = entry.get('user_agent_string', '')
|
||||||
|
if user_agent and not user_agent.startswith("#"):
|
||||||
|
bot_patterns.add(user_agent)
|
||||||
else:
|
else:
|
||||||
bot_patterns.update(response.text.splitlines())
|
for line in response.text.splitlines():
|
||||||
|
# Exclude comments, empty lines, and non-UA strings
|
||||||
|
if line and not line.startswith("#") and len(line) > 3 and "Mozilla" in line:
|
||||||
|
bot_patterns.add(line)
|
||||||
except (ValueError, json.JSONDecodeError) as e:
|
except (ValueError, json.JSONDecodeError) as e:
|
||||||
logging.warning(f"Error parsing {url}: {e}")
|
logging.warning(f"Error parsing {url}: {e}")
|
||||||
|
|
||||||
return list(bot_patterns)
|
return list(bot_patterns)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def fetch_bot_list():
|
def fetch_bot_list():
|
||||||
bot_patterns = set()
|
bot_patterns = set()
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user