Update badbots.py

Improved error handling, logging and code structure.
This commit is contained in:
fab 2025-01-03 13:20:01 +01:00 committed by GitHub
parent 2356cdcff0
commit 61e1a856c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,7 +13,6 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(
# Constants and Configuration # Constants and Configuration
OUTPUT_DIRS = { OUTPUT_DIRS = {
"nginx": "waf_patterns/nginx/", "nginx": "waf_patterns/nginx/",
"caddy": "waf_patterns/caddy/",
"apache": "waf_patterns/apache/", "apache": "waf_patterns/apache/",
"traefik": "waf_patterns/traefik/", "traefik": "waf_patterns/traefik/",
"haproxy": "waf_patterns/haproxy/" "haproxy": "waf_patterns/haproxy/"
@ -36,6 +35,9 @@ GITHUB_TOKEN = os.getenv("GITHUB_TOKEN")
def fetch_with_retries(url: str) -> list: def fetch_with_retries(url: str) -> list:
"""
Fetch bot patterns from a URL with retries and rate-limiting handling.
"""
retries = 0 retries = 0
headers = {} headers = {}
@ -70,6 +72,9 @@ def fetch_with_retries(url: str) -> list:
def parse_bot_list(url: str, response: requests.Response) -> list: def parse_bot_list(url: str, response: requests.Response) -> list:
"""
Parse bot patterns from the fetched response (JSON or plain text).
"""
bot_patterns = set() bot_patterns = set()
try: try:
if url.endswith(".json"): if url.endswith(".json"):
@ -95,8 +100,10 @@ def parse_bot_list(url: str, response: requests.Response) -> list:
return list(bot_patterns) return list(bot_patterns)
def fetch_bot_list(): def fetch_bot_list():
"""
Fetch bot patterns from all sources using a thread pool.
"""
bot_patterns = set() bot_patterns = set()
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
@ -115,6 +122,9 @@ def fetch_bot_list():
def write_to_file(path: Path, content: str): def write_to_file(path: Path, content: str):
"""
Write content to a file at the specified path.
"""
try: try:
with path.open("w") as f: with path.open("w") as f:
f.write(content) f.write(content)
@ -124,6 +134,9 @@ def write_to_file(path: Path, content: str):
def generate_nginx_conf(bots): def generate_nginx_conf(bots):
"""
Generate Nginx WAF configuration for blocking bots.
"""
path = Path(OUTPUT_DIRS['nginx'], "bots.conf") path = Path(OUTPUT_DIRS['nginx'], "bots.conf")
content = "map $http_user_agent $bad_bot {\n" content = "map $http_user_agent $bad_bot {\n"
for bot in bots: for bot in bots:
@ -132,16 +145,10 @@ def generate_nginx_conf(bots):
write_to_file(path, content) write_to_file(path, content)
def generate_caddy_conf(bots):
path = Path(OUTPUT_DIRS['caddy'], "bots.conf")
content = "@bad_bot {\n"
for bot in bots:
content += f' header User-Agent *{bot}*\n'
content += "}\nrespond @bad_bot 403\n"
write_to_file(path, content)
def generate_apache_conf(bots): def generate_apache_conf(bots):
"""
Generate Apache WAF configuration for blocking bots.
"""
path = Path(OUTPUT_DIRS['apache'], "bots.conf") path = Path(OUTPUT_DIRS['apache'], "bots.conf")
content = "SecRuleEngine On\n" content = "SecRuleEngine On\n"
for bot in bots: for bot in bots:
@ -150,6 +157,9 @@ def generate_apache_conf(bots):
def generate_traefik_conf(bots): def generate_traefik_conf(bots):
"""
Generate Traefik WAF configuration for blocking bots.
"""
path = Path(OUTPUT_DIRS['traefik'], "bots.toml") path = Path(OUTPUT_DIRS['traefik'], "bots.toml")
content = "[http.middlewares]\n[http.middlewares.bad_bot_block]\n [http.middlewares.bad_bot_block.plugin.badbot]\n userAgent = [\n" content = "[http.middlewares]\n[http.middlewares.bad_bot_block]\n [http.middlewares.bad_bot_block.plugin.badbot]\n userAgent = [\n"
for bot in bots: for bot in bots:
@ -159,6 +169,9 @@ def generate_traefik_conf(bots):
def generate_haproxy_conf(bots): def generate_haproxy_conf(bots):
"""
Generate HAProxy WAF configuration for blocking bots.
"""
path = Path(OUTPUT_DIRS['haproxy'], "bots.acl") path = Path(OUTPUT_DIRS['haproxy'], "bots.acl")
content = "# HAProxy WAF - Bad Bot Blocker\n" content = "# HAProxy WAF - Bad Bot Blocker\n"
for bot in bots: for bot in bots:
@ -168,13 +181,15 @@ def generate_haproxy_conf(bots):
if __name__ == "__main__": if __name__ == "__main__":
# Ensure output directories exist
for output_dir in OUTPUT_DIRS.values(): for output_dir in OUTPUT_DIRS.values():
Path(output_dir).mkdir(parents=True, exist_ok=True) Path(output_dir).mkdir(parents=True, exist_ok=True)
# Fetch bot patterns
bots = fetch_bot_list() bots = fetch_bot_list()
# Generate WAF configurations
generate_nginx_conf(bots) generate_nginx_conf(bots)
generate_caddy_conf(bots)
generate_apache_conf(bots) generate_apache_conf(bots)
generate_traefik_conf(bots) generate_traefik_conf(bots)
generate_haproxy_conf(bots) generate_haproxy_conf(bots)