diff --git a/badbots.py b/badbots.py new file mode 100644 index 0000000..52daf66 --- /dev/null +++ b/badbots.py @@ -0,0 +1,83 @@ +import requests +import os +import logging + +# Logging setup +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") + +# Output directories +OUTPUT_DIRS = { + "nginx": "waf_patterns/nginx/", + "caddy": "waf_patterns/caddy/", + "apache": "waf_patterns/apache/" +} + +# Primary and fallback bot lists +BOT_LIST_SOURCES = [ + "https://raw.githubusercontent.com/mitchellkrogza/nginx-ultimate-bad-bot-blocker/master/_generator_lists/bad-user-agents.list", # Primary + "https://raw.githubusercontent.com/atmire/COUNTER-Robots/master/COUNTER_Robots_list.txt", # Fallback 1 + "https://raw.githubusercontent.com/monperrus/crawler-user-agents/master/crawler-user-agents.json" # Fallback 2 (JSON) +] + +def fetch_bot_list(): + for source in BOT_LIST_SOURCES: + try: + logging.info(f"Fetching bad bot list from {source}...") + response = requests.get(source, timeout=10) + response.raise_for_status() + + # Handle JSON fallback source + if source.endswith(".json"): + bots = [item['pattern'] for item in response.json()] + else: + bots = response.text.splitlines() + + logging.info(f"Successfully fetched {len(bots)} bots from {source}") + return bots + + except (requests.RequestException, ValueError) as e: + logging.warning(f"Failed to fetch from {source}. Reason: {e}") + + logging.error("❌ All bot lists failed to fetch. Exiting...") + exit(1) + +def generate_nginx_conf(bots): + path = os.path.join(OUTPUT_DIRS['nginx'], "bots.conf") + with open(path, "w") as f: + f.write("# Nginx WAF - Bad Bot Blocker\n") + f.write("map $http_user_agent $bad_bot {\n") + for bot in bots: + f.write(f' "~*{bot}" 1;\n') + f.write(" default 0;\n}\n") + f.write("if ($bad_bot) {\n return 403;\n}\n") + logging.info(f"[+] Generated Nginx bot blocker: {path}") + +def generate_caddy_conf(bots): + path = os.path.join(OUTPUT_DIRS['caddy'], "bots.conf") + with open(path, "w") as f: + f.write("# Caddy WAF - Bad Bot Blocker\n") + f.write("@bad_bot {\n") + for bot in bots: + f.write(f' header User-Agent *{bot}*\n') + f.write("}\nrespond @bad_bot 403\n") + logging.info(f"[+] Generated Caddy bot blocker: {path}") + +def generate_apache_conf(bots): + path = os.path.join(OUTPUT_DIRS['apache'], "bots.conf") + with open(path, "w") as f: + f.write("# Apache ModSecurity - Bad Bot Blocker\n") + f.write("SecRuleEngine On\n") + for bot in bots: + f.write(f'SecRule REQUEST_HEADERS:User-Agent "@contains {bot}" "id:3000,phase:1,deny,status:403,log,msg:\'Bad Bot Blocked\'"\n') + logging.info(f"[+] Generated Apache bot blocker: {path}") + +if __name__ == "__main__": + os.makedirs(OUTPUT_DIRS['nginx'], exist_ok=True) + os.makedirs(OUTPUT_DIRS['caddy'], exist_ok=True) + os.makedirs(OUTPUT_DIRS['apache'], exist_ok=True) + + bots = fetch_bot_list() + generate_nginx_conf(bots) + generate_caddy_conf(bots) + generate_apache_conf(bots) + logging.info("[✔] Bot blocking configurations generated.")