mirror of
https://github.com/fabriziosalmi/patterns.git
synced 2025-12-17 17:55:48 +00:00
Update json2traefik.py
This commit is contained in:
parent
8277d8bc13
commit
df23dd0d04
208
json2traefik.py
208
json2traefik.py
@ -1,114 +1,154 @@
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from pathlib import Path
|
import re
|
||||||
from typing import List, Dict, Set
|
|
||||||
import logging
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import List, Dict, Set, Tuple, Optional
|
||||||
|
from functools import lru_cache
|
||||||
|
|
||||||
# Configure logging
|
# --- Configuration ---
|
||||||
logging.basicConfig(
|
LOG_LEVEL = logging.INFO # DEBUG, INFO, WARNING, ERROR
|
||||||
level=logging.INFO,
|
INPUT_FILE = Path(os.getenv("INPUT_FILE", "owasp_rules.json"))
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "waf_patterns/traefik"))
|
||||||
handlers=[logging.StreamHandler()],
|
MIDDLEWARE_FILE = OUTPUT_DIR / "middleware.toml"
|
||||||
)
|
|
||||||
|
|
||||||
# Constants
|
# Unsupported patterns (for Traefik's badbot plugin, which uses regex)
|
||||||
OUTPUT_DIR = Path("waf_patterns/traefik/") # Output directory for Traefik configs
|
UNSUPPORTED_PATTERNS = [
|
||||||
|
"@pmFromFile", # No file lookups
|
||||||
|
# Add other unsupported operators/patterns here.
|
||||||
|
]
|
||||||
|
|
||||||
|
# --- Logging Setup ---
|
||||||
|
logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def load_owasp_rules(file_path: Path) -> List[Dict]:
|
@lru_cache(maxsize=256)
|
||||||
"""
|
def validate_regex(pattern: str) -> bool:
|
||||||
Load OWASP rules from a JSON file.
|
"""Validates a regex pattern."""
|
||||||
|
|
||||||
Args:
|
|
||||||
file_path (Path): Path to the JSON file containing OWASP rules.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
List[Dict]: List of OWASP rules.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
SystemExit: If the file is not found or contains invalid JSON.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
with open(file_path, "r") as f:
|
re.compile(pattern)
|
||||||
return json.load(f)
|
return True
|
||||||
except FileNotFoundError:
|
except re.error as e:
|
||||||
logging.error(f"[-] Error: File '{file_path}' not found.")
|
logger.warning(f"Invalid regex: {pattern} - {e}")
|
||||||
exit(1)
|
return False
|
||||||
except json.JSONDecodeError:
|
|
||||||
logging.error(f"[-] Error: Invalid JSON in '{file_path}'.")
|
def _sanitize_pattern(pattern: str) -> str:
|
||||||
exit(1)
|
"""Internal helper for pattern sanitization."""
|
||||||
except Exception as e:
|
pattern = pattern.replace("@rx ", "").strip()
|
||||||
logging.error(f"[-] Unexpected error loading OWASP rules: {e}")
|
pattern = re.sub(r"\(\?i\)", "", pattern) # Remove case-insensitive flag
|
||||||
exit(1)
|
|
||||||
|
# Convert $ to \$
|
||||||
|
pattern = pattern.replace("$", r"\$")
|
||||||
|
|
||||||
|
# Convert { or { to {
|
||||||
|
pattern = re.sub(r"&l(?:brace|cub);?", r"{", pattern)
|
||||||
|
pattern = re.sub(r"&r(?:brace|cub);?", r"}", pattern)
|
||||||
|
|
||||||
|
# Remove unnecessary \.*
|
||||||
|
pattern = re.sub(r"\\\.\*", r"\.*", pattern)
|
||||||
|
pattern = re.sub(r"(?<!\\)\.(?![\w])", r"\.", pattern) # Escape dots
|
||||||
|
|
||||||
|
# Replace non-capturing groups (?:...) with capturing groups (...)
|
||||||
|
pattern = re.sub(r"\(\?:", "(", pattern)
|
||||||
|
|
||||||
|
return pattern
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_pattern(pattern: str) -> Optional[str]:
|
||||||
|
"""Sanitizes a pattern for use with Traefik's badbot plugin."""
|
||||||
|
for unsupported in UNSUPPORTED_PATTERNS:
|
||||||
|
if unsupported in pattern:
|
||||||
|
logger.warning(f"Skipping unsupported pattern: {pattern}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
# if it is not a string comparison we use regex
|
||||||
|
if not any(op in pattern for op in ["@streq", "@contains", "!@eq", "!@within", "@lt", "@ge", "@gt", "@eq", "@ipMatch", "@endsWith"]):
|
||||||
|
return _sanitize_pattern(pattern) # return the regex
|
||||||
|
else: # if it is not a regex
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def generate_traefik_conf(rules: List[Dict]) -> None:
|
def generate_traefik_conf(rules: List[Dict]) -> None:
|
||||||
"""
|
"""Generates the Traefik middleware configuration (middleware.toml)."""
|
||||||
Generate Traefik middleware configuration from OWASP rules.
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
Args:
|
|
||||||
rules (List[Dict]): List of OWASP rules.
|
|
||||||
|
|
||||||
Raises:
|
|
||||||
SystemExit: If there is an error writing to the output file.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
# Ensure the output directory exists
|
with open(MIDDLEWARE_FILE, "w", encoding="utf-8") as f:
|
||||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
|
||||||
config_file = OUTPUT_DIR / "middleware.toml"
|
|
||||||
|
|
||||||
with open(config_file, "w") as f:
|
|
||||||
f.write("[http.middlewares]\n\n")
|
f.write("[http.middlewares]\n\n")
|
||||||
|
|
||||||
# Group rules by category
|
# Group rules by category AND location. This is important!
|
||||||
grouped_rules: Dict[str, List[Dict]] = {}
|
categorized_rules: Dict[str, Dict[str, Set[str]]] = {}
|
||||||
|
|
||||||
for rule in rules:
|
for rule in rules:
|
||||||
category = rule.get("category", "default")
|
rule_id = rule.get("id", "no_id")
|
||||||
if category not in grouped_rules:
|
category = rule.get("category", "generic").lower()
|
||||||
grouped_rules[category] = []
|
location = rule.get("location", "user-agent").lower() # default value!
|
||||||
grouped_rules[category].append(rule)
|
pattern = rule["pattern"]
|
||||||
|
severity = rule.get("severity", "medium").lower() # default
|
||||||
|
|
||||||
# Write grouped rules to the TOML file
|
# Sanitize, but *only* if the location is User-Agent.
|
||||||
for category, rules_in_category in grouped_rules.items():
|
# We *don't* want to apply regexes to other locations here.
|
||||||
f.write(f"[http.middlewares.bad_bot_block_{category}]\n")
|
if location == "user-agent":
|
||||||
f.write(f" [http.middlewares.bad_bot_block_{category}.plugin.badbot]\n")
|
sanitized_pattern = sanitize_pattern(pattern)
|
||||||
|
if not sanitized_pattern or not validate_regex(sanitized_pattern):
|
||||||
|
continue # skip
|
||||||
|
else:
|
||||||
|
logger.warning(f"Skipping rule with unsupported location '{location}' for Traefik: {rule_id}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Initialize category/location if needed
|
||||||
|
if category not in categorized_rules:
|
||||||
|
categorized_rules[category] = {}
|
||||||
|
if location not in categorized_rules[category]:
|
||||||
|
categorized_rules[category][location] = set() # Use a set
|
||||||
|
|
||||||
|
# Add the *escaped* pattern to the set.
|
||||||
|
categorized_rules[category][location].add(sanitized_pattern)
|
||||||
|
|
||||||
|
# Write the configuration
|
||||||
|
for category, location_rules in categorized_rules.items():
|
||||||
|
for location, patterns in location_rules.items():
|
||||||
|
# Create a unique middleware name
|
||||||
|
middleware_name = f"waf_{category}_{location}".replace("-", "_")
|
||||||
|
f.write(f"[http.middlewares.{middleware_name}]\n")
|
||||||
|
f.write(f" [http.middlewares.{middleware_name}.plugin.badbot]\n")
|
||||||
f.write(" userAgent = [\n")
|
f.write(" userAgent = [\n")
|
||||||
|
# Properly escape for TOML (and for regex within the string)
|
||||||
# Use a set to deduplicate rules
|
for pattern in patterns:
|
||||||
unique_rules: Set[str] = set()
|
# No extra escape for TOML, because we write the full regex
|
||||||
for rule in rules_in_category:
|
f.write(f' "{pattern}",\n')
|
||||||
# Escape special characters in the pattern
|
|
||||||
pattern = rule["pattern"].replace('"', '\\"').replace("\\", "\\\\")
|
|
||||||
unique_rules.add(f' "{pattern}"')
|
|
||||||
|
|
||||||
f.write(",\n".join(unique_rules) + "\n")
|
|
||||||
f.write(" ]\n\n")
|
f.write(" ]\n\n")
|
||||||
|
|
||||||
logging.info(f"[+] Traefik WAF rules generated at {config_file}")
|
logger.info(f"Generated Traefik middleware file: {MIDDLEWARE_FILE}")
|
||||||
except IOError as e:
|
|
||||||
logging.error(f"[-] Error writing to file: {e}")
|
except OSError as e:
|
||||||
exit(1)
|
logger.error(f"Error writing to {MIDDLEWARE_FILE}: {e}")
|
||||||
except Exception as e:
|
raise
|
||||||
logging.error(f"[-] Unexpected error generating Traefik config: {e}")
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def load_owasp_rules(file_path: Path) -> List[Dict]:
|
||||||
"""
|
"""Loads OWASP rules from a JSON file."""
|
||||||
Main function to execute the script.
|
|
||||||
"""
|
|
||||||
try:
|
try:
|
||||||
logging.info("[*] Loading OWASP rules...")
|
with open(file_path, "r", encoding="utf-8") as f:
|
||||||
owasp_rules = load_owasp_rules(Path("owasp_rules.json"))
|
return json.load(f)
|
||||||
|
except (FileNotFoundError, json.JSONDecodeError, OSError) as e:
|
||||||
|
logger.error(f"Error loading rules from {file_path}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
logging.info(f"[*] Generating Traefik WAF configs from {len(owasp_rules)} rules...")
|
def main():
|
||||||
|
"""Main function."""
|
||||||
|
try:
|
||||||
|
logger.info("Loading OWASP rules...")
|
||||||
|
owasp_rules = load_owasp_rules(INPUT_FILE)
|
||||||
|
logger.info(f"Loaded {len(owasp_rules)} rules.")
|
||||||
|
|
||||||
|
logger.info("Generating Traefik WAF configuration...")
|
||||||
generate_traefik_conf(owasp_rules)
|
generate_traefik_conf(owasp_rules)
|
||||||
|
logger.info("Traefik WAF generation complete.")
|
||||||
|
|
||||||
logging.info("[✔] Traefik WAF configurations generated successfully.")
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.critical(f"[!] Script failed: {e}")
|
logger.critical(f"Script failed: {e}")
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user