patterns/json2apache.py

186 lines
7.0 KiB
Python
Raw Normal View History

2024-12-21 01:33:47 +01:00
import json
import os
import re
import logging
from pathlib import Path
from typing import List, Dict, Set, Tuple, Optional
from functools import lru_cache
2025-02-28 11:26:45 +01:00
from collections import defaultdict # Import defaultdict
2025-02-28 11:17:52 +01:00
# --- Configuration ---
LOG_LEVEL = logging.INFO # Adjust as needed (DEBUG, INFO, WARNING, ERROR)
INPUT_FILE = Path(os.getenv("INPUT_FILE", "owasp_rules.json"))
OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "waf_patterns/apache"))
2025-02-28 11:17:52 +01:00
# ModSecurity Rule Templates (more flexible)
MODSEC_RULE_TEMPLATE = (
2025-02-28 11:17:52 +01:00
'SecRule {variables} "{pattern}" '
'"id:{rule_id},phase:{phase},t:none,{actions},msg:\'{category} attack detected\',severity:{severity}"\n'
)
2025-02-28 11:17:52 +01:00
# Default Actions
DEFAULT_ACTIONS = "deny,status:403,log"
# Unsupported ModSecurity directives (expand as needed)
UNSUPPORTED_PATTERNS = [
2025-02-28 11:26:45 +01:00
"@pmFromFile", # File lookups not directly supported
2025-02-28 11:17:52 +01:00
# You might handle some of these with ctl:ruleRemoveTargetById later
]
# Supported ModSecurity operators and their rough translations (for logging/info)
SUPPORTED_OPERATORS = {
2025-02-28 11:26:45 +01:00
"@rx": "Regular Expression",
"@streq": "String Equals",
"@contains": "Contains String",
"@beginsWith": "Begins With",
"@endsWith": "Ends With",
"@within": "Contained Within",
"@ipMatch": "IP Address Match",
# ... add more as needed
2025-02-28 11:17:52 +01:00
}
# --- Logging Setup ---
logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)
# --- Utility Functions ---
@lru_cache(maxsize=None)
def validate_regex(pattern: str) -> bool:
2025-02-28 11:17:52 +01:00
"""Validates a regex pattern (basic check)."""
try:
re.compile(pattern)
return True
except re.error as e:
2025-02-28 11:17:52 +01:00
logger.warning(f"Invalid regex: {pattern} - {e}")
return False
2025-02-28 11:17:52 +01:00
def _sanitize_pattern(pattern: str) -> str:
"""Internal helper to perform basic pattern sanitization."""
# Remove @rx prefix, if present
pattern = pattern.replace("@rx ", "").strip()
# You *could* add basic escaping here if needed, but be *very* careful
# not to break valid regexes. It's generally better to handle this
# in the `owasp2json.py` script.
return pattern
2025-02-28 11:17:52 +01:00
def _determine_variables(location: str) -> str:
"""Maps the 'location' field to ModSecurity variables."""
location = location.lower() # Normalize to lowercase
if location == "request-uri":
return "REQUEST_URI"
elif location == "query-string":
return "ARGS" # Or ARGS_GET, depending on your needs
elif location == "user-agent":
return "REQUEST_HEADERS:User-Agent"
elif location == "host":
return "REQUEST_HEADERS:Host"
elif location == "referer":
return "REQUEST_HEADERS:Referer"
elif location == "content-type":
return "REQUEST_HEADERS:Content-Type"
# Add other location mappings as needed
else:
logger.warning(f"Unknown location '{location}', defaulting to REQUEST_URI")
2025-02-28 11:26:45 +01:00
return "REQUEST_URI" # Default variable
2025-02-28 11:17:52 +01:00
def generate_apache_waf(rules: List[Dict]) -> None:
2025-02-28 11:17:52 +01:00
"""Generates Apache ModSecurity configuration files."""
2025-02-28 11:17:52 +01:00
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
# Use a dictionary to group rules by category. Sets prevent duplicates.
categorized_rules: Dict[str, Set[str]] = defaultdict(set)
rule_id_counter = 9000000 # Start with a high ID range (OWASP CRS convention)
2024-12-21 01:33:47 +01:00
for rule in rules:
2025-02-28 11:17:52 +01:00
rule_id = rule.get("id", "no_id") # Get rule ID
2025-02-28 11:26:45 +01:00
if not isinstance(rule_id, int): # check if is an int
# Extract ID from rule and convert to an integer
2025-02-28 11:17:52 +01:00
match = re.search(r'id:(\d+)', rule_id)
2025-02-28 11:26:45 +01:00
if match:
try:
rule_id = int(match.group(1))
except ValueError:
logger.warning(f"Invalid rule ID '{match.group(1)}' in rule: {rule}. Using generated ID.")
rule_id = rule_id_counter
rule_id_counter += 1
else:
rule_id = rule_id_counter
rule_id_counter += 1
2025-02-28 11:17:52 +01:00
category = rule.get("category", "generic").lower()
pattern = rule["pattern"]
2025-02-28 11:26:45 +01:00
location = rule.get("location", "REQUEST_URI") # Set a default variable
severity = rule.get("severity", "CRITICAL").upper() # CRITICAL, ERROR, WARNING, NOTICE
2025-02-28 11:17:52 +01:00
# --- Operator Handling ---
operator_used = "Unknown" # Default
for op in SUPPORTED_OPERATORS:
if pattern.startswith(op):
operator_used = SUPPORTED_OPERATORS[op]
break # Stop after finding the *first* matching operator
# Skip unsupported patterns.
if any(unsupported in pattern for unsupported in UNSUPPORTED_PATTERNS):
logger.info(f"[!] Skipping unsupported pattern: {pattern}")
continue
2024-12-21 01:33:47 +01:00
2025-02-28 11:17:52 +01:00
sanitized_pattern = _sanitize_pattern(pattern)
if not sanitized_pattern or not validate_regex(sanitized_pattern):
continue # Skip invalid regexes
# Determine ModSecurity variables based on 'location'
variables = _determine_variables(location)
# --- Rule Construction ---
# Build the ModSecurity rule string
rule_str = MODSEC_RULE_TEMPLATE.format(
variables=variables,
pattern=re.escape(sanitized_pattern), # Escape for ModSecurity
rule_id=rule_id,
category=category.upper(), # Use uppercase for category
severity=severity,
phase=2, # Phase 2 (request body processing) is common, adjust if needed
actions=DEFAULT_ACTIONS,
)
2025-02-28 11:26:45 +01:00
categorized_rules[category].add(rule_str) # added into a dict
2025-02-28 11:17:52 +01:00
# --- File Output ---
# Write rules to per-category files. This is good for organization.
for category, rule_set in categorized_rules.items():
output_file = OUTPUT_DIR / f"{category}.conf"
try:
with open(output_file, "w") as f:
2025-02-28 11:17:52 +01:00
f.write(f"# ModSecurity Rules for Category: {category.upper()}\n")
f.write("SecRuleEngine On\n\n") # Enable the rule engine
for rule in rule_set:
f.write(rule)
2025-02-28 11:17:52 +01:00
logger.info(f"Generated {output_file} ({len(rule_set)} rules)")
except IOError as e:
2025-02-28 11:17:52 +01:00
logger.error(f"Error writing to {output_file}: {e}")
# Consider raising the exception here if you want the script to *stop*
# on any file write error.
2025-02-28 11:17:52 +01:00
def load_owasp_rules(file_path: Path) -> List[Dict]:
"""Loads OWASP rules from the JSON file."""
try:
with open(file_path, "r", encoding="utf-8") as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError, Exception) as e:
logger.error(f"Error loading rules from {file_path}: {e}")
raise
def main():
2025-02-28 11:17:52 +01:00
"""Main function."""
try:
rules = load_owasp_rules(INPUT_FILE)
generate_apache_waf(rules)
logger.info("Apache ModSecurity configuration generated successfully.")
except Exception as e:
logger.critical(f"Script failed: {e}")
exit(1)
if __name__ == "__main__":
2025-02-22 11:41:33 +01:00
main()