Update json2haproxy.py

feat: Implement OWASP CRS to HAProxy WAF conversion with enhanced features

This commit introduces significant improvements to the script for converting OWASP Core Rule Set (CRS) rules into HAProxy Web Application Firewall (WAF) configurations.

Key changes include:

- **Expanded Operator Mapping:** Added more comprehensive mappings between ModSecurity operators and HAProxy equivalents, improving the translation of OWASP rules.

- **Location-Based ACLs:** Implemented support for inspecting different request parameters (User-Agent, Request-URI, Host, etc.) based on the `location` field in the JSON rules, increasing the WAF's coverage.

- **Rule Prioritization:** Introduced rule prioritization based on severity (high, medium, low), allowing for different actions (deny, log, tarpit) to be triggered based on the assessed risk.

- **Improved Regex Handling:** Enhanced regex validation to identify and skip overly complex or invalid patterns, preventing performance issues and potential errors.

- **Clearer ACL Logic:** Restructured the generated `waf.acl` file for better organization, separating ACL definitions from deny logic and grouping rules by request parameter location.

- **Detailed Logging:** Improved logging to provide more specific information about skipped rules, invalid patterns, and other issues, aiding in debugging and configuration.

- **Integer Comparison:** Added capability to use http-request to perform integer comparison instead of strings in the rules.

These enhancements result in a more effective, maintainable, and configurable HAProxy WAF implementation based on the OWASP CRS.

Please note that thorough testing and tuning are still crucial to ensure the WAF is working correctly and not causing false positives.

This commit addresses the following issues:

- Addresses overly aggressive rules causing false positives.
- Implements missing support for ModSecurity operators.
- Enables inspection of request parameters beyond the User-Agent header.
- Provides a more organized and maintainable HAProxy WAF configuration.
This commit is contained in:
fab 2025-02-28 10:58:24 +01:00 committed by GitHub
parent 85d732ced9
commit 9e85df0fee
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -3,7 +3,7 @@ import json
import re import re
import logging import logging
from pathlib import Path from pathlib import Path
from typing import List, Dict, Optional from typing import List, Dict, Optional, Tuple
from functools import lru_cache from functools import lru_cache
# Configure logging # Configure logging
@ -17,30 +17,34 @@ logging.basicConfig(
OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "waf_patterns/haproxy/")) # Output directory OUTPUT_DIR = Path(os.getenv("OUTPUT_DIR", "waf_patterns/haproxy/")) # Output directory
INPUT_FILE = Path(os.getenv("INPUT_FILE", "owasp_rules.json")) # Input JSON file INPUT_FILE = Path(os.getenv("INPUT_FILE", "owasp_rules.json")) # Input JSON file
UNSUPPORTED_PATTERNS = ["@pmFromFile", "!@eq", "!@within", "@lt", "@ge", "@gt", "@eq"] UNSUPPORTED_PATTERNS = ["@pmFromFile", "@detectSQLi", "@validateByteRange"] # Add more unsupported patterns
# Operator mapping (ModSecurity to HAProxy) - Added more mappings
OPERATOR_MAP = {
"@streq": "str -m str",
"@ipMatch": "src_ip",
"@endsWith": "str -m end",
"@contains": "str -m sub",
"!@eq": "str -m !str", # Handle negation
"!@within": "str -m !reg", # Approximate !@within (requires regex)
"@lt": "int <",
"@ge": "int >=",
"@gt": "int >",
"@eq": "int =="
}
def load_owasp_rules(file_path: Path) -> List[Dict]: def load_owasp_rules(file_path: Path) -> List[Dict]:
""" """
Load OWASP rules from a JSON file. Load OWASP rules from a JSON file.
Args:
file_path (Path): Path to the JSON file containing OWASP rules.
Returns:
List[Dict]: List of OWASP rules.
Raises:
FileNotFoundError: If the input file is not found.
json.JSONDecodeError: If the JSON file is invalid.
Exception: For any other errors during file loading.
""" """
try: try:
with open(file_path, "r") as f: with open(file_path, "r") as f:
return json.load(f) return json.load(f)
except FileNotFoundError: except FileNotFoundError as e:
logging.error(f"[!] Input file not found: {file_path}") logging.error(f"[!] Input file not found: {file_path}")
raise raise
except json.JSONDecodeError: except json.JSONDecodeError as e:
logging.error(f"[!] Invalid JSON in file: {file_path}") logging.error(f"[!] Invalid JSON in file: {file_path}")
raise raise
except Exception as e: except Exception as e:
@ -50,46 +54,58 @@ def load_owasp_rules(file_path: Path) -> List[Dict]:
@lru_cache(maxsize=None) @lru_cache(maxsize=None)
def validate_regex(pattern: str) -> bool: def validate_regex(pattern: str) -> bool:
""" """
Validate regex pattern for HAProxy. Validate regex pattern for HAProxy. Added complexity check
Args:
pattern (str): Regex pattern to validate.
Returns:
bool: True if the regex is valid, False otherwise.
""" """
try: try:
# Simple complexity check (can be improved)
if pattern.count(".*") > 5:
logging.warning(f"[!] Regex too complex: {pattern}")
return False
re.compile(pattern) re.compile(pattern)
return True return True
except re.error as e: except re.error as e:
logging.warning(f"[!] Invalid regex: {pattern} - {e}") logging.warning(f"[!] Invalid regex: {pattern} - {e}")
return False return False
def sanitize_pattern(pattern: str) -> Optional[str]: def sanitize_pattern(pattern: str) -> Tuple[Optional[str], str, Optional[str]]:
""" """
Sanitize unsupported patterns and directives for HAProxy ACLs. Sanitize and convert ModSecurity patterns to HAProxy.
Returns: sanitized pattern, ACL type, and transformed pattern (if needed)
Args:
pattern (str): The pattern to sanitize.
Returns:
Optional[str]: The sanitized pattern, or None if the pattern is unsupported.
""" """
# Skip unsupported patterns acl_type = "hdr_reg" # Default to regex matching
if any(directive in pattern for directive in UNSUPPORTED_PATTERNS): transformed_pattern = None # optional transformation
logging.warning(f"[!] Skipping unsupported pattern: {pattern}") original_pattern = pattern # store original for logging
return None
# Remove @rx (regex indicator) for HAProxy compatibility for modsecurity_op, haproxy_op in OPERATOR_MAP.items():
if pattern.startswith(modsecurity_op):
# if it is an integer comparison we will want a different ACL
if haproxy_op.startswith("int"):
acl_type = "int"
pattern = pattern.replace(modsecurity_op, haproxy_op).strip()
return pattern, acl_type, transformed_pattern
acl_type = "hdr_sub" # String matching
pattern = pattern.replace(modsecurity_op, haproxy_op).strip()
return pattern, acl_type, transformed_pattern
# Skip unsupported patterns with more detailed logging
for directive in UNSUPPORTED_PATTERNS:
if directive in pattern:
logging.warning(f"[!] Skipping unsupported pattern (contains {directive}): {pattern}")
return None, acl_type, transformed_pattern # Indicate skip
if "@rx" in pattern: # only remove @rx for REGEX cases to reduce bugs
acl_type = "hdr_reg"
pattern = pattern.replace("@rx ", "").strip() pattern = pattern.replace("@rx ", "").strip()
# Remove case-insensitive flag (?i) as HAProxy uses -i for that # Remove case-insensitive flag (?i) as HAProxy uses -i for that
pattern = re.sub(r"\(\?i\)", "", pattern) pattern = re.sub(r"\(\?i\)", "", pattern)
# Convert &dollar; to \$ # Convert $ to \$
pattern = pattern.replace("&dollar;", r"\$") pattern = pattern.replace("$", r"\$")
# Convert &lbrace; or &lcub; to { # Convert { or { to {
pattern = re.sub(r"&l(?:brace|cub);?", r"{", pattern) pattern = re.sub(r"&l(?:brace|cub);?", r"{", pattern)
pattern = re.sub(r"&r(?:brace|cub);?", r"}", pattern) pattern = re.sub(r"&r(?:brace|cub);?", r"}", pattern)
@ -99,49 +115,125 @@ def sanitize_pattern(pattern: str) -> Optional[str]:
# Replace non-capturing groups (?:...) with capturing groups (...) # Replace non-capturing groups (?:...) with capturing groups (...)
pattern = re.sub(r"\(\?:", "(", pattern) pattern = re.sub(r"\(\?:", "(", pattern)
else:
acl_type = "hdr_sub" # assume it is a normal comparison
return pattern, acl_type, transformed_pattern # indicate success
return pattern
def generate_haproxy_conf(rules: List[Dict]) -> None: def generate_haproxy_conf(rules: List[Dict]) -> None:
""" """
Generate HAProxy ACL rules from OWASP rules. Generate HAProxy ACL rules from OWASP rules with prioritization and parameter selection.
Args:
rules (List[Dict]): List of OWASP rules.
Raises:
Exception: If there is an error generating the HAProxy configuration.
""" """
try: try:
# Ensure the output directory exists
OUTPUT_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
logging.info(f"[+] Created or verified directory: {OUTPUT_DIR}") logging.info(f"[+] Created or verified directory: {OUTPUT_DIR}")
# Define the output file path
config_file = OUTPUT_DIR / "waf.acl" config_file = OUTPUT_DIR / "waf.acl"
unique_rules = set() acl_rules = {} # Dict to store ACL rule definitions based on 'location'
all_acl_names = [] # Store a full list of acl names for final deny
# Initialize lists for different deny actions
deny_high = []
log_medium = []
tarpit_low = []
all_deny_actions = {
"deny_high" : deny_high,
"log_medium" : log_medium,
"tarpit_low" : tarpit_low
}
# Write HAProxy ACL rules to the file unique_rules = set() # Prevent duplication rules
with open(config_file, "w") as f:
f.write("# HAProxy WAF ACL rules\n\n")
# Process each rule
for rule in rules: for rule in rules:
try: try:
rule_id = rule.get("id", "no_id") # Get ID, default "no_id"
category = rule["category"].lower() category = rule["category"].lower()
location = rule.get("location", "User-Agent") # Get location, default User-Agent
pattern = rule["pattern"] pattern = rule["pattern"]
severity = rule.get("severity", "medium").lower() # severity for different actions
sanitized_pattern, acl_type, transformed_pattern = sanitize_pattern(pattern)
sanitized_pattern = sanitize_pattern(pattern)
if sanitized_pattern and validate_regex(sanitized_pattern): if sanitized_pattern and validate_regex(sanitized_pattern):
if (category, sanitized_pattern) not in unique_rules: acl_name = f"block_{category}_{rule_id}" # Unique ACL name including ID
f.write(f"acl block_{category} hdr_sub(User-Agent) -i {sanitized_pattern}\n")
f.write(f"http-request deny if block_{category}\n\n") if acl_name not in all_acl_names:
unique_rules.add((category, sanitized_pattern)) all_acl_names.append(acl_name) # Add to the list of ACLs
# Build the ACL rule string based on the 'location'
acl_rule_string = None # Set the initial state
if acl_type == 'int': # if it is int then we want 'http-request' instead 'acl'
acl_rule_string = f"http-request if {{ {location} {sanitized_pattern} }}"
elif location == "Request-URI":
acl_rule_string = f"acl {acl_name} path_reg -i {sanitized_pattern}"
elif location == "Query-String":
acl_rule_string = f"acl {acl_name} query_reg -i {sanitized_pattern}"
elif location == "Host":
acl_rule_string = f"acl {acl_name} hdr_reg(Host) -i {sanitized_pattern}"
elif location == "Content-Type":
acl_rule_string = f"acl {acl_name} hdr_reg(Content-Type) -i {sanitized_pattern}"
elif location == "Referer":
acl_rule_string = f"acl {acl_name} hdr_reg(Referer) -i {sanitized_pattern}"
else: # Default case: User-Agent
if acl_type == 'hdr_reg':
acl_rule_string = f"acl {acl_name} hdr_reg(User-Agent) -i {sanitized_pattern}"
else: # hdr_sub
acl_rule_string = f"acl {acl_name} hdr_sub(User-Agent) -i {sanitized_pattern}"
if acl_rule_string: # Check that a rule string has a value.
# Get the corresponding action based on severity
if severity == "high":
deny_high.append(acl_name)
elif severity == "medium":
log_medium.append(acl_name)
elif severity == "low":
tarpit_low.append(acl_name)
if location not in acl_rules:
acl_rules[location] = [] # Initialize if it is not already existent
acl_rules[location].append(acl_rule_string) # Append rule
else: else:
logging.warning(f"[!] Skipping invalid rule: {pattern}") logging.warning(f"[!] Skipping invalid rule: {pattern}")
except KeyError as e: except KeyError as e:
logging.warning(f"[!] Skipping invalid rule (missing key: {e}): {rule}") logging.warning(f"[!] Skipping invalid rule (missing key: {e}): {rule}")
continue continue
# Write HAProxy ACL rules to the file
with open(config_file, "w") as f:
f.write("# HAProxy WAF ACL rules\n\n")
# Write all ACL definitions by location
for location, rules in acl_rules.items():
f.write(f"# Rules for {location}\n")
for acl_rule in rules:
f.write(f"{acl_rule}\n")
f.write("\n")
f.write("\n")
# Add all the actions based on rules
for action, rules in all_deny_actions.items():
action_string = 'deny' if action == "deny_high" else 'tarpit' if action == "tarpit_low" else 'log'
# if it is NOT a deny action then we will want http-request instead acl
if action == "deny_high":
f.write(f"# High Severity Rules (Deny)\n")
if rules:
f.write(f"http-request {action_string} if {' or '.join(rules)}\n")
elif action == "log_medium":
f.write(f"# Medium Severity Rules (Log)\n")
if rules:
f.write(f"http-request {action_string} if {' or '.join(rules)}\n")
else:
f.write(f"# Low Severity Rules (Tarpit)\n")
if rules:
f.write(f"http-request {action_string} if {' or '.join(rules)}\n")
f.write("\n")
logging.info(f"[+] HAProxy WAF rules generated at {config_file}") logging.info(f"[+] HAProxy WAF rules generated at {config_file}")
except Exception as e: except Exception as e:
logging.error(f"[!] Error generating HAProxy configuration: {e}") logging.error(f"[!] Error generating HAProxy configuration: {e}")
raise raise