Merge pull request #12 from fabriziosalmi/codeflash/optimize-sanitize_pattern-m6xp6cvk

️ Speed up function `sanitize_pattern` by 11,547%
This commit is contained in:
fab 2025-02-09 15:14:24 +01:00 committed by GitHub
commit fb284892f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -4,6 +4,7 @@ import re
import logging import logging
from pathlib import Path from pathlib import Path
from collections import defaultdict from collections import defaultdict
from functools import lru_cache
# Configure logging # Configure logging
logging.basicConfig( logging.basicConfig(
@ -39,34 +40,15 @@ def load_owasp_rules(file_path):
def validate_regex(pattern): def validate_regex(pattern):
"""Validate if a pattern is a valid regex.""" """Validate if a pattern is a valid regex."""
try: try:
re.compile(pattern) _compile_pattern(pattern)
return True return True
except re.error: except re.error:
return False return False
def sanitize_pattern(pattern): def sanitize_pattern(pattern):
"""Sanitize and validate OWASP patterns for Nginx compatibility.""" """Wrapper function to use caching for patterns."""
if any( return _sanitize_pattern(pattern)
keyword in pattern
for keyword in ["@pmFromFile", "!@eq", "!@within", "@lt"]
):
logging.warning(f"Skipping unsupported pattern: {pattern}")
return None
if pattern.startswith("@rx "):
sanitized_pattern = pattern.replace("@rx ", "").strip()
if validate_regex(sanitized_pattern):
return re.escape(sanitized_pattern).replace(r'\@', '@')
else:
logging.warning(f"Invalid regex in pattern: {sanitized_pattern}")
return None
if validate_regex(pattern):
return re.escape(pattern).replace(r'\@', '@')
else:
logging.warning(f"Invalid regex in pattern: {pattern}")
return None
def generate_nginx_waf(rules): def generate_nginx_waf(rules):
@ -168,6 +150,29 @@ def main():
logging.critical(f"Script failed: {e}") logging.critical(f"Script failed: {e}")
exit(1) exit(1)
@lru_cache(maxsize=128)
def _compile_pattern(pattern):
"""Compile the regex pattern with caching to avoid recompilation."""
return re.compile(pattern)
@lru_cache(maxsize=128)
def _sanitize_pattern(pattern):
"""Sanitize and validate OWASP patterns for Nginx compatibility."""
if any(keyword in pattern for keyword in ["@pmFromFile", "!@eq", "!@within", "@lt"]):
logging.warning(f"Skipping unsupported pattern: {pattern}")
return None
if pattern.startswith("@rx "):
sanitized_pattern = pattern.replace("@rx ", "").strip()
else:
sanitized_pattern = pattern
if validate_regex(sanitized_pattern):
return re.escape(sanitized_pattern).replace(r'\@', '@')
else:
logging.warning(f"Invalid regex in pattern: {sanitized_pattern}")
return None
if __name__ == "__main__": if __name__ == "__main__":
main() main()