SuperClaude/Framework-Hooks/hooks/shared/compression_engine.py

662 lines
26 KiB
Python
Raw Normal View History

"""
Compression Engine for SuperClaude-Lite
Intelligent token optimization implementing MODE_Token_Efficiency.md algorithms
with adaptive compression, symbol systems, and quality-gated validation.
"""
import re
import json
import hashlib
from typing import Dict, Any, List, Optional, Tuple, Set
from dataclasses import dataclass
from enum import Enum
from yaml_loader import config_loader
class CompressionLevel(Enum):
"""Compression levels from MODE_Token_Efficiency.md."""
MINIMAL = "minimal" # 0-40% compression
EFFICIENT = "efficient" # 40-70% compression
COMPRESSED = "compressed" # 70-85% compression
CRITICAL = "critical" # 85-95% compression
EMERGENCY = "emergency" # 95%+ compression
class ContentType(Enum):
"""Types of content for selective compression."""
FRAMEWORK_CONTENT = "framework" # SuperClaude framework - EXCLUDE
SESSION_DATA = "session" # Session metadata - COMPRESS
USER_CONTENT = "user" # User project files - PRESERVE
WORKING_ARTIFACTS = "artifacts" # Analysis results - COMPRESS
@dataclass
class CompressionResult:
"""Result of compression operation."""
original_length: int
compressed_length: int
compression_ratio: float
quality_score: float # 0.0 to 1.0
techniques_used: List[str]
preservation_score: float # Information preservation
processing_time_ms: float
@dataclass
class CompressionStrategy:
"""Strategy configuration for compression."""
level: CompressionLevel
symbol_systems_enabled: bool
abbreviation_systems_enabled: bool
structural_optimization: bool
selective_preservation: Dict[str, bool]
quality_threshold: float
class CompressionEngine:
"""
Intelligent token optimization engine implementing MODE_Token_Efficiency.md.
Features:
- 5-level adaptive compression (minimal to emergency)
- Symbol systems for mathematical and logical relationships
- Abbreviation systems for technical domains
- Selective compression with framework/user content protection
- Quality-gated validation with 95% information preservation
- Real-time compression effectiveness monitoring
"""
def __init__(self):
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
try:
self.config = config_loader.load_config('compression')
except Exception as e:
# Fallback to default configuration if config loading fails
self.config = {'compression_levels': {}, 'selective_compression': {}}
self.symbol_mappings = self._load_symbol_mappings()
self.abbreviation_mappings = self._load_abbreviation_mappings()
self.compression_cache = {}
self.performance_metrics = {}
def _load_symbol_mappings(self) -> Dict[str, str]:
"""Load symbol system mappings from configuration."""
return {
# Core Logic & Flow
'leads to': '',
'implies': '',
'transforms to': '',
'converts to': '',
'rollback': '',
'reverse': '',
'bidirectional': '',
'sync': '',
'and': '&',
'combine': '&',
'separator': '|',
'or': '|',
'define': ':',
'specify': ':',
'sequence': '»',
'then': '»',
'therefore': '',
'because': '',
'equivalent': '',
'approximately': '',
'not equal': '',
# Status & Progress
'completed': '',
'passed': '',
'failed': '',
'error': '',
'warning': '⚠️',
'information': '',
'in progress': '🔄',
'processing': '🔄',
'waiting': '',
'pending': '',
'critical': '🚨',
'urgent': '🚨',
'target': '🎯',
'goal': '🎯',
'metrics': '📊',
'data': '📊',
'insight': '💡',
'learning': '💡',
# Technical Domains
'performance': '',
'optimization': '',
'analysis': '🔍',
'investigation': '🔍',
'configuration': '🔧',
'setup': '🔧',
'security': '🛡️',
'protection': '🛡️',
'deployment': '📦',
'package': '📦',
'design': '🎨',
'frontend': '🎨',
'network': '🌐',
'connectivity': '🌐',
'mobile': '📱',
'responsive': '📱',
'architecture': '🏗️',
'system structure': '🏗️',
'components': '🧩',
'modular': '🧩'
}
def _load_abbreviation_mappings(self) -> Dict[str, str]:
"""Load abbreviation system mappings from configuration."""
return {
# System & Architecture
'configuration': 'cfg',
'settings': 'cfg',
'implementation': 'impl',
'code structure': 'impl',
'architecture': 'arch',
'system design': 'arch',
'performance': 'perf',
'optimization': 'perf',
'operations': 'ops',
'deployment': 'ops',
'environment': 'env',
'runtime context': 'env',
# Development Process
'requirements': 'req',
'dependencies': 'deps',
'packages': 'deps',
'validation': 'val',
'verification': 'val',
'testing': 'test',
'quality assurance': 'test',
'documentation': 'docs',
'guides': 'docs',
'standards': 'std',
'conventions': 'std',
# Quality & Analysis
'quality': 'qual',
'maintainability': 'qual',
'security': 'sec',
'safety measures': 'sec',
'error': 'err',
'exception handling': 'err',
'recovery': 'rec',
'resilience': 'rec',
'severity': 'sev',
'priority level': 'sev',
'optimization': 'opt',
'improvement': 'opt'
}
def determine_compression_level(self, context: Dict[str, Any]) -> CompressionLevel:
"""
Determine appropriate compression level based on context.
Args:
context: Session context including resource usage, conversation length, etc.
Returns:
Appropriate CompressionLevel for the situation
"""
resource_usage = context.get('resource_usage_percent', 0)
conversation_length = context.get('conversation_length', 0)
user_requests_brevity = context.get('user_requests_brevity', False)
complexity_score = context.get('complexity_score', 0.0)
# Emergency compression for critical resource constraints
if resource_usage >= 95:
return CompressionLevel.EMERGENCY
# Critical compression for high resource usage
if resource_usage >= 85 or conversation_length > 200:
return CompressionLevel.CRITICAL
# Compressed level for moderate constraints
if resource_usage >= 70 or conversation_length > 100 or user_requests_brevity:
return CompressionLevel.COMPRESSED
# Efficient level for mild constraints or complex operations
if resource_usage >= 40 or complexity_score > 0.6:
return CompressionLevel.EFFICIENT
# Minimal compression for normal operations
return CompressionLevel.MINIMAL
def classify_content(self, content: str, metadata: Dict[str, Any]) -> ContentType:
"""
Classify content type for selective compression.
Args:
content: Content to classify
metadata: Metadata about the content (file paths, context, etc.)
Returns:
ContentType for compression decision making
"""
file_path = metadata.get('file_path', '')
context_type = metadata.get('context_type', '')
# Framework content - complete exclusion
framework_patterns = [
'~/.claude/',
'.claude/',
'SuperClaude/',
'CLAUDE.md',
'FLAGS.md',
'PRINCIPLES.md',
'ORCHESTRATOR.md',
'MCP_',
'MODE_',
'SESSION_LIFECYCLE.md'
]
for pattern in framework_patterns:
if pattern in file_path or pattern in content:
return ContentType.FRAMEWORK_CONTENT
# Session data - apply compression
if context_type in ['session_metadata', 'checkpoint_data', 'cache_content']:
return ContentType.SESSION_DATA
# Working artifacts - apply compression
if context_type in ['analysis_results', 'processing_data', 'working_artifacts']:
return ContentType.WORKING_ARTIFACTS
# User content - preserve with minimal compression only
user_patterns = [
'project_files',
'user_documentation',
'source_code',
'configuration_files',
'custom_content'
]
for pattern in user_patterns:
if pattern in context_type or pattern in file_path:
return ContentType.USER_CONTENT
# Default to user content preservation
return ContentType.USER_CONTENT
def compress_content(self,
content: str,
context: Dict[str, Any],
metadata: Dict[str, Any] = None) -> CompressionResult:
"""
Compress content with intelligent optimization.
Args:
content: Content to compress
context: Session context for compression level determination
metadata: Content metadata for selective compression
Returns:
CompressionResult with metrics and compressed content
"""
import time
start_time = time.time()
if metadata is None:
metadata = {}
# Classify content type
content_type = self.classify_content(content, metadata)
# Framework content - no compression
if content_type == ContentType.FRAMEWORK_CONTENT:
return CompressionResult(
original_length=len(content),
compressed_length=len(content),
compression_ratio=0.0,
quality_score=1.0,
techniques_used=['framework_exclusion'],
preservation_score=1.0,
processing_time_ms=(time.time() - start_time) * 1000
)
# User content - minimal compression only
if content_type == ContentType.USER_CONTENT:
compression_level = CompressionLevel.MINIMAL
else:
compression_level = self.determine_compression_level(context)
# Create compression strategy
strategy = self._create_compression_strategy(compression_level, content_type)
# Apply compression techniques
compressed_content = content
techniques_used = []
if strategy.symbol_systems_enabled:
compressed_content, symbol_techniques = self._apply_symbol_systems(compressed_content)
techniques_used.extend(symbol_techniques)
if strategy.abbreviation_systems_enabled:
compressed_content, abbrev_techniques = self._apply_abbreviation_systems(compressed_content)
techniques_used.extend(abbrev_techniques)
if strategy.structural_optimization:
compressed_content, struct_techniques = self._apply_structural_optimization(
compressed_content, compression_level
)
techniques_used.extend(struct_techniques)
# Calculate metrics
original_length = len(content)
compressed_length = len(compressed_content)
compression_ratio = (original_length - compressed_length) / original_length if original_length > 0 else 0.0
# Quality validation
quality_score = self._validate_compression_quality(content, compressed_content, strategy)
preservation_score = self._calculate_information_preservation(content, compressed_content)
processing_time = (time.time() - start_time) * 1000
# Cache result for performance
cache_key = hashlib.md5(content.encode()).hexdigest()
self.compression_cache[cache_key] = compressed_content
return CompressionResult(
original_length=original_length,
compressed_length=compressed_length,
compression_ratio=compression_ratio,
quality_score=quality_score,
techniques_used=techniques_used,
preservation_score=preservation_score,
processing_time_ms=processing_time
)
def _create_compression_strategy(self, level: CompressionLevel, content_type: ContentType) -> CompressionStrategy:
"""Create compression strategy based on level and content type."""
level_configs = {
CompressionLevel.MINIMAL: {
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
'symbol_systems': True, # Changed: Enable basic optimizations even for minimal
'abbreviations': False,
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
'structural': True, # Changed: Enable basic structural optimization
'quality_threshold': 0.98
},
CompressionLevel.EFFICIENT: {
'symbol_systems': True,
'abbreviations': False,
'structural': True,
'quality_threshold': 0.95
},
CompressionLevel.COMPRESSED: {
'symbol_systems': True,
'abbreviations': True,
'structural': True,
'quality_threshold': 0.90
},
CompressionLevel.CRITICAL: {
'symbol_systems': True,
'abbreviations': True,
'structural': True,
'quality_threshold': 0.85
},
CompressionLevel.EMERGENCY: {
'symbol_systems': True,
'abbreviations': True,
'structural': True,
'quality_threshold': 0.80
}
}
config = level_configs[level]
# Adjust for content type
if content_type == ContentType.USER_CONTENT:
# More conservative for user content
config['quality_threshold'] = min(config['quality_threshold'] + 0.1, 1.0)
return CompressionStrategy(
level=level,
symbol_systems_enabled=config['symbol_systems'],
abbreviation_systems_enabled=config['abbreviations'],
structural_optimization=config['structural'],
selective_preservation={},
quality_threshold=config['quality_threshold']
)
def _apply_symbol_systems(self, content: str) -> Tuple[str, List[str]]:
"""Apply symbol system replacements."""
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
if not content or not isinstance(content, str):
return content or "", []
compressed = content
techniques = []
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
try:
# Apply symbol mappings with word boundary protection
for phrase, symbol in self.symbol_mappings.items():
if not phrase or not symbol:
continue
pattern = r'\b' + re.escape(phrase) + r'\b'
if re.search(pattern, compressed, re.IGNORECASE):
compressed = re.sub(pattern, symbol, compressed, flags=re.IGNORECASE)
techniques.append(f"symbol_{phrase.replace(' ', '_')}")
except Exception as e:
# If regex fails, return original content
return content, []
return compressed, techniques
def _apply_abbreviation_systems(self, content: str) -> Tuple[str, List[str]]:
"""Apply abbreviation system replacements."""
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
if not content or not isinstance(content, str):
return content or "", []
compressed = content
techniques = []
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
try:
# Apply abbreviation mappings with context awareness
for phrase, abbrev in self.abbreviation_mappings.items():
if not phrase or not abbrev:
continue
pattern = r'\b' + re.escape(phrase) + r'\b'
if re.search(pattern, compressed, re.IGNORECASE):
compressed = re.sub(pattern, abbrev, compressed, flags=re.IGNORECASE)
techniques.append(f"abbrev_{phrase.replace(' ', '_')}")
except Exception as e:
# If regex fails, return original content
return content, []
return compressed, techniques
def _apply_structural_optimization(self, content: str, level: CompressionLevel) -> Tuple[str, List[str]]:
"""Apply structural optimizations for token efficiency."""
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
if not content or not isinstance(content, str):
return content or "", []
compressed = content
techniques = []
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
try:
# Always remove redundant whitespace for any level
if re.search(r'\s{2,}|\n\s*\n', compressed):
compressed = re.sub(r'\s+', ' ', compressed)
compressed = re.sub(r'\n\s*\n', '\n', compressed)
techniques.append('whitespace_optimization')
# Phrase simplification for compressed levels and above
if level in [CompressionLevel.COMPRESSED, CompressionLevel.CRITICAL, CompressionLevel.EMERGENCY]:
# Simplify common phrases FIRST
phrase_simplifications = {
r'in order to': 'to',
r'it is important to note that': 'note:',
r'please be aware that': 'note:',
r'it should be noted that': 'note:',
r'for the purpose of': 'for',
r'with regard to': 'regarding',
r'in relation to': 'regarding'
}
for pattern, replacement in phrase_simplifications.items():
if re.search(pattern, compressed, re.IGNORECASE):
compressed = re.sub(pattern, replacement, compressed, flags=re.IGNORECASE)
techniques.append('phrase_simplification')
# Remove redundant words AFTER phrase simplification
if re.search(r'\b(the|a|an)\s+', compressed, re.IGNORECASE):
compressed = re.sub(r'\b(the|a|an)\s+', '', compressed, flags=re.IGNORECASE)
techniques.append('article_removal')
except Exception as e:
# If regex fails, return original content
return content, []
return compressed, techniques
def _validate_compression_quality(self, original: str, compressed: str, strategy: CompressionStrategy) -> float:
"""Validate compression quality against thresholds."""
# Simple quality heuristics (real implementation would be more sophisticated)
# Check if key information is preserved
original_words = set(re.findall(r'\b\w+\b', original.lower()))
compressed_words = set(re.findall(r'\b\w+\b', compressed.lower()))
# Word preservation ratio
word_preservation = len(compressed_words & original_words) / len(original_words) if original_words else 1.0
# Length efficiency (not too aggressive)
length_ratio = len(compressed) / len(original) if original else 1.0
# Penalize over-compression
if length_ratio < 0.3:
word_preservation *= 0.8
quality_score = (word_preservation * 0.7) + (min(length_ratio * 2, 1.0) * 0.3)
return min(quality_score, 1.0)
def _calculate_information_preservation(self, original: str, compressed: str) -> float:
"""Calculate information preservation score."""
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
# Enhanced preservation metric based on multiple factors
# Extract key concepts (capitalized words, technical terms, file extensions)
original_concepts = set(re.findall(r'\b[A-Z][a-z]+\b|\b\w+\.(js|py|md|yaml|json)\b|\b\w*[A-Z]\w*\b', original))
compressed_concepts = set(re.findall(r'\b[A-Z][a-z]+\b|\b\w+\.(js|py|md|yaml|json)\b|\b\w*[A-Z]\w*\b', compressed))
# Also check for symbols that represent preserved concepts
symbol_mappings = {
'': ['leads', 'implies', 'transforms', 'converts'],
'': ['performance', 'optimization', 'speed'],
'🛡️': ['security', 'protection', 'safety'],
'': ['error', 'failed', 'exception'],
'⚠️': ['warning', 'caution'],
'🔍': ['analysis', 'investigation', 'search'],
'🔧': ['configuration', 'setup', 'tools'],
'📦': ['deployment', 'package', 'bundle'],
'🎨': ['design', 'frontend', 'ui'],
'🌐': ['network', 'web', 'connectivity'],
'📱': ['mobile', 'responsive'],
'🏗️': ['architecture', 'structure'],
'🧩': ['components', 'modular']
}
feat: Implement YAML-first declarative intelligence architecture Revolutionary transformation from hardcoded Python intelligence to hot-reloadable YAML patterns, enabling dynamic configuration without code changes. ## Phase 1: Foundation Intelligence Complete ### YAML Intelligence Patterns (6 files) - intelligence_patterns.yaml: Multi-dimensional pattern recognition with adaptive learning - mcp_orchestration.yaml: Server selection decision trees with load balancing - hook_coordination.yaml: Parallel execution patterns with dependency resolution - performance_intelligence.yaml: Resource zones and auto-optimization triggers - validation_intelligence.yaml: Health scoring and proactive diagnostic patterns - user_experience.yaml: Project detection and smart UX adaptations ### Python Infrastructure Enhanced (4 components) - intelligence_engine.py: Generic YAML pattern interpreter with hot-reload - learning_engine.py: Enhanced with YAML intelligence integration - yaml_loader.py: Added intelligence configuration helper methods - validate_system.py: New YAML-driven validation with health scoring ### Key Features Implemented - Hot-reload intelligence: Update patterns without code changes or restarts - Declarative configuration: All intelligence logic expressed in YAML - Graceful fallbacks: System works correctly even with missing YAML files - Multi-pattern coordination: Intelligent recommendations from multiple sources - Health scoring: Component-weighted validation with predictive diagnostics - Generic architecture: Single engine consumes all intelligence pattern types ### Testing Results ✅ All components integrate correctly ✅ Hot-reload mechanism functional ✅ Graceful error handling verified ✅ YAML-driven validation operational ✅ Health scoring system working (detected real system issues) This enables users to modify intelligence behavior by editing YAML files, add new pattern types without coding, and hot-reload improvements in real-time. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-08-06 13:26:04 +02:00
# Count preserved concepts through symbols
symbol_preserved_concepts = set()
for symbol, related_words in symbol_mappings.items():
if symbol in compressed:
for word in related_words:
if word in original.lower():
symbol_preserved_concepts.add(word)
# Extract important words (longer than 4 characters, not common words)
common_words = {'this', 'that', 'with', 'have', 'will', 'been', 'from', 'they',
'know', 'want', 'good', 'much', 'some', 'time', 'very', 'when',
'come', 'here', 'just', 'like', 'long', 'make', 'many', 'over',
'such', 'take', 'than', 'them', 'well', 'were', 'through'}
original_words = set(word.lower() for word in re.findall(r'\b\w{4,}\b', original)
if word.lower() not in common_words)
compressed_words = set(word.lower() for word in re.findall(r'\b\w{4,}\b', compressed)
if word.lower() not in common_words)
# Add symbol-preserved concepts to compressed words
compressed_words.update(symbol_preserved_concepts)
# Calculate concept preservation
if original_concepts:
concept_preservation = len(compressed_concepts & original_concepts) / len(original_concepts)
else:
concept_preservation = 1.0
# Calculate important word preservation
if original_words:
word_preservation = len(compressed_words & original_words) / len(original_words)
else:
word_preservation = 1.0
# Weight concept preservation more heavily, but be more generous
total_preservation = (concept_preservation * 0.6) + (word_preservation * 0.4)
# Bonus for symbol usage that preserves meaning
symbol_bonus = min(len(symbol_preserved_concepts) * 0.05, 0.15)
total_preservation += symbol_bonus
# Apply length penalty for over-compression
length_ratio = len(compressed) / len(original) if len(original) > 0 else 1.0
if length_ratio < 0.2: # Heavily penalize extreme over-compression
total_preservation *= 0.6
elif length_ratio < 0.4: # Penalize significant over-compression
total_preservation *= 0.8
elif length_ratio < 0.5: # Moderate penalty for over-compression
total_preservation *= 0.9
return min(total_preservation, 1.0)
def get_compression_recommendations(self, context: Dict[str, Any]) -> Dict[str, Any]:
"""Get recommendations for optimizing compression."""
recommendations = []
current_level = self.determine_compression_level(context)
resource_usage = context.get('resource_usage_percent', 0)
# Resource-based recommendations
if resource_usage > 85:
recommendations.append("Enable emergency compression mode for critical resource constraints")
elif resource_usage > 70:
recommendations.append("Consider compressed mode for better resource efficiency")
elif resource_usage < 40:
recommendations.append("Resource usage low - minimal compression sufficient")
# Performance recommendations
if context.get('processing_time_ms', 0) > 500:
recommendations.append("Compression processing time high - consider caching strategies")
return {
'current_level': current_level.value,
'recommendations': recommendations,
'estimated_savings': self._estimate_compression_savings(current_level),
'quality_impact': self._estimate_quality_impact(current_level),
'performance_metrics': self.performance_metrics
}
def _estimate_compression_savings(self, level: CompressionLevel) -> Dict[str, float]:
"""Estimate compression savings for a given level."""
savings_map = {
CompressionLevel.MINIMAL: {'token_reduction': 0.15, 'time_savings': 0.05},
CompressionLevel.EFFICIENT: {'token_reduction': 0.40, 'time_savings': 0.15},
CompressionLevel.COMPRESSED: {'token_reduction': 0.60, 'time_savings': 0.25},
CompressionLevel.CRITICAL: {'token_reduction': 0.75, 'time_savings': 0.35},
CompressionLevel.EMERGENCY: {'token_reduction': 0.85, 'time_savings': 0.45}
}
return savings_map.get(level, {'token_reduction': 0.0, 'time_savings': 0.0})
def _estimate_quality_impact(self, level: CompressionLevel) -> float:
"""Estimate quality preservation for a given level."""
quality_map = {
CompressionLevel.MINIMAL: 0.98,
CompressionLevel.EFFICIENT: 0.95,
CompressionLevel.COMPRESSED: 0.90,
CompressionLevel.CRITICAL: 0.85,
CompressionLevel.EMERGENCY: 0.80
}
return quality_map.get(level, 0.95)