771 lines
32 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
SuperClaude-Lite Pre-Compact Hook
Implements MODE_Token_Efficiency.md compression algorithms for intelligent context optimization.
Performance target: <150ms execution time.
This hook runs before context compaction and provides:
- Intelligent compression strategy selection
- Selective content preservation with framework exclusion
- Symbol systems and abbreviation optimization
- Quality-gated compression with 95% information preservation
- Adaptive compression based on resource constraints
"""
import sys
import json
import time
import os
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
# Add shared modules to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "shared"))
from framework_logic import FrameworkLogic
from pattern_detection import PatternDetector
from mcp_intelligence import MCPIntelligence
from compression_engine import (
CompressionEngine, CompressionLevel, ContentType, CompressionResult, CompressionStrategy
)
from learning_engine import LearningEngine, LearningType, AdaptationScope
from yaml_loader import config_loader
from logger import log_hook_start, log_hook_end, log_decision, log_error
class PreCompactHook:
"""
Pre-compact hook implementing SuperClaude token efficiency intelligence.
Responsibilities:
- Analyze context for compression opportunities
- Apply selective compression with framework protection
- Implement symbol systems and abbreviation optimization
- Maintain 95% information preservation quality
- Adapt compression strategy based on resource constraints
- Learn from compression effectiveness and user preferences
"""
def __init__(self):
start_time = time.time()
# Initialize core components
self.framework_logic = FrameworkLogic()
self.pattern_detector = PatternDetector()
self.mcp_intelligence = MCPIntelligence()
self.compression_engine = CompressionEngine()
# Initialize learning engine
cache_dir = Path("cache")
self.learning_engine = LearningEngine(cache_dir)
# Load hook-specific configuration from SuperClaude config
self.hook_config = config_loader.get_hook_config('pre_compact')
# Load compression configuration (from YAML if exists, otherwise use hook config)
try:
self.compression_config = config_loader.load_config('compression')
except FileNotFoundError:
# Fall back to hook configuration if YAML file not found
self.compression_config = self.hook_config.get('configuration', {})
# Performance tracking using configuration
self.initialization_time = (time.time() - start_time) * 1000
self.performance_target_ms = config_loader.get_hook_config('pre_compact', 'performance_target_ms', 150)
def process_pre_compact(self, compact_request: dict) -> dict:
"""
Process pre-compact request with intelligent compression.
Args:
compact_request: Context compaction request from Claude Code
Returns:
Compression configuration and optimized content strategy
"""
start_time = time.time()
# Log hook start
log_hook_start("pre_compact", {
"session_id": compact_request.get('session_id', ''),
"content_size": len(compact_request.get('content', '')),
"resource_state": compact_request.get('resource_state', {}),
"triggers": compact_request.get('triggers', [])
})
try:
# Extract compression context
context = self._extract_compression_context(compact_request)
# Analyze content for compression strategy
content_analysis = self._analyze_content_for_compression(context)
# Determine optimal compression strategy
compression_strategy = self._determine_compression_strategy(context, content_analysis)
# Log compression strategy decision
log_decision(
"pre_compact",
"compression_strategy",
compression_strategy.level.value,
f"Based on resource usage: {context.get('token_usage_percent', 0)}%, content type: {content_analysis['content_type'].value}"
)
# Apply selective compression with framework protection
compression_results = self._apply_selective_compression(
context, compression_strategy, content_analysis
)
# Validate compression quality
quality_validation = self._validate_compression_quality(
compression_results, compression_strategy
)
# Log quality validation results
if not quality_validation['overall_quality_met']:
log_decision(
"pre_compact",
"quality_validation",
"failed",
f"Preservation score: {quality_validation['preservation_score']:.2f}, Issues: {', '.join(quality_validation['quality_issues'])}"
)
# Record learning events
self._record_compression_learning(context, compression_results, quality_validation)
# Generate compression configuration
compression_config = self._generate_compression_config(
context, compression_strategy, compression_results, quality_validation
)
# Performance tracking
execution_time = (time.time() - start_time) * 1000
compression_config['performance_metrics'] = {
'compression_time_ms': execution_time,
'target_met': execution_time < self.performance_target_ms,
'efficiency_score': self._calculate_compression_efficiency(context, execution_time)
}
# Log compression results
log_decision(
"pre_compact",
"compression_results",
f"{compression_config['results']['compression_ratio']:.1%}",
f"Saved {compression_config['optimization']['estimated_token_savings']} tokens"
)
# Log hook end
log_hook_end(
"pre_compact",
int(execution_time),
True,
{
"compression_ratio": compression_config['results']['compression_ratio'],
"preservation_score": compression_config['quality']['preservation_score'],
"token_savings": compression_config['optimization']['estimated_token_savings'],
"performance_target_met": execution_time < self.performance_target_ms
}
)
return compression_config
except Exception as e:
# Log error
log_error("pre_compact", str(e), {"request": compact_request})
# Log hook end with failure
log_hook_end("pre_compact", int((time.time() - start_time) * 1000), False)
# Graceful fallback on error
return self._create_fallback_compression_config(compact_request, str(e))
def _extract_compression_context(self, compact_request: dict) -> dict:
"""Extract and enrich compression context."""
context = {
'session_id': compact_request.get('session_id', ''),
'content_to_compress': compact_request.get('content', ''),
'content_metadata': compact_request.get('metadata', {}),
'resource_constraints': compact_request.get('resource_state', {}),
'user_preferences': compact_request.get('user_preferences', {}),
'compression_triggers': compact_request.get('triggers', []),
'previous_compressions': compact_request.get('compression_history', []),
'session_context': compact_request.get('session_context', {}),
'timestamp': time.time()
}
# Analyze content characteristics
context.update(self._analyze_content_characteristics(context))
# Extract resource state
context.update(self._extract_resource_state(context))
return context
def _analyze_content_characteristics(self, context: dict) -> dict:
"""Analyze content characteristics for compression decisions."""
content = context.get('content_to_compress', '')
metadata = context.get('content_metadata', {})
characteristics = {
'content_length': len(content),
'content_complexity': 0.0,
'repetition_factor': 0.0,
'technical_density': 0.0,
'framework_content_ratio': 0.0,
'user_content_ratio': 0.0,
'compressibility_score': 0.0
}
if not content:
return characteristics
# Content complexity analysis
lines = content.split('\n')
characteristics['content_complexity'] = self._calculate_content_complexity(content, lines)
# Repetition analysis
characteristics['repetition_factor'] = self._calculate_repetition_factor(content, lines)
# Technical density
characteristics['technical_density'] = self._calculate_technical_density(content)
# Framework vs user content ratio
framework_ratio, user_ratio = self._analyze_content_sources(content, metadata)
characteristics['framework_content_ratio'] = framework_ratio
characteristics['user_content_ratio'] = user_ratio
# Overall compressibility score
characteristics['compressibility_score'] = self._calculate_compressibility_score(characteristics)
return characteristics
def _calculate_content_complexity(self, content: str, lines: List[str]) -> float:
"""Calculate content complexity score (0.0 to 1.0)."""
complexity_indicators = [
len([line for line in lines if len(line) > 100]) / max(len(lines), 1), # Long lines
len([char for char in content if char in '{}[]()']) / max(len(content), 1), # Structural chars
len(set(content.split())) / max(len(content.split()), 1), # Vocabulary richness
]
return min(sum(complexity_indicators) / len(complexity_indicators), 1.0)
def _calculate_repetition_factor(self, content: str, lines: List[str]) -> float:
"""Calculate repetition factor for compression potential."""
if not lines:
return 0.0
# Line repetition
unique_lines = len(set(lines))
line_repetition = 1.0 - (unique_lines / len(lines))
# Word repetition
words = content.split()
if words:
unique_words = len(set(words))
word_repetition = 1.0 - (unique_words / len(words))
else:
word_repetition = 0.0
return (line_repetition + word_repetition) / 2
def _calculate_technical_density(self, content: str) -> float:
"""Calculate technical density for compression strategy."""
technical_patterns = [
r'\b[A-Z][a-zA-Z]*\b', # CamelCase
r'\b\w+\.\w+\b', # Dotted notation
r'\b\d+\.\d+\.\d+\b', # Version numbers
r'\b[a-z]+_[a-z]+\b', # Snake_case
r'\b[A-Z]{2,}\b', # CONSTANTS
]
import re
technical_matches = 0
for pattern in technical_patterns:
technical_matches += len(re.findall(pattern, content))
total_words = len(content.split())
return min(technical_matches / max(total_words, 1), 1.0)
def _analyze_content_sources(self, content: str, metadata: dict) -> Tuple[float, float]:
"""Analyze ratio of framework vs user content."""
# Framework content indicators
framework_indicators = [
'SuperClaude', 'CLAUDE.md', 'FLAGS.md', 'PRINCIPLES.md',
'ORCHESTRATOR.md', 'MCP_', 'MODE_', 'SESSION_LIFECYCLE'
]
# User content indicators
user_indicators = [
'project_files', 'user_documentation', 'source_code',
'configuration_files', 'custom_content'
]
framework_score = 0
user_score = 0
# Check content text
content_lower = content.lower()
for indicator in framework_indicators:
if indicator.lower() in content_lower:
framework_score += 1
for indicator in user_indicators:
if indicator.lower() in content_lower:
user_score += 1
# Check metadata
content_type = metadata.get('content_type', '')
file_path = metadata.get('file_path', '')
if any(pattern in file_path for pattern in ['/SuperClaude/', '/.claude/', 'framework']):
framework_score += 3
if any(pattern in content_type for pattern in user_indicators):
user_score += 3
total_score = framework_score + user_score
if total_score == 0:
return 0.5, 0.5 # Unknown, assume mixed
return framework_score / total_score, user_score / total_score
def _calculate_compressibility_score(self, characteristics: dict) -> float:
"""Calculate overall compressibility score."""
# Higher repetition = higher compressibility
repetition_contribution = characteristics['repetition_factor'] * 0.4
# Higher technical density = better compression with abbreviations
technical_contribution = characteristics['technical_density'] * 0.3
# Framework content is not compressed (exclusion)
framework_penalty = characteristics['framework_content_ratio'] * 0.5
# Content complexity affects compression effectiveness
complexity_factor = 1.0 - (characteristics['content_complexity'] * 0.2)
score = (repetition_contribution + technical_contribution) * complexity_factor - framework_penalty
return max(min(score, 1.0), 0.0)
def _extract_resource_state(self, context: dict) -> dict:
"""Extract resource state for compression decisions."""
resource_constraints = context.get('resource_constraints', {})
return {
'memory_usage_percent': resource_constraints.get('memory_usage', 0),
'token_usage_percent': resource_constraints.get('token_usage', 0),
'conversation_length': resource_constraints.get('conversation_length', 0),
'resource_pressure': resource_constraints.get('pressure_level', 'normal'),
'user_requests_compression': resource_constraints.get('user_compression_request', False)
}
def _analyze_content_for_compression(self, context: dict) -> dict:
"""Analyze content to determine compression approach."""
content = context.get('content_to_compress', '')
metadata = context.get('content_metadata', {})
# Classify content type
content_type = self.compression_engine.classify_content(content, metadata)
# Analyze compression opportunities
analysis = {
'content_type': content_type,
'compression_opportunities': [],
'preservation_requirements': [],
'optimization_techniques': []
}
# Framework content - complete exclusion
if content_type == ContentType.FRAMEWORK_CONTENT:
analysis['preservation_requirements'].append('complete_exclusion')
analysis['compression_opportunities'] = []
log_decision(
"pre_compact",
"content_classification",
"framework_content",
"Complete exclusion from compression - framework protection"
)
return analysis
# User content - minimal compression only
if content_type == ContentType.USER_CONTENT:
analysis['preservation_requirements'].extend([
'high_fidelity_preservation',
'minimal_compression_only'
])
analysis['compression_opportunities'].append('whitespace_optimization')
log_decision(
"pre_compact",
"content_classification",
"user_content",
"Minimal compression only - user content preservation"
)
return analysis
# Session/working data - full compression applicable
compressibility = context.get('compressibility_score', 0.0)
if compressibility > 0.7:
analysis['compression_opportunities'].extend([
'symbol_systems',
'abbreviation_systems',
'structural_optimization',
'redundancy_removal'
])
elif compressibility > 0.4:
analysis['compression_opportunities'].extend([
'symbol_systems',
'structural_optimization'
])
else:
analysis['compression_opportunities'].append('minimal_optimization')
# Technical content optimization
if context.get('technical_density', 0) > 0.6:
analysis['optimization_techniques'].append('technical_abbreviations')
# Repetitive content optimization
if context.get('repetition_factor', 0) > 0.5:
analysis['optimization_techniques'].append('pattern_compression')
return analysis
def _determine_compression_strategy(self, context: dict, content_analysis: dict) -> CompressionStrategy:
"""Determine optimal compression strategy."""
# Determine compression level based on resource state
compression_level = self.compression_engine.determine_compression_level({
'resource_usage_percent': context.get('token_usage_percent', 0),
'conversation_length': context.get('conversation_length', 0),
'user_requests_brevity': context.get('user_requests_compression', False),
'complexity_score': context.get('content_complexity', 0.0)
})
# Adjust for content type
content_type = content_analysis['content_type']
if content_type == ContentType.FRAMEWORK_CONTENT:
compression_level = CompressionLevel.MINIMAL # Actually no compression
elif content_type == ContentType.USER_CONTENT:
compression_level = CompressionLevel.MINIMAL
# Create strategy
strategy = self.compression_engine._create_compression_strategy(compression_level, content_type)
# Customize based on content analysis
opportunities = content_analysis.get('compression_opportunities', [])
if 'symbol_systems' not in opportunities:
strategy.symbol_systems_enabled = False
if 'abbreviation_systems' not in opportunities:
strategy.abbreviation_systems_enabled = False
if 'structural_optimization' not in opportunities:
strategy.structural_optimization = False
return strategy
def _apply_selective_compression(self, context: dict, strategy: CompressionStrategy,
content_analysis: dict) -> Dict[str, CompressionResult]:
"""Apply selective compression with framework protection."""
content = context.get('content_to_compress', '')
metadata = context.get('content_metadata', {})
# Split content into sections for selective processing
content_sections = self._split_content_into_sections(content, metadata)
compression_results = {}
for section_name, section_data in content_sections.items():
section_content = section_data['content']
section_metadata = section_data['metadata']
# Apply compression to each section
result = self.compression_engine.compress_content(
section_content,
context,
section_metadata
)
compression_results[section_name] = result
return compression_results
def _split_content_into_sections(self, content: str, metadata: dict) -> dict:
"""Split content into sections for selective compression."""
sections = {}
# Simple splitting strategy - can be enhanced
lines = content.split('\n')
# Detect different content types within the text
current_section = 'default'
current_content = []
for line in lines:
# Framework content detection
if any(indicator in line for indicator in ['SuperClaude', 'CLAUDE.md', 'FLAGS.md']):
if current_content and current_section != 'framework':
sections[current_section] = {
'content': '\n'.join(current_content),
'metadata': {**metadata, 'content_type': current_section}
}
current_content = []
current_section = 'framework'
# User code detection
elif any(indicator in line for indicator in ['def ', 'class ', 'function', 'import ']):
if current_content and current_section != 'user_code':
sections[current_section] = {
'content': '\n'.join(current_content),
'metadata': {**metadata, 'content_type': current_section}
}
current_content = []
current_section = 'user_code'
# Session data detection
elif any(indicator in line for indicator in ['session_', 'checkpoint_', 'cache_']):
if current_content and current_section != 'session_data':
sections[current_section] = {
'content': '\n'.join(current_content),
'metadata': {**metadata, 'content_type': current_section}
}
current_content = []
current_section = 'session_data'
current_content.append(line)
# Add final section
if current_content:
sections[current_section] = {
'content': '\n'.join(current_content),
'metadata': {**metadata, 'content_type': current_section}
}
# If no sections detected, treat as single section
if not sections:
sections['default'] = {
'content': content,
'metadata': metadata
}
return sections
def _validate_compression_quality(self, compression_results: Dict[str, CompressionResult],
strategy: CompressionStrategy) -> dict:
"""Validate compression quality against standards."""
validation = {
'overall_quality_met': True,
'preservation_score': 0.0,
'compression_efficiency': 0.0,
'quality_issues': [],
'quality_warnings': []
}
if not compression_results:
return validation
# Calculate overall metrics
total_original = sum(result.original_length for result in compression_results.values())
total_compressed = sum(result.compressed_length for result in compression_results.values())
total_preservation = sum(result.preservation_score for result in compression_results.values())
if total_original > 0:
validation['compression_efficiency'] = (total_original - total_compressed) / total_original
validation['preservation_score'] = total_preservation / len(compression_results)
# Quality threshold validation
if validation['preservation_score'] < strategy.quality_threshold:
validation['overall_quality_met'] = False
validation['quality_issues'].append(
f"Preservation score {validation['preservation_score']:.2f} below threshold {strategy.quality_threshold}"
)
# Individual section validation
for section_name, result in compression_results.items():
if result.quality_score < 0.8:
validation['quality_warnings'].append(
f"Section '{section_name}' quality score low: {result.quality_score:.2f}"
)
if result.compression_ratio > 0.9: # Over 90% compression might be too aggressive
validation['quality_warnings'].append(
f"Section '{section_name}' compression ratio very high: {result.compression_ratio:.2f}"
)
return validation
def _record_compression_learning(self, context: dict, compression_results: Dict[str, CompressionResult],
quality_validation: dict):
"""Record compression learning for future optimization."""
overall_effectiveness = quality_validation['preservation_score'] * quality_validation['compression_efficiency']
# Record compression effectiveness
self.learning_engine.record_learning_event(
LearningType.PERFORMANCE_OPTIMIZATION,
AdaptationScope.USER,
context,
{
'compression_level': self.compression_engine.determine_compression_level(context).value,
'techniques_used': list(set().union(*[result.techniques_used for result in compression_results.values()])),
'preservation_score': quality_validation['preservation_score'],
'compression_efficiency': quality_validation['compression_efficiency']
},
overall_effectiveness,
0.9, # High confidence in compression metrics
{'hook': 'pre_compact', 'compression_learning': True}
)
# Record user preference if compression was requested
if context.get('user_requests_compression'):
self.learning_engine.record_learning_event(
LearningType.USER_PREFERENCE,
AdaptationScope.USER,
context,
{'compression_preference': 'enabled', 'user_satisfaction': overall_effectiveness},
overall_effectiveness,
0.8,
{'user_initiated_compression': True}
)
def _calculate_compression_efficiency(self, context: dict, execution_time_ms: float) -> float:
"""Calculate compression processing efficiency."""
content_length = context.get('content_length', 1)
# Efficiency based on processing speed per character
chars_per_ms = content_length / max(execution_time_ms, 1)
# Target: 100 chars per ms for good efficiency
target_chars_per_ms = 100
efficiency = min(chars_per_ms / target_chars_per_ms, 1.0)
return efficiency
def _generate_compression_config(self, context: dict, strategy: CompressionStrategy,
compression_results: Dict[str, CompressionResult],
quality_validation: dict) -> dict:
"""Generate comprehensive compression configuration."""
total_original = sum(result.original_length for result in compression_results.values())
total_compressed = sum(result.compressed_length for result in compression_results.values())
config = {
'compression_enabled': True,
'compression_level': strategy.level.value,
'selective_compression': True,
'strategy': {
'symbol_systems_enabled': strategy.symbol_systems_enabled,
'abbreviation_systems_enabled': strategy.abbreviation_systems_enabled,
'structural_optimization': strategy.structural_optimization,
'quality_threshold': strategy.quality_threshold
},
'results': {
'original_length': total_original,
'compressed_length': total_compressed,
'compression_ratio': (total_original - total_compressed) / max(total_original, 1),
'sections_processed': len(compression_results),
'techniques_used': list(set().union(*[result.techniques_used for result in compression_results.values()]))
},
'quality': {
'preservation_score': quality_validation['preservation_score'],
'quality_met': quality_validation['overall_quality_met'],
'issues': quality_validation['quality_issues'],
'warnings': quality_validation['quality_warnings']
},
'framework_protection': {
'framework_content_excluded': True,
'user_content_preserved': True,
'selective_processing_enabled': True
},
'optimization': {
'estimated_token_savings': int((total_original - total_compressed) * 0.7), # Rough estimate
'processing_efficiency': quality_validation['compression_efficiency'],
'recommendation': self._get_compression_recommendation(context, quality_validation)
},
'metadata': {
'hook_version': 'pre_compact_1.0',
'compression_timestamp': context['timestamp'],
'content_classification': 'selective_compression_applied'
}
}
return config
def _get_compression_recommendation(self, context: dict, quality_validation: dict) -> str:
"""Get compression recommendation based on results."""
if not quality_validation['overall_quality_met']:
return "Reduce compression level to maintain quality"
elif quality_validation['compression_efficiency'] > 0.7:
return "Excellent compression efficiency achieved"
elif quality_validation['compression_efficiency'] > 0.4:
return "Good compression efficiency, consider slight optimization"
else:
return "Low compression efficiency, consider alternative strategies"
def _create_fallback_compression_config(self, compact_request: dict, error: str) -> dict:
"""Create fallback compression configuration on error."""
return {
'compression_enabled': False,
'fallback_mode': True,
'error': error,
'strategy': {
'symbol_systems_enabled': False,
'abbreviation_systems_enabled': False,
'structural_optimization': False,
'quality_threshold': 1.0
},
'results': {
'original_length': len(compact_request.get('content', '')),
'compressed_length': len(compact_request.get('content', '')),
'compression_ratio': 0.0,
'sections_processed': 0,
'techniques_used': []
},
'quality': {
'preservation_score': 1.0,
'quality_met': False,
'issues': [f"Compression hook error: {error}"],
'warnings': []
},
'performance_metrics': {
'compression_time_ms': 0,
'target_met': False,
'error_occurred': True
}
}
def main():
"""Main hook execution function."""
try:
# Read compact request from stdin
compact_request = json.loads(sys.stdin.read())
# Initialize and run hook
hook = PreCompactHook()
result = hook.process_pre_compact(compact_request)
# Output result as JSON
print(json.dumps(result, indent=2))
except Exception as e:
# Output error as JSON
error_result = {
'compression_enabled': False,
'error': str(e),
'fallback_mode': True
}
print(json.dumps(error_result, indent=2))
sys.exit(1)
if __name__ == "__main__":
main()