mirror of
https://github.com/SuperClaude-Org/SuperClaude_Framework.git
synced 2025-12-17 17:56:46 +00:00
771 lines
32 KiB
Python
771 lines
32 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
SuperClaude-Lite Pre-Compact Hook
|
||
|
|
|
||
|
|
Implements MODE_Token_Efficiency.md compression algorithms for intelligent context optimization.
|
||
|
|
Performance target: <150ms execution time.
|
||
|
|
|
||
|
|
This hook runs before context compaction and provides:
|
||
|
|
- Intelligent compression strategy selection
|
||
|
|
- Selective content preservation with framework exclusion
|
||
|
|
- Symbol systems and abbreviation optimization
|
||
|
|
- Quality-gated compression with ≥95% information preservation
|
||
|
|
- Adaptive compression based on resource constraints
|
||
|
|
"""
|
||
|
|
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
import time
|
||
|
|
import os
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import Dict, Any, List, Optional, Tuple
|
||
|
|
|
||
|
|
# Add shared modules to path
|
||
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "shared"))
|
||
|
|
|
||
|
|
from framework_logic import FrameworkLogic
|
||
|
|
from pattern_detection import PatternDetector
|
||
|
|
from mcp_intelligence import MCPIntelligence
|
||
|
|
from compression_engine import (
|
||
|
|
CompressionEngine, CompressionLevel, ContentType, CompressionResult, CompressionStrategy
|
||
|
|
)
|
||
|
|
from learning_engine import LearningEngine, LearningType, AdaptationScope
|
||
|
|
from yaml_loader import config_loader
|
||
|
|
from logger import log_hook_start, log_hook_end, log_decision, log_error
|
||
|
|
|
||
|
|
|
||
|
|
class PreCompactHook:
|
||
|
|
"""
|
||
|
|
Pre-compact hook implementing SuperClaude token efficiency intelligence.
|
||
|
|
|
||
|
|
Responsibilities:
|
||
|
|
- Analyze context for compression opportunities
|
||
|
|
- Apply selective compression with framework protection
|
||
|
|
- Implement symbol systems and abbreviation optimization
|
||
|
|
- Maintain ≥95% information preservation quality
|
||
|
|
- Adapt compression strategy based on resource constraints
|
||
|
|
- Learn from compression effectiveness and user preferences
|
||
|
|
"""
|
||
|
|
|
||
|
|
def __init__(self):
|
||
|
|
start_time = time.time()
|
||
|
|
|
||
|
|
# Initialize core components
|
||
|
|
self.framework_logic = FrameworkLogic()
|
||
|
|
self.pattern_detector = PatternDetector()
|
||
|
|
self.mcp_intelligence = MCPIntelligence()
|
||
|
|
self.compression_engine = CompressionEngine()
|
||
|
|
|
||
|
|
# Initialize learning engine
|
||
|
|
cache_dir = Path("cache")
|
||
|
|
self.learning_engine = LearningEngine(cache_dir)
|
||
|
|
|
||
|
|
# Load hook-specific configuration from SuperClaude config
|
||
|
|
self.hook_config = config_loader.get_hook_config('pre_compact')
|
||
|
|
|
||
|
|
# Load compression configuration (from YAML if exists, otherwise use hook config)
|
||
|
|
try:
|
||
|
|
self.compression_config = config_loader.load_config('compression')
|
||
|
|
except FileNotFoundError:
|
||
|
|
# Fall back to hook configuration if YAML file not found
|
||
|
|
self.compression_config = self.hook_config.get('configuration', {})
|
||
|
|
|
||
|
|
# Performance tracking using configuration
|
||
|
|
self.initialization_time = (time.time() - start_time) * 1000
|
||
|
|
self.performance_target_ms = config_loader.get_hook_config('pre_compact', 'performance_target_ms', 150)
|
||
|
|
|
||
|
|
def process_pre_compact(self, compact_request: dict) -> dict:
|
||
|
|
"""
|
||
|
|
Process pre-compact request with intelligent compression.
|
||
|
|
|
||
|
|
Args:
|
||
|
|
compact_request: Context compaction request from Claude Code
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
Compression configuration and optimized content strategy
|
||
|
|
"""
|
||
|
|
start_time = time.time()
|
||
|
|
|
||
|
|
# Log hook start
|
||
|
|
log_hook_start("pre_compact", {
|
||
|
|
"session_id": compact_request.get('session_id', ''),
|
||
|
|
"content_size": len(compact_request.get('content', '')),
|
||
|
|
"resource_state": compact_request.get('resource_state', {}),
|
||
|
|
"triggers": compact_request.get('triggers', [])
|
||
|
|
})
|
||
|
|
|
||
|
|
try:
|
||
|
|
# Extract compression context
|
||
|
|
context = self._extract_compression_context(compact_request)
|
||
|
|
|
||
|
|
# Analyze content for compression strategy
|
||
|
|
content_analysis = self._analyze_content_for_compression(context)
|
||
|
|
|
||
|
|
# Determine optimal compression strategy
|
||
|
|
compression_strategy = self._determine_compression_strategy(context, content_analysis)
|
||
|
|
|
||
|
|
# Log compression strategy decision
|
||
|
|
log_decision(
|
||
|
|
"pre_compact",
|
||
|
|
"compression_strategy",
|
||
|
|
compression_strategy.level.value,
|
||
|
|
f"Based on resource usage: {context.get('token_usage_percent', 0)}%, content type: {content_analysis['content_type'].value}"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Apply selective compression with framework protection
|
||
|
|
compression_results = self._apply_selective_compression(
|
||
|
|
context, compression_strategy, content_analysis
|
||
|
|
)
|
||
|
|
|
||
|
|
# Validate compression quality
|
||
|
|
quality_validation = self._validate_compression_quality(
|
||
|
|
compression_results, compression_strategy
|
||
|
|
)
|
||
|
|
|
||
|
|
# Log quality validation results
|
||
|
|
if not quality_validation['overall_quality_met']:
|
||
|
|
log_decision(
|
||
|
|
"pre_compact",
|
||
|
|
"quality_validation",
|
||
|
|
"failed",
|
||
|
|
f"Preservation score: {quality_validation['preservation_score']:.2f}, Issues: {', '.join(quality_validation['quality_issues'])}"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Record learning events
|
||
|
|
self._record_compression_learning(context, compression_results, quality_validation)
|
||
|
|
|
||
|
|
# Generate compression configuration
|
||
|
|
compression_config = self._generate_compression_config(
|
||
|
|
context, compression_strategy, compression_results, quality_validation
|
||
|
|
)
|
||
|
|
|
||
|
|
# Performance tracking
|
||
|
|
execution_time = (time.time() - start_time) * 1000
|
||
|
|
compression_config['performance_metrics'] = {
|
||
|
|
'compression_time_ms': execution_time,
|
||
|
|
'target_met': execution_time < self.performance_target_ms,
|
||
|
|
'efficiency_score': self._calculate_compression_efficiency(context, execution_time)
|
||
|
|
}
|
||
|
|
|
||
|
|
# Log compression results
|
||
|
|
log_decision(
|
||
|
|
"pre_compact",
|
||
|
|
"compression_results",
|
||
|
|
f"{compression_config['results']['compression_ratio']:.1%}",
|
||
|
|
f"Saved {compression_config['optimization']['estimated_token_savings']} tokens"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Log hook end
|
||
|
|
log_hook_end(
|
||
|
|
"pre_compact",
|
||
|
|
int(execution_time),
|
||
|
|
True,
|
||
|
|
{
|
||
|
|
"compression_ratio": compression_config['results']['compression_ratio'],
|
||
|
|
"preservation_score": compression_config['quality']['preservation_score'],
|
||
|
|
"token_savings": compression_config['optimization']['estimated_token_savings'],
|
||
|
|
"performance_target_met": execution_time < self.performance_target_ms
|
||
|
|
}
|
||
|
|
)
|
||
|
|
|
||
|
|
return compression_config
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
# Log error
|
||
|
|
log_error("pre_compact", str(e), {"request": compact_request})
|
||
|
|
|
||
|
|
# Log hook end with failure
|
||
|
|
log_hook_end("pre_compact", int((time.time() - start_time) * 1000), False)
|
||
|
|
|
||
|
|
# Graceful fallback on error
|
||
|
|
return self._create_fallback_compression_config(compact_request, str(e))
|
||
|
|
|
||
|
|
def _extract_compression_context(self, compact_request: dict) -> dict:
|
||
|
|
"""Extract and enrich compression context."""
|
||
|
|
context = {
|
||
|
|
'session_id': compact_request.get('session_id', ''),
|
||
|
|
'content_to_compress': compact_request.get('content', ''),
|
||
|
|
'content_metadata': compact_request.get('metadata', {}),
|
||
|
|
'resource_constraints': compact_request.get('resource_state', {}),
|
||
|
|
'user_preferences': compact_request.get('user_preferences', {}),
|
||
|
|
'compression_triggers': compact_request.get('triggers', []),
|
||
|
|
'previous_compressions': compact_request.get('compression_history', []),
|
||
|
|
'session_context': compact_request.get('session_context', {}),
|
||
|
|
'timestamp': time.time()
|
||
|
|
}
|
||
|
|
|
||
|
|
# Analyze content characteristics
|
||
|
|
context.update(self._analyze_content_characteristics(context))
|
||
|
|
|
||
|
|
# Extract resource state
|
||
|
|
context.update(self._extract_resource_state(context))
|
||
|
|
|
||
|
|
return context
|
||
|
|
|
||
|
|
def _analyze_content_characteristics(self, context: dict) -> dict:
|
||
|
|
"""Analyze content characteristics for compression decisions."""
|
||
|
|
content = context.get('content_to_compress', '')
|
||
|
|
metadata = context.get('content_metadata', {})
|
||
|
|
|
||
|
|
characteristics = {
|
||
|
|
'content_length': len(content),
|
||
|
|
'content_complexity': 0.0,
|
||
|
|
'repetition_factor': 0.0,
|
||
|
|
'technical_density': 0.0,
|
||
|
|
'framework_content_ratio': 0.0,
|
||
|
|
'user_content_ratio': 0.0,
|
||
|
|
'compressibility_score': 0.0
|
||
|
|
}
|
||
|
|
|
||
|
|
if not content:
|
||
|
|
return characteristics
|
||
|
|
|
||
|
|
# Content complexity analysis
|
||
|
|
lines = content.split('\n')
|
||
|
|
characteristics['content_complexity'] = self._calculate_content_complexity(content, lines)
|
||
|
|
|
||
|
|
# Repetition analysis
|
||
|
|
characteristics['repetition_factor'] = self._calculate_repetition_factor(content, lines)
|
||
|
|
|
||
|
|
# Technical density
|
||
|
|
characteristics['technical_density'] = self._calculate_technical_density(content)
|
||
|
|
|
||
|
|
# Framework vs user content ratio
|
||
|
|
framework_ratio, user_ratio = self._analyze_content_sources(content, metadata)
|
||
|
|
characteristics['framework_content_ratio'] = framework_ratio
|
||
|
|
characteristics['user_content_ratio'] = user_ratio
|
||
|
|
|
||
|
|
# Overall compressibility score
|
||
|
|
characteristics['compressibility_score'] = self._calculate_compressibility_score(characteristics)
|
||
|
|
|
||
|
|
return characteristics
|
||
|
|
|
||
|
|
def _calculate_content_complexity(self, content: str, lines: List[str]) -> float:
|
||
|
|
"""Calculate content complexity score (0.0 to 1.0)."""
|
||
|
|
complexity_indicators = [
|
||
|
|
len([line for line in lines if len(line) > 100]) / max(len(lines), 1), # Long lines
|
||
|
|
len([char for char in content if char in '{}[]()']) / max(len(content), 1), # Structural chars
|
||
|
|
len(set(content.split())) / max(len(content.split()), 1), # Vocabulary richness
|
||
|
|
]
|
||
|
|
|
||
|
|
return min(sum(complexity_indicators) / len(complexity_indicators), 1.0)
|
||
|
|
|
||
|
|
def _calculate_repetition_factor(self, content: str, lines: List[str]) -> float:
|
||
|
|
"""Calculate repetition factor for compression potential."""
|
||
|
|
if not lines:
|
||
|
|
return 0.0
|
||
|
|
|
||
|
|
# Line repetition
|
||
|
|
unique_lines = len(set(lines))
|
||
|
|
line_repetition = 1.0 - (unique_lines / len(lines))
|
||
|
|
|
||
|
|
# Word repetition
|
||
|
|
words = content.split()
|
||
|
|
if words:
|
||
|
|
unique_words = len(set(words))
|
||
|
|
word_repetition = 1.0 - (unique_words / len(words))
|
||
|
|
else:
|
||
|
|
word_repetition = 0.0
|
||
|
|
|
||
|
|
return (line_repetition + word_repetition) / 2
|
||
|
|
|
||
|
|
def _calculate_technical_density(self, content: str) -> float:
|
||
|
|
"""Calculate technical density for compression strategy."""
|
||
|
|
technical_patterns = [
|
||
|
|
r'\b[A-Z][a-zA-Z]*\b', # CamelCase
|
||
|
|
r'\b\w+\.\w+\b', # Dotted notation
|
||
|
|
r'\b\d+\.\d+\.\d+\b', # Version numbers
|
||
|
|
r'\b[a-z]+_[a-z]+\b', # Snake_case
|
||
|
|
r'\b[A-Z]{2,}\b', # CONSTANTS
|
||
|
|
]
|
||
|
|
|
||
|
|
import re
|
||
|
|
technical_matches = 0
|
||
|
|
for pattern in technical_patterns:
|
||
|
|
technical_matches += len(re.findall(pattern, content))
|
||
|
|
|
||
|
|
total_words = len(content.split())
|
||
|
|
return min(technical_matches / max(total_words, 1), 1.0)
|
||
|
|
|
||
|
|
def _analyze_content_sources(self, content: str, metadata: dict) -> Tuple[float, float]:
|
||
|
|
"""Analyze ratio of framework vs user content."""
|
||
|
|
# Framework content indicators
|
||
|
|
framework_indicators = [
|
||
|
|
'SuperClaude', 'CLAUDE.md', 'FLAGS.md', 'PRINCIPLES.md',
|
||
|
|
'ORCHESTRATOR.md', 'MCP_', 'MODE_', 'SESSION_LIFECYCLE'
|
||
|
|
]
|
||
|
|
|
||
|
|
# User content indicators
|
||
|
|
user_indicators = [
|
||
|
|
'project_files', 'user_documentation', 'source_code',
|
||
|
|
'configuration_files', 'custom_content'
|
||
|
|
]
|
||
|
|
|
||
|
|
framework_score = 0
|
||
|
|
user_score = 0
|
||
|
|
|
||
|
|
# Check content text
|
||
|
|
content_lower = content.lower()
|
||
|
|
for indicator in framework_indicators:
|
||
|
|
if indicator.lower() in content_lower:
|
||
|
|
framework_score += 1
|
||
|
|
|
||
|
|
for indicator in user_indicators:
|
||
|
|
if indicator.lower() in content_lower:
|
||
|
|
user_score += 1
|
||
|
|
|
||
|
|
# Check metadata
|
||
|
|
content_type = metadata.get('content_type', '')
|
||
|
|
file_path = metadata.get('file_path', '')
|
||
|
|
|
||
|
|
if any(pattern in file_path for pattern in ['/SuperClaude/', '/.claude/', 'framework']):
|
||
|
|
framework_score += 3
|
||
|
|
|
||
|
|
if any(pattern in content_type for pattern in user_indicators):
|
||
|
|
user_score += 3
|
||
|
|
|
||
|
|
total_score = framework_score + user_score
|
||
|
|
if total_score == 0:
|
||
|
|
return 0.5, 0.5 # Unknown, assume mixed
|
||
|
|
|
||
|
|
return framework_score / total_score, user_score / total_score
|
||
|
|
|
||
|
|
def _calculate_compressibility_score(self, characteristics: dict) -> float:
|
||
|
|
"""Calculate overall compressibility score."""
|
||
|
|
# Higher repetition = higher compressibility
|
||
|
|
repetition_contribution = characteristics['repetition_factor'] * 0.4
|
||
|
|
|
||
|
|
# Higher technical density = better compression with abbreviations
|
||
|
|
technical_contribution = characteristics['technical_density'] * 0.3
|
||
|
|
|
||
|
|
# Framework content is not compressed (exclusion)
|
||
|
|
framework_penalty = characteristics['framework_content_ratio'] * 0.5
|
||
|
|
|
||
|
|
# Content complexity affects compression effectiveness
|
||
|
|
complexity_factor = 1.0 - (characteristics['content_complexity'] * 0.2)
|
||
|
|
|
||
|
|
score = (repetition_contribution + technical_contribution) * complexity_factor - framework_penalty
|
||
|
|
|
||
|
|
return max(min(score, 1.0), 0.0)
|
||
|
|
|
||
|
|
def _extract_resource_state(self, context: dict) -> dict:
|
||
|
|
"""Extract resource state for compression decisions."""
|
||
|
|
resource_constraints = context.get('resource_constraints', {})
|
||
|
|
|
||
|
|
return {
|
||
|
|
'memory_usage_percent': resource_constraints.get('memory_usage', 0),
|
||
|
|
'token_usage_percent': resource_constraints.get('token_usage', 0),
|
||
|
|
'conversation_length': resource_constraints.get('conversation_length', 0),
|
||
|
|
'resource_pressure': resource_constraints.get('pressure_level', 'normal'),
|
||
|
|
'user_requests_compression': resource_constraints.get('user_compression_request', False)
|
||
|
|
}
|
||
|
|
|
||
|
|
def _analyze_content_for_compression(self, context: dict) -> dict:
|
||
|
|
"""Analyze content to determine compression approach."""
|
||
|
|
content = context.get('content_to_compress', '')
|
||
|
|
metadata = context.get('content_metadata', {})
|
||
|
|
|
||
|
|
# Classify content type
|
||
|
|
content_type = self.compression_engine.classify_content(content, metadata)
|
||
|
|
|
||
|
|
# Analyze compression opportunities
|
||
|
|
analysis = {
|
||
|
|
'content_type': content_type,
|
||
|
|
'compression_opportunities': [],
|
||
|
|
'preservation_requirements': [],
|
||
|
|
'optimization_techniques': []
|
||
|
|
}
|
||
|
|
|
||
|
|
# Framework content - complete exclusion
|
||
|
|
if content_type == ContentType.FRAMEWORK_CONTENT:
|
||
|
|
analysis['preservation_requirements'].append('complete_exclusion')
|
||
|
|
analysis['compression_opportunities'] = []
|
||
|
|
log_decision(
|
||
|
|
"pre_compact",
|
||
|
|
"content_classification",
|
||
|
|
"framework_content",
|
||
|
|
"Complete exclusion from compression - framework protection"
|
||
|
|
)
|
||
|
|
return analysis
|
||
|
|
|
||
|
|
# User content - minimal compression only
|
||
|
|
if content_type == ContentType.USER_CONTENT:
|
||
|
|
analysis['preservation_requirements'].extend([
|
||
|
|
'high_fidelity_preservation',
|
||
|
|
'minimal_compression_only'
|
||
|
|
])
|
||
|
|
analysis['compression_opportunities'].append('whitespace_optimization')
|
||
|
|
log_decision(
|
||
|
|
"pre_compact",
|
||
|
|
"content_classification",
|
||
|
|
"user_content",
|
||
|
|
"Minimal compression only - user content preservation"
|
||
|
|
)
|
||
|
|
return analysis
|
||
|
|
|
||
|
|
# Session/working data - full compression applicable
|
||
|
|
compressibility = context.get('compressibility_score', 0.0)
|
||
|
|
|
||
|
|
if compressibility > 0.7:
|
||
|
|
analysis['compression_opportunities'].extend([
|
||
|
|
'symbol_systems',
|
||
|
|
'abbreviation_systems',
|
||
|
|
'structural_optimization',
|
||
|
|
'redundancy_removal'
|
||
|
|
])
|
||
|
|
elif compressibility > 0.4:
|
||
|
|
analysis['compression_opportunities'].extend([
|
||
|
|
'symbol_systems',
|
||
|
|
'structural_optimization'
|
||
|
|
])
|
||
|
|
else:
|
||
|
|
analysis['compression_opportunities'].append('minimal_optimization')
|
||
|
|
|
||
|
|
# Technical content optimization
|
||
|
|
if context.get('technical_density', 0) > 0.6:
|
||
|
|
analysis['optimization_techniques'].append('technical_abbreviations')
|
||
|
|
|
||
|
|
# Repetitive content optimization
|
||
|
|
if context.get('repetition_factor', 0) > 0.5:
|
||
|
|
analysis['optimization_techniques'].append('pattern_compression')
|
||
|
|
|
||
|
|
return analysis
|
||
|
|
|
||
|
|
def _determine_compression_strategy(self, context: dict, content_analysis: dict) -> CompressionStrategy:
|
||
|
|
"""Determine optimal compression strategy."""
|
||
|
|
# Determine compression level based on resource state
|
||
|
|
compression_level = self.compression_engine.determine_compression_level({
|
||
|
|
'resource_usage_percent': context.get('token_usage_percent', 0),
|
||
|
|
'conversation_length': context.get('conversation_length', 0),
|
||
|
|
'user_requests_brevity': context.get('user_requests_compression', False),
|
||
|
|
'complexity_score': context.get('content_complexity', 0.0)
|
||
|
|
})
|
||
|
|
|
||
|
|
# Adjust for content type
|
||
|
|
content_type = content_analysis['content_type']
|
||
|
|
if content_type == ContentType.FRAMEWORK_CONTENT:
|
||
|
|
compression_level = CompressionLevel.MINIMAL # Actually no compression
|
||
|
|
elif content_type == ContentType.USER_CONTENT:
|
||
|
|
compression_level = CompressionLevel.MINIMAL
|
||
|
|
|
||
|
|
# Create strategy
|
||
|
|
strategy = self.compression_engine._create_compression_strategy(compression_level, content_type)
|
||
|
|
|
||
|
|
# Customize based on content analysis
|
||
|
|
opportunities = content_analysis.get('compression_opportunities', [])
|
||
|
|
|
||
|
|
if 'symbol_systems' not in opportunities:
|
||
|
|
strategy.symbol_systems_enabled = False
|
||
|
|
if 'abbreviation_systems' not in opportunities:
|
||
|
|
strategy.abbreviation_systems_enabled = False
|
||
|
|
if 'structural_optimization' not in opportunities:
|
||
|
|
strategy.structural_optimization = False
|
||
|
|
|
||
|
|
return strategy
|
||
|
|
|
||
|
|
def _apply_selective_compression(self, context: dict, strategy: CompressionStrategy,
|
||
|
|
content_analysis: dict) -> Dict[str, CompressionResult]:
|
||
|
|
"""Apply selective compression with framework protection."""
|
||
|
|
content = context.get('content_to_compress', '')
|
||
|
|
metadata = context.get('content_metadata', {})
|
||
|
|
|
||
|
|
# Split content into sections for selective processing
|
||
|
|
content_sections = self._split_content_into_sections(content, metadata)
|
||
|
|
|
||
|
|
compression_results = {}
|
||
|
|
|
||
|
|
for section_name, section_data in content_sections.items():
|
||
|
|
section_content = section_data['content']
|
||
|
|
section_metadata = section_data['metadata']
|
||
|
|
|
||
|
|
# Apply compression to each section
|
||
|
|
result = self.compression_engine.compress_content(
|
||
|
|
section_content,
|
||
|
|
context,
|
||
|
|
section_metadata
|
||
|
|
)
|
||
|
|
|
||
|
|
compression_results[section_name] = result
|
||
|
|
|
||
|
|
return compression_results
|
||
|
|
|
||
|
|
def _split_content_into_sections(self, content: str, metadata: dict) -> dict:
|
||
|
|
"""Split content into sections for selective compression."""
|
||
|
|
sections = {}
|
||
|
|
|
||
|
|
# Simple splitting strategy - can be enhanced
|
||
|
|
lines = content.split('\n')
|
||
|
|
|
||
|
|
# Detect different content types within the text
|
||
|
|
current_section = 'default'
|
||
|
|
current_content = []
|
||
|
|
|
||
|
|
for line in lines:
|
||
|
|
# Framework content detection
|
||
|
|
if any(indicator in line for indicator in ['SuperClaude', 'CLAUDE.md', 'FLAGS.md']):
|
||
|
|
if current_content and current_section != 'framework':
|
||
|
|
sections[current_section] = {
|
||
|
|
'content': '\n'.join(current_content),
|
||
|
|
'metadata': {**metadata, 'content_type': current_section}
|
||
|
|
}
|
||
|
|
current_content = []
|
||
|
|
current_section = 'framework'
|
||
|
|
|
||
|
|
# User code detection
|
||
|
|
elif any(indicator in line for indicator in ['def ', 'class ', 'function', 'import ']):
|
||
|
|
if current_content and current_section != 'user_code':
|
||
|
|
sections[current_section] = {
|
||
|
|
'content': '\n'.join(current_content),
|
||
|
|
'metadata': {**metadata, 'content_type': current_section}
|
||
|
|
}
|
||
|
|
current_content = []
|
||
|
|
current_section = 'user_code'
|
||
|
|
|
||
|
|
# Session data detection
|
||
|
|
elif any(indicator in line for indicator in ['session_', 'checkpoint_', 'cache_']):
|
||
|
|
if current_content and current_section != 'session_data':
|
||
|
|
sections[current_section] = {
|
||
|
|
'content': '\n'.join(current_content),
|
||
|
|
'metadata': {**metadata, 'content_type': current_section}
|
||
|
|
}
|
||
|
|
current_content = []
|
||
|
|
current_section = 'session_data'
|
||
|
|
|
||
|
|
current_content.append(line)
|
||
|
|
|
||
|
|
# Add final section
|
||
|
|
if current_content:
|
||
|
|
sections[current_section] = {
|
||
|
|
'content': '\n'.join(current_content),
|
||
|
|
'metadata': {**metadata, 'content_type': current_section}
|
||
|
|
}
|
||
|
|
|
||
|
|
# If no sections detected, treat as single section
|
||
|
|
if not sections:
|
||
|
|
sections['default'] = {
|
||
|
|
'content': content,
|
||
|
|
'metadata': metadata
|
||
|
|
}
|
||
|
|
|
||
|
|
return sections
|
||
|
|
|
||
|
|
def _validate_compression_quality(self, compression_results: Dict[str, CompressionResult],
|
||
|
|
strategy: CompressionStrategy) -> dict:
|
||
|
|
"""Validate compression quality against standards."""
|
||
|
|
validation = {
|
||
|
|
'overall_quality_met': True,
|
||
|
|
'preservation_score': 0.0,
|
||
|
|
'compression_efficiency': 0.0,
|
||
|
|
'quality_issues': [],
|
||
|
|
'quality_warnings': []
|
||
|
|
}
|
||
|
|
|
||
|
|
if not compression_results:
|
||
|
|
return validation
|
||
|
|
|
||
|
|
# Calculate overall metrics
|
||
|
|
total_original = sum(result.original_length for result in compression_results.values())
|
||
|
|
total_compressed = sum(result.compressed_length for result in compression_results.values())
|
||
|
|
total_preservation = sum(result.preservation_score for result in compression_results.values())
|
||
|
|
|
||
|
|
if total_original > 0:
|
||
|
|
validation['compression_efficiency'] = (total_original - total_compressed) / total_original
|
||
|
|
|
||
|
|
validation['preservation_score'] = total_preservation / len(compression_results)
|
||
|
|
|
||
|
|
# Quality threshold validation
|
||
|
|
if validation['preservation_score'] < strategy.quality_threshold:
|
||
|
|
validation['overall_quality_met'] = False
|
||
|
|
validation['quality_issues'].append(
|
||
|
|
f"Preservation score {validation['preservation_score']:.2f} below threshold {strategy.quality_threshold}"
|
||
|
|
)
|
||
|
|
|
||
|
|
# Individual section validation
|
||
|
|
for section_name, result in compression_results.items():
|
||
|
|
if result.quality_score < 0.8:
|
||
|
|
validation['quality_warnings'].append(
|
||
|
|
f"Section '{section_name}' quality score low: {result.quality_score:.2f}"
|
||
|
|
)
|
||
|
|
|
||
|
|
if result.compression_ratio > 0.9: # Over 90% compression might be too aggressive
|
||
|
|
validation['quality_warnings'].append(
|
||
|
|
f"Section '{section_name}' compression ratio very high: {result.compression_ratio:.2f}"
|
||
|
|
)
|
||
|
|
|
||
|
|
return validation
|
||
|
|
|
||
|
|
def _record_compression_learning(self, context: dict, compression_results: Dict[str, CompressionResult],
|
||
|
|
quality_validation: dict):
|
||
|
|
"""Record compression learning for future optimization."""
|
||
|
|
overall_effectiveness = quality_validation['preservation_score'] * quality_validation['compression_efficiency']
|
||
|
|
|
||
|
|
# Record compression effectiveness
|
||
|
|
self.learning_engine.record_learning_event(
|
||
|
|
LearningType.PERFORMANCE_OPTIMIZATION,
|
||
|
|
AdaptationScope.USER,
|
||
|
|
context,
|
||
|
|
{
|
||
|
|
'compression_level': self.compression_engine.determine_compression_level(context).value,
|
||
|
|
'techniques_used': list(set().union(*[result.techniques_used for result in compression_results.values()])),
|
||
|
|
'preservation_score': quality_validation['preservation_score'],
|
||
|
|
'compression_efficiency': quality_validation['compression_efficiency']
|
||
|
|
},
|
||
|
|
overall_effectiveness,
|
||
|
|
0.9, # High confidence in compression metrics
|
||
|
|
{'hook': 'pre_compact', 'compression_learning': True}
|
||
|
|
)
|
||
|
|
|
||
|
|
# Record user preference if compression was requested
|
||
|
|
if context.get('user_requests_compression'):
|
||
|
|
self.learning_engine.record_learning_event(
|
||
|
|
LearningType.USER_PREFERENCE,
|
||
|
|
AdaptationScope.USER,
|
||
|
|
context,
|
||
|
|
{'compression_preference': 'enabled', 'user_satisfaction': overall_effectiveness},
|
||
|
|
overall_effectiveness,
|
||
|
|
0.8,
|
||
|
|
{'user_initiated_compression': True}
|
||
|
|
)
|
||
|
|
|
||
|
|
def _calculate_compression_efficiency(self, context: dict, execution_time_ms: float) -> float:
|
||
|
|
"""Calculate compression processing efficiency."""
|
||
|
|
content_length = context.get('content_length', 1)
|
||
|
|
|
||
|
|
# Efficiency based on processing speed per character
|
||
|
|
chars_per_ms = content_length / max(execution_time_ms, 1)
|
||
|
|
|
||
|
|
# Target: 100 chars per ms for good efficiency
|
||
|
|
target_chars_per_ms = 100
|
||
|
|
efficiency = min(chars_per_ms / target_chars_per_ms, 1.0)
|
||
|
|
|
||
|
|
return efficiency
|
||
|
|
|
||
|
|
def _generate_compression_config(self, context: dict, strategy: CompressionStrategy,
|
||
|
|
compression_results: Dict[str, CompressionResult],
|
||
|
|
quality_validation: dict) -> dict:
|
||
|
|
"""Generate comprehensive compression configuration."""
|
||
|
|
total_original = sum(result.original_length for result in compression_results.values())
|
||
|
|
total_compressed = sum(result.compressed_length for result in compression_results.values())
|
||
|
|
|
||
|
|
config = {
|
||
|
|
'compression_enabled': True,
|
||
|
|
'compression_level': strategy.level.value,
|
||
|
|
'selective_compression': True,
|
||
|
|
|
||
|
|
'strategy': {
|
||
|
|
'symbol_systems_enabled': strategy.symbol_systems_enabled,
|
||
|
|
'abbreviation_systems_enabled': strategy.abbreviation_systems_enabled,
|
||
|
|
'structural_optimization': strategy.structural_optimization,
|
||
|
|
'quality_threshold': strategy.quality_threshold
|
||
|
|
},
|
||
|
|
|
||
|
|
'results': {
|
||
|
|
'original_length': total_original,
|
||
|
|
'compressed_length': total_compressed,
|
||
|
|
'compression_ratio': (total_original - total_compressed) / max(total_original, 1),
|
||
|
|
'sections_processed': len(compression_results),
|
||
|
|
'techniques_used': list(set().union(*[result.techniques_used for result in compression_results.values()]))
|
||
|
|
},
|
||
|
|
|
||
|
|
'quality': {
|
||
|
|
'preservation_score': quality_validation['preservation_score'],
|
||
|
|
'quality_met': quality_validation['overall_quality_met'],
|
||
|
|
'issues': quality_validation['quality_issues'],
|
||
|
|
'warnings': quality_validation['quality_warnings']
|
||
|
|
},
|
||
|
|
|
||
|
|
'framework_protection': {
|
||
|
|
'framework_content_excluded': True,
|
||
|
|
'user_content_preserved': True,
|
||
|
|
'selective_processing_enabled': True
|
||
|
|
},
|
||
|
|
|
||
|
|
'optimization': {
|
||
|
|
'estimated_token_savings': int((total_original - total_compressed) * 0.7), # Rough estimate
|
||
|
|
'processing_efficiency': quality_validation['compression_efficiency'],
|
||
|
|
'recommendation': self._get_compression_recommendation(context, quality_validation)
|
||
|
|
},
|
||
|
|
|
||
|
|
'metadata': {
|
||
|
|
'hook_version': 'pre_compact_1.0',
|
||
|
|
'compression_timestamp': context['timestamp'],
|
||
|
|
'content_classification': 'selective_compression_applied'
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
return config
|
||
|
|
|
||
|
|
def _get_compression_recommendation(self, context: dict, quality_validation: dict) -> str:
|
||
|
|
"""Get compression recommendation based on results."""
|
||
|
|
if not quality_validation['overall_quality_met']:
|
||
|
|
return "Reduce compression level to maintain quality"
|
||
|
|
elif quality_validation['compression_efficiency'] > 0.7:
|
||
|
|
return "Excellent compression efficiency achieved"
|
||
|
|
elif quality_validation['compression_efficiency'] > 0.4:
|
||
|
|
return "Good compression efficiency, consider slight optimization"
|
||
|
|
else:
|
||
|
|
return "Low compression efficiency, consider alternative strategies"
|
||
|
|
|
||
|
|
def _create_fallback_compression_config(self, compact_request: dict, error: str) -> dict:
|
||
|
|
"""Create fallback compression configuration on error."""
|
||
|
|
return {
|
||
|
|
'compression_enabled': False,
|
||
|
|
'fallback_mode': True,
|
||
|
|
'error': error,
|
||
|
|
|
||
|
|
'strategy': {
|
||
|
|
'symbol_systems_enabled': False,
|
||
|
|
'abbreviation_systems_enabled': False,
|
||
|
|
'structural_optimization': False,
|
||
|
|
'quality_threshold': 1.0
|
||
|
|
},
|
||
|
|
|
||
|
|
'results': {
|
||
|
|
'original_length': len(compact_request.get('content', '')),
|
||
|
|
'compressed_length': len(compact_request.get('content', '')),
|
||
|
|
'compression_ratio': 0.0,
|
||
|
|
'sections_processed': 0,
|
||
|
|
'techniques_used': []
|
||
|
|
},
|
||
|
|
|
||
|
|
'quality': {
|
||
|
|
'preservation_score': 1.0,
|
||
|
|
'quality_met': False,
|
||
|
|
'issues': [f"Compression hook error: {error}"],
|
||
|
|
'warnings': []
|
||
|
|
},
|
||
|
|
|
||
|
|
'performance_metrics': {
|
||
|
|
'compression_time_ms': 0,
|
||
|
|
'target_met': False,
|
||
|
|
'error_occurred': True
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
"""Main hook execution function."""
|
||
|
|
try:
|
||
|
|
# Read compact request from stdin
|
||
|
|
compact_request = json.loads(sys.stdin.read())
|
||
|
|
|
||
|
|
# Initialize and run hook
|
||
|
|
hook = PreCompactHook()
|
||
|
|
result = hook.process_pre_compact(compact_request)
|
||
|
|
|
||
|
|
# Output result as JSON
|
||
|
|
print(json.dumps(result, indent=2))
|
||
|
|
|
||
|
|
except Exception as e:
|
||
|
|
# Output error as JSON
|
||
|
|
error_result = {
|
||
|
|
'compression_enabled': False,
|
||
|
|
'error': str(e),
|
||
|
|
'fallback_mode': True
|
||
|
|
}
|
||
|
|
print(json.dumps(error_result, indent=2))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|