mithun50 b00547ad53 style: apply ruff formatting to all files
Formatted 14 files to comply with ruff formatting rules:
- Consistent code style across codebase
- Improved readability
- All formatting checks now pass

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-12 18:19:45 +01:00

401 lines
12 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Reflection Engine - 3-Stage Pre-Execution Confidence Check
Implements the "Triple Reflection" pattern:
1. Requirement clarity analysis
2. Past mistake pattern detection
3. Context sufficiency validation
Only proceeds with execution if confidence >70%.
"""
import json
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional
@dataclass
class ReflectionResult:
"""Single reflection analysis result"""
stage: str
score: float # 0.0 - 1.0
evidence: List[str]
concerns: List[str]
def __repr__(self) -> str:
emoji = "" if self.score > 0.7 else "⚠️" if self.score > 0.4 else ""
return f"{emoji} {self.stage}: {self.score:.0%}"
@dataclass
class ConfidenceScore:
"""Overall pre-execution confidence assessment"""
# Individual reflection scores
requirement_clarity: ReflectionResult
mistake_check: ReflectionResult
context_ready: ReflectionResult
# Overall confidence (weighted average)
confidence: float
# Decision
should_proceed: bool
blockers: List[str]
recommendations: List[str]
def __repr__(self) -> str:
status = "🟢 PROCEED" if self.should_proceed else "🔴 BLOCKED"
return (
f"{status} | Confidence: {self.confidence:.0%}\n"
+ f" Clarity: {self.requirement_clarity}\n"
+ f" Mistakes: {self.mistake_check}\n"
+ f" Context: {self.context_ready}"
)
class ReflectionEngine:
"""
3-Stage Pre-Execution Reflection System
Prevents wrong-direction execution by deep reflection
before committing resources to implementation.
Workflow:
1. Reflect on requirement clarity (what to build)
2. Reflect on past mistakes (what not to do)
3. Reflect on context readiness (can I do it)
4. Calculate overall confidence
5. BLOCK if <70%, PROCEED if ≥70%
"""
def __init__(self, repo_path: Path):
self.repo_path = repo_path
self.memory_path = repo_path / "docs" / "memory"
self.memory_path.mkdir(parents=True, exist_ok=True)
# Confidence threshold
self.CONFIDENCE_THRESHOLD = 0.7
# Weights for confidence calculation
self.WEIGHTS = {
"clarity": 0.5, # Most important
"mistakes": 0.3, # Learn from past
"context": 0.2, # Least critical (can load more)
}
def reflect(
self, task: str, context: Optional[Dict[str, Any]] = None
) -> ConfidenceScore:
"""
3-Stage Reflection Process
Returns confidence score with decision to proceed or block.
"""
print("🧠 Reflection Engine: 3-Stage Analysis")
print("=" * 60)
# Stage 1: Requirement Clarity
clarity = self._reflect_clarity(task, context)
print(f"1{clarity}")
# Stage 2: Past Mistakes
mistakes = self._reflect_mistakes(task, context)
print(f"2{mistakes}")
# Stage 3: Context Readiness
context_ready = self._reflect_context(task, context)
print(f"3{context_ready}")
# Calculate overall confidence
confidence = (
clarity.score * self.WEIGHTS["clarity"]
+ mistakes.score * self.WEIGHTS["mistakes"]
+ context_ready.score * self.WEIGHTS["context"]
)
# Decision logic
should_proceed = confidence >= self.CONFIDENCE_THRESHOLD
# Collect blockers and recommendations
blockers = []
recommendations = []
if clarity.score < 0.7:
blockers.extend(clarity.concerns)
recommendations.append("Clarify requirements with user")
if mistakes.score < 0.7:
blockers.extend(mistakes.concerns)
recommendations.append("Review past mistakes before proceeding")
if context_ready.score < 0.7:
blockers.extend(context_ready.concerns)
recommendations.append("Load additional context files")
result = ConfidenceScore(
requirement_clarity=clarity,
mistake_check=mistakes,
context_ready=context_ready,
confidence=confidence,
should_proceed=should_proceed,
blockers=blockers,
recommendations=recommendations,
)
print("=" * 60)
print(result)
print("=" * 60)
return result
def _reflect_clarity(
self, task: str, context: Optional[Dict] = None
) -> ReflectionResult:
"""
Reflection 1: Requirement Clarity
Analyzes if the task description is specific enough
to proceed with implementation.
"""
evidence = []
concerns = []
score = 0.5 # Start neutral
# Check for specificity indicators
specific_verbs = [
"create",
"fix",
"add",
"update",
"delete",
"refactor",
"implement",
]
vague_verbs = ["improve", "optimize", "enhance", "better", "something"]
task_lower = task.lower()
# Positive signals (increase score)
if any(verb in task_lower for verb in specific_verbs):
score += 0.2
evidence.append("Contains specific action verb")
# Technical terms present
if any(
term in task_lower
for term in ["function", "class", "file", "api", "endpoint"]
):
score += 0.15
evidence.append("Includes technical specifics")
# Has concrete targets
if any(char in task for char in ["/", ".", "(", ")"]):
score += 0.15
evidence.append("References concrete code elements")
# Negative signals (decrease score)
if any(verb in task_lower for verb in vague_verbs):
score -= 0.2
concerns.append("Contains vague action verbs")
# Too short (likely unclear)
if len(task.split()) < 5:
score -= 0.15
concerns.append("Task description too brief")
# Clamp score to [0, 1]
score = max(0.0, min(1.0, score))
return ReflectionResult(
stage="Requirement Clarity",
score=score,
evidence=evidence,
concerns=concerns,
)
def _reflect_mistakes(
self, task: str, context: Optional[Dict] = None
) -> ReflectionResult:
"""
Reflection 2: Past Mistake Check
Searches for similar past mistakes and warns if detected.
"""
evidence = []
concerns = []
score = 1.0 # Start optimistic (no mistakes known)
# Load reflexion memory
reflexion_file = self.memory_path / "reflexion.json"
if not reflexion_file.exists():
evidence.append("No past mistakes recorded")
return ReflectionResult(
stage="Past Mistakes", score=score, evidence=evidence, concerns=concerns
)
try:
with open(reflexion_file) as f:
reflexion_data = json.load(f)
past_mistakes = reflexion_data.get("mistakes", [])
# Search for similar mistakes
similar_mistakes = []
task_keywords = set(task.lower().split())
for mistake in past_mistakes:
mistake_keywords = set(mistake.get("task", "").lower().split())
overlap = task_keywords & mistake_keywords
if len(overlap) >= 2: # At least 2 common words
similar_mistakes.append(mistake)
if similar_mistakes:
score -= 0.3 * min(len(similar_mistakes), 3) # Max -0.9
concerns.append(f"Found {len(similar_mistakes)} similar past mistakes")
for mistake in similar_mistakes[:3]: # Show max 3
concerns.append(f" ⚠️ {mistake.get('mistake', 'Unknown')}")
else:
evidence.append(
f"Checked {len(past_mistakes)} past mistakes - none similar"
)
except Exception as e:
concerns.append(f"Could not load reflexion memory: {e}")
score = 0.7 # Neutral when can't check
# Clamp score
score = max(0.0, min(1.0, score))
return ReflectionResult(
stage="Past Mistakes", score=score, evidence=evidence, concerns=concerns
)
def _reflect_context(
self, task: str, context: Optional[Dict] = None
) -> ReflectionResult:
"""
Reflection 3: Context Readiness
Validates that sufficient context is loaded to proceed.
"""
evidence = []
concerns = []
score = 0.5 # Start neutral
# Check if context provided
if not context:
concerns.append("No context provided")
score = 0.3
return ReflectionResult(
stage="Context Readiness",
score=score,
evidence=evidence,
concerns=concerns,
)
# Check for essential context elements
essential_keys = ["project_index", "current_branch", "git_status"]
loaded_keys = [key for key in essential_keys if key in context]
if len(loaded_keys) == len(essential_keys):
score += 0.3
evidence.append("All essential context loaded")
else:
missing = set(essential_keys) - set(loaded_keys)
score -= 0.2
concerns.append(f"Missing context: {', '.join(missing)}")
# Check project index exists and is fresh
index_path = self.repo_path / "PROJECT_INDEX.md"
if index_path.exists():
# Check age
age_days = (datetime.now().timestamp() - index_path.stat().st_mtime) / 86400
if age_days < 7:
score += 0.2
evidence.append(f"Project index is fresh ({age_days:.1f} days old)")
else:
concerns.append(f"Project index is stale ({age_days:.0f} days old)")
else:
score -= 0.2
concerns.append("Project index missing")
# Clamp score
score = max(0.0, min(1.0, score))
return ReflectionResult(
stage="Context Readiness", score=score, evidence=evidence, concerns=concerns
)
def record_reflection(self, task: str, confidence: ConfidenceScore, decision: str):
"""Record reflection results for future learning"""
reflection_log = self.memory_path / "reflection_log.json"
entry = {
"timestamp": datetime.now().isoformat(),
"task": task,
"confidence": confidence.confidence,
"decision": decision,
"blockers": confidence.blockers,
"recommendations": confidence.recommendations,
}
# Append to log
try:
if reflection_log.exists():
with open(reflection_log) as f:
log_data = json.load(f)
else:
log_data = {"reflections": []}
log_data["reflections"].append(entry)
with open(reflection_log, "w") as f:
json.dump(log_data, f, indent=2)
except Exception as e:
print(f"⚠️ Could not record reflection: {e}")
# Singleton instance
_reflection_engine: Optional[ReflectionEngine] = None
def get_reflection_engine(repo_path: Optional[Path] = None) -> ReflectionEngine:
"""Get or create reflection engine singleton"""
global _reflection_engine
if _reflection_engine is None:
if repo_path is None:
repo_path = Path.cwd()
_reflection_engine = ReflectionEngine(repo_path)
return _reflection_engine
# Convenience function
def reflect_before_execution(
task: str, context: Optional[Dict] = None
) -> ConfidenceScore:
"""
Perform 3-stage reflection before task execution
Returns ConfidenceScore with decision to proceed or block.
"""
engine = get_reflection_engine()
return engine.reflect(task, context)