SuperClaude/Docs/Reference/validate_commands.py

#!/usr/bin/env python3
"""
SuperClaude Framework Command Validation Script

This script validates all documented SuperClaude commands and flags to ensure
documentation accuracy and system reliability.

Usage:
    python3 validate_commands.py [--quick] [--verbose] [--export-report]
    
Requirements:
    - SuperClaude Framework installed
    - Active Claude Code session
    - MCP servers configured (for full validation)
"""

import sys
import os
import subprocess
import time
import json
import re
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Set
from dataclasses import dataclass, asdict
from enum import Enum
import argparse

class ValidationResult(Enum):
    PASS = "✅"
    FAIL = "❌"  
    WARNING = "⚠️"
    SKIP = "⏭️"
    UNKNOWN = "❓"

@dataclass
class TestResult:
    name: str
    category: str
    command: str
    expected_behavior: str
    result: ValidationResult
    message: str
    execution_time: float = 0.0
    details: Optional[Dict] = None

class SuperClaudeValidator:
    """Comprehensive validation system for SuperClaude commands and flags."""
    
    def __init__(self, verbose: bool = False, quick_mode: bool = False):
        self.verbose = verbose
        self.quick_mode = quick_mode
        self.results: List[TestResult] = []
        self.start_time = datetime.now()
        
        # Documented commands from commands.md
        self.essential_commands = [
            "brainstorm", "implement", "analyze", "troubleshoot", 
            "test", "improve", "document", "workflow"
        ]
        
        self.development_commands = ["build", "design"]
        self.analysis_commands = ["explain"]
        self.quality_commands = ["cleanup"]
        self.project_mgmt_commands = ["estimate", "task", "spawn"]
        self.utility_commands = ["git", "index"]
        self.session_commands = ["load", "save", "reflect", "select-tool"]
        
        # All commands combined
        self.all_commands = (
            self.essential_commands + self.development_commands + 
            self.analysis_commands + self.quality_commands +
            self.project_mgmt_commands + self.utility_commands + 
            self.session_commands
        )
        
        # Documented flags from flags.md
        self.analysis_flags = ["--think", "--think-hard", "--ultrathink"]
        self.mode_flags = ["--brainstorm", "--introspect", "--task-manage"]
        self.efficiency_flags = ["--uc", "--token-efficient", "--orchestrate"]
        self.mcp_flags = [
            "--c7", "--context7", "--seq", "--sequential", "--magic",
            "--morph", "--morphllm", "--serena", "--play", "--playwright",
            "--all-mcp", "--no-mcp"
        ]
        self.focus_flags = [
            "--focus security", "--focus performance", "--focus quality",
            "--focus architecture", "--focus accessibility", "--focus testing"
        ]
        self.safety_flags = ["--safe-mode", "--validate", "--dry-run", "--backup"]
        self.execution_flags = [
            "--parallel", "--sequential", "--concurrency 2", "--scope file",
            "--scope module", "--scope project", "--scope system"
        ]
        
        # All flags combined
        self.all_flags = (
            self.analysis_flags + self.mode_flags + self.efficiency_flags +
            self.mcp_flags + self.focus_flags + self.safety_flags + 
            self.execution_flags
        )

    def log(self, message: str, level: str = "INFO"):
        """Log message with timestamp and level."""
        if self.verbose or level in ["ERROR", "WARNING"]:
            timestamp = datetime.now().strftime("%H:%M:%S")
            print(f"[{timestamp}] {level}: {message}")

    def run_command_test(self, command: str, timeout: int = 30) -> Tuple[bool, str, float]:
        """
        Attempt to run a SuperClaude command in a controlled way.
        
        Note: This simulates command execution since actual SuperClaude commands
        require active Claude Code session context.
        """
        start_time = time.time()
        
        try:
            # For validation purposes, we'll check command syntax and structure
            # In a real deployment, this would interface with Claude Code
            
            if not command.startswith("/sc:"):
                return False, "Invalid command format - must start with /sc:", time.time() - start_time
            
            cmd_name = command.split()[0][4:]  # Remove /sc: prefix
            
            if cmd_name not in self.all_commands:
                return False, f"Unknown command: {cmd_name}", time.time() - start_time
            
            # Simulate basic validation checks
            time.sleep(0.1)  # Simulate processing time
            
            # Check for obvious syntax errors
            if "--" in command:
                flags = [part for part in command.split() if part.startswith("--")]
                for flag in flags:
                    if not self._is_valid_flag_syntax(flag):
                        return False, f"Invalid flag syntax: {flag}", time.time() - start_time
                
                # Check for contradictory flag combinations
                conflict_result = self._check_flag_conflicts(flags)
                if conflict_result:
                    return False, conflict_result, time.time() - start_time
            
            execution_time = time.time() - start_time
            return True, f"Command syntax valid: {command}", execution_time
            
        except Exception as e:
            execution_time = time.time() - start_time
            return False, f"Command test failed: {str(e)}", execution_time

    def _is_valid_flag_syntax(self, flag: str) -> bool:
        """Validate flag syntax against documented patterns."""
        # Remove values for validation (e.g., "--concurrency 2" -> "--concurrency")
        base_flag = flag.split()[0] if " " in flag else flag
        
        valid_flag_patterns = [
            # Analysis flags
            "--think", "--think-hard", "--ultrathink",
            # Mode flags  
            "--brainstorm", "--introspect", "--task-manage", "--delegate",
            # Efficiency flags
            "--uc", "--ultracompressed", "--token-efficient", "--orchestrate",
            # MCP flags
            "--c7", "--context7", "--seq", "--sequential", "--magic",
            "--morph", "--morphllm", "--serena", "--play", "--playwright", 
            "--all-mcp", "--no-mcp",
            # Focus flags (special case with values)
            "--focus",
            # Safety flags
            "--safe-mode", "--validate", "--dry-run", "--backup",
            # Execution flags
            "--parallel", "--sequential", "--concurrency", "--scope",
            # Build and optimization flags
            "--optimize", "--target", "--fix-errors", "--deps-install",
            # Test flags
            "--coverage", "--fix", "--watch", "--smoke", "--related-tests",
            "--browsers", "--type", "--report",
            # Documentation flags
            "--type", "--format", "--inline", "--audience",
            # Improvement flags
            "--fix", "--preview", "--safe-mode", "--measure-impact",
            # Task management flags
            "--breakdown", "--priority", "--detailed", "--estimates",
            # Additional common flags
            "--verbose", "--quiet", "--help", "--format", "--export",
            "--depth", "--strategy", "--level", "--confirm-before-delete"
        ]
        
        return base_flag in valid_flag_patterns

    def _check_flag_conflicts(self, flags: List[str]) -> Optional[str]:
        """Check for contradictory flag combinations."""
        base_flags = [flag.split()[0] for flag in flags]
        
        # Define contradictory flag pairs
        conflicts = [
            ("--all-mcp", "--no-mcp", "Cannot use all MCP servers and no MCP servers simultaneously"),
            ("--parallel", "--sequential", "Cannot use parallel and sequential execution simultaneously"),
            ("--verbose", "--quiet", "Cannot use verbose and quiet modes simultaneously"),
            ("--think", "--no-mcp", "Deep thinking modes require MCP servers (--think conflicts with --no-mcp)"),
            ("--think-hard", "--no-mcp", "Deep thinking modes require MCP servers (--think-hard conflicts with --no-mcp)"),
            ("--ultrathink", "--no-mcp", "Deep thinking modes require MCP servers (--ultrathink conflicts with --no-mcp)"),
        ]
        
        for flag1, flag2, message in conflicts:
            if flag1 in base_flags and flag2 in base_flags:
                return f"Flag conflict: {message}"
        
        # Check for invalid focus domain values
        focus_flags = [flag for flag in flags if flag.startswith("--focus")]
        for focus_flag in focus_flags:
            if " " in focus_flag:
                domain = focus_flag.split(" ", 1)[1]
                valid_domains = ["security", "performance", "quality", "architecture", "accessibility", "testing"]
                if domain not in valid_domains:
                    return f"Invalid focus domain: {domain}. Valid domains: {', '.join(valid_domains)}"
        
        return None

    def validate_command_syntax(self) -> None:
        """Test basic command syntax validation."""
        self.log("Starting command syntax validation...")
        
        for cmd in self.all_commands:
            test_command = f"/sc:{cmd}"
            success, message, exec_time = self.run_command_test(test_command)
            
            result = TestResult(
                name=f"Syntax: {cmd}",
                category="Command Syntax",
                command=test_command,
                expected_behavior="Valid command syntax recognized",
                result=ValidationResult.PASS if success else ValidationResult.FAIL,
                message=message,
                execution_time=exec_time
            )
            self.results.append(result)

    def validate_flag_combinations(self) -> None:
        """Test documented flag combinations."""
        self.log("Starting flag combination validation...")
        
        # Test common flag combinations from documentation
        test_combinations = [
            # Analysis combinations
            ("/sc:analyze src/ --think", "Standard analysis with structured thinking"),
            ("/sc:analyze --focus security --think-hard", "Deep security analysis"),
            ("/sc:troubleshoot 'issue' --ultrathink --seq", "Maximum troubleshooting"),
            
            # Development combinations
            ("/sc:implement 'feature' --magic --c7", "UI feature with patterns"),
            ("/sc:improve code/ --morph --serena", "Code improvement with context"),
            ("/sc:build --optimize --validate", "Safe production build"),
            
            # Workflow combinations
            ("/sc:brainstorm 'idea' --think --c7", "Structured brainstorming"),
            ("/sc:task 'complex' --task-manage --delegate", "Complex task coordination"),
            ("/sc:test --coverage --play", "Comprehensive testing"),
            
            # Safety combinations
            ("/sc:improve production/ --safe-mode --backup", "Safe production changes"),
            ("/sc:cleanup legacy/ --dry-run --validate", "Preview cleanup"),
            
            # Efficiency combinations
            ("/sc:analyze large/ --uc --scope module", "Efficient scoped analysis"),
            ("/sc:implement 'simple' --no-mcp", "Lightweight implementation"),
        ]
        
        for command, description in test_combinations:
            success, message, exec_time = self.run_command_test(command)
            
            result = TestResult(
                name=f"Combo: {description}",
                category="Flag Combinations", 
                command=command,
                expected_behavior=description,
                result=ValidationResult.PASS if success else ValidationResult.FAIL,
                message=message,
                execution_time=exec_time
            )
            self.results.append(result)

    def validate_mcp_server_flags(self) -> None:
        """Test MCP server activation flags."""
        self.log("Starting MCP server flag validation...")
        
        mcp_tests = [
            ("--c7", "Context7 server for documentation"),
            ("--seq", "Sequential server for reasoning"),
            ("--magic", "Magic server for UI components"),
            ("--morph", "Morphllm server for transformations"),
            ("--serena", "Serena server for project memory"),
            ("--play", "Playwright server for browser testing"),
            ("--all-mcp", "All MCP servers activated"),
            ("--no-mcp", "No MCP servers, native only"),
        ]
        
        for flag, description in mcp_tests:
            command = f"/sc:analyze test/ {flag}"
            success, message, exec_time = self.run_command_test(command)
            
            result = TestResult(
                name=f"MCP: {flag}",
                category="MCP Server Flags",
                command=command, 
                expected_behavior=description,
                result=ValidationResult.PASS if success else ValidationResult.FAIL,
                message=message,
                execution_time=exec_time
            )
            self.results.append(result)

    def validate_focus_flags(self) -> None:
        """Test domain focus flags."""
        self.log("Starting focus flag validation...")
        
        focus_domains = [
            "security", "performance", "quality", 
            "architecture", "accessibility", "testing"
        ]
        
        for domain in focus_domains:
            command = f"/sc:analyze code/ --focus {domain}"
            success, message, exec_time = self.run_command_test(command)
            
            result = TestResult(
                name=f"Focus: {domain}",
                category="Focus Flags",
                command=command,
                expected_behavior=f"Analysis focused on {domain} domain",
                result=ValidationResult.PASS if success else ValidationResult.FAIL,
                message=message,
                execution_time=exec_time
            )
            self.results.append(result)

    def validate_workflow_examples(self) -> None:
        """Test documented workflow examples."""
        self.log("Starting workflow example validation...")
        
        workflows = [
            # New Project Setup workflow
            [
                "/sc:brainstorm 'project concept'",
                "/sc:design 'system architecture'", 
                "/sc:workflow 'implementation plan'",
                "/sc:save 'project-plan'"
            ],
            
            # Feature Development workflow  
            [
                "/sc:load 'project-context'",
                "/sc:implement 'feature name'",
                "/sc:test --coverage",
                "/sc:document --type api"
            ],
            
            # Bug Investigation workflow
            [
                "/sc:troubleshoot 'issue description'",
                "/sc:analyze --focus problem-area",
                "/sc:improve --fix --safe-mode",
                "/sc:test --related-tests"
            ]
        ]
        
        for i, workflow in enumerate(workflows):
            workflow_name = f"Workflow {i+1}"
            all_valid = True
            messages = []
            total_time = 0
            
            for step, command in enumerate(workflow):
                success, message, exec_time = self.run_command_test(command)
                total_time += exec_time
                
                if not success:
                    all_valid = False
                    messages.append(f"Step {step+1} failed: {message}")
                else:
                    messages.append(f"Step {step+1} passed")
            
            result = TestResult(
                name=workflow_name,
                category="Workflow Examples",
                command=" → ".join(workflow),
                expected_behavior="Complete workflow execution",
                result=ValidationResult.PASS if all_valid else ValidationResult.FAIL,
                message="; ".join(messages),
                execution_time=total_time
            )
            self.results.append(result)

    def validate_error_conditions(self) -> None:
        """Test error handling for invalid inputs."""
        self.log("Starting error condition validation...")
        
        error_tests = [
            # Invalid commands
            ("/sc:invalid-command", "Should reject unknown commands"),
            ("/invalid:format", "Should reject invalid command format"),
            ("sc:missing-slash", "Should reject missing slash prefix"),
            
            # Invalid flag combinations
            ("/sc:analyze --all-mcp --no-mcp", "Should handle contradictory flags"),
            ("/sc:implement --invalid-flag", "Should reject unknown flags"),
            ("/sc:test --focus invalid-domain", "Should reject invalid focus domains"),
            
            # Malformed syntax
            ("/sc:analyze --", "Should handle incomplete flags"),
            ("/sc:implement ''", "Should handle empty arguments"),
        ]
        
        for command, expected_behavior in error_tests:
            success, message, exec_time = self.run_command_test(command)
            
            # For error tests, we expect failure (proper error handling)
            expected_to_fail = True
            actual_result = ValidationResult.PASS if not success else ValidationResult.FAIL
            
            result = TestResult(
                name=f"Error: {command.split()[0] if command.split() else 'malformed'}",
                category="Error Handling",
                command=command,
                expected_behavior=expected_behavior,
                result=actual_result,
                message=message,
                execution_time=exec_time
            )
            self.results.append(result)

    def check_system_requirements(self) -> None:
        """Validate system setup and requirements."""
        self.log("Checking system requirements...")
        
        # Check Python version
        python_version = sys.version_info
        python_ok = python_version >= (3, 8)
        
        result = TestResult(
            name="Python Version",
            category="System Requirements",
            command="python --version",
            expected_behavior="Python 3.8+",
            result=ValidationResult.PASS if python_ok else ValidationResult.FAIL,
            message=f"Python {python_version.major}.{python_version.minor}.{python_version.micro}",
            execution_time=0.0
        )
        self.results.append(result)
        
        # Check if we're in SuperClaude project directory
        current_dir = Path.cwd()
        is_superclaude_project = (
            (current_dir / "SuperClaude").exists() or
            (current_dir / "pyproject.toml").exists() and "SuperClaude" in (current_dir / "pyproject.toml").read_text()
        )
        
        result = TestResult(
            name="Project Directory",
            category="System Requirements", 
            command="pwd",
            expected_behavior="In SuperClaude project directory",
            result=ValidationResult.PASS if is_superclaude_project else ValidationResult.WARNING,
            message=f"Current directory: {current_dir}",
            execution_time=0.0
        )
        self.results.append(result)

    def run_integration_tests(self) -> None:
        """Run integration tests simulating real usage."""
        self.log("Starting integration tests...")
        
        # Test session lifecycle
        session_commands = [
            "/sc:load test-project/",
            "/sc:analyze src/ --think",
            "/sc:implement 'test feature' --magic",
            "/sc:save 'test-session'"
        ]
        
        session_valid = True
        session_messages = []
        session_time = 0
        
        for command in session_commands:
            success, message, exec_time = self.run_command_test(command)
            session_time += exec_time
            
            if success:
                session_messages.append(f"✓ {command}")
            else:
                session_valid = False
                session_messages.append(f"✗ {command}: {message}")
        
        result = TestResult(
            name="Session Lifecycle",
            category="Integration Tests",
            command=" → ".join(session_commands),
            expected_behavior="Complete session management workflow",
            result=ValidationResult.PASS if session_valid else ValidationResult.FAIL,
            message="; ".join(session_messages),
            execution_time=session_time
        )
        self.results.append(result)

    def generate_report(self) -> Dict:
        """Generate comprehensive validation report."""
        total_tests = len(self.results)
        passed_tests = len([r for r in self.results if r.result == ValidationResult.PASS])
        failed_tests = len([r for r in self.results if r.result == ValidationResult.FAIL])
        warning_tests = len([r for r in self.results if r.result == ValidationResult.WARNING])
        
        success_rate = (passed_tests / total_tests * 100) if total_tests > 0 else 0
        
        execution_time = (datetime.now() - self.start_time).total_seconds()
        
        # Group results by category
        categories = {}
        for result in self.results:
            if result.category not in categories:
                categories[result.category] = []
            categories[result.category].append(result)
        
        report = {
            "timestamp": self.start_time.isoformat(),
            "execution_time_seconds": execution_time,
            "summary": {
                "total_tests": total_tests,
                "passed": passed_tests,
                "failed": failed_tests,
                "warnings": warning_tests,
                "success_rate_percent": round(success_rate, 2)
            },
            "categories": {}
        }
        
        for category, tests in categories.items():
            category_passed = len([t for t in tests if t.result == ValidationResult.PASS])
            category_total = len(tests)
            category_rate = (category_passed / category_total * 100) if category_total > 0 else 0
            
            report["categories"][category] = {
                "success_rate": round(category_rate, 2),
                "total": category_total,
                "passed": category_passed,
                "failed": len([t for t in tests if t.result == ValidationResult.FAIL]),
                "tests": [asdict(test) for test in tests]
            }
        
        return report

    def print_summary(self) -> None:
        """Print validation summary to console."""
        report = self.generate_report()
        summary = report["summary"]
        
        print("\n" + "="*60)
        print("🧪 SUPERCLAUDE COMMAND VALIDATION SUMMARY")
        print("="*60)
        print(f"⏱️  Execution Time: {report['execution_time_seconds']:.2f} seconds")
        print(f"📊 Success Rate: {summary['success_rate_percent']}%")
        print(f"✅ Passed: {summary['passed']}")
        print(f"❌ Failed: {summary['failed']}")
        print(f"⚠️  Warnings: {summary['warnings']}")
        print(f"📈 Total Tests: {summary['total_tests']}")
        
        # Category breakdown
        print("\n📂 CATEGORY BREAKDOWN:")
        for category, data in report["categories"].items():
            status_icon = "✅" if data["success_rate"] >= 90 else "⚠️" if data["success_rate"] >= 70 else "❌"
            print(f"{status_icon} {category}: {data['success_rate']:.1f}% ({data['passed']}/{data['total']})")
        
        # Failed tests detail
        failed_results = [r for r in self.results if r.result == ValidationResult.FAIL]
        if failed_results:
            print(f"\n❌ FAILED TESTS ({len(failed_results)}):")
            for result in failed_results:
                print(f"   • {result.category}: {result.name}")
                print(f"     Command: {result.command}")
                print(f"     Error: {result.message}")
        
        # Warnings detail
        warning_results = [r for r in self.results if r.result == ValidationResult.WARNING]
        if warning_results:
            print(f"\n⚠️  WARNINGS ({len(warning_results)}):")
            for result in warning_results:
                print(f"   • {result.category}: {result.name}")
                print(f"     Message: {result.message}")
        
        print("\n" + "="*60)

    def export_report(self, filename: str = None) -> str:
        """Export detailed report to JSON file."""
        if filename is None:
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            filename = f"superclaude_validation_report_{timestamp}.json"
        
        report = self.generate_report()
        
        with open(filename, 'w') as f:
            json.dump(report, f, indent=2, default=str)
        
        self.log(f"Report exported to: {filename}")
        return filename

    def run_all_validations(self) -> None:
        """Execute complete validation suite."""
        print("🚀 Starting SuperClaude Framework validation...")
        print(f"📅 Time: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
        print(f"🏃 Mode: {'Quick' if self.quick_mode else 'Comprehensive'}")
        print()
        
        # System requirements check
        self.check_system_requirements()
        
        # Core validations
        self.validate_command_syntax()
        
        if not self.quick_mode:
            self.validate_flag_combinations()
            self.validate_mcp_server_flags() 
            self.validate_focus_flags()
            self.validate_workflow_examples()
            self.validate_error_conditions()
            self.run_integration_tests()
        
        self.log("Validation suite completed")

def main():
    """Main execution function."""
    parser = argparse.ArgumentParser(
        description="Validate SuperClaude Framework commands and flags",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    python3 validate_commands.py                    # Full validation
    python3 validate_commands.py --quick            # Quick syntax check only
    python3 validate_commands.py --verbose          # Detailed logging
    python3 validate_commands.py --export-report    # Export JSON report
        """
    )
    
    parser.add_argument(
        "--quick", 
        action="store_true", 
        help="Run quick validation (syntax only)"
    )
    parser.add_argument(
        "--verbose", 
        action="store_true", 
        help="Enable verbose logging"
    )
    parser.add_argument(
        "--export-report", 
        action="store_true", 
        help="Export detailed JSON report"
    )
    parser.add_argument(
        "--report-file", 
        type=str, 
        help="Custom report filename"
    )
    
    args = parser.parse_args()
    
    # Initialize validator
    validator = SuperClaudeValidator(
        verbose=args.verbose,
        quick_mode=args.quick
    )
    
    try:
        # Run validation suite
        validator.run_all_validations()
        
        # Print summary
        validator.print_summary()
        
        # Export report if requested
        if args.export_report:
            report_file = validator.export_report(args.report_file)
            print(f"\n📄 Detailed report saved: {report_file}")
        
        # Exit code based on results
        failed_count = len([r for r in validator.results if r.result == ValidationResult.FAIL])
        exit_code = 1 if failed_count > 0 else 0
        
        if exit_code == 0:
            print("🎉 All validations passed!")
        else:
            print(f"⚠️  {failed_count} validation(s) failed. See details above.")
        
        sys.exit(exit_code)
        
    except KeyboardInterrupt:
        print("\n🛑 Validation interrupted by user")
        sys.exit(130)
    except Exception as e:
        print(f"\n💥 Validation failed with error: {e}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        sys.exit(1)

if __name__ == "__main__":
    main()