SuperClaude/Docs/Reference/validate_commands.py

708 lines
28 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
SuperClaude Framework Command Validation Script
This script validates all documented SuperClaude commands and flags to ensure
documentation accuracy and system reliability.
Usage:
python3 validate_commands.py [--quick] [--verbose] [--export-report]
Requirements:
- SuperClaude Framework installed
- Active Claude Code session
- MCP servers configured (for full validation)
"""
import sys
import os
import subprocess
import time
import json
import re
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Tuple, Optional, Set
from dataclasses import dataclass, asdict
from enum import Enum
import argparse
class ValidationResult(Enum):
PASS = ""
FAIL = ""
WARNING = "⚠️"
SKIP = "⏭️"
UNKNOWN = ""
@dataclass
class TestResult:
name: str
category: str
command: str
expected_behavior: str
result: ValidationResult
message: str
execution_time: float = 0.0
details: Optional[Dict] = None
class SuperClaudeValidator:
"""Comprehensive validation system for SuperClaude commands and flags."""
def __init__(self, verbose: bool = False, quick_mode: bool = False):
self.verbose = verbose
self.quick_mode = quick_mode
self.results: List[TestResult] = []
self.start_time = datetime.now()
# Documented commands from commands.md
self.essential_commands = [
"brainstorm", "implement", "analyze", "troubleshoot",
"test", "improve", "document", "workflow"
]
self.development_commands = ["build", "design"]
self.analysis_commands = ["explain"]
self.quality_commands = ["cleanup"]
self.project_mgmt_commands = ["estimate", "task", "spawn"]
self.utility_commands = ["git", "index"]
self.session_commands = ["load", "save", "reflect", "select-tool"]
# All commands combined
self.all_commands = (
self.essential_commands + self.development_commands +
self.analysis_commands + self.quality_commands +
self.project_mgmt_commands + self.utility_commands +
self.session_commands
)
# Documented flags from flags.md
self.analysis_flags = ["--think", "--think-hard", "--ultrathink"]
self.mode_flags = ["--brainstorm", "--introspect", "--task-manage"]
self.efficiency_flags = ["--uc", "--token-efficient", "--orchestrate"]
self.mcp_flags = [
"--c7", "--context7", "--seq", "--sequential", "--magic",
"--morph", "--morphllm", "--serena", "--play", "--playwright",
"--all-mcp", "--no-mcp"
]
self.focus_flags = [
"--focus security", "--focus performance", "--focus quality",
"--focus architecture", "--focus accessibility", "--focus testing"
]
self.safety_flags = ["--safe-mode", "--validate", "--dry-run", "--backup"]
self.execution_flags = [
"--parallel", "--sequential", "--concurrency 2", "--scope file",
"--scope module", "--scope project", "--scope system"
]
# All flags combined
self.all_flags = (
self.analysis_flags + self.mode_flags + self.efficiency_flags +
self.mcp_flags + self.focus_flags + self.safety_flags +
self.execution_flags
)
def log(self, message: str, level: str = "INFO"):
"""Log message with timestamp and level."""
if self.verbose or level in ["ERROR", "WARNING"]:
timestamp = datetime.now().strftime("%H:%M:%S")
print(f"[{timestamp}] {level}: {message}")
def run_command_test(self, command: str, timeout: int = 30) -> Tuple[bool, str, float]:
"""
Attempt to run a SuperClaude command in a controlled way.
Note: This simulates command execution since actual SuperClaude commands
require active Claude Code session context.
"""
start_time = time.time()
try:
# For validation purposes, we'll check command syntax and structure
# In a real deployment, this would interface with Claude Code
if not command.startswith("/sc:"):
return False, "Invalid command format - must start with /sc:", time.time() - start_time
cmd_name = command.split()[0][4:] # Remove /sc: prefix
if cmd_name not in self.all_commands:
return False, f"Unknown command: {cmd_name}", time.time() - start_time
# Simulate basic validation checks
time.sleep(0.1) # Simulate processing time
# Check for obvious syntax errors
if "--" in command:
flags = [part for part in command.split() if part.startswith("--")]
for flag in flags:
if not self._is_valid_flag_syntax(flag):
return False, f"Invalid flag syntax: {flag}", time.time() - start_time
# Check for contradictory flag combinations
conflict_result = self._check_flag_conflicts(flags)
if conflict_result:
return False, conflict_result, time.time() - start_time
execution_time = time.time() - start_time
return True, f"Command syntax valid: {command}", execution_time
except Exception as e:
execution_time = time.time() - start_time
return False, f"Command test failed: {str(e)}", execution_time
def _is_valid_flag_syntax(self, flag: str) -> bool:
"""Validate flag syntax against documented patterns."""
# Remove values for validation (e.g., "--concurrency 2" -> "--concurrency")
base_flag = flag.split()[0] if " " in flag else flag
valid_flag_patterns = [
# Analysis flags
"--think", "--think-hard", "--ultrathink",
# Mode flags
"--brainstorm", "--introspect", "--task-manage", "--delegate",
# Efficiency flags
"--uc", "--ultracompressed", "--token-efficient", "--orchestrate",
# MCP flags
"--c7", "--context7", "--seq", "--sequential", "--magic",
"--morph", "--morphllm", "--serena", "--play", "--playwright",
"--all-mcp", "--no-mcp",
# Focus flags (special case with values)
"--focus",
# Safety flags
"--safe-mode", "--validate", "--dry-run", "--backup",
# Execution flags
"--parallel", "--sequential", "--concurrency", "--scope",
# Build and optimization flags
"--optimize", "--target", "--fix-errors", "--deps-install",
# Test flags
"--coverage", "--fix", "--watch", "--smoke", "--related-tests",
"--browsers", "--type", "--report",
# Documentation flags
"--type", "--format", "--inline", "--audience",
# Improvement flags
"--fix", "--preview", "--safe-mode", "--measure-impact",
# Task management flags
"--breakdown", "--priority", "--detailed", "--estimates",
# Additional common flags
"--verbose", "--quiet", "--help", "--format", "--export",
"--depth", "--strategy", "--level", "--confirm-before-delete"
]
return base_flag in valid_flag_patterns
def _check_flag_conflicts(self, flags: List[str]) -> Optional[str]:
"""Check for contradictory flag combinations."""
base_flags = [flag.split()[0] for flag in flags]
# Define contradictory flag pairs
conflicts = [
("--all-mcp", "--no-mcp", "Cannot use all MCP servers and no MCP servers simultaneously"),
("--parallel", "--sequential", "Cannot use parallel and sequential execution simultaneously"),
("--verbose", "--quiet", "Cannot use verbose and quiet modes simultaneously"),
("--think", "--no-mcp", "Deep thinking modes require MCP servers (--think conflicts with --no-mcp)"),
("--think-hard", "--no-mcp", "Deep thinking modes require MCP servers (--think-hard conflicts with --no-mcp)"),
("--ultrathink", "--no-mcp", "Deep thinking modes require MCP servers (--ultrathink conflicts with --no-mcp)"),
]
for flag1, flag2, message in conflicts:
if flag1 in base_flags and flag2 in base_flags:
return f"Flag conflict: {message}"
# Check for invalid focus domain values
focus_flags = [flag for flag in flags if flag.startswith("--focus")]
for focus_flag in focus_flags:
if " " in focus_flag:
domain = focus_flag.split(" ", 1)[1]
valid_domains = ["security", "performance", "quality", "architecture", "accessibility", "testing"]
if domain not in valid_domains:
return f"Invalid focus domain: {domain}. Valid domains: {', '.join(valid_domains)}"
return None
def validate_command_syntax(self) -> None:
"""Test basic command syntax validation."""
self.log("Starting command syntax validation...")
for cmd in self.all_commands:
test_command = f"/sc:{cmd}"
success, message, exec_time = self.run_command_test(test_command)
result = TestResult(
name=f"Syntax: {cmd}",
category="Command Syntax",
command=test_command,
expected_behavior="Valid command syntax recognized",
result=ValidationResult.PASS if success else ValidationResult.FAIL,
message=message,
execution_time=exec_time
)
self.results.append(result)
def validate_flag_combinations(self) -> None:
"""Test documented flag combinations."""
self.log("Starting flag combination validation...")
# Test common flag combinations from documentation
test_combinations = [
# Analysis combinations
("/sc:analyze src/ --think", "Standard analysis with structured thinking"),
("/sc:analyze --focus security --think-hard", "Deep security analysis"),
("/sc:troubleshoot 'issue' --ultrathink --seq", "Maximum troubleshooting"),
# Development combinations
("/sc:implement 'feature' --magic --c7", "UI feature with patterns"),
("/sc:improve code/ --morph --serena", "Code improvement with context"),
("/sc:build --optimize --validate", "Safe production build"),
# Workflow combinations
("/sc:brainstorm 'idea' --think --c7", "Structured brainstorming"),
("/sc:task 'complex' --task-manage --delegate", "Complex task coordination"),
("/sc:test --coverage --play", "Comprehensive testing"),
# Safety combinations
("/sc:improve production/ --safe-mode --backup", "Safe production changes"),
("/sc:cleanup legacy/ --dry-run --validate", "Preview cleanup"),
# Efficiency combinations
("/sc:analyze large/ --uc --scope module", "Efficient scoped analysis"),
("/sc:implement 'simple' --no-mcp", "Lightweight implementation"),
]
for command, description in test_combinations:
success, message, exec_time = self.run_command_test(command)
result = TestResult(
name=f"Combo: {description}",
category="Flag Combinations",
command=command,
expected_behavior=description,
result=ValidationResult.PASS if success else ValidationResult.FAIL,
message=message,
execution_time=exec_time
)
self.results.append(result)
def validate_mcp_server_flags(self) -> None:
"""Test MCP server activation flags."""
self.log("Starting MCP server flag validation...")
mcp_tests = [
("--c7", "Context7 server for documentation"),
("--seq", "Sequential server for reasoning"),
("--magic", "Magic server for UI components"),
("--morph", "Morphllm server for transformations"),
("--serena", "Serena server for project memory"),
("--play", "Playwright server for browser testing"),
("--all-mcp", "All MCP servers activated"),
("--no-mcp", "No MCP servers, native only"),
]
for flag, description in mcp_tests:
command = f"/sc:analyze test/ {flag}"
success, message, exec_time = self.run_command_test(command)
result = TestResult(
name=f"MCP: {flag}",
category="MCP Server Flags",
command=command,
expected_behavior=description,
result=ValidationResult.PASS if success else ValidationResult.FAIL,
message=message,
execution_time=exec_time
)
self.results.append(result)
def validate_focus_flags(self) -> None:
"""Test domain focus flags."""
self.log("Starting focus flag validation...")
focus_domains = [
"security", "performance", "quality",
"architecture", "accessibility", "testing"
]
for domain in focus_domains:
command = f"/sc:analyze code/ --focus {domain}"
success, message, exec_time = self.run_command_test(command)
result = TestResult(
name=f"Focus: {domain}",
category="Focus Flags",
command=command,
expected_behavior=f"Analysis focused on {domain} domain",
result=ValidationResult.PASS if success else ValidationResult.FAIL,
message=message,
execution_time=exec_time
)
self.results.append(result)
def validate_workflow_examples(self) -> None:
"""Test documented workflow examples."""
self.log("Starting workflow example validation...")
workflows = [
# New Project Setup workflow
[
"/sc:brainstorm 'project concept'",
"/sc:design 'system architecture'",
"/sc:workflow 'implementation plan'",
"/sc:save 'project-plan'"
],
# Feature Development workflow
[
"/sc:load 'project-context'",
"/sc:implement 'feature name'",
"/sc:test --coverage",
"/sc:document --type api"
],
# Bug Investigation workflow
[
"/sc:troubleshoot 'issue description'",
"/sc:analyze --focus problem-area",
"/sc:improve --fix --safe-mode",
"/sc:test --related-tests"
]
]
for i, workflow in enumerate(workflows):
workflow_name = f"Workflow {i+1}"
all_valid = True
messages = []
total_time = 0
for step, command in enumerate(workflow):
success, message, exec_time = self.run_command_test(command)
total_time += exec_time
if not success:
all_valid = False
messages.append(f"Step {step+1} failed: {message}")
else:
messages.append(f"Step {step+1} passed")
result = TestResult(
name=workflow_name,
category="Workflow Examples",
command="".join(workflow),
expected_behavior="Complete workflow execution",
result=ValidationResult.PASS if all_valid else ValidationResult.FAIL,
message="; ".join(messages),
execution_time=total_time
)
self.results.append(result)
def validate_error_conditions(self) -> None:
"""Test error handling for invalid inputs."""
self.log("Starting error condition validation...")
error_tests = [
# Invalid commands
("/sc:invalid-command", "Should reject unknown commands"),
("/invalid:format", "Should reject invalid command format"),
("sc:missing-slash", "Should reject missing slash prefix"),
# Invalid flag combinations
("/sc:analyze --all-mcp --no-mcp", "Should handle contradictory flags"),
("/sc:implement --invalid-flag", "Should reject unknown flags"),
("/sc:test --focus invalid-domain", "Should reject invalid focus domains"),
# Malformed syntax
("/sc:analyze --", "Should handle incomplete flags"),
("/sc:implement ''", "Should handle empty arguments"),
]
for command, expected_behavior in error_tests:
success, message, exec_time = self.run_command_test(command)
# For error tests, we expect failure (proper error handling)
expected_to_fail = True
actual_result = ValidationResult.PASS if not success else ValidationResult.FAIL
result = TestResult(
name=f"Error: {command.split()[0] if command.split() else 'malformed'}",
category="Error Handling",
command=command,
expected_behavior=expected_behavior,
result=actual_result,
message=message,
execution_time=exec_time
)
self.results.append(result)
def check_system_requirements(self) -> None:
"""Validate system setup and requirements."""
self.log("Checking system requirements...")
# Check Python version
python_version = sys.version_info
python_ok = python_version >= (3, 8)
result = TestResult(
name="Python Version",
category="System Requirements",
command="python --version",
expected_behavior="Python 3.8+",
result=ValidationResult.PASS if python_ok else ValidationResult.FAIL,
message=f"Python {python_version.major}.{python_version.minor}.{python_version.micro}",
execution_time=0.0
)
self.results.append(result)
# Check if we're in SuperClaude project directory
current_dir = Path.cwd()
is_superclaude_project = (
(current_dir / "SuperClaude").exists() or
(current_dir / "pyproject.toml").exists() and "SuperClaude" in (current_dir / "pyproject.toml").read_text()
)
result = TestResult(
name="Project Directory",
category="System Requirements",
command="pwd",
expected_behavior="In SuperClaude project directory",
result=ValidationResult.PASS if is_superclaude_project else ValidationResult.WARNING,
message=f"Current directory: {current_dir}",
execution_time=0.0
)
self.results.append(result)
def run_integration_tests(self) -> None:
"""Run integration tests simulating real usage."""
self.log("Starting integration tests...")
# Test session lifecycle
session_commands = [
"/sc:load test-project/",
"/sc:analyze src/ --think",
"/sc:implement 'test feature' --magic",
"/sc:save 'test-session'"
]
session_valid = True
session_messages = []
session_time = 0
for command in session_commands:
success, message, exec_time = self.run_command_test(command)
session_time += exec_time
if success:
session_messages.append(f"{command}")
else:
session_valid = False
session_messages.append(f"{command}: {message}")
result = TestResult(
name="Session Lifecycle",
category="Integration Tests",
command="".join(session_commands),
expected_behavior="Complete session management workflow",
result=ValidationResult.PASS if session_valid else ValidationResult.FAIL,
message="; ".join(session_messages),
execution_time=session_time
)
self.results.append(result)
def generate_report(self) -> Dict:
"""Generate comprehensive validation report."""
total_tests = len(self.results)
passed_tests = len([r for r in self.results if r.result == ValidationResult.PASS])
failed_tests = len([r for r in self.results if r.result == ValidationResult.FAIL])
warning_tests = len([r for r in self.results if r.result == ValidationResult.WARNING])
success_rate = (passed_tests / total_tests * 100) if total_tests > 0 else 0
execution_time = (datetime.now() - self.start_time).total_seconds()
# Group results by category
categories = {}
for result in self.results:
if result.category not in categories:
categories[result.category] = []
categories[result.category].append(result)
report = {
"timestamp": self.start_time.isoformat(),
"execution_time_seconds": execution_time,
"summary": {
"total_tests": total_tests,
"passed": passed_tests,
"failed": failed_tests,
"warnings": warning_tests,
"success_rate_percent": round(success_rate, 2)
},
"categories": {}
}
for category, tests in categories.items():
category_passed = len([t for t in tests if t.result == ValidationResult.PASS])
category_total = len(tests)
category_rate = (category_passed / category_total * 100) if category_total > 0 else 0
report["categories"][category] = {
"success_rate": round(category_rate, 2),
"total": category_total,
"passed": category_passed,
"failed": len([t for t in tests if t.result == ValidationResult.FAIL]),
"tests": [asdict(test) for test in tests]
}
return report
def print_summary(self) -> None:
"""Print validation summary to console."""
report = self.generate_report()
summary = report["summary"]
print("\n" + "="*60)
print("🧪 SUPERCLAUDE COMMAND VALIDATION SUMMARY")
print("="*60)
print(f"⏱️ Execution Time: {report['execution_time_seconds']:.2f} seconds")
print(f"📊 Success Rate: {summary['success_rate_percent']}%")
print(f"✅ Passed: {summary['passed']}")
print(f"❌ Failed: {summary['failed']}")
print(f"⚠️ Warnings: {summary['warnings']}")
print(f"📈 Total Tests: {summary['total_tests']}")
# Category breakdown
print("\n📂 CATEGORY BREAKDOWN:")
for category, data in report["categories"].items():
status_icon = "" if data["success_rate"] >= 90 else "⚠️" if data["success_rate"] >= 70 else ""
print(f"{status_icon} {category}: {data['success_rate']:.1f}% ({data['passed']}/{data['total']})")
# Failed tests detail
failed_results = [r for r in self.results if r.result == ValidationResult.FAIL]
if failed_results:
print(f"\n❌ FAILED TESTS ({len(failed_results)}):")
for result in failed_results:
print(f"{result.category}: {result.name}")
print(f" Command: {result.command}")
print(f" Error: {result.message}")
# Warnings detail
warning_results = [r for r in self.results if r.result == ValidationResult.WARNING]
if warning_results:
print(f"\n⚠️ WARNINGS ({len(warning_results)}):")
for result in warning_results:
print(f"{result.category}: {result.name}")
print(f" Message: {result.message}")
print("\n" + "="*60)
def export_report(self, filename: str = None) -> str:
"""Export detailed report to JSON file."""
if filename is None:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"superclaude_validation_report_{timestamp}.json"
report = self.generate_report()
with open(filename, 'w') as f:
json.dump(report, f, indent=2, default=str)
self.log(f"Report exported to: {filename}")
return filename
def run_all_validations(self) -> None:
"""Execute complete validation suite."""
print("🚀 Starting SuperClaude Framework validation...")
print(f"📅 Time: {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")
print(f"🏃 Mode: {'Quick' if self.quick_mode else 'Comprehensive'}")
print()
# System requirements check
self.check_system_requirements()
# Core validations
self.validate_command_syntax()
if not self.quick_mode:
self.validate_flag_combinations()
self.validate_mcp_server_flags()
self.validate_focus_flags()
self.validate_workflow_examples()
self.validate_error_conditions()
self.run_integration_tests()
self.log("Validation suite completed")
def main():
"""Main execution function."""
parser = argparse.ArgumentParser(
description="Validate SuperClaude Framework commands and flags",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python3 validate_commands.py # Full validation
python3 validate_commands.py --quick # Quick syntax check only
python3 validate_commands.py --verbose # Detailed logging
python3 validate_commands.py --export-report # Export JSON report
"""
)
parser.add_argument(
"--quick",
action="store_true",
help="Run quick validation (syntax only)"
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable verbose logging"
)
parser.add_argument(
"--export-report",
action="store_true",
help="Export detailed JSON report"
)
parser.add_argument(
"--report-file",
type=str,
help="Custom report filename"
)
args = parser.parse_args()
# Initialize validator
validator = SuperClaudeValidator(
verbose=args.verbose,
quick_mode=args.quick
)
try:
# Run validation suite
validator.run_all_validations()
# Print summary
validator.print_summary()
# Export report if requested
if args.export_report:
report_file = validator.export_report(args.report_file)
print(f"\n📄 Detailed report saved: {report_file}")
# Exit code based on results
failed_count = len([r for r in validator.results if r.result == ValidationResult.FAIL])
exit_code = 1 if failed_count > 0 else 0
if exit_code == 0:
print("🎉 All validations passed!")
else:
print(f"⚠️ {failed_count} validation(s) failed. See details above.")
sys.exit(exit_code)
except KeyboardInterrupt:
print("\n🛑 Validation interrupted by user")
sys.exit(130)
except Exception as e:
print(f"\n💥 Validation failed with error: {e}")
if args.verbose:
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()