feat: add comprehensive test suite, CI/CD workflows, and install command

Major improvements to SuperClaude Framework infrastructure and testing: ## New Features - Add 'superclaude install' command to install slash commands (/research, /index-repo, /agent, /recommend) - Create comprehensive test suite with 71 tests (70 passing, 1 skipped) - Add GitHub Actions CI/CD workflows for automated testing - Add essential documentation files (PLANNING.md, TASK.md, KNOWLEDGE.md) ## Testing - tests/unit/: 59 tests covering PM Agent components - test_confidence.py: 13 tests for ConfidenceChecker - test_self_check.py: 14 tests for SelfCheckProtocol - test_reflexion.py: 9 tests for ReflexionPattern - test_token_budget.py: 12 tests for TokenBudgetManager - test_cli_install.py: 12 tests for install command (NEW) - tests/integration/: 11 tests for pytest plugin integration - tests/conftest.py: Shared fixtures for all tests ## CI/CD Workflows - .github/workflows/test.yml: Comprehensive test matrix - Tests on Python 3.10, 3.11, 3.12 - Lint and format checks with ruff - Pytest plugin verification - SuperClaude doctor health checks - Coverage reporting with Codecov - .github/workflows/quick-check.yml: Fast PR validation (~2-3 min) - .github/workflows/README.md: Workflow documentation ## Documentation - PLANNING.md: Architecture, design principles, absolute rules - TASK.md: Current tasks, priorities, backlog - KNOWLEDGE.md: Accumulated insights, best practices, troubleshooting ## Bug Fixes - Fix .gitignore contradictions (remove conflicting Claude Code patterns) - Fix TokenBudgetManager to properly validate and default invalid complexity - Update package.json version to 4.1.6 (sync with VERSION file) ## CLI Improvements - src/superclaude/cli/install_commands.py: Command installation logic - src/superclaude/cli/main.py: Add 'install' command with --list and --force options - README.md: Update installation instructions with correct commands ## Breaking Changes None - all changes are backwards compatible ## Migration Guide Users should run 'superclaude install' after upgrading to install slash commands Fixes #466 (indirectly by clarifying installation process) Refs #419 (plugin system - documentation updated) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-29 16:16:08 +00:00 · 2025-11-11 18:21:22 +01:00
parent bec0b0c3e3
commit 8c0559ca9a
22 changed files with 3157 additions and 9 deletions
--- a/tests/unit/test_self_check.py
+++ b/tests/unit/test_self_check.py
@@ -0,0 +1,235 @@
+"""
+Unit tests for SelfCheckProtocol
+
+Tests post-implementation validation functionality.
+"""
+
+import pytest
+from superclaude.pm_agent.self_check import SelfCheckProtocol
+
+
+class TestSelfCheckProtocol:
+    """Test suite for SelfCheckProtocol class"""
+
+    def test_validate_passing_implementation(self, sample_implementation):
+        """
+        Test validation of a complete, passing implementation
+
+        Should pass all four questions:
+        1. Tests passing? ✅
+        2. Requirements met? ✅
+        3. Assumptions verified? ✅
+        4. Evidence provided? ✅
+        """
+        protocol = SelfCheckProtocol()
+        passed, issues = protocol.validate(sample_implementation)
+
+        assert passed is True, f"Expected validation to pass, got issues: {issues}"
+        assert len(issues) == 0, f"Expected no issues, got {len(issues)}: {issues}"
+
+    def test_validate_failing_implementation(self, failing_implementation):
+        """
+        Test validation of a failing implementation
+
+        Should fail multiple checks
+        """
+        protocol = SelfCheckProtocol()
+        passed, issues = protocol.validate(failing_implementation)
+
+        assert passed is False, "Expected validation to fail"
+        assert len(issues) > 0, "Expected issues to be detected"
+
+        # Check specific issues
+        issue_text = " ".join(issues)
+        assert "Tests not passing" in issue_text or "test" in issue_text.lower()
+
+    def test_check_tests_passing_with_output(self):
+        """Test that tests_passed requires actual output"""
+        protocol = SelfCheckProtocol()
+
+        # Tests passed WITH output - should pass
+        impl_with_output = {
+            "tests_passed": True,
+            "test_output": "✅ 10 tests passed",
+        }
+        assert protocol._check_tests_passing(impl_with_output) is True
+
+        # Tests passed WITHOUT output - should fail (hallucination detection)
+        impl_without_output = {
+            "tests_passed": True,
+            "test_output": "",
+        }
+        assert protocol._check_tests_passing(impl_without_output) is False
+
+    def test_check_requirements_met(self):
+        """Test requirements validation"""
+        protocol = SelfCheckProtocol()
+
+        # All requirements met
+        impl_complete = {
+            "requirements": ["A", "B", "C"],
+            "requirements_met": ["A", "B", "C"],
+        }
+        unmet = protocol._check_requirements_met(impl_complete)
+        assert len(unmet) == 0
+
+        # Some requirements not met
+        impl_incomplete = {
+            "requirements": ["A", "B", "C"],
+            "requirements_met": ["A", "B"],
+        }
+        unmet = protocol._check_requirements_met(impl_incomplete)
+        assert len(unmet) == 1
+        assert "C" in unmet
+
+    def test_check_assumptions_verified(self):
+        """Test assumptions verification"""
+        protocol = SelfCheckProtocol()
+
+        # All assumptions verified
+        impl_verified = {
+            "assumptions": ["API is REST", "DB is PostgreSQL"],
+            "assumptions_verified": ["API is REST", "DB is PostgreSQL"],
+        }
+        unverified = protocol._check_assumptions_verified(impl_verified)
+        assert len(unverified) == 0
+
+        # Some assumptions unverified
+        impl_unverified = {
+            "assumptions": ["API is REST", "DB is PostgreSQL"],
+            "assumptions_verified": ["API is REST"],
+        }
+        unverified = protocol._check_assumptions_verified(impl_unverified)
+        assert len(unverified) == 1
+        assert "DB is PostgreSQL" in unverified
+
+    def test_check_evidence_exists(self):
+        """Test evidence requirement validation"""
+        protocol = SelfCheckProtocol()
+
+        # All evidence present
+        impl_with_evidence = {
+            "evidence": {
+                "test_results": "Tests passed",
+                "code_changes": ["file1.py"],
+                "validation": "Linting passed",
+            }
+        }
+        missing = protocol._check_evidence_exists(impl_with_evidence)
+        assert len(missing) == 0
+
+        # Missing all evidence
+        impl_no_evidence = {"evidence": {}}
+        missing = protocol._check_evidence_exists(impl_no_evidence)
+        assert len(missing) == 3
+        assert "test_results" in missing
+        assert "code_changes" in missing
+        assert "validation" in missing
+
+    def test_detect_hallucinations_tests_without_output(self):
+        """Test hallucination detection: claims tests pass without output"""
+        protocol = SelfCheckProtocol()
+
+        impl = {
+            "tests_passed": True,
+            "test_output": "",  # No output - hallucination!
+        }
+
+        detected = protocol._detect_hallucinations(impl)
+
+        assert len(detected) > 0
+        assert any("without showing output" in d for d in detected)
+
+    def test_detect_hallucinations_complete_without_evidence(self):
+        """Test hallucination detection: claims complete without evidence"""
+        protocol = SelfCheckProtocol()
+
+        impl = {
+            "status": "complete",
+            "evidence": None,  # No evidence - hallucination!
+        }
+
+        detected = protocol._detect_hallucinations(impl)
+
+        assert len(detected) > 0
+        assert any("without evidence" in d for d in detected)
+
+    def test_detect_hallucinations_complete_with_failing_tests(self):
+        """Test hallucination detection: claims complete despite failing tests"""
+        protocol = SelfCheckProtocol()
+
+        impl = {
+            "status": "complete",
+            "tests_passed": False,  # Tests failed but claims complete!
+        }
+
+        detected = protocol._detect_hallucinations(impl)
+
+        assert len(detected) > 0
+        assert any("failing tests" in d for d in detected)
+
+    def test_detect_hallucinations_ignored_errors(self):
+        """Test hallucination detection: ignored errors/warnings"""
+        protocol = SelfCheckProtocol()
+
+        impl = {
+            "status": "complete",
+            "errors": ["TypeError in module X"],
+            "warnings": ["Deprecated function used"],
+        }
+
+        detected = protocol._detect_hallucinations(impl)
+
+        assert len(detected) > 0
+        assert any("errors/warnings" in d for d in detected)
+
+    def test_detect_hallucinations_uncertainty_language(self):
+        """Test hallucination detection: uncertainty language"""
+        protocol = SelfCheckProtocol()
+
+        impl = {
+            "description": "This probably works and might be correct",
+        }
+
+        detected = protocol._detect_hallucinations(impl)
+
+        assert len(detected) > 0
+        assert any("Uncertainty language" in d for d in detected)
+
+    def test_format_report_passing(self):
+        """Test report formatting for passing validation"""
+        protocol = SelfCheckProtocol()
+
+        report = protocol.format_report(passed=True, issues=[])
+
+        assert "PASSED" in report
+        assert "✅" in report
+
+    def test_format_report_failing(self):
+        """Test report formatting for failing validation"""
+        protocol = SelfCheckProtocol()
+
+        issues = [
+            "❌ Tests not passing",
+            "❌ Missing evidence: test_results",
+        ]
+
+        report = protocol.format_report(passed=False, issues=issues)
+
+        assert "FAILED" in report
+        assert "❌" in report
+        for issue in issues:
+            assert issue in report
+
+
+@pytest.mark.self_check
+def test_self_check_marker_integration(self_check_protocol, sample_implementation):
+    """
+    Test that self_check marker works with pytest plugin fixture
+
+    This test validates the fixture provided by pytest plugin
+    """
+    passed, issues = self_check_protocol.validate(sample_implementation)
+
+    assert passed is True, f"Sample implementation should pass validation: {issues}"
+    assert len(issues) == 0, "No issues should be detected in sample implementation"