SuperClaude/tests/unit/test_self_check.py

"""
Unit tests for SelfCheckProtocol

Tests post-implementation validation functionality.
"""

import pytest
from superclaude.pm_agent.self_check import SelfCheckProtocol


class TestSelfCheckProtocol:
    """Test suite for SelfCheckProtocol class"""

    def test_validate_passing_implementation(self, sample_implementation):
        """
        Test validation of a complete, passing implementation

        Should pass all four questions:
        1. Tests passing? ✅
        2. Requirements met? ✅
        3. Assumptions verified? ✅
        4. Evidence provided? ✅
        """
        protocol = SelfCheckProtocol()
        passed, issues = protocol.validate(sample_implementation)

        assert passed is True, f"Expected validation to pass, got issues: {issues}"
        assert len(issues) == 0, f"Expected no issues, got {len(issues)}: {issues}"

    def test_validate_failing_implementation(self, failing_implementation):
        """
        Test validation of a failing implementation

        Should fail multiple checks
        """
        protocol = SelfCheckProtocol()
        passed, issues = protocol.validate(failing_implementation)

        assert passed is False, "Expected validation to fail"
        assert len(issues) > 0, "Expected issues to be detected"

        # Check specific issues
        issue_text = " ".join(issues)
        assert "Tests not passing" in issue_text or "test" in issue_text.lower()

    def test_check_tests_passing_with_output(self):
        """Test that tests_passed requires actual output"""
        protocol = SelfCheckProtocol()

        # Tests passed WITH output - should pass
        impl_with_output = {
            "tests_passed": True,
            "test_output": "✅ 10 tests passed",
        }
        assert protocol._check_tests_passing(impl_with_output) is True

        # Tests passed WITHOUT output - should fail (hallucination detection)
        impl_without_output = {
            "tests_passed": True,
            "test_output": "",
        }
        assert protocol._check_tests_passing(impl_without_output) is False

    def test_check_requirements_met(self):
        """Test requirements validation"""
        protocol = SelfCheckProtocol()

        # All requirements met
        impl_complete = {
            "requirements": ["A", "B", "C"],
            "requirements_met": ["A", "B", "C"],
        }
        unmet = protocol._check_requirements_met(impl_complete)
        assert len(unmet) == 0

        # Some requirements not met
        impl_incomplete = {
            "requirements": ["A", "B", "C"],
            "requirements_met": ["A", "B"],
        }
        unmet = protocol._check_requirements_met(impl_incomplete)
        assert len(unmet) == 1
        assert "C" in unmet

    def test_check_assumptions_verified(self):
        """Test assumptions verification"""
        protocol = SelfCheckProtocol()

        # All assumptions verified
        impl_verified = {
            "assumptions": ["API is REST", "DB is PostgreSQL"],
            "assumptions_verified": ["API is REST", "DB is PostgreSQL"],
        }
        unverified = protocol._check_assumptions_verified(impl_verified)
        assert len(unverified) == 0

        # Some assumptions unverified
        impl_unverified = {
            "assumptions": ["API is REST", "DB is PostgreSQL"],
            "assumptions_verified": ["API is REST"],
        }
        unverified = protocol._check_assumptions_verified(impl_unverified)
        assert len(unverified) == 1
        assert "DB is PostgreSQL" in unverified

    def test_check_evidence_exists(self):
        """Test evidence requirement validation"""
        protocol = SelfCheckProtocol()

        # All evidence present
        impl_with_evidence = {
            "evidence": {
                "test_results": "Tests passed",
                "code_changes": ["file1.py"],
                "validation": "Linting passed",
            }
        }
        missing = protocol._check_evidence_exists(impl_with_evidence)
        assert len(missing) == 0

        # Missing all evidence
        impl_no_evidence = {"evidence": {}}
        missing = protocol._check_evidence_exists(impl_no_evidence)
        assert len(missing) == 3
        assert "test_results" in missing
        assert "code_changes" in missing
        assert "validation" in missing

    def test_detect_hallucinations_tests_without_output(self):
        """Test hallucination detection: claims tests pass without output"""
        protocol = SelfCheckProtocol()

        impl = {
            "tests_passed": True,
            "test_output": "",  # No output - hallucination!
        }

        detected = protocol._detect_hallucinations(impl)

        assert len(detected) > 0
        assert any("without showing output" in d for d in detected)

    def test_detect_hallucinations_complete_without_evidence(self):
        """Test hallucination detection: claims complete without evidence"""
        protocol = SelfCheckProtocol()

        impl = {
            "status": "complete",
            "evidence": None,  # No evidence - hallucination!
        }

        detected = protocol._detect_hallucinations(impl)

        assert len(detected) > 0
        assert any("without evidence" in d for d in detected)

    def test_detect_hallucinations_complete_with_failing_tests(self):
        """Test hallucination detection: claims complete despite failing tests"""
        protocol = SelfCheckProtocol()

        impl = {
            "status": "complete",
            "tests_passed": False,  # Tests failed but claims complete!
        }

        detected = protocol._detect_hallucinations(impl)

        assert len(detected) > 0
        assert any("failing tests" in d for d in detected)

    def test_detect_hallucinations_ignored_errors(self):
        """Test hallucination detection: ignored errors/warnings"""
        protocol = SelfCheckProtocol()

        impl = {
            "status": "complete",
            "errors": ["TypeError in module X"],
            "warnings": ["Deprecated function used"],
        }

        detected = protocol._detect_hallucinations(impl)

        assert len(detected) > 0
        assert any("errors/warnings" in d for d in detected)

    def test_detect_hallucinations_uncertainty_language(self):
        """Test hallucination detection: uncertainty language"""
        protocol = SelfCheckProtocol()

        impl = {
            "description": "This probably works and might be correct",
        }

        detected = protocol._detect_hallucinations(impl)

        assert len(detected) > 0
        assert any("Uncertainty language" in d for d in detected)

    def test_format_report_passing(self):
        """Test report formatting for passing validation"""
        protocol = SelfCheckProtocol()

        report = protocol.format_report(passed=True, issues=[])

        assert "PASSED" in report
        assert "✅" in report

    def test_format_report_failing(self):
        """Test report formatting for failing validation"""
        protocol = SelfCheckProtocol()

        issues = [
            "❌ Tests not passing",
            "❌ Missing evidence: test_results",
        ]

        report = protocol.format_report(passed=False, issues=issues)

        assert "FAILED" in report
        assert "❌" in report
        for issue in issues:
            assert issue in report


@pytest.mark.self_check
def test_self_check_marker_integration(self_check_protocol, sample_implementation):
    """
    Test that self_check marker works with pytest plugin fixture

    This test validates the fixture provided by pytest plugin
    """
    passed, issues = self_check_protocol.validate(sample_implementation)

    assert passed is True, f"Sample implementation should pass validation: {issues}"
    assert len(issues) == 0, "No issues should be detected in sample implementation"
feat: add comprehensive test suite, CI/CD workflows, and install command Major improvements to SuperClaude Framework infrastructure and testing: ## New Features - Add 'superclaude install' command to install slash commands (/research, /index-repo, /agent, /recommend) - Create comprehensive test suite with 71 tests (70 passing, 1 skipped) - Add GitHub Actions CI/CD workflows for automated testing - Add essential documentation files (PLANNING.md, TASK.md, KNOWLEDGE.md) ## Testing - tests/unit/: 59 tests covering PM Agent components - test_confidence.py: 13 tests for ConfidenceChecker - test_self_check.py: 14 tests for SelfCheckProtocol - test_reflexion.py: 9 tests for ReflexionPattern - test_token_budget.py: 12 tests for TokenBudgetManager - test_cli_install.py: 12 tests for install command (NEW) - tests/integration/: 11 tests for pytest plugin integration - tests/conftest.py: Shared fixtures for all tests ## CI/CD Workflows - .github/workflows/test.yml: Comprehensive test matrix - Tests on Python 3.10, 3.11, 3.12 - Lint and format checks with ruff - Pytest plugin verification - SuperClaude doctor health checks - Coverage reporting with Codecov - .github/workflows/quick-check.yml: Fast PR validation (~2-3 min) - .github/workflows/README.md: Workflow documentation ## Documentation - PLANNING.md: Architecture, design principles, absolute rules - TASK.md: Current tasks, priorities, backlog - KNOWLEDGE.md: Accumulated insights, best practices, troubleshooting ## Bug Fixes - Fix .gitignore contradictions (remove conflicting Claude Code patterns) - Fix TokenBudgetManager to properly validate and default invalid complexity - Update package.json version to 4.1.6 (sync with VERSION file) ## CLI Improvements - src/superclaude/cli/install_commands.py: Command installation logic - src/superclaude/cli/main.py: Add 'install' command with --list and --force options - README.md: Update installation instructions with correct commands ## Breaking Changes None - all changes are backwards compatible ## Migration Guide Users should run 'superclaude install' after upgrading to install slash commands Fixes #466 (indirectly by clarifying installation process) Refs #419 (plugin system - documentation updated) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-11 18:21:22 +01:00			`"""`
			`Unit tests for SelfCheckProtocol`

			`Tests post-implementation validation functionality.`
			`"""`

			`import pytest`
			`from superclaude.pm_agent.self_check import SelfCheckProtocol`


			`class TestSelfCheckProtocol:`
			`"""Test suite for SelfCheckProtocol class"""`

			`def test_validate_passing_implementation(self, sample_implementation):`
			`"""`
			`Test validation of a complete, passing implementation`

			`Should pass all four questions:`
			`1. Tests passing? ✅`
			`2. Requirements met? ✅`
			`3. Assumptions verified? ✅`
			`4. Evidence provided? ✅`
			`"""`
			`protocol = SelfCheckProtocol()`
			`passed, issues = protocol.validate(sample_implementation)`

			`assert passed is True, f"Expected validation to pass, got issues: {issues}"`
			`assert len(issues) == 0, f"Expected no issues, got {len(issues)}: {issues}"`

			`def test_validate_failing_implementation(self, failing_implementation):`
			`"""`
			`Test validation of a failing implementation`

			`Should fail multiple checks`
			`"""`
			`protocol = SelfCheckProtocol()`
			`passed, issues = protocol.validate(failing_implementation)`

			`assert passed is False, "Expected validation to fail"`
			`assert len(issues) > 0, "Expected issues to be detected"`

			`# Check specific issues`
			`issue_text = " ".join(issues)`
			`assert "Tests not passing" in issue_text or "test" in issue_text.lower()`

			`def test_check_tests_passing_with_output(self):`
			`"""Test that tests_passed requires actual output"""`
			`protocol = SelfCheckProtocol()`

			`# Tests passed WITH output - should pass`
			`impl_with_output = {`
			`"tests_passed": True,`
			`"test_output": "✅ 10 tests passed",`
			`}`
			`assert protocol._check_tests_passing(impl_with_output) is True`

			`# Tests passed WITHOUT output - should fail (hallucination detection)`
			`impl_without_output = {`
			`"tests_passed": True,`
			`"test_output": "",`
			`}`
			`assert protocol._check_tests_passing(impl_without_output) is False`

			`def test_check_requirements_met(self):`
			`"""Test requirements validation"""`
			`protocol = SelfCheckProtocol()`

			`# All requirements met`
			`impl_complete = {`
			`"requirements": ["A", "B", "C"],`
			`"requirements_met": ["A", "B", "C"],`
			`}`
			`unmet = protocol._check_requirements_met(impl_complete)`
			`assert len(unmet) == 0`

			`# Some requirements not met`
			`impl_incomplete = {`
			`"requirements": ["A", "B", "C"],`
			`"requirements_met": ["A", "B"],`
			`}`
			`unmet = protocol._check_requirements_met(impl_incomplete)`
			`assert len(unmet) == 1`
			`assert "C" in unmet`

			`def test_check_assumptions_verified(self):`
			`"""Test assumptions verification"""`
			`protocol = SelfCheckProtocol()`

			`# All assumptions verified`
			`impl_verified = {`
			`"assumptions": ["API is REST", "DB is PostgreSQL"],`
			`"assumptions_verified": ["API is REST", "DB is PostgreSQL"],`
			`}`
			`unverified = protocol._check_assumptions_verified(impl_verified)`
			`assert len(unverified) == 0`

			`# Some assumptions unverified`
			`impl_unverified = {`
			`"assumptions": ["API is REST", "DB is PostgreSQL"],`
			`"assumptions_verified": ["API is REST"],`
			`}`
			`unverified = protocol._check_assumptions_verified(impl_unverified)`
			`assert len(unverified) == 1`
			`assert "DB is PostgreSQL" in unverified`

			`def test_check_evidence_exists(self):`
			`"""Test evidence requirement validation"""`
			`protocol = SelfCheckProtocol()`

			`# All evidence present`
			`impl_with_evidence = {`
			`"evidence": {`
			`"test_results": "Tests passed",`
			`"code_changes": ["file1.py"],`
			`"validation": "Linting passed",`
			`}`
			`}`
			`missing = protocol._check_evidence_exists(impl_with_evidence)`
			`assert len(missing) == 0`

			`# Missing all evidence`
			`impl_no_evidence = {"evidence": {}}`
			`missing = protocol._check_evidence_exists(impl_no_evidence)`
			`assert len(missing) == 3`
			`assert "test_results" in missing`
			`assert "code_changes" in missing`
			`assert "validation" in missing`

			`def test_detect_hallucinations_tests_without_output(self):`
			`"""Test hallucination detection: claims tests pass without output"""`
			`protocol = SelfCheckProtocol()`

			`impl = {`
			`"tests_passed": True,`
			`"test_output": "", # No output - hallucination!`
			`}`

			`detected = protocol._detect_hallucinations(impl)`

			`assert len(detected) > 0`
			`assert any("without showing output" in d for d in detected)`

			`def test_detect_hallucinations_complete_without_evidence(self):`
			`"""Test hallucination detection: claims complete without evidence"""`
			`protocol = SelfCheckProtocol()`

			`impl = {`
			`"status": "complete",`
			`"evidence": None, # No evidence - hallucination!`
			`}`

			`detected = protocol._detect_hallucinations(impl)`

			`assert len(detected) > 0`
			`assert any("without evidence" in d for d in detected)`

			`def test_detect_hallucinations_complete_with_failing_tests(self):`
			`"""Test hallucination detection: claims complete despite failing tests"""`
			`protocol = SelfCheckProtocol()`

			`impl = {`
			`"status": "complete",`
			`"tests_passed": False, # Tests failed but claims complete!`
			`}`

			`detected = protocol._detect_hallucinations(impl)`

			`assert len(detected) > 0`
			`assert any("failing tests" in d for d in detected)`

			`def test_detect_hallucinations_ignored_errors(self):`
			`"""Test hallucination detection: ignored errors/warnings"""`
			`protocol = SelfCheckProtocol()`

			`impl = {`
			`"status": "complete",`
			`"errors": ["TypeError in module X"],`
			`"warnings": ["Deprecated function used"],`
			`}`

			`detected = protocol._detect_hallucinations(impl)`

			`assert len(detected) > 0`
			`assert any("errors/warnings" in d for d in detected)`

			`def test_detect_hallucinations_uncertainty_language(self):`
			`"""Test hallucination detection: uncertainty language"""`
			`protocol = SelfCheckProtocol()`

			`impl = {`
			`"description": "This probably works and might be correct",`
			`}`

			`detected = protocol._detect_hallucinations(impl)`

			`assert len(detected) > 0`
			`assert any("Uncertainty language" in d for d in detected)`

			`def test_format_report_passing(self):`
			`"""Test report formatting for passing validation"""`
			`protocol = SelfCheckProtocol()`

			`report = protocol.format_report(passed=True, issues=[])`

			`assert "PASSED" in report`
			`assert "✅" in report`

			`def test_format_report_failing(self):`
			`"""Test report formatting for failing validation"""`
			`protocol = SelfCheckProtocol()`

			`issues = [`
			`"❌ Tests not passing",`
			`"❌ Missing evidence: test_results",`
			`]`

			`report = protocol.format_report(passed=False, issues=issues)`

			`assert "FAILED" in report`
			`assert "❌" in report`
			`for issue in issues:`
			`assert issue in report`


			`@pytest.mark.self_check`
			`def test_self_check_marker_integration(self_check_protocol, sample_implementation):`
			`"""`
			`Test that self_check marker works with pytest plugin fixture`

			`This test validates the fixture provided by pytest plugin`
			`"""`
			`passed, issues = self_check_protocol.validate(sample_implementation)`

			`assert passed is True, f"Sample implementation should pass validation: {issues}"`
			`assert len(issues) == 0, "No issues should be detected in sample implementation"`