SuperClaude/tests/unit/test_confidence.py

"""
Unit tests for ConfidenceChecker

Tests pre-execution confidence assessment functionality.
"""

import pytest

from superclaude.pm_agent.confidence import ConfidenceChecker


class TestConfidenceChecker:
    """Test suite for ConfidenceChecker class"""

    def test_high_confidence_scenario(self, sample_context):
        """
        Test that a well-prepared context returns high confidence (≥90%)

        All checks pass:
        - No duplicates (25%)
        - Architecture compliant (25%)
        - Official docs verified (20%)
        - OSS reference found (15%)
        - Root cause identified (15%)
        Total: 100%
        """
        checker = ConfidenceChecker()
        confidence = checker.assess(sample_context)

        assert confidence >= 0.9, f"Expected high confidence ≥0.9, got {confidence}"
        assert confidence == 1.0, "All checks passed should give 100% confidence"

    def test_low_confidence_scenario(self, low_confidence_context):
        """
        Test that an unprepared context returns low confidence (<70%)

        No checks pass: 0%
        """
        checker = ConfidenceChecker()
        confidence = checker.assess(low_confidence_context)

        assert confidence < 0.7, f"Expected low confidence <0.7, got {confidence}"
        assert confidence == 0.0, "No checks passed should give 0% confidence"

    def test_medium_confidence_scenario(self):
        """
        Test medium confidence scenario (70-89%)

        Some checks pass, some don't
        """
        checker = ConfidenceChecker()
        context = {
            "test_name": "test_feature",
            "duplicate_check_complete": True,  # 25%
            "architecture_check_complete": True,  # 25%
            "official_docs_verified": True,  # 20%
            "oss_reference_complete": False,  # 0%
            "root_cause_identified": False,  # 0%
        }

        confidence = checker.assess(context)

        assert 0.7 <= confidence < 0.9, (
            f"Expected medium confidence 0.7-0.9, got {confidence}"
        )
        assert confidence == 0.7, "Should be exactly 70%"

    def test_confidence_checks_recorded(self, sample_context):
        """Test that confidence checks are recorded in context"""
        checker = ConfidenceChecker()
        checker.assess(sample_context)

        assert "confidence_checks" in sample_context
        assert isinstance(sample_context["confidence_checks"], list)
        assert len(sample_context["confidence_checks"]) == 5

        # All checks should pass
        for check in sample_context["confidence_checks"]:
            assert check.startswith("✅"), f"Expected passing check, got: {check}"

    def test_get_recommendation_high(self):
        """Test recommendation for high confidence"""
        checker = ConfidenceChecker()
        recommendation = checker.get_recommendation(0.95)

        assert "High confidence" in recommendation
        assert "Proceed" in recommendation

    def test_get_recommendation_medium(self):
        """Test recommendation for medium confidence"""
        checker = ConfidenceChecker()
        recommendation = checker.get_recommendation(0.75)

        assert "Medium confidence" in recommendation
        assert "Continue investigation" in recommendation

    def test_get_recommendation_low(self):
        """Test recommendation for low confidence"""
        checker = ConfidenceChecker()
        recommendation = checker.get_recommendation(0.5)

        assert "Low confidence" in recommendation
        assert "STOP" in recommendation

    def test_has_official_docs_with_flag(self):
        """Test official docs check with direct flag"""
        checker = ConfidenceChecker()
        context = {"official_docs_verified": True}

        result = checker._has_official_docs(context)

        assert result is True

    def test_no_duplicates_check(self):
        """Test duplicate check validation"""
        checker = ConfidenceChecker()

        # With flag
        context_pass = {"duplicate_check_complete": True}
        assert checker._no_duplicates(context_pass) is True

        # Without flag
        context_fail = {"duplicate_check_complete": False}
        assert checker._no_duplicates(context_fail) is False

    def test_architecture_compliance_check(self):
        """Test architecture compliance validation"""
        checker = ConfidenceChecker()

        # With flag
        context_pass = {"architecture_check_complete": True}
        assert checker._architecture_compliant(context_pass) is True

        # Without flag
        context_fail = {}
        assert checker._architecture_compliant(context_fail) is False

    def test_oss_reference_check(self):
        """Test OSS reference validation"""
        checker = ConfidenceChecker()

        # With flag
        context_pass = {"oss_reference_complete": True}
        assert checker._has_oss_reference(context_pass) is True

        # Without flag
        context_fail = {"oss_reference_complete": False}
        assert checker._has_oss_reference(context_fail) is False

    def test_root_cause_check(self):
        """Test root cause identification validation"""
        checker = ConfidenceChecker()

        # With flag
        context_pass = {"root_cause_identified": True}
        assert checker._root_cause_identified(context_pass) is True

        # Without flag
        context_fail = {}
        assert checker._root_cause_identified(context_fail) is False


@pytest.mark.confidence_check
def test_confidence_check_marker_integration(confidence_checker):
    """
    Test that confidence_check marker works with pytest plugin fixture

    This test should skip if confidence < 70%
    """
    context = {
        "test_name": "test_confidence_check_marker_integration",
        "has_official_docs": True,
        "duplicate_check_complete": True,
        "architecture_check_complete": True,
        "official_docs_verified": True,
        "oss_reference_complete": True,
        "root_cause_identified": True,
    }

    confidence = confidence_checker.assess(context)
    assert confidence >= 0.7, "Confidence should be high enough to not skip"
feat: add comprehensive test suite, CI/CD workflows, and install command Major improvements to SuperClaude Framework infrastructure and testing: ## New Features - Add 'superclaude install' command to install slash commands (/research, /index-repo, /agent, /recommend) - Create comprehensive test suite with 71 tests (70 passing, 1 skipped) - Add GitHub Actions CI/CD workflows for automated testing - Add essential documentation files (PLANNING.md, TASK.md, KNOWLEDGE.md) ## Testing - tests/unit/: 59 tests covering PM Agent components - test_confidence.py: 13 tests for ConfidenceChecker - test_self_check.py: 14 tests for SelfCheckProtocol - test_reflexion.py: 9 tests for ReflexionPattern - test_token_budget.py: 12 tests for TokenBudgetManager - test_cli_install.py: 12 tests for install command (NEW) - tests/integration/: 11 tests for pytest plugin integration - tests/conftest.py: Shared fixtures for all tests ## CI/CD Workflows - .github/workflows/test.yml: Comprehensive test matrix - Tests on Python 3.10, 3.11, 3.12 - Lint and format checks with ruff - Pytest plugin verification - SuperClaude doctor health checks - Coverage reporting with Codecov - .github/workflows/quick-check.yml: Fast PR validation (~2-3 min) - .github/workflows/README.md: Workflow documentation ## Documentation - PLANNING.md: Architecture, design principles, absolute rules - TASK.md: Current tasks, priorities, backlog - KNOWLEDGE.md: Accumulated insights, best practices, troubleshooting ## Bug Fixes - Fix .gitignore contradictions (remove conflicting Claude Code patterns) - Fix TokenBudgetManager to properly validate and default invalid complexity - Update package.json version to 4.1.6 (sync with VERSION file) ## CLI Improvements - src/superclaude/cli/install_commands.py: Command installation logic - src/superclaude/cli/main.py: Add 'install' command with --list and --force options - README.md: Update installation instructions with correct commands ## Breaking Changes None - all changes are backwards compatible ## Migration Guide Users should run 'superclaude install' after upgrading to install slash commands Fixes #466 (indirectly by clarifying installation process) Refs #419 (plugin system - documentation updated) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-11 18:21:22 +01:00			`"""`
			`Unit tests for ConfidenceChecker`

			`Tests pre-execution confidence assessment functionality.`
			`"""`

			`import pytest`
fix: resolve all ruff linting errors Fixed 42 linting errors across codebase: - Auto-fixed 35 import sorting issues (I001) - Added unused imports to __all__ in execution/__init__.py - Removed unused variable assignments (F841) - Updated pyproject.toml to use [tool.ruff.lint] section All ruff checks now pass successfully. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-12 18:17:39 +01:00
feat: add comprehensive test suite, CI/CD workflows, and install command Major improvements to SuperClaude Framework infrastructure and testing: ## New Features - Add 'superclaude install' command to install slash commands (/research, /index-repo, /agent, /recommend) - Create comprehensive test suite with 71 tests (70 passing, 1 skipped) - Add GitHub Actions CI/CD workflows for automated testing - Add essential documentation files (PLANNING.md, TASK.md, KNOWLEDGE.md) ## Testing - tests/unit/: 59 tests covering PM Agent components - test_confidence.py: 13 tests for ConfidenceChecker - test_self_check.py: 14 tests for SelfCheckProtocol - test_reflexion.py: 9 tests for ReflexionPattern - test_token_budget.py: 12 tests for TokenBudgetManager - test_cli_install.py: 12 tests for install command (NEW) - tests/integration/: 11 tests for pytest plugin integration - tests/conftest.py: Shared fixtures for all tests ## CI/CD Workflows - .github/workflows/test.yml: Comprehensive test matrix - Tests on Python 3.10, 3.11, 3.12 - Lint and format checks with ruff - Pytest plugin verification - SuperClaude doctor health checks - Coverage reporting with Codecov - .github/workflows/quick-check.yml: Fast PR validation (~2-3 min) - .github/workflows/README.md: Workflow documentation ## Documentation - PLANNING.md: Architecture, design principles, absolute rules - TASK.md: Current tasks, priorities, backlog - KNOWLEDGE.md: Accumulated insights, best practices, troubleshooting ## Bug Fixes - Fix .gitignore contradictions (remove conflicting Claude Code patterns) - Fix TokenBudgetManager to properly validate and default invalid complexity - Update package.json version to 4.1.6 (sync with VERSION file) ## CLI Improvements - src/superclaude/cli/install_commands.py: Command installation logic - src/superclaude/cli/main.py: Add 'install' command with --list and --force options - README.md: Update installation instructions with correct commands ## Breaking Changes None - all changes are backwards compatible ## Migration Guide Users should run 'superclaude install' after upgrading to install slash commands Fixes #466 (indirectly by clarifying installation process) Refs #419 (plugin system - documentation updated) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-11 18:21:22 +01:00			`from superclaude.pm_agent.confidence import ConfidenceChecker`


			`class TestConfidenceChecker:`
			`"""Test suite for ConfidenceChecker class"""`

			`def test_high_confidence_scenario(self, sample_context):`
			`"""`
			`Test that a well-prepared context returns high confidence (≥90%)`

			`All checks pass:`
			`- No duplicates (25%)`
			`- Architecture compliant (25%)`
			`- Official docs verified (20%)`
			`- OSS reference found (15%)`
			`- Root cause identified (15%)`
			`Total: 100%`
			`"""`
			`checker = ConfidenceChecker()`
			`confidence = checker.assess(sample_context)`

			`assert confidence >= 0.9, f"Expected high confidence ≥0.9, got {confidence}"`
			`assert confidence == 1.0, "All checks passed should give 100% confidence"`

			`def test_low_confidence_scenario(self, low_confidence_context):`
			`"""`
			`Test that an unprepared context returns low confidence (<70%)`

			`No checks pass: 0%`
			`"""`
			`checker = ConfidenceChecker()`
			`confidence = checker.assess(low_confidence_context)`

			`assert confidence < 0.7, f"Expected low confidence <0.7, got {confidence}"`
			`assert confidence == 0.0, "No checks passed should give 0% confidence"`

			`def test_medium_confidence_scenario(self):`
			`"""`
			`Test medium confidence scenario (70-89%)`

			`Some checks pass, some don't`
			`"""`
			`checker = ConfidenceChecker()`
			`context = {`
			`"test_name": "test_feature",`
			`"duplicate_check_complete": True, # 25%`
			`"architecture_check_complete": True, # 25%`
			`"official_docs_verified": True, # 20%`
			`"oss_reference_complete": False, # 0%`
			`"root_cause_identified": False, # 0%`
			`}`

			`confidence = checker.assess(context)`

style: apply ruff formatting to all files Formatted 14 files to comply with ruff formatting rules: - Consistent code style across codebase - Improved readability - All formatting checks now pass 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-12 18:19:45 +01:00			`assert 0.7 <= confidence < 0.9, (`
			`f"Expected medium confidence 0.7-0.9, got {confidence}"`
			`)`
feat: add comprehensive test suite, CI/CD workflows, and install command Major improvements to SuperClaude Framework infrastructure and testing: ## New Features - Add 'superclaude install' command to install slash commands (/research, /index-repo, /agent, /recommend) - Create comprehensive test suite with 71 tests (70 passing, 1 skipped) - Add GitHub Actions CI/CD workflows for automated testing - Add essential documentation files (PLANNING.md, TASK.md, KNOWLEDGE.md) ## Testing - tests/unit/: 59 tests covering PM Agent components - test_confidence.py: 13 tests for ConfidenceChecker - test_self_check.py: 14 tests for SelfCheckProtocol - test_reflexion.py: 9 tests for ReflexionPattern - test_token_budget.py: 12 tests for TokenBudgetManager - test_cli_install.py: 12 tests for install command (NEW) - tests/integration/: 11 tests for pytest plugin integration - tests/conftest.py: Shared fixtures for all tests ## CI/CD Workflows - .github/workflows/test.yml: Comprehensive test matrix - Tests on Python 3.10, 3.11, 3.12 - Lint and format checks with ruff - Pytest plugin verification - SuperClaude doctor health checks - Coverage reporting with Codecov - .github/workflows/quick-check.yml: Fast PR validation (~2-3 min) - .github/workflows/README.md: Workflow documentation ## Documentation - PLANNING.md: Architecture, design principles, absolute rules - TASK.md: Current tasks, priorities, backlog - KNOWLEDGE.md: Accumulated insights, best practices, troubleshooting ## Bug Fixes - Fix .gitignore contradictions (remove conflicting Claude Code patterns) - Fix TokenBudgetManager to properly validate and default invalid complexity - Update package.json version to 4.1.6 (sync with VERSION file) ## CLI Improvements - src/superclaude/cli/install_commands.py: Command installation logic - src/superclaude/cli/main.py: Add 'install' command with --list and --force options - README.md: Update installation instructions with correct commands ## Breaking Changes None - all changes are backwards compatible ## Migration Guide Users should run 'superclaude install' after upgrading to install slash commands Fixes #466 (indirectly by clarifying installation process) Refs #419 (plugin system - documentation updated) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com> 2025-11-11 18:21:22 +01:00			`assert confidence == 0.7, "Should be exactly 70%"`

			`def test_confidence_checks_recorded(self, sample_context):`
			`"""Test that confidence checks are recorded in context"""`
			`checker = ConfidenceChecker()`
			`checker.assess(sample_context)`

			`assert "confidence_checks" in sample_context`
			`assert isinstance(sample_context["confidence_checks"], list)`
			`assert len(sample_context["confidence_checks"]) == 5`

			`# All checks should pass`
			`for check in sample_context["confidence_checks"]:`
			`assert check.startswith("✅"), f"Expected passing check, got: {check}"`

			`def test_get_recommendation_high(self):`
			`"""Test recommendation for high confidence"""`
			`checker = ConfidenceChecker()`
			`recommendation = checker.get_recommendation(0.95)`

			`assert "High confidence" in recommendation`
			`assert "Proceed" in recommendation`

			`def test_get_recommendation_medium(self):`
			`"""Test recommendation for medium confidence"""`
			`checker = ConfidenceChecker()`
			`recommendation = checker.get_recommendation(0.75)`

			`assert "Medium confidence" in recommendation`
			`assert "Continue investigation" in recommendation`

			`def test_get_recommendation_low(self):`
			`"""Test recommendation for low confidence"""`
			`checker = ConfidenceChecker()`
			`recommendation = checker.get_recommendation(0.5)`

			`assert "Low confidence" in recommendation`
			`assert "STOP" in recommendation`

			`def test_has_official_docs_with_flag(self):`
			`"""Test official docs check with direct flag"""`
			`checker = ConfidenceChecker()`
			`context = {"official_docs_verified": True}`

			`result = checker._has_official_docs(context)`

			`assert result is True`

			`def test_no_duplicates_check(self):`
			`"""Test duplicate check validation"""`
			`checker = ConfidenceChecker()`

			`# With flag`
			`context_pass = {"duplicate_check_complete": True}`
			`assert checker._no_duplicates(context_pass) is True`

			`# Without flag`
			`context_fail = {"duplicate_check_complete": False}`
			`assert checker._no_duplicates(context_fail) is False`

			`def test_architecture_compliance_check(self):`
			`"""Test architecture compliance validation"""`
			`checker = ConfidenceChecker()`

			`# With flag`
			`context_pass = {"architecture_check_complete": True}`
			`assert checker._architecture_compliant(context_pass) is True`

			`# Without flag`
			`context_fail = {}`
			`assert checker._architecture_compliant(context_fail) is False`

			`def test_oss_reference_check(self):`
			`"""Test OSS reference validation"""`
			`checker = ConfidenceChecker()`

			`# With flag`
			`context_pass = {"oss_reference_complete": True}`
			`assert checker._has_oss_reference(context_pass) is True`

			`# Without flag`
			`context_fail = {"oss_reference_complete": False}`
			`assert checker._has_oss_reference(context_fail) is False`

			`def test_root_cause_check(self):`
			`"""Test root cause identification validation"""`
			`checker = ConfidenceChecker()`

			`# With flag`
			`context_pass = {"root_cause_identified": True}`
			`assert checker._root_cause_identified(context_pass) is True`

			`# Without flag`
			`context_fail = {}`
			`assert checker._root_cause_identified(context_fail) is False`


			`@pytest.mark.confidence_check`
			`def test_confidence_check_marker_integration(confidence_checker):`
			`"""`
			`Test that confidence_check marker works with pytest plugin fixture`

			`This test should skip if confidence < 70%`
			`"""`
			`context = {`
			`"test_name": "test_confidence_check_marker_integration",`
			`"has_official_docs": True,`
			`"duplicate_check_complete": True,`
			`"architecture_check_complete": True,`
			`"official_docs_verified": True,`
			`"oss_reference_complete": True,`
			`"root_cause_identified": True,`
			`}`

			`confidence = confidence_checker.assess(context)`
			`assert confidence >= 0.7, "Confidence should be high enough to not skip"`