SuperClaude/tests/unit/test_self_check.py

236 lines
7.7 KiB
Python
Raw Normal View History

feat: add comprehensive test suite, CI/CD workflows, and install command Major improvements to SuperClaude Framework infrastructure and testing: ## New Features - Add 'superclaude install' command to install slash commands (/research, /index-repo, /agent, /recommend) - Create comprehensive test suite with 71 tests (70 passing, 1 skipped) - Add GitHub Actions CI/CD workflows for automated testing - Add essential documentation files (PLANNING.md, TASK.md, KNOWLEDGE.md) ## Testing - tests/unit/: 59 tests covering PM Agent components - test_confidence.py: 13 tests for ConfidenceChecker - test_self_check.py: 14 tests for SelfCheckProtocol - test_reflexion.py: 9 tests for ReflexionPattern - test_token_budget.py: 12 tests for TokenBudgetManager - test_cli_install.py: 12 tests for install command (NEW) - tests/integration/: 11 tests for pytest plugin integration - tests/conftest.py: Shared fixtures for all tests ## CI/CD Workflows - .github/workflows/test.yml: Comprehensive test matrix - Tests on Python 3.10, 3.11, 3.12 - Lint and format checks with ruff - Pytest plugin verification - SuperClaude doctor health checks - Coverage reporting with Codecov - .github/workflows/quick-check.yml: Fast PR validation (~2-3 min) - .github/workflows/README.md: Workflow documentation ## Documentation - PLANNING.md: Architecture, design principles, absolute rules - TASK.md: Current tasks, priorities, backlog - KNOWLEDGE.md: Accumulated insights, best practices, troubleshooting ## Bug Fixes - Fix .gitignore contradictions (remove conflicting Claude Code patterns) - Fix TokenBudgetManager to properly validate and default invalid complexity - Update package.json version to 4.1.6 (sync with VERSION file) ## CLI Improvements - src/superclaude/cli/install_commands.py: Command installation logic - src/superclaude/cli/main.py: Add 'install' command with --list and --force options - README.md: Update installation instructions with correct commands ## Breaking Changes None - all changes are backwards compatible ## Migration Guide Users should run 'superclaude install' after upgrading to install slash commands Fixes #466 (indirectly by clarifying installation process) Refs #419 (plugin system - documentation updated) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
2025-11-11 18:21:22 +01:00
"""
Unit tests for SelfCheckProtocol
Tests post-implementation validation functionality.
"""
import pytest
from superclaude.pm_agent.self_check import SelfCheckProtocol
class TestSelfCheckProtocol:
"""Test suite for SelfCheckProtocol class"""
def test_validate_passing_implementation(self, sample_implementation):
"""
Test validation of a complete, passing implementation
Should pass all four questions:
1. Tests passing?
2. Requirements met?
3. Assumptions verified?
4. Evidence provided?
"""
protocol = SelfCheckProtocol()
passed, issues = protocol.validate(sample_implementation)
assert passed is True, f"Expected validation to pass, got issues: {issues}"
assert len(issues) == 0, f"Expected no issues, got {len(issues)}: {issues}"
def test_validate_failing_implementation(self, failing_implementation):
"""
Test validation of a failing implementation
Should fail multiple checks
"""
protocol = SelfCheckProtocol()
passed, issues = protocol.validate(failing_implementation)
assert passed is False, "Expected validation to fail"
assert len(issues) > 0, "Expected issues to be detected"
# Check specific issues
issue_text = " ".join(issues)
assert "Tests not passing" in issue_text or "test" in issue_text.lower()
def test_check_tests_passing_with_output(self):
"""Test that tests_passed requires actual output"""
protocol = SelfCheckProtocol()
# Tests passed WITH output - should pass
impl_with_output = {
"tests_passed": True,
"test_output": "✅ 10 tests passed",
}
assert protocol._check_tests_passing(impl_with_output) is True
# Tests passed WITHOUT output - should fail (hallucination detection)
impl_without_output = {
"tests_passed": True,
"test_output": "",
}
assert protocol._check_tests_passing(impl_without_output) is False
def test_check_requirements_met(self):
"""Test requirements validation"""
protocol = SelfCheckProtocol()
# All requirements met
impl_complete = {
"requirements": ["A", "B", "C"],
"requirements_met": ["A", "B", "C"],
}
unmet = protocol._check_requirements_met(impl_complete)
assert len(unmet) == 0
# Some requirements not met
impl_incomplete = {
"requirements": ["A", "B", "C"],
"requirements_met": ["A", "B"],
}
unmet = protocol._check_requirements_met(impl_incomplete)
assert len(unmet) == 1
assert "C" in unmet
def test_check_assumptions_verified(self):
"""Test assumptions verification"""
protocol = SelfCheckProtocol()
# All assumptions verified
impl_verified = {
"assumptions": ["API is REST", "DB is PostgreSQL"],
"assumptions_verified": ["API is REST", "DB is PostgreSQL"],
}
unverified = protocol._check_assumptions_verified(impl_verified)
assert len(unverified) == 0
# Some assumptions unverified
impl_unverified = {
"assumptions": ["API is REST", "DB is PostgreSQL"],
"assumptions_verified": ["API is REST"],
}
unverified = protocol._check_assumptions_verified(impl_unverified)
assert len(unverified) == 1
assert "DB is PostgreSQL" in unverified
def test_check_evidence_exists(self):
"""Test evidence requirement validation"""
protocol = SelfCheckProtocol()
# All evidence present
impl_with_evidence = {
"evidence": {
"test_results": "Tests passed",
"code_changes": ["file1.py"],
"validation": "Linting passed",
}
}
missing = protocol._check_evidence_exists(impl_with_evidence)
assert len(missing) == 0
# Missing all evidence
impl_no_evidence = {"evidence": {}}
missing = protocol._check_evidence_exists(impl_no_evidence)
assert len(missing) == 3
assert "test_results" in missing
assert "code_changes" in missing
assert "validation" in missing
def test_detect_hallucinations_tests_without_output(self):
"""Test hallucination detection: claims tests pass without output"""
protocol = SelfCheckProtocol()
impl = {
"tests_passed": True,
"test_output": "", # No output - hallucination!
}
detected = protocol._detect_hallucinations(impl)
assert len(detected) > 0
assert any("without showing output" in d for d in detected)
def test_detect_hallucinations_complete_without_evidence(self):
"""Test hallucination detection: claims complete without evidence"""
protocol = SelfCheckProtocol()
impl = {
"status": "complete",
"evidence": None, # No evidence - hallucination!
}
detected = protocol._detect_hallucinations(impl)
assert len(detected) > 0
assert any("without evidence" in d for d in detected)
def test_detect_hallucinations_complete_with_failing_tests(self):
"""Test hallucination detection: claims complete despite failing tests"""
protocol = SelfCheckProtocol()
impl = {
"status": "complete",
"tests_passed": False, # Tests failed but claims complete!
}
detected = protocol._detect_hallucinations(impl)
assert len(detected) > 0
assert any("failing tests" in d for d in detected)
def test_detect_hallucinations_ignored_errors(self):
"""Test hallucination detection: ignored errors/warnings"""
protocol = SelfCheckProtocol()
impl = {
"status": "complete",
"errors": ["TypeError in module X"],
"warnings": ["Deprecated function used"],
}
detected = protocol._detect_hallucinations(impl)
assert len(detected) > 0
assert any("errors/warnings" in d for d in detected)
def test_detect_hallucinations_uncertainty_language(self):
"""Test hallucination detection: uncertainty language"""
protocol = SelfCheckProtocol()
impl = {
"description": "This probably works and might be correct",
}
detected = protocol._detect_hallucinations(impl)
assert len(detected) > 0
assert any("Uncertainty language" in d for d in detected)
def test_format_report_passing(self):
"""Test report formatting for passing validation"""
protocol = SelfCheckProtocol()
report = protocol.format_report(passed=True, issues=[])
assert "PASSED" in report
assert "" in report
def test_format_report_failing(self):
"""Test report formatting for failing validation"""
protocol = SelfCheckProtocol()
issues = [
"❌ Tests not passing",
"❌ Missing evidence: test_results",
]
report = protocol.format_report(passed=False, issues=issues)
assert "FAILED" in report
assert "" in report
for issue in issues:
assert issue in report
@pytest.mark.self_check
def test_self_check_marker_integration(self_check_protocol, sample_implementation):
"""
Test that self_check marker works with pytest plugin fixture
This test validates the fixture provided by pytest plugin
"""
passed, issues = self_check_protocol.validate(sample_implementation)
assert passed is True, f"Sample implementation should pass validation: {issues}"
assert len(issues) == 0, "No issues should be detected in sample implementation"