context-engineering-intro/use-cases/agent-factory-with-subagents/agents/rag_agent/tests/test_integration.py

"""End-to-end integration tests for Semantic Search Agent."""

import pytest
from unittest.mock import AsyncMock, patch, MagicMock
import asyncio
from pydantic_ai.models.test import TestModel
from pydantic_ai.models.function import FunctionModel
from pydantic_ai.messages import ModelTextResponse

from ..agent import search_agent, search, interactive_search, SearchResponse
from ..dependencies import AgentDependencies
from ..settings import load_settings
from ..tools import semantic_search, hybrid_search, auto_search


class TestEndToEndSearch:
    """Test complete search workflows from query to response."""

    @pytest.mark.asyncio
    async def test_complete_semantic_search_workflow(self, test_dependencies, sample_search_results):
        """Test complete semantic search workflow."""
        deps, connection = test_dependencies

        # Mock database results
        db_results = [
            {
                'chunk_id': r.chunk_id,
                'document_id': r.document_id,
                'content': r.content,
                'similarity': r.similarity,
                'metadata': r.metadata,
                'document_title': r.document_title,
                'document_source': r.document_source
            }
            for r in sample_search_results
        ]
        connection.fetch.return_value = db_results

        # Create function model that simulates complete workflow
        call_count = 0

        async def search_workflow(messages, tools):
            nonlocal call_count
            call_count += 1

            if call_count == 1:
                return ModelTextResponse(content="I'll search for Python programming information.")
            elif call_count == 2:
                return {"auto_search": {"query": "Python programming", "match_count": 10}}
            else:
                return ModelTextResponse(
                    content="Based on my search, I found relevant information about Python programming. "
                           "The results include tutorials and guides that explain Python concepts and syntax. "
                           "Key sources include Python Tutorial and ML Guide documents."
                )

        function_model = FunctionModel(search_workflow)
        test_agent = search_agent.override(model=function_model)

        # Run complete workflow
        result = await test_agent.run("Find information about Python programming", deps=deps)

        # Verify workflow completed
        assert result.data is not None
        assert "Python programming" in result.data
        assert "search" in result.data.lower()

        # Verify database was queried
        connection.fetch.assert_called()

        # Verify embedding was generated
        deps.openai_client.embeddings.create.assert_called()

    @pytest.mark.asyncio
    async def test_complete_hybrid_search_workflow(self, test_dependencies, sample_hybrid_results):
        """Test complete hybrid search workflow."""
        deps, connection = test_dependencies
        connection.fetch.return_value = sample_hybrid_results

        # Set preference for hybrid search
        deps.set_user_preference('search_type', 'hybrid')

        call_count = 0

        async def hybrid_workflow(messages, tools):
            nonlocal call_count
            call_count += 1

            if call_count == 1:
                return ModelTextResponse(content="I'll perform a hybrid search combining semantic and keyword matching.")
            elif call_count == 2:
                return {"auto_search": {"query": "exact Python syntax", "match_count": 15}}
            else:
                return ModelTextResponse(
                    content="The hybrid search found precise matches for Python syntax. "
                           "Results combine semantic similarity with exact keyword matching. "
                           "This approach is ideal for finding specific technical information."
                )

        function_model = FunctionModel(hybrid_workflow)
        test_agent = search_agent.override(model=function_model)

        result = await test_agent.run("Find exact Python syntax examples", deps=deps)

        assert result.data is not None
        assert "hybrid search" in result.data or "Python syntax" in result.data

        # Verify user preference was considered
        assert deps.user_preferences['search_type'] == 'hybrid'

        # Verify query was added to history
        assert "Find exact Python syntax examples" in deps.query_history or len(deps.query_history) > 0

    @pytest.mark.asyncio
    async def test_search_function_integration(self, mock_database_responses):
        """Test the search function with realistic agent interaction."""
        with patch('..agent.search_agent') as mock_agent:
            # Mock agent behavior
            mock_result = AsyncMock()
            mock_result.data = "Comprehensive search results found. The analysis shows relevant information about machine learning concepts and Python implementations."
            mock_agent.run.return_value = mock_result

            # Mock dependency initialization
            with patch.object(AgentDependencies, 'initialize') as mock_init:
                with patch.object(AgentDependencies, 'cleanup') as mock_cleanup:

                    response = await search(
                        query="machine learning with Python",
                        search_type="auto",
                        match_count=20,
                        text_weight=0.4
                    )

                    # Verify response structure
                    assert isinstance(response, SearchResponse)
                    assert response.summary == mock_result.data
                    assert response.search_strategy == "auto"
                    assert response.result_count == 20

                    # Verify agent was called
                    mock_agent.run.assert_called_once()

                    # Verify dependency lifecycle
                    mock_init.assert_called_once()
                    mock_cleanup.assert_called_once()

    @pytest.mark.asyncio
    async def test_interactive_session_workflow(self, test_dependencies):
        """Test interactive session maintains state across queries."""
        deps, connection = test_dependencies
        connection.fetch.return_value = []

        # Initialize interactive session
        session_deps = await interactive_search(deps)

        # Verify session is properly initialized
        assert session_deps is deps
        assert session_deps.session_id is not None

        # Simulate multiple queries in same session
        queries = [
            "What is Python?",
            "How does machine learning work?",
            "Show me examples of neural networks"
        ]

        call_count = 0

        async def session_workflow(messages, tools):
            nonlocal call_count
            call_count += 1

            if call_count % 2 == 1:  # Odd calls - analysis
                return ModelTextResponse(content="I'll search for information about your query.")
            else:  # Even calls - tool calls
                return {"auto_search": {"query": queries[(call_count // 2) - 1], "match_count": 10}}

        function_model = FunctionModel(session_workflow)
        test_agent = search_agent.override(model=function_model)

        # Run multiple searches in session
        for query in queries:
            result = await test_agent.run(query, deps=session_deps)
            assert result.data is not None

        # Verify session state is maintained
        assert len(session_deps.query_history) == len(queries)
        assert all(q in session_deps.query_history for q in queries)

    @pytest.mark.asyncio
    async def test_error_recovery_workflow(self, test_dependencies):
        """Test system recovers from errors gracefully."""
        deps, connection = test_dependencies

        # First call fails, second succeeds
        connection.fetch.side_effect = [
            Exception("Database connection failed"),
            [{'chunk_id': 'chunk_1', 'document_id': 'doc_1', 'content': 'Recovery test',
              'similarity': 0.9, 'metadata': {}, 'document_title': 'Test Doc',
              'document_source': 'test.pdf'}]
        ]

        call_count = 0

        async def error_recovery_workflow(messages, tools):
            nonlocal call_count
            call_count += 1

            if call_count == 1:
                return ModelTextResponse(content="I'll try to search for information.")
            elif call_count == 2:
                return {"auto_search": {"query": "test query", "match_count": 10}}
            elif call_count == 3:
                return ModelTextResponse(content="The first search failed, let me try again.")
            elif call_count == 4:
                return {"auto_search": {"query": "test query", "match_count": 10}}
            else:
                return ModelTextResponse(content="Successfully recovered and found information.")

        function_model = FunctionModel(error_recovery_workflow)
        test_agent = search_agent.override(model=function_model)

        # First attempt should handle error gracefully
        result1 = await test_agent.run("Test error recovery", deps=deps)
        assert result1.data is not None

        # Second attempt should succeed
        result2 = await test_agent.run("Test successful recovery", deps=deps)
        assert result2.data is not None
        assert "Successfully recovered" in result2.data


class TestCrossComponentIntegration:
    """Test integration between different agent components."""

    @pytest.mark.asyncio
    async def test_settings_to_dependencies_integration(self):
        """Test settings are properly integrated into dependencies."""
        with patch.dict('os.environ', {
            'DATABASE_URL': 'postgresql://test:test@localhost:5432/testdb',
            'OPENAI_API_KEY': 'test_openai_key',
            'LLM_MODEL': 'gpt-4',
            'DEFAULT_MATCH_COUNT': '25',
            'MAX_MATCH_COUNT': '100'
        }):
            settings = load_settings()

            with patch('asyncpg.create_pool') as mock_create_pool:
                with patch('openai.AsyncOpenAI') as mock_openai:
                    mock_pool = AsyncMock()
                    mock_client = AsyncMock()
                    mock_create_pool.return_value = mock_pool
                    mock_openai.return_value = mock_client

                    deps = AgentDependencies()
                    deps.settings = settings
                    await deps.initialize()

                    # Verify settings values are used
                    assert deps.settings.database_url == 'postgresql://test:test@localhost:5432/testdb'
                    assert deps.settings.openai_api_key == 'test_openai_key'
                    assert deps.settings.llm_model == 'gpt-4'
                    assert deps.settings.default_match_count == 25
                    assert deps.settings.max_match_count == 100

                    # Verify pool created with correct settings
                    mock_create_pool.assert_called_once_with(
                        'postgresql://test:test@localhost:5432/testdb',
                        min_size=deps.settings.db_pool_min_size,
                        max_size=deps.settings.db_pool_max_size
                    )

                    # Verify OpenAI client created with correct key
                    mock_openai.assert_called_once_with(
                        api_key='test_openai_key'
                    )

    @pytest.mark.asyncio
    async def test_tools_to_agent_integration(self, test_dependencies, sample_search_results):
        """Test tools are properly integrated with the agent."""
        deps, connection = test_dependencies

        # Mock different tool results
        semantic_results = [
            {
                'chunk_id': r.chunk_id,
                'document_id': r.document_id,
                'content': r.content,
                'similarity': r.similarity,
                'metadata': r.metadata,
                'document_title': r.document_title,
                'document_source': r.document_source
            }
            for r in sample_search_results
        ]

        hybrid_results = [
            {
                'chunk_id': r.chunk_id,
                'document_id': r.document_id,
                'content': r.content,
                'combined_score': r.similarity,
                'vector_similarity': r.similarity,
                'text_similarity': r.similarity - 0.1,
                'metadata': r.metadata,
                'document_title': r.document_title,
                'document_source': r.document_source
            }
            for r in sample_search_results
        ]

        connection.fetch.side_effect = [semantic_results, hybrid_results, semantic_results]

        # Test all tools work with agent
        call_count = 0

        async def multi_tool_workflow(messages, tools):
            nonlocal call_count
            call_count += 1

            if call_count == 1:
                return {"semantic_search": {"query": "test semantic", "match_count": 5}}
            elif call_count == 2:
                return {"hybrid_search": {"query": "test hybrid", "match_count": 5, "text_weight": 0.4}}
            elif call_count == 3:
                return {"auto_search": {"query": "test auto", "match_count": 5}}
            else:
                return ModelTextResponse(content="All search tools tested successfully.")

        function_model = FunctionModel(multi_tool_workflow)
        test_agent = search_agent.override(model=function_model)

        result = await test_agent.run("Test all search tools", deps=deps)

        # Verify all tools were called
        assert connection.fetch.call_count >= 3
        assert result.data is not None
        assert "successfully" in result.data.lower()

    @pytest.mark.asyncio
    async def test_preferences_across_tools(self, test_dependencies, sample_hybrid_results):
        """Test user preferences work consistently across all tools."""
        deps, connection = test_dependencies
        connection.fetch.return_value = sample_hybrid_results

        # Set user preferences
        deps.set_user_preference('search_type', 'hybrid')
        deps.set_user_preference('text_weight', 0.7)
        deps.set_user_preference('result_count', 15)

        # Test preferences are used by auto_search
        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        result = await auto_search(ctx, "test query with preferences")

        # Should use user preference for search type
        assert result['strategy'] == 'hybrid'
        assert result['reason'] == 'User preference'

        # Verify database call used preference values
        connection.fetch.assert_called()
        args = connection.fetch.call_args[0]
        assert args[4] == 0.7  # text_weight parameter

    @pytest.mark.asyncio
    async def test_query_history_integration(self, test_dependencies):
        """Test query history is maintained across all interactions."""
        deps, connection = test_dependencies
        connection.fetch.return_value = []

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        # Make multiple searches that should add to history
        test_queries = [
            "First search query",
            "Second search about AI",
            "Third query on machine learning",
            "Fourth search on Python"
        ]

        for query in test_queries:
            await auto_search(ctx, query)

        # Verify all queries added to history
        assert len(deps.query_history) == len(test_queries)
        for query in test_queries:
            assert query in deps.query_history

        # Verify history order is maintained
        assert deps.query_history == test_queries


class TestPerformanceIntegration:
    """Test performance aspects of integrated system."""

    @pytest.mark.asyncio
    async def test_concurrent_search_requests(self, test_dependencies):
        """Test system handles concurrent search requests."""
        deps, connection = test_dependencies
        connection.fetch.return_value = [
            {
                'chunk_id': 'chunk_1',
                'document_id': 'doc_1',
                'content': 'Concurrent test content',
                'similarity': 0.8,
                'metadata': {},
                'document_title': 'Test Doc',
                'document_source': 'test.pdf'
            }
        ]

        # Create multiple search tasks
        async def single_search(query_id):
            from pydantic_ai import RunContext
            ctx = RunContext(deps=deps)
            return await semantic_search(ctx, f"Query {query_id}")

        # Run concurrent searches
        tasks = [single_search(i) for i in range(5)]
        results = await asyncio.gather(*tasks)

        # All should complete successfully
        assert len(results) == 5
        for result in results:
            assert isinstance(result, list)
            assert len(result) > 0

        # Should have made multiple database calls
        assert connection.fetch.call_count == 5

    @pytest.mark.asyncio
    async def test_large_result_set_processing(self, test_dependencies):
        """Test system handles large result sets efficiently."""
        deps, connection = test_dependencies

        # Create large result set
        large_results = []
        for i in range(50):  # Maximum allowed results
            large_results.append({
                'chunk_id': f'chunk_{i}',
                'document_id': f'doc_{i}',
                'content': f'Content {i} with substantial text for testing performance',
                'similarity': 0.9 - (i * 0.01),
                'metadata': {'page': i, 'section': f'Section {i}'},
                'document_title': f'Document {i}',
                'document_source': f'source_{i}.pdf'
            })

        connection.fetch.return_value = large_results

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        # Process large result set
        results = await semantic_search(ctx, "large dataset query", match_count=50)

        # Should handle all results efficiently
        assert len(results) == 50
        assert all(r.similarity >= 0.4 for r in results)  # All should have reasonable similarity
        assert results[0].similarity > results[-1].similarity  # Should be ordered by similarity

    @pytest.mark.asyncio
    async def test_embedding_generation_performance(self, test_dependencies):
        """Test embedding generation performance."""
        deps, connection = test_dependencies
        connection.fetch.return_value = []

        # Test embedding generation for various text lengths
        test_texts = [
            "Short query",
            "Medium length query with more words and details about the search topic",
            "Very long query " * 100  # Very long text
        ]

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        for text in test_texts:
            result = await semantic_search(ctx, text)
            assert isinstance(result, list)

        # Should have generated embeddings for all texts
        assert deps.openai_client.embeddings.create.call_count == len(test_texts)


class TestRobustnessIntegration:
    """Test system robustness and error handling."""

    @pytest.mark.asyncio
    async def test_network_failure_recovery(self, test_dependencies):
        """Test system handles network failures gracefully."""
        deps, connection = test_dependencies

        # Simulate network failure then recovery
        deps.openai_client.embeddings.create.side_effect = [
            ConnectionError("Network unavailable"),
            MagicMock(data=[MagicMock(embedding=[0.1] * 1536)])
        ]

        connection.fetch.return_value = []

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        # First call should fail
        with pytest.raises(ConnectionError):
            await semantic_search(ctx, "network test query")

        # Second call should succeed after "network recovery"
        result = await semantic_search(ctx, "recovery test query")
        assert isinstance(result, list)

    @pytest.mark.asyncio
    async def test_database_transaction_handling(self, test_dependencies):
        """Test proper database transaction handling."""
        deps, connection = test_dependencies

        # Simulate database transaction scenarios
        connection.fetch.side_effect = [
            Exception("Database locked"),
            [{'chunk_id': 'chunk_1', 'document_id': 'doc_1', 'content': 'Recovery success',
              'similarity': 0.95, 'metadata': {}, 'document_title': 'Test', 'document_source': 'test.pdf'}]
        ]

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        # First attempt fails
        with pytest.raises(Exception, match="Database locked"):
            await semantic_search(ctx, "transaction test")

        # Subsequent attempt succeeds
        result = await semantic_search(ctx, "transaction recovery")
        assert len(result) == 1
        assert result[0].content == "Recovery success"

    @pytest.mark.asyncio
    async def test_memory_management_with_large_sessions(self, test_dependencies):
        """Test memory management with large interactive sessions."""
        deps, connection = test_dependencies
        connection.fetch.return_value = []

        # Simulate large number of queries in session
        for i in range(20):  # More than history limit
            deps.add_to_history(f"Query number {i} with detailed content about search topics")

        # History should be properly limited
        assert len(deps.query_history) == 10
        assert deps.query_history[0] == "Query number 10 with detailed content about search topics"
        assert deps.query_history[-1] == "Query number 19 with detailed content about search topics"

        # User preferences should still work
        deps.set_user_preference('search_type', 'semantic')
        assert deps.user_preferences['search_type'] == 'semantic'

    @pytest.mark.asyncio
    async def test_cleanup_after_errors(self, test_dependencies):
        """Test proper cleanup occurs even after errors."""
        deps, connection = test_dependencies

        # Simulate error during operation
        connection.fetch.side_effect = Exception("Critical database error")

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        try:
            await semantic_search(ctx, "cleanup test")
        except Exception:
            pass  # Expected to fail

        # Dependencies should still be in valid state for cleanup
        assert deps.db_pool is not None
        assert deps.openai_client is not None

        # Cleanup should work normally
        await deps.cleanup()
        assert deps.db_pool is None


class TestScenarioIntegration:
    """Test realistic usage scenarios end-to-end."""

    @pytest.mark.asyncio
    async def test_research_workflow_scenario(self, test_dependencies):
        """Test complete research workflow scenario."""
        deps, connection = test_dependencies

        # Mock research-relevant results
        research_results = [
            {
                'chunk_id': 'research_1',
                'document_id': 'paper_1',
                'content': 'Neural networks are computational models inspired by biological neural networks.',
                'similarity': 0.92,
                'metadata': {'type': 'research_paper', 'year': 2023},
                'document_title': 'Deep Learning Fundamentals',
                'document_source': 'nature_ml.pdf'
            },
            {
                'chunk_id': 'research_2',
                'document_id': 'paper_2',
                'content': 'Machine learning algorithms can be broadly categorized into supervised and unsupervised learning.',
                'similarity': 0.88,
                'metadata': {'type': 'textbook', 'chapter': 3},
                'document_title': 'ML Textbook',
                'document_source': 'ml_book.pdf'
            }
        ]
        connection.fetch.return_value = research_results

        # Simulate research workflow
        research_queries = [
            "What are neural networks?",
            "Types of machine learning algorithms",
            "Deep learning applications"
        ]

        call_count = 0

        async def research_workflow(messages, tools):
            nonlocal call_count
            call_count += 1

            if call_count % 2 == 1:  # Analysis calls
                return ModelTextResponse(content="I'll search for research information on this topic.")
            else:  # Tool calls
                query_idx = (call_count // 2) - 1
                if query_idx < len(research_queries):
                    return {"auto_search": {"query": research_queries[query_idx], "match_count": 10}}
                else:
                    return ModelTextResponse(content="Research workflow completed successfully.")

        function_model = FunctionModel(research_workflow)
        test_agent = search_agent.override(model=function_model)

        # Execute research workflow
        for query in research_queries:
            result = await test_agent.run(query, deps=deps)
            assert result.data is not None
            assert "search" in result.data.lower() or "research" in result.data.lower()

        # Verify research context maintained
        assert len(deps.query_history) == len(research_queries)
        assert all(q in deps.query_history for q in research_queries)

    @pytest.mark.asyncio
    async def test_troubleshooting_workflow_scenario(self, test_dependencies):
        """Test troubleshooting workflow with specific technical queries."""
        deps, connection = test_dependencies

        # Mock technical troubleshooting results
        tech_results = [
            {
                'chunk_id': 'tech_1',
                'document_id': 'docs_1',
                'content': 'ImportError: No module named sklearn. Solution: pip install scikit-learn',
                'combined_score': 0.95,
                'vector_similarity': 0.90,
                'text_similarity': 1.0,
                'metadata': {'type': 'troubleshooting', 'language': 'python'},
                'document_title': 'Python Error Solutions',
                'document_source': 'python_docs.pdf'
            }
        ]
        connection.fetch.return_value = tech_results

        # Set preference for exact matching
        deps.set_user_preference('search_type', 'hybrid')

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        # Perform technical search
        result = await auto_search(ctx, 'ImportError: No module named sklearn')

        # Should use hybrid search for exact technical terms
        assert result['strategy'] == 'hybrid'
        assert result['reason'] == 'User preference'
        assert len(result['results']) > 0

        # Verify technical content found
        tech_content = result['results'][0]
        assert 'ImportError' in tech_content['content']
        assert 'sklearn' in tech_content['content']

    @pytest.mark.asyncio
    async def test_learning_workflow_scenario(self, test_dependencies):
        """Test learning workflow with progressive queries."""
        deps, connection = test_dependencies

        learning_results = [
            {
                'chunk_id': 'learn_1',
                'document_id': 'tutorial_1',
                'content': 'Python basics: Variables store data values. Example: x = 5',
                'similarity': 0.85,
                'metadata': {'difficulty': 'beginner', 'topic': 'variables'},
                'document_title': 'Python Basics Tutorial',
                'document_source': 'python_tutorial.pdf'
            }
        ]
        connection.fetch.return_value = learning_results

        # Simulate progressive learning queries
        learning_progression = [
            "Python basics for beginners",
            "Python variables and data types",
            "Python functions and methods",
            "Advanced Python concepts"
        ]

        from pydantic_ai import RunContext
        ctx = RunContext(deps=deps)

        # Perform progressive searches
        for i, query in enumerate(learning_progression):
            result = await auto_search(ctx, query)

            # Should find relevant educational content
            assert result['strategy'] in ['semantic', 'hybrid']
            assert len(result['results']) > 0

            # Verify query added to history
            assert query in deps.query_history

        # Verify complete learning history maintained
        assert len(deps.query_history) == len(learning_progression)

        # History should show learning progression
        for query in learning_progression:
            assert query in deps.query_history