AI Agent Factory with Claude Code Subagents

2025-12-29 16:14:56 +00:00 · 2025-08-22 21:01:17 -05:00
parent 4e1240a0b3
commit 8d9f46ecfa
104 changed files with 24521 additions and 0 deletions
--- a/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/init.py
+++ b/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/init.py
@@ -0,0 +1,3 @@
+"""Ingestion package for processing documents into vector DB and knowledge graph."""
+
+__version__ = "0.1.0"
--- a/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/chunker.py
+++ b/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/chunker.py
@@ -0,0 +1,518 @@
+"""
+Semantic chunking implementation for intelligent document splitting.
+"""
+
+import os
+import re
+import logging
+from typing import List, Dict, Any, Optional, Tuple
+from dataclasses import dataclass
+import asyncio
+
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+# Import flexible providers
+try:
+    from ..utils.providers import get_embedding_client, get_ingestion_model
+except ImportError:
+    # For direct execution or testing
+    import sys
+    import os
+    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    from utils.providers import get_embedding_client, get_ingestion_model
+
+# Initialize clients with flexible providers
+embedding_client = get_embedding_client()
+ingestion_model = get_ingestion_model()
+
+
+@dataclass
+class ChunkingConfig:
+    """Configuration for chunking."""
+    chunk_size: int = 1000
+    chunk_overlap: int = 200
+    max_chunk_size: int = 2000
+    min_chunk_size: int = 100
+    use_semantic_splitting: bool = True
+    preserve_structure: bool = True
+    
+    def __post_init__(self):
+        """Validate configuration."""
+        if self.chunk_overlap >= self.chunk_size:
+            raise ValueError("Chunk overlap must be less than chunk size")
+        if self.min_chunk_size <= 0:
+            raise ValueError("Minimum chunk size must be positive")
+
+
+@dataclass
+class DocumentChunk:
+    """Represents a document chunk."""
+    content: str
+    index: int
+    start_char: int
+    end_char: int
+    metadata: Dict[str, Any]
+    token_count: Optional[int] = None
+    
+    def __post_init__(self):
+        """Calculate token count if not provided."""
+        if self.token_count is None:
+            # Rough estimation: ~4 characters per token
+            self.token_count = len(self.content) // 4
+
+
+class SemanticChunker:
+    """Semantic document chunker using LLM for intelligent splitting."""
+    
+    def __init__(self, config: ChunkingConfig):
+        """
+        Initialize chunker.
+        
+        Args:
+            config: Chunking configuration
+        """
+        self.config = config
+        self.client = embedding_client
+        self.model = ingestion_model
+    
+    async def chunk_document(
+        self,
+        content: str,
+        title: str,
+        source: str,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> List[DocumentChunk]:
+        """
+        Chunk a document into semantically coherent pieces.
+        
+        Args:
+            content: Document content
+            title: Document title
+            source: Document source
+            metadata: Additional metadata
+        
+        Returns:
+            List of document chunks
+        """
+        if not content.strip():
+            return []
+        
+        base_metadata = {
+            "title": title,
+            "source": source,
+            **(metadata or {})
+        }
+        
+        # First, try semantic chunking if enabled
+        if self.config.use_semantic_splitting and len(content) > self.config.chunk_size:
+            try:
+                semantic_chunks = await self._semantic_chunk(content)
+                if semantic_chunks:
+                    return self._create_chunk_objects(
+                        semantic_chunks,
+                        content,
+                        base_metadata
+                    )
+            except Exception as e:
+                logger.warning(f"Semantic chunking failed, falling back to simple chunking: {e}")
+        
+        # Fallback to rule-based chunking
+        return self._simple_chunk(content, base_metadata)
+    
+    async def _semantic_chunk(self, content: str) -> List[str]:
+        """
+        Perform semantic chunking using LLM.
+        
+        Args:
+            content: Content to chunk
+        
+        Returns:
+            List of chunk boundaries
+        """
+        # First, split on natural boundaries
+        sections = self._split_on_structure(content)
+        
+        # Group sections into semantic chunks
+        chunks = []
+        current_chunk = ""
+        
+        for section in sections:
+            # Check if adding this section would exceed chunk size
+            potential_chunk = current_chunk + "\n\n" + section if current_chunk else section
+            
+            if len(potential_chunk) <= self.config.chunk_size:
+                current_chunk = potential_chunk
+            else:
+                # Current chunk is ready, decide if we should split the section
+                if current_chunk:
+                    chunks.append(current_chunk.strip())
+                    current_chunk = ""
+                
+                # Handle oversized sections
+                if len(section) > self.config.max_chunk_size:
+                    # Split the section semantically
+                    sub_chunks = await self._split_long_section(section)
+                    chunks.extend(sub_chunks)
+                else:
+                    current_chunk = section
+        
+        # Add the last chunk
+        if current_chunk:
+            chunks.append(current_chunk.strip())
+        
+        return [chunk for chunk in chunks if len(chunk.strip()) >= self.config.min_chunk_size]
+    
+    def _split_on_structure(self, content: str) -> List[str]:
+        """
+        Split content on structural boundaries.
+        
+        Args:
+            content: Content to split
+        
+        Returns:
+            List of sections
+        """
+        # Split on markdown headers, paragraphs, and other structural elements
+        patterns = [
+            r'\n#{1,6}\s+.+?\n',  # Markdown headers
+            r'\n\n+',            # Multiple newlines (paragraph breaks)
+            r'\n[-*+]\s+',       # List items
+            r'\n\d+\.\s+',       # Numbered lists
+            r'\n```.*?```\n',    # Code blocks
+            r'\n\|\s*.+?\|\s*\n', # Tables
+        ]
+        
+        # Split by patterns but keep the separators
+        sections = [content]
+        
+        for pattern in patterns:
+            new_sections = []
+            for section in sections:
+                parts = re.split(f'({pattern})', section, flags=re.MULTILINE | re.DOTALL)
+                new_sections.extend([part for part in parts if part.strip()])
+            sections = new_sections
+        
+        return sections
+    
+    async def _split_long_section(self, section: str) -> List[str]:
+        """
+        Split a long section using LLM for semantic boundaries.
+        
+        Args:
+            section: Section to split
+        
+        Returns:
+            List of sub-chunks
+        """
+        try:
+            prompt = f"""
+            Split the following text into semantically coherent chunks. Each chunk should:
+            1. Be roughly {self.config.chunk_size} characters long
+            2. End at natural semantic boundaries
+            3. Maintain context and readability
+            4. Not exceed {self.config.max_chunk_size} characters
+            
+            Return only the split text with "---CHUNK---" as separator between chunks.
+            
+            Text to split:
+            {section}
+            """
+            
+            # Use Pydantic AI for LLM calls
+            from pydantic_ai import Agent
+            temp_agent = Agent(self.model)
+            
+            response = await temp_agent.run(prompt)
+            result = response.data
+            chunks = [chunk.strip() for chunk in result.split("---CHUNK---")]
+            
+            # Validate chunks
+            valid_chunks = []
+            for chunk in chunks:
+                if (self.config.min_chunk_size <= len(chunk) <= self.config.max_chunk_size):
+                    valid_chunks.append(chunk)
+            
+            return valid_chunks if valid_chunks else self._simple_split(section)
+            
+        except Exception as e:
+            logger.error(f"LLM chunking failed: {e}")
+            return self._simple_split(section)
+    
+    def _simple_split(self, text: str) -> List[str]:
+        """
+        Simple text splitting as fallback.
+        
+        Args:
+            text: Text to split
+        
+        Returns:
+            List of chunks
+        """
+        chunks = []
+        start = 0
+        
+        while start < len(text):
+            end = start + self.config.chunk_size
+            
+            if end >= len(text):
+                # Last chunk
+                chunks.append(text[start:])
+                break
+            
+            # Try to end at a sentence boundary
+            chunk_end = end
+            for i in range(end, max(start + self.config.min_chunk_size, end - 200), -1):
+                if text[i] in '.!?\n':
+                    chunk_end = i + 1
+                    break
+            
+            chunks.append(text[start:chunk_end])
+            start = chunk_end - self.config.chunk_overlap
+        
+        return chunks
+    
+    def _simple_chunk(
+        self,
+        content: str,
+        base_metadata: Dict[str, Any]
+    ) -> List[DocumentChunk]:
+        """
+        Simple rule-based chunking.
+        
+        Args:
+            content: Content to chunk
+            base_metadata: Base metadata for chunks
+        
+        Returns:
+            List of document chunks
+        """
+        chunks = self._simple_split(content)
+        return self._create_chunk_objects(chunks, content, base_metadata)
+    
+    def _create_chunk_objects(
+        self,
+        chunks: List[str],
+        original_content: str,
+        base_metadata: Dict[str, Any]
+    ) -> List[DocumentChunk]:
+        """
+        Create DocumentChunk objects from text chunks.
+        
+        Args:
+            chunks: List of chunk texts
+            original_content: Original document content
+            base_metadata: Base metadata
+        
+        Returns:
+            List of DocumentChunk objects
+        """
+        chunk_objects = []
+        current_pos = 0
+        
+        for i, chunk_text in enumerate(chunks):
+            # Find the position of this chunk in the original content
+            start_pos = original_content.find(chunk_text, current_pos)
+            if start_pos == -1:
+                # Fallback: estimate position
+                start_pos = current_pos
+            
+            end_pos = start_pos + len(chunk_text)
+            
+            # Create chunk metadata
+            chunk_metadata = {
+                **base_metadata,
+                "chunk_method": "semantic" if self.config.use_semantic_splitting else "simple",
+                "total_chunks": len(chunks)
+            }
+            
+            chunk_objects.append(DocumentChunk(
+                content=chunk_text.strip(),
+                index=i,
+                start_char=start_pos,
+                end_char=end_pos,
+                metadata=chunk_metadata
+            ))
+            
+            current_pos = end_pos
+        
+        return chunk_objects
+
+
+class SimpleChunker:
+    """Simple non-semantic chunker for faster processing."""
+    
+    def __init__(self, config: ChunkingConfig):
+        """Initialize simple chunker."""
+        self.config = config
+    
+    def chunk_document(
+        self,
+        content: str,
+        title: str,
+        source: str,
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> List[DocumentChunk]:
+        """
+        Chunk document using simple rules.
+        
+        Args:
+            content: Document content
+            title: Document title
+            source: Document source
+            metadata: Additional metadata
+        
+        Returns:
+            List of document chunks
+        """
+        if not content.strip():
+            return []
+        
+        base_metadata = {
+            "title": title,
+            "source": source,
+            "chunk_method": "simple",
+            **(metadata or {})
+        }
+        
+        # Split on paragraphs first
+        paragraphs = re.split(r'\n\s*\n', content)
+        chunks = []
+        current_chunk = ""
+        current_pos = 0
+        chunk_index = 0
+        
+        for paragraph in paragraphs:
+            paragraph = paragraph.strip()
+            if not paragraph:
+                continue
+            
+            # Check if adding this paragraph exceeds chunk size
+            potential_chunk = current_chunk + "\n\n" + paragraph if current_chunk else paragraph
+            
+            if len(potential_chunk) <= self.config.chunk_size:
+                current_chunk = potential_chunk
+            else:
+                # Save current chunk if it exists
+                if current_chunk:
+                    chunks.append(self._create_chunk(
+                        current_chunk,
+                        chunk_index,
+                        current_pos,
+                        current_pos + len(current_chunk),
+                        base_metadata.copy()
+                    ))
+                    
+                    # Move position, but ensure overlap is respected
+                    overlap_start = max(0, len(current_chunk) - self.config.chunk_overlap)
+                    current_pos += overlap_start
+                    chunk_index += 1
+                
+                # Start new chunk with current paragraph
+                current_chunk = paragraph
+        
+        # Add final chunk
+        if current_chunk:
+            chunks.append(self._create_chunk(
+                current_chunk,
+                chunk_index,
+                current_pos,
+                current_pos + len(current_chunk),
+                base_metadata.copy()
+            ))
+        
+        # Update total chunks in metadata
+        for chunk in chunks:
+            chunk.metadata["total_chunks"] = len(chunks)
+        
+        return chunks
+    
+    def _create_chunk(
+        self,
+        content: str,
+        index: int,
+        start_pos: int,
+        end_pos: int,
+        metadata: Dict[str, Any]
+    ) -> DocumentChunk:
+        """Create a DocumentChunk object."""
+        return DocumentChunk(
+            content=content.strip(),
+            index=index,
+            start_char=start_pos,
+            end_char=end_pos,
+            metadata=metadata
+        )
+
+
+# Factory function
+def create_chunker(config: ChunkingConfig):
+    """
+    Create appropriate chunker based on configuration.
+    
+    Args:
+        config: Chunking configuration
+    
+    Returns:
+        Chunker instance
+    """
+    if config.use_semantic_splitting:
+        return SemanticChunker(config)
+    else:
+        return SimpleChunker(config)
+
+
+# Example usage
+async def main():
+    """Example usage of the chunker."""
+    config = ChunkingConfig(
+        chunk_size=500,
+        chunk_overlap=50,
+        use_semantic_splitting=True
+    )
+    
+    chunker = create_chunker(config)
+    
+    sample_text = """
+    # Big Tech AI Initiatives
+    
+    ## Google's AI Strategy
+    Google has been investing heavily in artificial intelligence research and development.
+    Their main focus areas include:
+    
+    - Large language models (LaMDA, PaLM, Gemini)
+    - Computer vision and image recognition
+    - Natural language processing
+    - AI-powered search improvements
+    
+    The company's DeepMind division continues to push the boundaries of AI research,
+    with breakthrough achievements in protein folding prediction and game playing.
+    
+    ## Microsoft's Partnership with OpenAI
+    Microsoft's strategic partnership with OpenAI has positioned them as a leader
+    in the generative AI space. Key developments include:
+    
+    1. Integration of GPT models into Office 365
+    2. Azure OpenAI Service for enterprise customers
+    3. Investment in OpenAI's continued research
+    """
+    
+    chunks = await chunker.chunk_document(
+        content=sample_text,
+        title="Big Tech AI Report",
+        source="example.md"
+    )
+    
+    for i, chunk in enumerate(chunks):
+        print(f"Chunk {i}: {len(chunk.content)} chars")
+        print(f"Content: {chunk.content[:100]}...")
+        print(f"Metadata: {chunk.metadata}")
+        print("---")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/embedder.py
+++ b/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/embedder.py
@@ -0,0 +1,418 @@
+"""
+Document embedding generation for vector search.
+"""
+
+import os
+import asyncio
+import logging
+from typing import List, Dict, Any, Optional, Tuple
+from datetime import datetime
+import json
+
+from openai import RateLimitError, APIError
+from dotenv import load_dotenv
+
+from .chunker import DocumentChunk
+
+# Import flexible providers
+try:
+    from ..utils.providers import get_embedding_client, get_embedding_model
+except ImportError:
+    # For direct execution or testing
+    import sys
+    import os
+    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    from utils.providers import get_embedding_client, get_embedding_model
+
+# Load environment variables
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+# Initialize client with flexible provider
+embedding_client = get_embedding_client()
+EMBEDDING_MODEL = get_embedding_model()
+
+
+class EmbeddingGenerator:
+    """Generates embeddings for document chunks."""
+    
+    def __init__(
+        self,
+        model: str = EMBEDDING_MODEL,
+        batch_size: int = 100,
+        max_retries: int = 3,
+        retry_delay: float = 1.0
+    ):
+        """
+        Initialize embedding generator.
+        
+        Args:
+            model: OpenAI embedding model to use
+            batch_size: Number of texts to process in parallel
+            max_retries: Maximum number of retry attempts
+            retry_delay: Delay between retries in seconds
+        """
+        self.model = model
+        self.batch_size = batch_size
+        self.max_retries = max_retries
+        self.retry_delay = retry_delay
+        
+        # Model-specific configurations
+        self.model_configs = {
+            "text-embedding-3-small": {"dimensions": 1536, "max_tokens": 8191},
+            "text-embedding-3-large": {"dimensions": 3072, "max_tokens": 8191},
+            "text-embedding-ada-002": {"dimensions": 1536, "max_tokens": 8191}
+        }
+        
+        if model not in self.model_configs:
+            logger.warning(f"Unknown model {model}, using default config")
+            self.config = {"dimensions": 1536, "max_tokens": 8191}
+        else:
+            self.config = self.model_configs[model]
+    
+    async def generate_embedding(self, text: str) -> List[float]:
+        """
+        Generate embedding for a single text.
+        
+        Args:
+            text: Text to embed
+        
+        Returns:
+            Embedding vector
+        """
+        # Truncate text if too long
+        if len(text) > self.config["max_tokens"] * 4:  # Rough token estimation
+            text = text[:self.config["max_tokens"] * 4]
+        
+        for attempt in range(self.max_retries):
+            try:
+                response = await embedding_client.embeddings.create(
+                    model=self.model,
+                    input=text
+                )
+                
+                return response.data[0].embedding
+                
+            except RateLimitError as e:
+                if attempt == self.max_retries - 1:
+                    raise
+                
+                # Exponential backoff for rate limits
+                delay = self.retry_delay * (2 ** attempt)
+                logger.warning(f"Rate limit hit, retrying in {delay}s")
+                await asyncio.sleep(delay)
+                
+            except APIError as e:
+                logger.error(f"OpenAI API error: {e}")
+                if attempt == self.max_retries - 1:
+                    raise
+                await asyncio.sleep(self.retry_delay)
+                
+            except Exception as e:
+                logger.error(f"Unexpected error generating embedding: {e}")
+                if attempt == self.max_retries - 1:
+                    raise
+                await asyncio.sleep(self.retry_delay)
+    
+    async def generate_embeddings_batch(
+        self,
+        texts: List[str]
+    ) -> List[List[float]]:
+        """
+        Generate embeddings for a batch of texts.
+        
+        Args:
+            texts: List of texts to embed
+        
+        Returns:
+            List of embedding vectors
+        """
+        # Filter and truncate texts
+        processed_texts = []
+        for text in texts:
+            if not text or not text.strip():
+                processed_texts.append("")
+                continue
+                
+            # Truncate if too long
+            if len(text) > self.config["max_tokens"] * 4:
+                text = text[:self.config["max_tokens"] * 4]
+            
+            processed_texts.append(text)
+        
+        for attempt in range(self.max_retries):
+            try:
+                response = await embedding_client.embeddings.create(
+                    model=self.model,
+                    input=processed_texts
+                )
+                
+                return [data.embedding for data in response.data]
+                
+            except RateLimitError as e:
+                if attempt == self.max_retries - 1:
+                    raise
+                
+                delay = self.retry_delay * (2 ** attempt)
+                logger.warning(f"Rate limit hit, retrying batch in {delay}s")
+                await asyncio.sleep(delay)
+                
+            except APIError as e:
+                logger.error(f"OpenAI API error in batch: {e}")
+                if attempt == self.max_retries - 1:
+                    # Fallback to individual processing
+                    return await self._process_individually(processed_texts)
+                await asyncio.sleep(self.retry_delay)
+                
+            except Exception as e:
+                logger.error(f"Unexpected error in batch embedding: {e}")
+                if attempt == self.max_retries - 1:
+                    return await self._process_individually(processed_texts)
+                await asyncio.sleep(self.retry_delay)
+    
+    async def _process_individually(
+        self,
+        texts: List[str]
+    ) -> List[List[float]]:
+        """
+        Process texts individually as fallback.
+        
+        Args:
+            texts: List of texts to embed
+        
+        Returns:
+            List of embedding vectors
+        """
+        embeddings = []
+        
+        for text in texts:
+            try:
+                if not text or not text.strip():
+                    embeddings.append([0.0] * self.config["dimensions"])
+                    continue
+                
+                embedding = await self.generate_embedding(text)
+                embeddings.append(embedding)
+                
+                # Small delay to avoid overwhelming the API
+                await asyncio.sleep(0.1)
+                
+            except Exception as e:
+                logger.error(f"Failed to embed text: {e}")
+                # Use zero vector as fallback
+                embeddings.append([0.0] * self.config["dimensions"])
+        
+        return embeddings
+    
+    async def embed_chunks(
+        self,
+        chunks: List[DocumentChunk],
+        progress_callback: Optional[callable] = None
+    ) -> List[DocumentChunk]:
+        """
+        Generate embeddings for document chunks.
+        
+        Args:
+            chunks: List of document chunks
+            progress_callback: Optional callback for progress updates
+        
+        Returns:
+            Chunks with embeddings added
+        """
+        if not chunks:
+            return chunks
+        
+        logger.info(f"Generating embeddings for {len(chunks)} chunks")
+        
+        # Process chunks in batches
+        embedded_chunks = []
+        total_batches = (len(chunks) + self.batch_size - 1) // self.batch_size
+        
+        for i in range(0, len(chunks), self.batch_size):
+            batch_chunks = chunks[i:i + self.batch_size]
+            batch_texts = [chunk.content for chunk in batch_chunks]
+            
+            try:
+                # Generate embeddings for this batch
+                embeddings = await self.generate_embeddings_batch(batch_texts)
+                
+                # Add embeddings to chunks
+                for chunk, embedding in zip(batch_chunks, embeddings):
+                    # Create a new chunk with embedding
+                    embedded_chunk = DocumentChunk(
+                        content=chunk.content,
+                        index=chunk.index,
+                        start_char=chunk.start_char,
+                        end_char=chunk.end_char,
+                        metadata={
+                            **chunk.metadata,
+                            "embedding_model": self.model,
+                            "embedding_generated_at": datetime.now().isoformat()
+                        },
+                        token_count=chunk.token_count
+                    )
+                    
+                    # Add embedding as a separate attribute
+                    embedded_chunk.embedding = embedding
+                    embedded_chunks.append(embedded_chunk)
+                
+                # Progress update
+                current_batch = (i // self.batch_size) + 1
+                if progress_callback:
+                    progress_callback(current_batch, total_batches)
+                
+                logger.info(f"Processed batch {current_batch}/{total_batches}")
+                
+            except Exception as e:
+                logger.error(f"Failed to process batch {i//self.batch_size + 1}: {e}")
+                
+                # Add chunks without embeddings as fallback
+                for chunk in batch_chunks:
+                    chunk.metadata.update({
+                        "embedding_error": str(e),
+                        "embedding_generated_at": datetime.now().isoformat()
+                    })
+                    chunk.embedding = [0.0] * self.config["dimensions"]
+                    embedded_chunks.append(chunk)
+        
+        logger.info(f"Generated embeddings for {len(embedded_chunks)} chunks")
+        return embedded_chunks
+    
+    async def embed_query(self, query: str) -> List[float]:
+        """
+        Generate embedding for a search query.
+        
+        Args:
+            query: Search query
+        
+        Returns:
+            Query embedding
+        """
+        return await self.generate_embedding(query)
+    
+    def get_embedding_dimension(self) -> int:
+        """Get the dimension of embeddings for this model."""
+        return self.config["dimensions"]
+
+
+# Cache for embeddings
+class EmbeddingCache:
+    """Simple in-memory cache for embeddings."""
+    
+    def __init__(self, max_size: int = 1000):
+        """Initialize cache."""
+        self.cache: Dict[str, List[float]] = {}
+        self.access_times: Dict[str, datetime] = {}
+        self.max_size = max_size
+    
+    def get(self, text: str) -> Optional[List[float]]:
+        """Get embedding from cache."""
+        text_hash = self._hash_text(text)
+        if text_hash in self.cache:
+            self.access_times[text_hash] = datetime.now()
+            return self.cache[text_hash]
+        return None
+    
+    def put(self, text: str, embedding: List[float]):
+        """Store embedding in cache."""
+        text_hash = self._hash_text(text)
+        
+        # Evict oldest entries if cache is full
+        if len(self.cache) >= self.max_size:
+            oldest_key = min(self.access_times.keys(), key=lambda k: self.access_times[k])
+            del self.cache[oldest_key]
+            del self.access_times[oldest_key]
+        
+        self.cache[text_hash] = embedding
+        self.access_times[text_hash] = datetime.now()
+    
+    def _hash_text(self, text: str) -> str:
+        """Generate hash for text."""
+        import hashlib
+        return hashlib.md5(text.encode()).hexdigest()
+
+
+# Factory function
+def create_embedder(
+    model: str = EMBEDDING_MODEL,
+    use_cache: bool = True,
+    **kwargs
+) -> EmbeddingGenerator:
+    """
+    Create embedding generator with optional caching.
+    
+    Args:
+        model: Embedding model to use
+        use_cache: Whether to use caching
+        **kwargs: Additional arguments for EmbeddingGenerator
+    
+    Returns:
+        EmbeddingGenerator instance
+    """
+    embedder = EmbeddingGenerator(model=model, **kwargs)
+    
+    if use_cache:
+        # Add caching capability
+        cache = EmbeddingCache()
+        original_generate = embedder.generate_embedding
+        
+        async def cached_generate(text: str) -> List[float]:
+            cached = cache.get(text)
+            if cached is not None:
+                return cached
+            
+            embedding = await original_generate(text)
+            cache.put(text, embedding)
+            return embedding
+        
+        embedder.generate_embedding = cached_generate
+    
+    return embedder
+
+
+# Example usage
+async def main():
+    """Example usage of the embedder."""
+    from .chunker import ChunkingConfig, create_chunker
+    
+    # Create chunker and embedder
+    config = ChunkingConfig(chunk_size=200, use_semantic_splitting=False)
+    chunker = create_chunker(config)
+    embedder = create_embedder()
+    
+    sample_text = """
+    Google's AI initiatives include advanced language models, computer vision,
+    and machine learning research. The company has invested heavily in
+    transformer architectures and neural network optimization.
+    
+    Microsoft's partnership with OpenAI has led to integration of GPT models
+    into various products and services, making AI accessible to enterprise
+    customers through Azure cloud services.
+    """
+    
+    # Chunk the document
+    chunks = chunker.chunk_document(
+        content=sample_text,
+        title="AI Initiatives",
+        source="example.md"
+    )
+    
+    print(f"Created {len(chunks)} chunks")
+    
+    # Generate embeddings
+    def progress_callback(current, total):
+        print(f"Processing batch {current}/{total}")
+    
+    embedded_chunks = await embedder.embed_chunks(chunks, progress_callback)
+    
+    for i, chunk in enumerate(embedded_chunks):
+        print(f"Chunk {i}: {len(chunk.content)} chars, embedding dim: {len(chunk.embedding)}")
+    
+    # Test query embedding
+    query_embedding = await embedder.embed_query("Google AI research")
+    print(f"Query embedding dimension: {len(query_embedding)}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/ingest.py
+++ b/use-cases/agent-factory-with-subagents/agents/rag_agent/ingestion/ingest.py
@@ -0,0 +1,434 @@
+"""
+Main ingestion script for processing markdown documents into vector DB and knowledge graph.
+"""
+
+import os
+import asyncio
+import logging
+import json
+import glob
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+from datetime import datetime
+import argparse
+
+import asyncpg
+from dotenv import load_dotenv
+
+from .chunker import ChunkingConfig, create_chunker, DocumentChunk
+from .embedder import create_embedder
+
+# Import utilities
+try:
+    from ..utils.db_utils import initialize_database, close_database, db_pool
+    from ..utils.models import IngestionConfig, IngestionResult
+except ImportError:
+    # For direct execution or testing
+    import sys
+    import os
+    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+    from utils.db_utils import initialize_database, close_database, db_pool
+    from utils.models import IngestionConfig, IngestionResult
+
+# Load environment variables
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentIngestionPipeline:
+    """Pipeline for ingesting documents into vector DB and knowledge graph."""
+    
+    def __init__(
+        self,
+        config: IngestionConfig,
+        documents_folder: str = "documents",
+        clean_before_ingest: bool = False
+    ):
+        """
+        Initialize ingestion pipeline.
+        
+        Args:
+            config: Ingestion configuration
+            documents_folder: Folder containing markdown documents
+            clean_before_ingest: Whether to clean existing data before ingestion
+        """
+        self.config = config
+        self.documents_folder = documents_folder
+        self.clean_before_ingest = clean_before_ingest
+        
+        # Initialize components
+        self.chunker_config = ChunkingConfig(
+            chunk_size=config.chunk_size,
+            chunk_overlap=config.chunk_overlap,
+            max_chunk_size=config.max_chunk_size,
+            use_semantic_splitting=config.use_semantic_chunking
+        )
+        
+        self.chunker = create_chunker(self.chunker_config)
+        self.embedder = create_embedder()
+        
+        self._initialized = False
+    
+    async def initialize(self):
+        """Initialize database connections."""
+        if self._initialized:
+            return
+        
+        logger.info("Initializing ingestion pipeline...")
+        
+        # Initialize database connections
+        await initialize_database()
+        
+        self._initialized = True
+        logger.info("Ingestion pipeline initialized")
+    
+    async def close(self):
+        """Close database connections."""
+        if self._initialized:
+            await close_database()
+            self._initialized = False
+    
+    async def ingest_documents(
+        self,
+        progress_callback: Optional[callable] = None
+    ) -> List[IngestionResult]:
+        """
+        Ingest all documents from the documents folder.
+        
+        Args:
+            progress_callback: Optional callback for progress updates
+        
+        Returns:
+            List of ingestion results
+        """
+        if not self._initialized:
+            await self.initialize()
+        
+        # Clean existing data if requested
+        if self.clean_before_ingest:
+            await self._clean_databases()
+        
+        # Find all markdown files
+        markdown_files = self._find_markdown_files()
+        
+        if not markdown_files:
+            logger.warning(f"No markdown files found in {self.documents_folder}")
+            return []
+        
+        logger.info(f"Found {len(markdown_files)} markdown files to process")
+        
+        results = []
+        
+        for i, file_path in enumerate(markdown_files):
+            try:
+                logger.info(f"Processing file {i+1}/{len(markdown_files)}: {file_path}")
+                
+                result = await self._ingest_single_document(file_path)
+                results.append(result)
+                
+                if progress_callback:
+                    progress_callback(i + 1, len(markdown_files))
+                
+            except Exception as e:
+                logger.error(f"Failed to process {file_path}: {e}")
+                results.append(IngestionResult(
+                    document_id="",
+                    title=os.path.basename(file_path),
+                    chunks_created=0,
+                    entities_extracted=0,
+                    relationships_created=0,
+                    processing_time_ms=0,
+                    errors=[str(e)]
+                ))
+        
+        # Log summary
+        total_chunks = sum(r.chunks_created for r in results)
+        total_errors = sum(len(r.errors) for r in results)
+        
+        logger.info(f"Ingestion complete: {len(results)} documents, {total_chunks} chunks, {total_errors} errors")
+        
+        return results
+    
+    async def _ingest_single_document(self, file_path: str) -> IngestionResult:
+        """
+        Ingest a single document.
+        
+        Args:
+            file_path: Path to the document file
+        
+        Returns:
+            Ingestion result
+        """
+        start_time = datetime.now()
+        
+        # Read document
+        document_content = self._read_document(file_path)
+        document_title = self._extract_title(document_content, file_path)
+        document_source = os.path.relpath(file_path, self.documents_folder)
+        
+        # Extract metadata from content
+        document_metadata = self._extract_document_metadata(document_content, file_path)
+        
+        logger.info(f"Processing document: {document_title}")
+        
+        # Chunk the document
+        chunks = await self.chunker.chunk_document(
+            content=document_content,
+            title=document_title,
+            source=document_source,
+            metadata=document_metadata
+        )
+        
+        if not chunks:
+            logger.warning(f"No chunks created for {document_title}")
+            return IngestionResult(
+                document_id="",
+                title=document_title,
+                chunks_created=0,
+                entities_extracted=0,
+                relationships_created=0,
+                processing_time_ms=(datetime.now() - start_time).total_seconds() * 1000,
+                errors=["No chunks created"]
+            )
+        
+        logger.info(f"Created {len(chunks)} chunks")
+        
+        # Entity extraction removed (graph-related functionality)
+        entities_extracted = 0
+        
+        # Generate embeddings
+        embedded_chunks = await self.embedder.embed_chunks(chunks)
+        logger.info(f"Generated embeddings for {len(embedded_chunks)} chunks")
+        
+        # Save to PostgreSQL
+        document_id = await self._save_to_postgres(
+            document_title,
+            document_source,
+            document_content,
+            embedded_chunks,
+            document_metadata
+        )
+        
+        logger.info(f"Saved document to PostgreSQL with ID: {document_id}")
+        
+        # Knowledge graph functionality removed
+        relationships_created = 0
+        graph_errors = []
+        
+        # Calculate processing time
+        processing_time = (datetime.now() - start_time).total_seconds() * 1000
+        
+        return IngestionResult(
+            document_id=document_id,
+            title=document_title,
+            chunks_created=len(chunks),
+            entities_extracted=entities_extracted,
+            relationships_created=relationships_created,
+            processing_time_ms=processing_time,
+            errors=graph_errors
+        )
+    
+    def _find_markdown_files(self) -> List[str]:
+        """Find all markdown files in the documents folder."""
+        if not os.path.exists(self.documents_folder):
+            logger.error(f"Documents folder not found: {self.documents_folder}")
+            return []
+        
+        patterns = ["*.md", "*.markdown", "*.txt"]
+        files = []
+        
+        for pattern in patterns:
+            files.extend(glob.glob(os.path.join(self.documents_folder, "**", pattern), recursive=True))
+        
+        return sorted(files)
+    
+    def _read_document(self, file_path: str) -> str:
+        """Read document content from file."""
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        except UnicodeDecodeError:
+            # Try with different encoding
+            with open(file_path, 'r', encoding='latin-1') as f:
+                return f.read()
+    
+    def _extract_title(self, content: str, file_path: str) -> str:
+        """Extract title from document content or filename."""
+        # Try to find markdown title
+        lines = content.split('\n')
+        for line in lines[:10]:  # Check first 10 lines
+            line = line.strip()
+            if line.startswith('# '):
+                return line[2:].strip()
+        
+        # Fallback to filename
+        return os.path.splitext(os.path.basename(file_path))[0]
+    
+    def _extract_document_metadata(self, content: str, file_path: str) -> Dict[str, Any]:
+        """Extract metadata from document content."""
+        metadata = {
+            "file_path": file_path,
+            "file_size": len(content),
+            "ingestion_date": datetime.now().isoformat()
+        }
+        
+        # Try to extract YAML frontmatter
+        if content.startswith('---'):
+            try:
+                import yaml
+                end_marker = content.find('\n---\n', 4)
+                if end_marker != -1:
+                    frontmatter = content[4:end_marker]
+                    yaml_metadata = yaml.safe_load(frontmatter)
+                    if isinstance(yaml_metadata, dict):
+                        metadata.update(yaml_metadata)
+            except ImportError:
+                logger.warning("PyYAML not installed, skipping frontmatter extraction")
+            except Exception as e:
+                logger.warning(f"Failed to parse frontmatter: {e}")
+        
+        # Extract some basic metadata from content
+        lines = content.split('\n')
+        metadata['line_count'] = len(lines)
+        metadata['word_count'] = len(content.split())
+        
+        return metadata
+    
+    async def _save_to_postgres(
+        self,
+        title: str,
+        source: str,
+        content: str,
+        chunks: List[DocumentChunk],
+        metadata: Dict[str, Any]
+    ) -> str:
+        """Save document and chunks to PostgreSQL."""
+        async with db_pool.acquire() as conn:
+            async with conn.transaction():
+                # Insert document
+                document_result = await conn.fetchrow(
+                    """
+                    INSERT INTO documents (title, source, content, metadata)
+                    VALUES ($1, $2, $3, $4)
+                    RETURNING id::text
+                    """,
+                    title,
+                    source,
+                    content,
+                    json.dumps(metadata)
+                )
+                
+                document_id = document_result["id"]
+                
+                # Insert chunks
+                for chunk in chunks:
+                    # Convert embedding to PostgreSQL vector string format
+                    embedding_data = None
+                    if hasattr(chunk, 'embedding') and chunk.embedding:
+                        # PostgreSQL vector format: '[1.0,2.0,3.0]' (no spaces after commas)
+                        embedding_data = '[' + ','.join(map(str, chunk.embedding)) + ']'
+                    
+                    await conn.execute(
+                        """
+                        INSERT INTO chunks (document_id, content, embedding, chunk_index, metadata, token_count)
+                        VALUES ($1::uuid, $2, $3::vector, $4, $5, $6)
+                        """,
+                        document_id,
+                        chunk.content,
+                        embedding_data,
+                        chunk.index,
+                        json.dumps(chunk.metadata),
+                        chunk.token_count
+                    )
+                
+                return document_id
+    
+    async def _clean_databases(self):
+        """Clean existing data from databases."""
+        logger.warning("Cleaning existing data from databases...")
+        
+        # Clean PostgreSQL
+        async with db_pool.acquire() as conn:
+            async with conn.transaction():
+                await conn.execute("DELETE FROM chunks")
+                await conn.execute("DELETE FROM documents")
+        
+        logger.info("Cleaned PostgreSQL database")
+
+async def main():
+    """Main function for running ingestion."""
+    parser = argparse.ArgumentParser(description="Ingest documents into vector DB")
+    parser.add_argument("--documents", "-d", default="documents", help="Documents folder path")
+    parser.add_argument("--clean", "-c", action="store_true", help="Clean existing data before ingestion")
+    parser.add_argument("--chunk-size", type=int, default=1000, help="Chunk size for splitting documents")
+    parser.add_argument("--chunk-overlap", type=int, default=200, help="Chunk overlap size")
+    parser.add_argument("--no-semantic", action="store_true", help="Disable semantic chunking")
+    # Graph-related arguments removed
+    parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
+    
+    args = parser.parse_args()
+    
+    # Configure logging
+    log_level = logging.DEBUG if args.verbose else logging.INFO
+    logging.basicConfig(
+        level=log_level,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    
+    # Create ingestion configuration
+    config = IngestionConfig(
+        chunk_size=args.chunk_size,
+        chunk_overlap=args.chunk_overlap,
+        use_semantic_chunking=not args.no_semantic
+    )
+    
+    # Create and run pipeline
+    pipeline = DocumentIngestionPipeline(
+        config=config,
+        documents_folder=args.documents,
+        clean_before_ingest=args.clean
+    )
+    
+    def progress_callback(current: int, total: int):
+        print(f"Progress: {current}/{total} documents processed")
+    
+    try:
+        start_time = datetime.now()
+        
+        results = await pipeline.ingest_documents(progress_callback)
+        
+        end_time = datetime.now()
+        total_time = (end_time - start_time).total_seconds()
+        
+        # Print summary
+        print("\n" + "="*50)
+        print("INGESTION SUMMARY")
+        print("="*50)
+        print(f"Documents processed: {len(results)}")
+        print(f"Total chunks created: {sum(r.chunks_created for r in results)}")
+        # Graph-related stats removed
+        print(f"Total errors: {sum(len(r.errors) for r in results)}")
+        print(f"Total processing time: {total_time:.2f} seconds")
+        print()
+        
+        # Print individual results
+        for result in results:
+            status = "✓" if not result.errors else "✗"
+            print(f"{status} {result.title}: {result.chunks_created} chunks")
+            
+            if result.errors:
+                for error in result.errors:
+                    print(f"  Error: {error}")
+        
+    except KeyboardInterrupt:
+        print("\nIngestion interrupted by user")
+    except Exception as e:
+        logger.error(f"Ingestion failed: {e}")
+        raise
+    finally:
+        await pipeline.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())