mirror of
https://github.com/coleam00/context-engineering-intro.git
synced 2025-12-29 16:14:56 +00:00
AI Agent Factory with Claude Code Subagents
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
"""Ingestion package for processing documents into vector DB and knowledge graph."""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -0,0 +1,518 @@
|
||||
"""
|
||||
Semantic chunking implementation for intelligent document splitting.
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from dataclasses import dataclass
|
||||
import asyncio
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Import flexible providers
|
||||
try:
|
||||
from ..utils.providers import get_embedding_client, get_ingestion_model
|
||||
except ImportError:
|
||||
# For direct execution or testing
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from utils.providers import get_embedding_client, get_ingestion_model
|
||||
|
||||
# Initialize clients with flexible providers
|
||||
embedding_client = get_embedding_client()
|
||||
ingestion_model = get_ingestion_model()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkingConfig:
|
||||
"""Configuration for chunking."""
|
||||
chunk_size: int = 1000
|
||||
chunk_overlap: int = 200
|
||||
max_chunk_size: int = 2000
|
||||
min_chunk_size: int = 100
|
||||
use_semantic_splitting: bool = True
|
||||
preserve_structure: bool = True
|
||||
|
||||
def __post_init__(self):
|
||||
"""Validate configuration."""
|
||||
if self.chunk_overlap >= self.chunk_size:
|
||||
raise ValueError("Chunk overlap must be less than chunk size")
|
||||
if self.min_chunk_size <= 0:
|
||||
raise ValueError("Minimum chunk size must be positive")
|
||||
|
||||
|
||||
@dataclass
|
||||
class DocumentChunk:
|
||||
"""Represents a document chunk."""
|
||||
content: str
|
||||
index: int
|
||||
start_char: int
|
||||
end_char: int
|
||||
metadata: Dict[str, Any]
|
||||
token_count: Optional[int] = None
|
||||
|
||||
def __post_init__(self):
|
||||
"""Calculate token count if not provided."""
|
||||
if self.token_count is None:
|
||||
# Rough estimation: ~4 characters per token
|
||||
self.token_count = len(self.content) // 4
|
||||
|
||||
|
||||
class SemanticChunker:
|
||||
"""Semantic document chunker using LLM for intelligent splitting."""
|
||||
|
||||
def __init__(self, config: ChunkingConfig):
|
||||
"""
|
||||
Initialize chunker.
|
||||
|
||||
Args:
|
||||
config: Chunking configuration
|
||||
"""
|
||||
self.config = config
|
||||
self.client = embedding_client
|
||||
self.model = ingestion_model
|
||||
|
||||
async def chunk_document(
|
||||
self,
|
||||
content: str,
|
||||
title: str,
|
||||
source: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
Chunk a document into semantically coherent pieces.
|
||||
|
||||
Args:
|
||||
content: Document content
|
||||
title: Document title
|
||||
source: Document source
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
List of document chunks
|
||||
"""
|
||||
if not content.strip():
|
||||
return []
|
||||
|
||||
base_metadata = {
|
||||
"title": title,
|
||||
"source": source,
|
||||
**(metadata or {})
|
||||
}
|
||||
|
||||
# First, try semantic chunking if enabled
|
||||
if self.config.use_semantic_splitting and len(content) > self.config.chunk_size:
|
||||
try:
|
||||
semantic_chunks = await self._semantic_chunk(content)
|
||||
if semantic_chunks:
|
||||
return self._create_chunk_objects(
|
||||
semantic_chunks,
|
||||
content,
|
||||
base_metadata
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Semantic chunking failed, falling back to simple chunking: {e}")
|
||||
|
||||
# Fallback to rule-based chunking
|
||||
return self._simple_chunk(content, base_metadata)
|
||||
|
||||
async def _semantic_chunk(self, content: str) -> List[str]:
|
||||
"""
|
||||
Perform semantic chunking using LLM.
|
||||
|
||||
Args:
|
||||
content: Content to chunk
|
||||
|
||||
Returns:
|
||||
List of chunk boundaries
|
||||
"""
|
||||
# First, split on natural boundaries
|
||||
sections = self._split_on_structure(content)
|
||||
|
||||
# Group sections into semantic chunks
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
|
||||
for section in sections:
|
||||
# Check if adding this section would exceed chunk size
|
||||
potential_chunk = current_chunk + "\n\n" + section if current_chunk else section
|
||||
|
||||
if len(potential_chunk) <= self.config.chunk_size:
|
||||
current_chunk = potential_chunk
|
||||
else:
|
||||
# Current chunk is ready, decide if we should split the section
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
current_chunk = ""
|
||||
|
||||
# Handle oversized sections
|
||||
if len(section) > self.config.max_chunk_size:
|
||||
# Split the section semantically
|
||||
sub_chunks = await self._split_long_section(section)
|
||||
chunks.extend(sub_chunks)
|
||||
else:
|
||||
current_chunk = section
|
||||
|
||||
# Add the last chunk
|
||||
if current_chunk:
|
||||
chunks.append(current_chunk.strip())
|
||||
|
||||
return [chunk for chunk in chunks if len(chunk.strip()) >= self.config.min_chunk_size]
|
||||
|
||||
def _split_on_structure(self, content: str) -> List[str]:
|
||||
"""
|
||||
Split content on structural boundaries.
|
||||
|
||||
Args:
|
||||
content: Content to split
|
||||
|
||||
Returns:
|
||||
List of sections
|
||||
"""
|
||||
# Split on markdown headers, paragraphs, and other structural elements
|
||||
patterns = [
|
||||
r'\n#{1,6}\s+.+?\n', # Markdown headers
|
||||
r'\n\n+', # Multiple newlines (paragraph breaks)
|
||||
r'\n[-*+]\s+', # List items
|
||||
r'\n\d+\.\s+', # Numbered lists
|
||||
r'\n```.*?```\n', # Code blocks
|
||||
r'\n\|\s*.+?\|\s*\n', # Tables
|
||||
]
|
||||
|
||||
# Split by patterns but keep the separators
|
||||
sections = [content]
|
||||
|
||||
for pattern in patterns:
|
||||
new_sections = []
|
||||
for section in sections:
|
||||
parts = re.split(f'({pattern})', section, flags=re.MULTILINE | re.DOTALL)
|
||||
new_sections.extend([part for part in parts if part.strip()])
|
||||
sections = new_sections
|
||||
|
||||
return sections
|
||||
|
||||
async def _split_long_section(self, section: str) -> List[str]:
|
||||
"""
|
||||
Split a long section using LLM for semantic boundaries.
|
||||
|
||||
Args:
|
||||
section: Section to split
|
||||
|
||||
Returns:
|
||||
List of sub-chunks
|
||||
"""
|
||||
try:
|
||||
prompt = f"""
|
||||
Split the following text into semantically coherent chunks. Each chunk should:
|
||||
1. Be roughly {self.config.chunk_size} characters long
|
||||
2. End at natural semantic boundaries
|
||||
3. Maintain context and readability
|
||||
4. Not exceed {self.config.max_chunk_size} characters
|
||||
|
||||
Return only the split text with "---CHUNK---" as separator between chunks.
|
||||
|
||||
Text to split:
|
||||
{section}
|
||||
"""
|
||||
|
||||
# Use Pydantic AI for LLM calls
|
||||
from pydantic_ai import Agent
|
||||
temp_agent = Agent(self.model)
|
||||
|
||||
response = await temp_agent.run(prompt)
|
||||
result = response.data
|
||||
chunks = [chunk.strip() for chunk in result.split("---CHUNK---")]
|
||||
|
||||
# Validate chunks
|
||||
valid_chunks = []
|
||||
for chunk in chunks:
|
||||
if (self.config.min_chunk_size <= len(chunk) <= self.config.max_chunk_size):
|
||||
valid_chunks.append(chunk)
|
||||
|
||||
return valid_chunks if valid_chunks else self._simple_split(section)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"LLM chunking failed: {e}")
|
||||
return self._simple_split(section)
|
||||
|
||||
def _simple_split(self, text: str) -> List[str]:
|
||||
"""
|
||||
Simple text splitting as fallback.
|
||||
|
||||
Args:
|
||||
text: Text to split
|
||||
|
||||
Returns:
|
||||
List of chunks
|
||||
"""
|
||||
chunks = []
|
||||
start = 0
|
||||
|
||||
while start < len(text):
|
||||
end = start + self.config.chunk_size
|
||||
|
||||
if end >= len(text):
|
||||
# Last chunk
|
||||
chunks.append(text[start:])
|
||||
break
|
||||
|
||||
# Try to end at a sentence boundary
|
||||
chunk_end = end
|
||||
for i in range(end, max(start + self.config.min_chunk_size, end - 200), -1):
|
||||
if text[i] in '.!?\n':
|
||||
chunk_end = i + 1
|
||||
break
|
||||
|
||||
chunks.append(text[start:chunk_end])
|
||||
start = chunk_end - self.config.chunk_overlap
|
||||
|
||||
return chunks
|
||||
|
||||
def _simple_chunk(
|
||||
self,
|
||||
content: str,
|
||||
base_metadata: Dict[str, Any]
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
Simple rule-based chunking.
|
||||
|
||||
Args:
|
||||
content: Content to chunk
|
||||
base_metadata: Base metadata for chunks
|
||||
|
||||
Returns:
|
||||
List of document chunks
|
||||
"""
|
||||
chunks = self._simple_split(content)
|
||||
return self._create_chunk_objects(chunks, content, base_metadata)
|
||||
|
||||
def _create_chunk_objects(
|
||||
self,
|
||||
chunks: List[str],
|
||||
original_content: str,
|
||||
base_metadata: Dict[str, Any]
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
Create DocumentChunk objects from text chunks.
|
||||
|
||||
Args:
|
||||
chunks: List of chunk texts
|
||||
original_content: Original document content
|
||||
base_metadata: Base metadata
|
||||
|
||||
Returns:
|
||||
List of DocumentChunk objects
|
||||
"""
|
||||
chunk_objects = []
|
||||
current_pos = 0
|
||||
|
||||
for i, chunk_text in enumerate(chunks):
|
||||
# Find the position of this chunk in the original content
|
||||
start_pos = original_content.find(chunk_text, current_pos)
|
||||
if start_pos == -1:
|
||||
# Fallback: estimate position
|
||||
start_pos = current_pos
|
||||
|
||||
end_pos = start_pos + len(chunk_text)
|
||||
|
||||
# Create chunk metadata
|
||||
chunk_metadata = {
|
||||
**base_metadata,
|
||||
"chunk_method": "semantic" if self.config.use_semantic_splitting else "simple",
|
||||
"total_chunks": len(chunks)
|
||||
}
|
||||
|
||||
chunk_objects.append(DocumentChunk(
|
||||
content=chunk_text.strip(),
|
||||
index=i,
|
||||
start_char=start_pos,
|
||||
end_char=end_pos,
|
||||
metadata=chunk_metadata
|
||||
))
|
||||
|
||||
current_pos = end_pos
|
||||
|
||||
return chunk_objects
|
||||
|
||||
|
||||
class SimpleChunker:
|
||||
"""Simple non-semantic chunker for faster processing."""
|
||||
|
||||
def __init__(self, config: ChunkingConfig):
|
||||
"""Initialize simple chunker."""
|
||||
self.config = config
|
||||
|
||||
def chunk_document(
|
||||
self,
|
||||
content: str,
|
||||
title: str,
|
||||
source: str,
|
||||
metadata: Optional[Dict[str, Any]] = None
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
Chunk document using simple rules.
|
||||
|
||||
Args:
|
||||
content: Document content
|
||||
title: Document title
|
||||
source: Document source
|
||||
metadata: Additional metadata
|
||||
|
||||
Returns:
|
||||
List of document chunks
|
||||
"""
|
||||
if not content.strip():
|
||||
return []
|
||||
|
||||
base_metadata = {
|
||||
"title": title,
|
||||
"source": source,
|
||||
"chunk_method": "simple",
|
||||
**(metadata or {})
|
||||
}
|
||||
|
||||
# Split on paragraphs first
|
||||
paragraphs = re.split(r'\n\s*\n', content)
|
||||
chunks = []
|
||||
current_chunk = ""
|
||||
current_pos = 0
|
||||
chunk_index = 0
|
||||
|
||||
for paragraph in paragraphs:
|
||||
paragraph = paragraph.strip()
|
||||
if not paragraph:
|
||||
continue
|
||||
|
||||
# Check if adding this paragraph exceeds chunk size
|
||||
potential_chunk = current_chunk + "\n\n" + paragraph if current_chunk else paragraph
|
||||
|
||||
if len(potential_chunk) <= self.config.chunk_size:
|
||||
current_chunk = potential_chunk
|
||||
else:
|
||||
# Save current chunk if it exists
|
||||
if current_chunk:
|
||||
chunks.append(self._create_chunk(
|
||||
current_chunk,
|
||||
chunk_index,
|
||||
current_pos,
|
||||
current_pos + len(current_chunk),
|
||||
base_metadata.copy()
|
||||
))
|
||||
|
||||
# Move position, but ensure overlap is respected
|
||||
overlap_start = max(0, len(current_chunk) - self.config.chunk_overlap)
|
||||
current_pos += overlap_start
|
||||
chunk_index += 1
|
||||
|
||||
# Start new chunk with current paragraph
|
||||
current_chunk = paragraph
|
||||
|
||||
# Add final chunk
|
||||
if current_chunk:
|
||||
chunks.append(self._create_chunk(
|
||||
current_chunk,
|
||||
chunk_index,
|
||||
current_pos,
|
||||
current_pos + len(current_chunk),
|
||||
base_metadata.copy()
|
||||
))
|
||||
|
||||
# Update total chunks in metadata
|
||||
for chunk in chunks:
|
||||
chunk.metadata["total_chunks"] = len(chunks)
|
||||
|
||||
return chunks
|
||||
|
||||
def _create_chunk(
|
||||
self,
|
||||
content: str,
|
||||
index: int,
|
||||
start_pos: int,
|
||||
end_pos: int,
|
||||
metadata: Dict[str, Any]
|
||||
) -> DocumentChunk:
|
||||
"""Create a DocumentChunk object."""
|
||||
return DocumentChunk(
|
||||
content=content.strip(),
|
||||
index=index,
|
||||
start_char=start_pos,
|
||||
end_char=end_pos,
|
||||
metadata=metadata
|
||||
)
|
||||
|
||||
|
||||
# Factory function
|
||||
def create_chunker(config: ChunkingConfig):
|
||||
"""
|
||||
Create appropriate chunker based on configuration.
|
||||
|
||||
Args:
|
||||
config: Chunking configuration
|
||||
|
||||
Returns:
|
||||
Chunker instance
|
||||
"""
|
||||
if config.use_semantic_splitting:
|
||||
return SemanticChunker(config)
|
||||
else:
|
||||
return SimpleChunker(config)
|
||||
|
||||
|
||||
# Example usage
|
||||
async def main():
|
||||
"""Example usage of the chunker."""
|
||||
config = ChunkingConfig(
|
||||
chunk_size=500,
|
||||
chunk_overlap=50,
|
||||
use_semantic_splitting=True
|
||||
)
|
||||
|
||||
chunker = create_chunker(config)
|
||||
|
||||
sample_text = """
|
||||
# Big Tech AI Initiatives
|
||||
|
||||
## Google's AI Strategy
|
||||
Google has been investing heavily in artificial intelligence research and development.
|
||||
Their main focus areas include:
|
||||
|
||||
- Large language models (LaMDA, PaLM, Gemini)
|
||||
- Computer vision and image recognition
|
||||
- Natural language processing
|
||||
- AI-powered search improvements
|
||||
|
||||
The company's DeepMind division continues to push the boundaries of AI research,
|
||||
with breakthrough achievements in protein folding prediction and game playing.
|
||||
|
||||
## Microsoft's Partnership with OpenAI
|
||||
Microsoft's strategic partnership with OpenAI has positioned them as a leader
|
||||
in the generative AI space. Key developments include:
|
||||
|
||||
1. Integration of GPT models into Office 365
|
||||
2. Azure OpenAI Service for enterprise customers
|
||||
3. Investment in OpenAI's continued research
|
||||
"""
|
||||
|
||||
chunks = await chunker.chunk_document(
|
||||
content=sample_text,
|
||||
title="Big Tech AI Report",
|
||||
source="example.md"
|
||||
)
|
||||
|
||||
for i, chunk in enumerate(chunks):
|
||||
print(f"Chunk {i}: {len(chunk.content)} chars")
|
||||
print(f"Content: {chunk.content[:100]}...")
|
||||
print(f"Metadata: {chunk.metadata}")
|
||||
print("---")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,418 @@
|
||||
"""
|
||||
Document embedding generation for vector search.
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
from openai import RateLimitError, APIError
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from .chunker import DocumentChunk
|
||||
|
||||
# Import flexible providers
|
||||
try:
|
||||
from ..utils.providers import get_embedding_client, get_embedding_model
|
||||
except ImportError:
|
||||
# For direct execution or testing
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from utils.providers import get_embedding_client, get_embedding_model
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Initialize client with flexible provider
|
||||
embedding_client = get_embedding_client()
|
||||
EMBEDDING_MODEL = get_embedding_model()
|
||||
|
||||
|
||||
class EmbeddingGenerator:
|
||||
"""Generates embeddings for document chunks."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model: str = EMBEDDING_MODEL,
|
||||
batch_size: int = 100,
|
||||
max_retries: int = 3,
|
||||
retry_delay: float = 1.0
|
||||
):
|
||||
"""
|
||||
Initialize embedding generator.
|
||||
|
||||
Args:
|
||||
model: OpenAI embedding model to use
|
||||
batch_size: Number of texts to process in parallel
|
||||
max_retries: Maximum number of retry attempts
|
||||
retry_delay: Delay between retries in seconds
|
||||
"""
|
||||
self.model = model
|
||||
self.batch_size = batch_size
|
||||
self.max_retries = max_retries
|
||||
self.retry_delay = retry_delay
|
||||
|
||||
# Model-specific configurations
|
||||
self.model_configs = {
|
||||
"text-embedding-3-small": {"dimensions": 1536, "max_tokens": 8191},
|
||||
"text-embedding-3-large": {"dimensions": 3072, "max_tokens": 8191},
|
||||
"text-embedding-ada-002": {"dimensions": 1536, "max_tokens": 8191}
|
||||
}
|
||||
|
||||
if model not in self.model_configs:
|
||||
logger.warning(f"Unknown model {model}, using default config")
|
||||
self.config = {"dimensions": 1536, "max_tokens": 8191}
|
||||
else:
|
||||
self.config = self.model_configs[model]
|
||||
|
||||
async def generate_embedding(self, text: str) -> List[float]:
|
||||
"""
|
||||
Generate embedding for a single text.
|
||||
|
||||
Args:
|
||||
text: Text to embed
|
||||
|
||||
Returns:
|
||||
Embedding vector
|
||||
"""
|
||||
# Truncate text if too long
|
||||
if len(text) > self.config["max_tokens"] * 4: # Rough token estimation
|
||||
text = text[:self.config["max_tokens"] * 4]
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = await embedding_client.embeddings.create(
|
||||
model=self.model,
|
||||
input=text
|
||||
)
|
||||
|
||||
return response.data[0].embedding
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt == self.max_retries - 1:
|
||||
raise
|
||||
|
||||
# Exponential backoff for rate limits
|
||||
delay = self.retry_delay * (2 ** attempt)
|
||||
logger.warning(f"Rate limit hit, retrying in {delay}s")
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
except APIError as e:
|
||||
logger.error(f"OpenAI API error: {e}")
|
||||
if attempt == self.max_retries - 1:
|
||||
raise
|
||||
await asyncio.sleep(self.retry_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error generating embedding: {e}")
|
||||
if attempt == self.max_retries - 1:
|
||||
raise
|
||||
await asyncio.sleep(self.retry_delay)
|
||||
|
||||
async def generate_embeddings_batch(
|
||||
self,
|
||||
texts: List[str]
|
||||
) -> List[List[float]]:
|
||||
"""
|
||||
Generate embeddings for a batch of texts.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of embedding vectors
|
||||
"""
|
||||
# Filter and truncate texts
|
||||
processed_texts = []
|
||||
for text in texts:
|
||||
if not text or not text.strip():
|
||||
processed_texts.append("")
|
||||
continue
|
||||
|
||||
# Truncate if too long
|
||||
if len(text) > self.config["max_tokens"] * 4:
|
||||
text = text[:self.config["max_tokens"] * 4]
|
||||
|
||||
processed_texts.append(text)
|
||||
|
||||
for attempt in range(self.max_retries):
|
||||
try:
|
||||
response = await embedding_client.embeddings.create(
|
||||
model=self.model,
|
||||
input=processed_texts
|
||||
)
|
||||
|
||||
return [data.embedding for data in response.data]
|
||||
|
||||
except RateLimitError as e:
|
||||
if attempt == self.max_retries - 1:
|
||||
raise
|
||||
|
||||
delay = self.retry_delay * (2 ** attempt)
|
||||
logger.warning(f"Rate limit hit, retrying batch in {delay}s")
|
||||
await asyncio.sleep(delay)
|
||||
|
||||
except APIError as e:
|
||||
logger.error(f"OpenAI API error in batch: {e}")
|
||||
if attempt == self.max_retries - 1:
|
||||
# Fallback to individual processing
|
||||
return await self._process_individually(processed_texts)
|
||||
await asyncio.sleep(self.retry_delay)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error in batch embedding: {e}")
|
||||
if attempt == self.max_retries - 1:
|
||||
return await self._process_individually(processed_texts)
|
||||
await asyncio.sleep(self.retry_delay)
|
||||
|
||||
async def _process_individually(
|
||||
self,
|
||||
texts: List[str]
|
||||
) -> List[List[float]]:
|
||||
"""
|
||||
Process texts individually as fallback.
|
||||
|
||||
Args:
|
||||
texts: List of texts to embed
|
||||
|
||||
Returns:
|
||||
List of embedding vectors
|
||||
"""
|
||||
embeddings = []
|
||||
|
||||
for text in texts:
|
||||
try:
|
||||
if not text or not text.strip():
|
||||
embeddings.append([0.0] * self.config["dimensions"])
|
||||
continue
|
||||
|
||||
embedding = await self.generate_embedding(text)
|
||||
embeddings.append(embedding)
|
||||
|
||||
# Small delay to avoid overwhelming the API
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to embed text: {e}")
|
||||
# Use zero vector as fallback
|
||||
embeddings.append([0.0] * self.config["dimensions"])
|
||||
|
||||
return embeddings
|
||||
|
||||
async def embed_chunks(
|
||||
self,
|
||||
chunks: List[DocumentChunk],
|
||||
progress_callback: Optional[callable] = None
|
||||
) -> List[DocumentChunk]:
|
||||
"""
|
||||
Generate embeddings for document chunks.
|
||||
|
||||
Args:
|
||||
chunks: List of document chunks
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
Chunks with embeddings added
|
||||
"""
|
||||
if not chunks:
|
||||
return chunks
|
||||
|
||||
logger.info(f"Generating embeddings for {len(chunks)} chunks")
|
||||
|
||||
# Process chunks in batches
|
||||
embedded_chunks = []
|
||||
total_batches = (len(chunks) + self.batch_size - 1) // self.batch_size
|
||||
|
||||
for i in range(0, len(chunks), self.batch_size):
|
||||
batch_chunks = chunks[i:i + self.batch_size]
|
||||
batch_texts = [chunk.content for chunk in batch_chunks]
|
||||
|
||||
try:
|
||||
# Generate embeddings for this batch
|
||||
embeddings = await self.generate_embeddings_batch(batch_texts)
|
||||
|
||||
# Add embeddings to chunks
|
||||
for chunk, embedding in zip(batch_chunks, embeddings):
|
||||
# Create a new chunk with embedding
|
||||
embedded_chunk = DocumentChunk(
|
||||
content=chunk.content,
|
||||
index=chunk.index,
|
||||
start_char=chunk.start_char,
|
||||
end_char=chunk.end_char,
|
||||
metadata={
|
||||
**chunk.metadata,
|
||||
"embedding_model": self.model,
|
||||
"embedding_generated_at": datetime.now().isoformat()
|
||||
},
|
||||
token_count=chunk.token_count
|
||||
)
|
||||
|
||||
# Add embedding as a separate attribute
|
||||
embedded_chunk.embedding = embedding
|
||||
embedded_chunks.append(embedded_chunk)
|
||||
|
||||
# Progress update
|
||||
current_batch = (i // self.batch_size) + 1
|
||||
if progress_callback:
|
||||
progress_callback(current_batch, total_batches)
|
||||
|
||||
logger.info(f"Processed batch {current_batch}/{total_batches}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process batch {i//self.batch_size + 1}: {e}")
|
||||
|
||||
# Add chunks without embeddings as fallback
|
||||
for chunk in batch_chunks:
|
||||
chunk.metadata.update({
|
||||
"embedding_error": str(e),
|
||||
"embedding_generated_at": datetime.now().isoformat()
|
||||
})
|
||||
chunk.embedding = [0.0] * self.config["dimensions"]
|
||||
embedded_chunks.append(chunk)
|
||||
|
||||
logger.info(f"Generated embeddings for {len(embedded_chunks)} chunks")
|
||||
return embedded_chunks
|
||||
|
||||
async def embed_query(self, query: str) -> List[float]:
|
||||
"""
|
||||
Generate embedding for a search query.
|
||||
|
||||
Args:
|
||||
query: Search query
|
||||
|
||||
Returns:
|
||||
Query embedding
|
||||
"""
|
||||
return await self.generate_embedding(query)
|
||||
|
||||
def get_embedding_dimension(self) -> int:
|
||||
"""Get the dimension of embeddings for this model."""
|
||||
return self.config["dimensions"]
|
||||
|
||||
|
||||
# Cache for embeddings
|
||||
class EmbeddingCache:
|
||||
"""Simple in-memory cache for embeddings."""
|
||||
|
||||
def __init__(self, max_size: int = 1000):
|
||||
"""Initialize cache."""
|
||||
self.cache: Dict[str, List[float]] = {}
|
||||
self.access_times: Dict[str, datetime] = {}
|
||||
self.max_size = max_size
|
||||
|
||||
def get(self, text: str) -> Optional[List[float]]:
|
||||
"""Get embedding from cache."""
|
||||
text_hash = self._hash_text(text)
|
||||
if text_hash in self.cache:
|
||||
self.access_times[text_hash] = datetime.now()
|
||||
return self.cache[text_hash]
|
||||
return None
|
||||
|
||||
def put(self, text: str, embedding: List[float]):
|
||||
"""Store embedding in cache."""
|
||||
text_hash = self._hash_text(text)
|
||||
|
||||
# Evict oldest entries if cache is full
|
||||
if len(self.cache) >= self.max_size:
|
||||
oldest_key = min(self.access_times.keys(), key=lambda k: self.access_times[k])
|
||||
del self.cache[oldest_key]
|
||||
del self.access_times[oldest_key]
|
||||
|
||||
self.cache[text_hash] = embedding
|
||||
self.access_times[text_hash] = datetime.now()
|
||||
|
||||
def _hash_text(self, text: str) -> str:
|
||||
"""Generate hash for text."""
|
||||
import hashlib
|
||||
return hashlib.md5(text.encode()).hexdigest()
|
||||
|
||||
|
||||
# Factory function
|
||||
def create_embedder(
|
||||
model: str = EMBEDDING_MODEL,
|
||||
use_cache: bool = True,
|
||||
**kwargs
|
||||
) -> EmbeddingGenerator:
|
||||
"""
|
||||
Create embedding generator with optional caching.
|
||||
|
||||
Args:
|
||||
model: Embedding model to use
|
||||
use_cache: Whether to use caching
|
||||
**kwargs: Additional arguments for EmbeddingGenerator
|
||||
|
||||
Returns:
|
||||
EmbeddingGenerator instance
|
||||
"""
|
||||
embedder = EmbeddingGenerator(model=model, **kwargs)
|
||||
|
||||
if use_cache:
|
||||
# Add caching capability
|
||||
cache = EmbeddingCache()
|
||||
original_generate = embedder.generate_embedding
|
||||
|
||||
async def cached_generate(text: str) -> List[float]:
|
||||
cached = cache.get(text)
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
embedding = await original_generate(text)
|
||||
cache.put(text, embedding)
|
||||
return embedding
|
||||
|
||||
embedder.generate_embedding = cached_generate
|
||||
|
||||
return embedder
|
||||
|
||||
|
||||
# Example usage
|
||||
async def main():
|
||||
"""Example usage of the embedder."""
|
||||
from .chunker import ChunkingConfig, create_chunker
|
||||
|
||||
# Create chunker and embedder
|
||||
config = ChunkingConfig(chunk_size=200, use_semantic_splitting=False)
|
||||
chunker = create_chunker(config)
|
||||
embedder = create_embedder()
|
||||
|
||||
sample_text = """
|
||||
Google's AI initiatives include advanced language models, computer vision,
|
||||
and machine learning research. The company has invested heavily in
|
||||
transformer architectures and neural network optimization.
|
||||
|
||||
Microsoft's partnership with OpenAI has led to integration of GPT models
|
||||
into various products and services, making AI accessible to enterprise
|
||||
customers through Azure cloud services.
|
||||
"""
|
||||
|
||||
# Chunk the document
|
||||
chunks = chunker.chunk_document(
|
||||
content=sample_text,
|
||||
title="AI Initiatives",
|
||||
source="example.md"
|
||||
)
|
||||
|
||||
print(f"Created {len(chunks)} chunks")
|
||||
|
||||
# Generate embeddings
|
||||
def progress_callback(current, total):
|
||||
print(f"Processing batch {current}/{total}")
|
||||
|
||||
embedded_chunks = await embedder.embed_chunks(chunks, progress_callback)
|
||||
|
||||
for i, chunk in enumerate(embedded_chunks):
|
||||
print(f"Chunk {i}: {len(chunk.content)} chars, embedding dim: {len(chunk.embedding)}")
|
||||
|
||||
# Test query embedding
|
||||
query_embedding = await embedder.embed_query("Google AI research")
|
||||
print(f"Query embedding dimension: {len(query_embedding)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1,434 @@
|
||||
"""
|
||||
Main ingestion script for processing markdown documents into vector DB and knowledge graph.
|
||||
"""
|
||||
|
||||
import os
|
||||
import asyncio
|
||||
import logging
|
||||
import json
|
||||
import glob
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
import argparse
|
||||
|
||||
import asyncpg
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from .chunker import ChunkingConfig, create_chunker, DocumentChunk
|
||||
from .embedder import create_embedder
|
||||
|
||||
# Import utilities
|
||||
try:
|
||||
from ..utils.db_utils import initialize_database, close_database, db_pool
|
||||
from ..utils.models import IngestionConfig, IngestionResult
|
||||
except ImportError:
|
||||
# For direct execution or testing
|
||||
import sys
|
||||
import os
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from utils.db_utils import initialize_database, close_database, db_pool
|
||||
from utils.models import IngestionConfig, IngestionResult
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DocumentIngestionPipeline:
|
||||
"""Pipeline for ingesting documents into vector DB and knowledge graph."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
config: IngestionConfig,
|
||||
documents_folder: str = "documents",
|
||||
clean_before_ingest: bool = False
|
||||
):
|
||||
"""
|
||||
Initialize ingestion pipeline.
|
||||
|
||||
Args:
|
||||
config: Ingestion configuration
|
||||
documents_folder: Folder containing markdown documents
|
||||
clean_before_ingest: Whether to clean existing data before ingestion
|
||||
"""
|
||||
self.config = config
|
||||
self.documents_folder = documents_folder
|
||||
self.clean_before_ingest = clean_before_ingest
|
||||
|
||||
# Initialize components
|
||||
self.chunker_config = ChunkingConfig(
|
||||
chunk_size=config.chunk_size,
|
||||
chunk_overlap=config.chunk_overlap,
|
||||
max_chunk_size=config.max_chunk_size,
|
||||
use_semantic_splitting=config.use_semantic_chunking
|
||||
)
|
||||
|
||||
self.chunker = create_chunker(self.chunker_config)
|
||||
self.embedder = create_embedder()
|
||||
|
||||
self._initialized = False
|
||||
|
||||
async def initialize(self):
|
||||
"""Initialize database connections."""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
logger.info("Initializing ingestion pipeline...")
|
||||
|
||||
# Initialize database connections
|
||||
await initialize_database()
|
||||
|
||||
self._initialized = True
|
||||
logger.info("Ingestion pipeline initialized")
|
||||
|
||||
async def close(self):
|
||||
"""Close database connections."""
|
||||
if self._initialized:
|
||||
await close_database()
|
||||
self._initialized = False
|
||||
|
||||
async def ingest_documents(
|
||||
self,
|
||||
progress_callback: Optional[callable] = None
|
||||
) -> List[IngestionResult]:
|
||||
"""
|
||||
Ingest all documents from the documents folder.
|
||||
|
||||
Args:
|
||||
progress_callback: Optional callback for progress updates
|
||||
|
||||
Returns:
|
||||
List of ingestion results
|
||||
"""
|
||||
if not self._initialized:
|
||||
await self.initialize()
|
||||
|
||||
# Clean existing data if requested
|
||||
if self.clean_before_ingest:
|
||||
await self._clean_databases()
|
||||
|
||||
# Find all markdown files
|
||||
markdown_files = self._find_markdown_files()
|
||||
|
||||
if not markdown_files:
|
||||
logger.warning(f"No markdown files found in {self.documents_folder}")
|
||||
return []
|
||||
|
||||
logger.info(f"Found {len(markdown_files)} markdown files to process")
|
||||
|
||||
results = []
|
||||
|
||||
for i, file_path in enumerate(markdown_files):
|
||||
try:
|
||||
logger.info(f"Processing file {i+1}/{len(markdown_files)}: {file_path}")
|
||||
|
||||
result = await self._ingest_single_document(file_path)
|
||||
results.append(result)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(i + 1, len(markdown_files))
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process {file_path}: {e}")
|
||||
results.append(IngestionResult(
|
||||
document_id="",
|
||||
title=os.path.basename(file_path),
|
||||
chunks_created=0,
|
||||
entities_extracted=0,
|
||||
relationships_created=0,
|
||||
processing_time_ms=0,
|
||||
errors=[str(e)]
|
||||
))
|
||||
|
||||
# Log summary
|
||||
total_chunks = sum(r.chunks_created for r in results)
|
||||
total_errors = sum(len(r.errors) for r in results)
|
||||
|
||||
logger.info(f"Ingestion complete: {len(results)} documents, {total_chunks} chunks, {total_errors} errors")
|
||||
|
||||
return results
|
||||
|
||||
async def _ingest_single_document(self, file_path: str) -> IngestionResult:
|
||||
"""
|
||||
Ingest a single document.
|
||||
|
||||
Args:
|
||||
file_path: Path to the document file
|
||||
|
||||
Returns:
|
||||
Ingestion result
|
||||
"""
|
||||
start_time = datetime.now()
|
||||
|
||||
# Read document
|
||||
document_content = self._read_document(file_path)
|
||||
document_title = self._extract_title(document_content, file_path)
|
||||
document_source = os.path.relpath(file_path, self.documents_folder)
|
||||
|
||||
# Extract metadata from content
|
||||
document_metadata = self._extract_document_metadata(document_content, file_path)
|
||||
|
||||
logger.info(f"Processing document: {document_title}")
|
||||
|
||||
# Chunk the document
|
||||
chunks = await self.chunker.chunk_document(
|
||||
content=document_content,
|
||||
title=document_title,
|
||||
source=document_source,
|
||||
metadata=document_metadata
|
||||
)
|
||||
|
||||
if not chunks:
|
||||
logger.warning(f"No chunks created for {document_title}")
|
||||
return IngestionResult(
|
||||
document_id="",
|
||||
title=document_title,
|
||||
chunks_created=0,
|
||||
entities_extracted=0,
|
||||
relationships_created=0,
|
||||
processing_time_ms=(datetime.now() - start_time).total_seconds() * 1000,
|
||||
errors=["No chunks created"]
|
||||
)
|
||||
|
||||
logger.info(f"Created {len(chunks)} chunks")
|
||||
|
||||
# Entity extraction removed (graph-related functionality)
|
||||
entities_extracted = 0
|
||||
|
||||
# Generate embeddings
|
||||
embedded_chunks = await self.embedder.embed_chunks(chunks)
|
||||
logger.info(f"Generated embeddings for {len(embedded_chunks)} chunks")
|
||||
|
||||
# Save to PostgreSQL
|
||||
document_id = await self._save_to_postgres(
|
||||
document_title,
|
||||
document_source,
|
||||
document_content,
|
||||
embedded_chunks,
|
||||
document_metadata
|
||||
)
|
||||
|
||||
logger.info(f"Saved document to PostgreSQL with ID: {document_id}")
|
||||
|
||||
# Knowledge graph functionality removed
|
||||
relationships_created = 0
|
||||
graph_errors = []
|
||||
|
||||
# Calculate processing time
|
||||
processing_time = (datetime.now() - start_time).total_seconds() * 1000
|
||||
|
||||
return IngestionResult(
|
||||
document_id=document_id,
|
||||
title=document_title,
|
||||
chunks_created=len(chunks),
|
||||
entities_extracted=entities_extracted,
|
||||
relationships_created=relationships_created,
|
||||
processing_time_ms=processing_time,
|
||||
errors=graph_errors
|
||||
)
|
||||
|
||||
def _find_markdown_files(self) -> List[str]:
|
||||
"""Find all markdown files in the documents folder."""
|
||||
if not os.path.exists(self.documents_folder):
|
||||
logger.error(f"Documents folder not found: {self.documents_folder}")
|
||||
return []
|
||||
|
||||
patterns = ["*.md", "*.markdown", "*.txt"]
|
||||
files = []
|
||||
|
||||
for pattern in patterns:
|
||||
files.extend(glob.glob(os.path.join(self.documents_folder, "**", pattern), recursive=True))
|
||||
|
||||
return sorted(files)
|
||||
|
||||
def _read_document(self, file_path: str) -> str:
|
||||
"""Read document content from file."""
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
return f.read()
|
||||
except UnicodeDecodeError:
|
||||
# Try with different encoding
|
||||
with open(file_path, 'r', encoding='latin-1') as f:
|
||||
return f.read()
|
||||
|
||||
def _extract_title(self, content: str, file_path: str) -> str:
|
||||
"""Extract title from document content or filename."""
|
||||
# Try to find markdown title
|
||||
lines = content.split('\n')
|
||||
for line in lines[:10]: # Check first 10 lines
|
||||
line = line.strip()
|
||||
if line.startswith('# '):
|
||||
return line[2:].strip()
|
||||
|
||||
# Fallback to filename
|
||||
return os.path.splitext(os.path.basename(file_path))[0]
|
||||
|
||||
def _extract_document_metadata(self, content: str, file_path: str) -> Dict[str, Any]:
|
||||
"""Extract metadata from document content."""
|
||||
metadata = {
|
||||
"file_path": file_path,
|
||||
"file_size": len(content),
|
||||
"ingestion_date": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# Try to extract YAML frontmatter
|
||||
if content.startswith('---'):
|
||||
try:
|
||||
import yaml
|
||||
end_marker = content.find('\n---\n', 4)
|
||||
if end_marker != -1:
|
||||
frontmatter = content[4:end_marker]
|
||||
yaml_metadata = yaml.safe_load(frontmatter)
|
||||
if isinstance(yaml_metadata, dict):
|
||||
metadata.update(yaml_metadata)
|
||||
except ImportError:
|
||||
logger.warning("PyYAML not installed, skipping frontmatter extraction")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse frontmatter: {e}")
|
||||
|
||||
# Extract some basic metadata from content
|
||||
lines = content.split('\n')
|
||||
metadata['line_count'] = len(lines)
|
||||
metadata['word_count'] = len(content.split())
|
||||
|
||||
return metadata
|
||||
|
||||
async def _save_to_postgres(
|
||||
self,
|
||||
title: str,
|
||||
source: str,
|
||||
content: str,
|
||||
chunks: List[DocumentChunk],
|
||||
metadata: Dict[str, Any]
|
||||
) -> str:
|
||||
"""Save document and chunks to PostgreSQL."""
|
||||
async with db_pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
# Insert document
|
||||
document_result = await conn.fetchrow(
|
||||
"""
|
||||
INSERT INTO documents (title, source, content, metadata)
|
||||
VALUES ($1, $2, $3, $4)
|
||||
RETURNING id::text
|
||||
""",
|
||||
title,
|
||||
source,
|
||||
content,
|
||||
json.dumps(metadata)
|
||||
)
|
||||
|
||||
document_id = document_result["id"]
|
||||
|
||||
# Insert chunks
|
||||
for chunk in chunks:
|
||||
# Convert embedding to PostgreSQL vector string format
|
||||
embedding_data = None
|
||||
if hasattr(chunk, 'embedding') and chunk.embedding:
|
||||
# PostgreSQL vector format: '[1.0,2.0,3.0]' (no spaces after commas)
|
||||
embedding_data = '[' + ','.join(map(str, chunk.embedding)) + ']'
|
||||
|
||||
await conn.execute(
|
||||
"""
|
||||
INSERT INTO chunks (document_id, content, embedding, chunk_index, metadata, token_count)
|
||||
VALUES ($1::uuid, $2, $3::vector, $4, $5, $6)
|
||||
""",
|
||||
document_id,
|
||||
chunk.content,
|
||||
embedding_data,
|
||||
chunk.index,
|
||||
json.dumps(chunk.metadata),
|
||||
chunk.token_count
|
||||
)
|
||||
|
||||
return document_id
|
||||
|
||||
async def _clean_databases(self):
|
||||
"""Clean existing data from databases."""
|
||||
logger.warning("Cleaning existing data from databases...")
|
||||
|
||||
# Clean PostgreSQL
|
||||
async with db_pool.acquire() as conn:
|
||||
async with conn.transaction():
|
||||
await conn.execute("DELETE FROM chunks")
|
||||
await conn.execute("DELETE FROM documents")
|
||||
|
||||
logger.info("Cleaned PostgreSQL database")
|
||||
|
||||
async def main():
|
||||
"""Main function for running ingestion."""
|
||||
parser = argparse.ArgumentParser(description="Ingest documents into vector DB")
|
||||
parser.add_argument("--documents", "-d", default="documents", help="Documents folder path")
|
||||
parser.add_argument("--clean", "-c", action="store_true", help="Clean existing data before ingestion")
|
||||
parser.add_argument("--chunk-size", type=int, default=1000, help="Chunk size for splitting documents")
|
||||
parser.add_argument("--chunk-overlap", type=int, default=200, help="Chunk overlap size")
|
||||
parser.add_argument("--no-semantic", action="store_true", help="Disable semantic chunking")
|
||||
# Graph-related arguments removed
|
||||
parser.add_argument("--verbose", "-v", action="store_true", help="Enable verbose logging")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Configure logging
|
||||
log_level = logging.DEBUG if args.verbose else logging.INFO
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
)
|
||||
|
||||
# Create ingestion configuration
|
||||
config = IngestionConfig(
|
||||
chunk_size=args.chunk_size,
|
||||
chunk_overlap=args.chunk_overlap,
|
||||
use_semantic_chunking=not args.no_semantic
|
||||
)
|
||||
|
||||
# Create and run pipeline
|
||||
pipeline = DocumentIngestionPipeline(
|
||||
config=config,
|
||||
documents_folder=args.documents,
|
||||
clean_before_ingest=args.clean
|
||||
)
|
||||
|
||||
def progress_callback(current: int, total: int):
|
||||
print(f"Progress: {current}/{total} documents processed")
|
||||
|
||||
try:
|
||||
start_time = datetime.now()
|
||||
|
||||
results = await pipeline.ingest_documents(progress_callback)
|
||||
|
||||
end_time = datetime.now()
|
||||
total_time = (end_time - start_time).total_seconds()
|
||||
|
||||
# Print summary
|
||||
print("\n" + "="*50)
|
||||
print("INGESTION SUMMARY")
|
||||
print("="*50)
|
||||
print(f"Documents processed: {len(results)}")
|
||||
print(f"Total chunks created: {sum(r.chunks_created for r in results)}")
|
||||
# Graph-related stats removed
|
||||
print(f"Total errors: {sum(len(r.errors) for r in results)}")
|
||||
print(f"Total processing time: {total_time:.2f} seconds")
|
||||
print()
|
||||
|
||||
# Print individual results
|
||||
for result in results:
|
||||
status = "✓" if not result.errors else "✗"
|
||||
print(f"{status} {result.title}: {result.chunks_created} chunks")
|
||||
|
||||
if result.errors:
|
||||
for error in result.errors:
|
||||
print(f" Error: {error}")
|
||||
|
||||
except KeyboardInterrupt:
|
||||
print("\nIngestion interrupted by user")
|
||||
except Exception as e:
|
||||
logger.error(f"Ingestion failed: {e}")
|
||||
raise
|
||||
finally:
|
||||
await pipeline.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user