Release 2025-05-19

This commit is contained in:
pluja
2025-05-19 10:19:49 +00:00
parent 046c4559e5
commit 2657f936bc
267 changed files with 0 additions and 49432 deletions

View File

@@ -1,21 +0,0 @@
# Database connection
DATABASE_URL=postgresql://kycnot:kycnot@localhost:3399/kycnot
# API settings
TOS_API_BASE_URL=https://r.jina.ai
# Logging
LOG_LEVEL=INFO
LOG_FORMAT=%(asctime)s - %(name)s - %(levelname)s - %(message)s
# OpenAI
OPENAI_API_KEY="xxxxxxxxx"
OPENAI_BASE_URL="https://xxxxxx/api/v1"
OPENAI_MODEL="xxxxxxxxx"
OPENAI_RETRY=3
CRON_TOSREVIEW_TASK=0 0 1 * * # Every month
CRON_USER_SENTIMENT_TASK=0 0 * * * # Every day
CRON_COMMENT_MODERATION_TASK=0 0 * * * # Every hour
CRON_FORCE_TRIGGERS_TASK=0 2 * * * # Every day
CRON_SERVICE_SCORE_RECALC_TASK=*/5 * * * * # Every 10 minutes

174
pyworker/.gitignore vendored
View File

@@ -1,174 +0,0 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# UV
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
#uv.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
# Ruff stuff:
.ruff_cache/
# PyPI configuration file
.pypirc

View File

@@ -1 +0,0 @@
3.13

View File

@@ -1,10 +0,0 @@
FROM ghcr.io/astral-sh/uv:alpine
WORKDIR /app
COPY . .
RUN uv sync --frozen
EXPOSE 8000
CMD ["uv", "run", "-m", "pyworker", "--worker"]

View File

@@ -1,149 +0,0 @@
# KYC Not Worker
A Python worker for processing and analyzing data for the KYC Not project.
## Features
- TOS (Terms of Service) text retrieval and analysis
- User sentiment analysis from comments
- Comment moderation
- Service score recalculation
- Database trigger maintenance
- Scheduled task execution
- Database operations for services and comments
## Installation
1. Clone the repository
2. Sync dependencies with [uv](https://docs.astral.sh/uv/):
```bash
uv sync
```
## Configuration
Copy `.env.example` to `.env` and fill in the required values:
```bash
cp .env.example .env
```
Required environment variables:
- `DATABASE_URL`: PostgreSQL connection string
- `OPENAI_API_KEY`: OpenAI API key for AI tasks
- `CRON_TOSREVIEW_TASK`: Cron expression for TOS review task
- `CRON_SENTIMENT_TASK`: Cron expression for user sentiment analysis task
- `CRON_MODERATION_TASK`: Cron expression for comment moderation task
- `CRON_FORCE_TRIGGERS_TASK`: Cron expression for force triggers task
- `CRON_SERVICE_SCORE_RECALC_TASK`: Cron expression for service score recalculation task
## Usage
### Command Line Interface
Run tasks directly:
```bash
# Run TOS review task
uv run -m pyworker tos [--service-id ID]
# Run user sentiment analysis task
uv run -m pyworker sentiment [--service-id ID]
# Run comment moderation task
uv run -m pyworker moderation [--service-id ID]
# Run force triggers task
uv run -m pyworker force-triggers
# Run service score recalculation task
uv run -m pyworker service-score-recalc [--service-id ID]
```
### Worker Mode
Run in worker mode to execute tasks on a schedule:
```bash
uv run -m pyworker --worker
```
Tasks will run according to their configured cron schedules.
## Tasks
### TOS Review Task
- Retrieves and analyzes Terms of Service documents
- Updates service records with TOS information
- Scheduled via `CRON_TOSREVIEW_TASK`
### User Sentiment Task
- Analyzes user comments to determine overall sentiment
- Updates service records with sentiment analysis
- Scheduled via `CRON_SENTIMENT_TASK`
### Comment Moderation Task
- Makes a basic first moderation of comments
- Flags comments as needed
- Adds content if needed
- Scheduled via `CRON_MODERATION_TASK`
### Force Triggers Task
- Maintains database triggers by forcing them to run under certain conditions
- Currently handles updating the "isRecentlyListed" flag for services after 15 days
- Scheduled via `CRON_FORCE-TRIGGERS_TASK`
### Service Score Recalculation Task
- Recalculates service scores based on attribute changes
- Processes jobs from the ServiceScoreRecalculationJob table
- Calculates privacy, trust, and overall scores
- Scheduled via `CRON_SERVICE-SCORE-RECALC_TASK`
## Development
### Project Structure
```text
pyworker/
├── pyworker/
│ ├── __init__.py
│ ├── __main__.py
│ ├── cli.py
│ ├── config.py
│ ├── database.py
│ ├── scheduler.py
│ ├── tasks/
│ │ ├── __init__.py
│ │ ├── base.py
│ │ ├── comment_moderation.py
│ │ ├── force_triggers.py
│ │ ├── service_score_recalc.py
│ │ ├── tos_review.py
│ │ └── user_sentiment.py
│ └── utils/
│ ├── __init__.py
│ ├── ai.py
│ └── logging.py
├── tests/
├── setup.py
├── requirements.txt
└── README.md
```
### Adding New Tasks
1. Create a new task class in `pyworker/tasks/`
2. Implement the `run` method
3. Add the task to `pyworker/tasks/__init__.py`
4. Update the CLI and scheduler to handle the new task
## License
MIT

View File

@@ -1,12 +0,0 @@
services:
pyworker:
build:
context: .
dockerfile: Dockerfile
restart: always
env_file:
- .env
environment:
- OPENAI_API_KEY=${OPENAI_API_KEY}
- OPENAI_MODEL=${OPENAI_MODEL}
- DATABASE_URL=${DATABASE_URL}

View File

@@ -1,24 +0,0 @@
[project]
name = "pyworker"
version = "0.1.0"
description = "AI workers for kycnot.me"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"croniter>=6.0.0",
"json-repair>=0.41.1",
"openai>=1.74.0",
"psycopg[binary,pool]>=3.2.6",
"python-dotenv>=1.1.0",
"requests>=2.32.3",
]
[project.scripts]
pyworker = "pyworker.cli:main"
[tool.setuptools]
packages = ["pyworker"]
[build-system]
requires = ["setuptools>=61.0"]
build-backend = "setuptools.build_meta"

View File

@@ -1,7 +0,0 @@
"""
KYC Not Worker Package
A package for worker tasks related to the KYC Not platform.
"""
__version__ = "0.1.0"

View File

@@ -1,10 +0,0 @@
#!/usr/bin/env python3
"""
Entry point for the pyworker package when executed as a module.
"""
import sys
from pyworker.cli import main
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,437 +0,0 @@
"""
Command line interface for the pyworker package.
"""
import argparse
import sys
import time
from typing import List, Optional, Dict, Any
from pyworker.config import config
from pyworker.database import (
close_db_pool,
fetch_all_services,
fetch_services_with_pending_comments,
)
from pyworker.scheduler import TaskScheduler
from .tasks import (
CommentModerationTask,
ForceTriggersTask,
ServiceScoreRecalculationTask,
TosReviewTask,
UserSentimentTask,
)
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
def parse_args(args: List[str]) -> argparse.Namespace:
"""
Parse command line arguments.
Args:
args: Command line arguments.
Returns:
Parsed arguments.
"""
parser = argparse.ArgumentParser(description="KYC Not Worker")
# Global options
parser.add_argument(
"--worker",
action="store_true",
help="Run in worker mode (schedule tasks to run periodically)",
)
# Add subparsers for different tasks
subparsers = parser.add_subparsers(dest="task", help="Task to run")
# TOS retrieval task
tos_parser = subparsers.add_parser(
"tos", help="Retrieve Terms of Service (TOS) text"
)
tos_parser.add_argument(
"--service-id", type=int, help="Specific service ID to process (optional)"
)
# User sentiment task
sentiment_parser = subparsers.add_parser(
"sentiment", help="Analyze user sentiment from comments"
)
sentiment_parser.add_argument(
"--service-id", type=int, help="Specific service ID to process (optional)"
)
# Comment moderation task
moderation_parser = subparsers.add_parser(
"moderation", help="Moderate pending comments"
)
moderation_parser.add_argument(
"--service-id", type=int, help="Specific service ID to process (optional)"
)
# New Service Penalty task
penalty_parser = subparsers.add_parser(
"force-triggers",
help="Force triggers to run under certain conditions",
)
penalty_parser.add_argument(
"--service-id", type=int, help="Specific service ID to process (optional)"
)
# Service Score Recalculation task
score_recalc_parser = subparsers.add_parser(
"service-score-recalc",
help="Recalculate service scores based on attribute changes",
)
score_recalc_parser.add_argument(
"--service-id", type=int, help="Specific service ID to process (optional)"
)
return parser.parse_args(args)
def run_tos_task(service_id: Optional[int] = None) -> int:
"""
Run the TOS retrieval task.
Args:
service_id: Optional specific service ID to process.
Returns:
Exit code.
"""
logger.info("Starting TOS retrieval task")
try:
# Fetch services
services = fetch_all_services()
if not services:
logger.error("No services found")
return 1
# Filter by service ID if specified
if service_id:
services = [s for s in services if s["id"] == service_id]
if not services:
logger.error(f"Service with ID {service_id} not found")
return 1
# Initialize task and use as context manager
with TosReviewTask() as task: # type: ignore
# Process services using the same database connection
for service in services:
if not service.get("tosUrls"):
logger.info(
f"Skipping service {service['name']} (ID: {service['id']}) - no TOS URLs"
)
continue
result = task.run(service) # type: ignore
if result:
logger.info(
f"Successfully retrieved TOS for service {service['name']}"
)
else:
logger.warning(
f"Failed to retrieve TOS for service {service['name']}"
)
logger.info("TOS retrieval task completed")
return 0
finally:
# Ensure connection pool is closed even if an error occurs
close_db_pool()
def run_sentiment_task(service_id: Optional[int] = None) -> int:
"""
Run the user sentiment analysis task.
Args:
service_id: Optional specific service ID to process.
Returns:
Exit code.
"""
logger.info("Starting user sentiment analysis task")
try:
# Fetch services
services = fetch_all_services()
if not services:
logger.error("No services found")
return 1
# Filter by service ID if specified
if service_id:
services = [s for s in services if s["id"] == service_id]
if not services:
logger.error(f"Service with ID {service_id} not found")
return 1
# Initialize task and use as context manager
with UserSentimentTask() as task: # type: ignore
# Process services using the same database connection
for service in services:
result = task.run(service) # type: ignore
if result is not None:
logger.info(
f"Successfully analyzed sentiment for service {service['name']}"
)
logger.info("User sentiment analysis task completed")
return 0
finally:
# Ensure connection pool is closed even if an error occurs
close_db_pool()
def run_moderation_task(service_id: Optional[int] = None) -> int:
"""
Run the comment moderation task.
Args:
service_id: Optional specific service ID to process.
Returns:
Exit code.
"""
logger.info("Starting comment moderation task")
try:
services_to_process: List[Dict[str, Any]] = []
if service_id:
# Fetch specific service if ID is provided
# Consider creating a fetch_service_by_id for efficiency if this path is common
all_services = fetch_all_services()
services_to_process = [s for s in all_services if s["id"] == service_id]
if not services_to_process:
logger.error(
f"Service with ID {service_id} not found or does not meet general fetch criteria."
)
return 1
logger.info(f"Processing specifically for service ID: {service_id}")
else:
# No specific service ID, fetch only services with pending comments
logger.info(
"No specific service ID provided. Querying for services with pending comments."
)
services_to_process = fetch_services_with_pending_comments()
if not services_to_process:
logger.info(
"No services found with pending comments for moderation at this time."
)
# Task completed its check, nothing to do.
# Fall through to common completion log.
any_service_had_comments_processed = False
if not services_to_process and not service_id:
# This case is when no service_id was given AND no services with pending comments were found.
# Already logged above.
pass
elif not services_to_process and service_id:
# This case should have been caught by the 'return 1' if service_id was specified but not found.
# If it reaches here, it implies an issue or the service had no pending comments (which the task will handle).
logger.info(
f"Service ID {service_id} was specified, but no matching service found or it has no pending items for the task."
)
else:
logger.info(
f"Identified {len(services_to_process)} service(s) to check for comment moderation."
)
# Initialize task and use as context manager
with CommentModerationTask() as task: # type: ignore
for service in services_to_process:
# The CommentModerationTask.run() method now returns a boolean
# and handles its own logging regarding finding/processing comments for the service.
if task.run(service): # type: ignore
logger.info(
f"Comment moderation task ran for service {service['name']} (ID: {service['id']}) and processed comments."
)
any_service_had_comments_processed = True
else:
logger.info(
f"Comment moderation task ran for service {service['name']} (ID: {service['id']}), but no new comments were moderated."
)
if services_to_process and not any_service_had_comments_processed:
logger.info(
"Completed iterating through services; no comments were moderated in this run."
)
logger.info("Comment moderation task completed")
return 0
finally:
# Ensure connection pool is closed even if an error occurs
close_db_pool()
def run_force_triggers_task() -> int:
"""
Runs the force triggers task.
Returns:
Exit code.
"""
logger.info("Starting force triggers task")
try:
# Initialize task and use as context manager
with ForceTriggersTask() as task: # type: ignore
success = task.run() # type: ignore
if success:
logger.info("Force triggers task completed successfully.")
return 0
else:
logger.error("Force triggers task failed.")
return 1
finally:
# Ensure connection pool is closed even if an error occurs
close_db_pool()
def run_service_score_recalc_task(service_id: Optional[int] = None) -> int:
"""
Run the service score recalculation task.
Args:
service_id: Optional specific service ID to process.
Returns:
Exit code.
"""
logger.info("Starting service score recalculation task")
try:
# Initialize task and use as context manager
with ServiceScoreRecalculationTask() as task: # type: ignore
result = task.run(service_id) # type: ignore
if result:
logger.info("Successfully recalculated service scores")
else:
logger.warning("Failed to recalculate service scores")
logger.info("Service score recalculation task completed")
return 0
finally:
# Ensure connection pool is closed even if an error occurs
close_db_pool()
def run_worker_mode() -> int:
"""
Run in worker mode, scheduling tasks to run periodically.
Returns:
Exit code.
"""
logger.info("Starting worker mode")
# Get task schedules from config
task_schedules = config.task_schedules
if not task_schedules:
logger.error(
"No task schedules defined. Set CRON_TASKNAME_TASK environment variables."
)
return 1
logger.info(
f"Found {len(task_schedules)} scheduled tasks: {', '.join(task_schedules.keys())}"
)
# Initialize the scheduler
scheduler = TaskScheduler()
# Register tasks with their schedules
for task_name, cron_expression in task_schedules.items():
if task_name.lower() == "tosreview":
scheduler.register_task(task_name, cron_expression, run_tos_task)
elif task_name.lower() == "user_sentiment":
scheduler.register_task(task_name, cron_expression, run_sentiment_task)
elif task_name.lower() == "comment_moderation":
scheduler.register_task(task_name, cron_expression, run_moderation_task)
elif task_name.lower() == "force_triggers":
scheduler.register_task(task_name, cron_expression, run_force_triggers_task)
elif task_name.lower() == "service_score_recalc":
scheduler.register_task(
task_name, cron_expression, run_service_score_recalc_task
)
else:
logger.warning(f"Unknown task '{task_name}', skipping")
# Register service score recalculation task (every 5 minutes)
scheduler.register_task(
"service-score-recalc",
"*/5 * * * *",
run_service_score_recalc_task,
)
# Start the scheduler if tasks were registered
if scheduler.tasks:
try:
scheduler.start()
logger.info("Worker started, press Ctrl+C to stop")
# Keep the main thread alive
while scheduler.is_running():
time.sleep(1)
return 0
except KeyboardInterrupt:
logger.info("Keyboard interrupt received, shutting down...")
scheduler.stop()
return 0
except Exception as e:
logger.exception(f"Error in worker mode: {e}")
scheduler.stop()
return 1
else:
logger.error("No valid tasks registered")
return 1
def main() -> int:
"""
Main entry point.
Returns:
Exit code.
"""
args = parse_args(sys.argv[1:])
try:
# If worker mode is specified, run the scheduler
if args.worker:
return run_worker_mode()
# Otherwise, run the specified task once
if args.task == "tos":
return run_tos_task(args.service_id)
elif args.task == "sentiment":
return run_sentiment_task(args.service_id)
elif args.task == "moderation":
return run_moderation_task(args.service_id)
elif args.task == "force-triggers":
return run_force_triggers_task()
elif args.task == "service-score-recalc":
return run_service_score_recalc_task(args.service_id)
elif args.task:
logger.error(f"Unknown task: {args.task}")
return 1
else:
logger.error(
"No task specified. Use --worker for scheduled execution or specify a task to run once."
)
return 1
except Exception as e:
logger.exception(f"Error running task: {e}")
return 1
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1,67 +0,0 @@
"""
Configuration module for pyworker.
Handles loading environment variables and configuration settings.
"""
import os
import re
from typing import Dict
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()
class Config:
"""Configuration class for the worker application."""
# Database settings
DATABASE_URL: str = os.getenv(
"DATABASE_URL", "postgresql://kycnot:kycnot@localhost:3399/kycnot"
)
# Clean the URL by removing any query parameters
@property
def db_connection_string(self) -> str:
"""Get the clean database connection string without query parameters."""
if "?" in self.DATABASE_URL:
return self.DATABASE_URL.split("?")[0]
return self.DATABASE_URL
# API settings
TOS_API_BASE_URL: str = os.getenv("TOS_API_BASE_URL", "https://r.jina.ai")
# Logging settings
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO")
LOG_FORMAT: str = os.getenv(
"LOG_FORMAT", "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# Task scheduling
@property
def task_schedules(self) -> Dict[str, str]:
"""
Get cron schedules for tasks from environment variables.
Looks for environment variables with the pattern CRON_TASKNAME_TASK
and returns a dictionary mapping task names to cron schedules.
Returns:
Dictionary mapping task names to cron schedules.
"""
schedules: Dict[str, str] = {}
cron_pattern = re.compile(r"^CRON_(\w+)_TASK$")
for key, value in os.environ.items():
match = cron_pattern.match(key)
if match:
task_name = match.group(1).lower()
schedules[task_name] = value
return schedules
# Create a singleton instance
config = Config()

View File

@@ -1,733 +0,0 @@
"""
Database operations for the pyworker package.
"""
import json
from contextlib import contextmanager
from datetime import datetime
from typing import Any, Dict, Generator, List, Optional, TypedDict, Union
from typing import Literal as TypeLiteral
import psycopg
from psycopg.rows import dict_row
from psycopg.sql import SQL, Composed, Literal
from psycopg_pool import ConnectionPool # Proper import for the connection pool
from pyworker.config import config
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
# --- Type Definitions ---
# Moved from tasks/comment_moderation.py
class CommentType(TypedDict):
id: int
upvotes: int
status: str # Assuming CommentStatus Enum isn't used across modules yet
suspicious: bool
requiresAdminReview: bool
communityNote: Optional[str]
internalNote: Optional[str]
privateContext: Optional[str]
content: str
rating: Optional[float]
createdAt: datetime
updatedAt: datetime
authorId: int
serviceId: int
parentId: Optional[int]
# Add author/service/reply fields if needed by update_comment
# Moved from utils/ai.py
RatingType = TypeLiteral["info", "warning", "alert"]
class UserRightType(TypedDict):
text: str
rating: RatingType
class DataSharingType(TypedDict):
text: str
rating: RatingType
class DataCollectedType(TypedDict):
text: str
rating: RatingType
class KycOrSourceOfFundsType(TypedDict):
text: str
rating: RatingType
class TosReviewType(TypedDict, total=False):
contentHash: str
kycLevel: int
summary: str
complexity: TypeLiteral["low", "medium", "high"]
highlights: List[Dict[str, Any]]
class CommentSentimentSummaryType(TypedDict):
summary: str
sentiment: TypeLiteral["positive", "negative", "neutral"]
whatUsersLike: List[str]
whatUsersDislike: List[str]
class CommentModerationType(TypedDict):
isSpam: bool
requiresAdminReview: bool
contextNote: str
internalNote: str
commentQuality: int
QueryType = Union[str, bytes, SQL, Composed, Literal]
# --- Database Connection Pool ---
_db_pool: Optional[ConnectionPool] = None
def get_db_pool() -> ConnectionPool:
"""
Get or create the database connection pool.
Returns:
A connection pool object.
"""
global _db_pool
if _db_pool is None:
try:
# Create a new connection pool with min connections of 2 and max of 10
_db_pool = ConnectionPool(
conninfo=config.db_connection_string,
min_size=2,
max_size=10,
# Configure how connections are initialized
kwargs={
"autocommit": False,
},
)
logger.info("Database connection pool initialized")
except Exception as e:
logger.error(f"Error creating database connection pool: {e}")
raise
return _db_pool
def close_db_pool():
"""
Close the database connection pool.
This should be called when the application is shutting down.
"""
global _db_pool
if _db_pool is not None:
logger.info("Closing database connection pool")
_db_pool.close()
_db_pool = None
@contextmanager
def get_db_connection() -> Generator[psycopg.Connection, None, None]:
"""
Context manager for database connections.
Yields:
A database connection object from the pool.
"""
pool = get_db_pool()
try:
# Use the connection method which returns a connection as a context manager
with pool.connection() as conn:
# Set the schema explicitly after connection
with conn.cursor() as cursor:
cursor.execute("SET search_path TO public")
yield conn
# The connection will be automatically returned to the pool
# when the with block exits
except Exception as e:
logger.error(f"Error connecting to the database: {e}")
raise
# --- Database Functions ---
def fetch_all_services() -> List[Dict[str, Any]]:
"""
Fetch all public and verified services from the database.
Returns:
A list of service dictionaries.
"""
services = []
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute("""
SELECT id, name, slug, description, "kycLevel", "overallScore",
"privacyScore", "trustScore", "verificationStatus",
"serviceVisibility", "tosUrls", "serviceUrls", "onionUrls", "i2pUrls",
"tosReview", "tosReviewAt", "userSentiment", "userSentimentAt"
FROM "Service"
WHERE "serviceVisibility" = 'PUBLIC'
AND ("verificationStatus" = 'VERIFICATION_SUCCESS'
OR "verificationStatus" = 'COMMUNITY_CONTRIBUTED'
OR "verificationStatus" = 'APPROVED')
ORDER BY id
""")
services = cursor.fetchall()
logger.info(f"Fetched {len(services)} services from the database")
except Exception as e:
logger.error(f"Error fetching services: {e}")
return services
def fetch_services_with_pending_comments() -> List[Dict[str, Any]]:
"""
Fetch all public and verified services that have at least one pending comment.
Returns:
A list of service dictionaries.
"""
services = []
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute("""
SELECT DISTINCT s.id, s.name, s.slug, s.description, s."kycLevel", s."overallScore",
s."privacyScore", s."trustScore", s."verificationStatus",
s."serviceVisibility", s."tosUrls", s."serviceUrls", s."onionUrls", s."i2pUrls",
s."tosReview", s."tosReviewAt", s."userSentiment", s."userSentimentAt"
FROM "Service" s
JOIN "Comment" c ON s.id = c."serviceId"
WHERE c.status = 'PENDING'
AND s."serviceVisibility" = 'PUBLIC'
AND (s."verificationStatus" = 'VERIFICATION_SUCCESS'
OR s."verificationStatus" = 'COMMUNITY_CONTRIBUTED'
OR s."verificationStatus" = 'APPROVED')
ORDER BY s.id
""")
services = cursor.fetchall()
logger.info(
f"Fetched {len(services)} services with pending comments from the database"
)
except Exception as e:
logger.error(f"Error fetching services with pending comments: {e}")
return services
def fetch_service_attributes(service_id: int) -> List[Dict[str, Any]]:
"""
Fetch attributes for a specific service.
Args:
service_id: The ID of the service.
Returns:
A list of attribute dictionaries.
"""
attributes = []
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
"""
SELECT a.id, a.slug, a.title, a.description, a.category, a.type
FROM "Attribute" a
JOIN "ServiceAttribute" sa ON a.id = sa."attributeId"
WHERE sa."serviceId" = %s
""",
(service_id,),
)
attributes = cursor.fetchall()
except Exception as e:
logger.error(f"Error fetching attributes for service {service_id}: {e}")
return attributes
def get_attribute_id_by_slug(slug: str) -> Optional[int]:
attribute_id = None
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute('SELECT id FROM "Attribute" WHERE slug = %s', (slug,))
row = cursor.fetchone()
if row:
attribute_id = row["id"]
except Exception as e:
logger.error(f"Error fetching attribute id for slug '{slug}': {e}")
return attribute_id
def add_service_attribute(service_id: int, attribute_id: int) -> bool:
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
'SELECT 1 FROM "ServiceAttribute" WHERE "serviceId" = %s AND "attributeId" = %s',
(service_id, attribute_id),
)
if cursor.fetchone():
return True
cursor.execute(
'INSERT INTO "ServiceAttribute" ("serviceId", "attributeId", "createdAt") VALUES (%s, %s, NOW())',
(service_id, attribute_id),
)
conn.commit()
logger.info(
f"Added attribute id {attribute_id} to service {service_id}"
)
return True
except Exception as e:
logger.error(
f"Error adding attribute id {attribute_id} to service {service_id}: {e}"
)
return False
def remove_service_attribute(service_id: int, attribute_id: int) -> bool:
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
'DELETE FROM "ServiceAttribute" WHERE "serviceId" = %s AND "attributeId" = %s',
(service_id, attribute_id),
)
conn.commit()
logger.info(
f"Removed attribute id {attribute_id} from service {service_id}"
)
return True
except Exception as e:
logger.error(
f"Error removing attribute id {attribute_id} from service {service_id}: {e}"
)
return False
def add_service_attribute_by_slug(service_id: int, attribute_slug: str) -> bool:
attribute_id = get_attribute_id_by_slug(attribute_slug)
if attribute_id is None:
logger.error(f"Attribute with slug '{attribute_slug}' not found.")
return False
return add_service_attribute(service_id, attribute_id)
def remove_service_attribute_by_slug(service_id: int, attribute_slug: str) -> bool:
attribute_id = get_attribute_id_by_slug(attribute_slug)
if attribute_id is None:
logger.error(f"Attribute with slug '{attribute_slug}' not found.")
return False
return remove_service_attribute(service_id, attribute_id)
def save_tos_review(service_id: int, review: TosReviewType):
"""
Save a TOS review for a specific service.
Args:
service_id: The ID of the service.
review: A TypedDict containing the review data.
"""
try:
# Serialize the dictionary to a JSON string for the database
review_json = json.dumps(review)
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
"""
UPDATE "Service"
SET "tosReview" = %s, "tosReviewAt" = NOW()
WHERE id = %s
""",
(review_json, service_id),
)
conn.commit()
logger.info(f"Successfully saved TOS review for service {service_id}")
except Exception as e:
logger.error(f"Error saving TOS review for service {service_id}: {e}")
def update_kyc_level(service_id: int, kyc_level: int) -> bool:
"""
Update the KYC level for a specific service.
Args:
service_id: The ID of the service.
kyc_level: The new KYC level (0-4).
Returns:
bool: True if the update was successful, False otherwise.
"""
try:
# Ensure the kyc_level is within the valid range
if not 0 <= kyc_level <= 4:
logger.error(
f"Invalid KYC level ({kyc_level}) for service {service_id}. Must be between 0 and 4."
)
return False
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
"""
UPDATE "Service"
SET "kycLevel" = %s, "updatedAt" = NOW()
WHERE id = %s
""",
(kyc_level, service_id),
)
conn.commit()
logger.info(
f"Successfully updated KYC level to {kyc_level} for service {service_id}"
)
return True
except Exception as e:
logger.error(f"Error updating KYC level for service {service_id}: {e}")
return False
def get_comments(service_id: int, status: str = "APPROVED") -> List[Dict[str, Any]]:
"""
Get all comments for a specific service with the specified status.
Args:
service_id: The ID of the service.
status: The status of comments to fetch (e.g. 'APPROVED', 'PENDING'). Defaults to 'APPROVED'.
Returns:
A list of comment dictionaries.
NOTE: The structure returned by the SQL query might be different from CommentType.
Adjust CommentType or parsing if needed elsewhere.
"""
comments = []
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
"""
WITH RECURSIVE comment_tree AS (
-- Base case: get all root comments (no parent)
SELECT
c.id,
c.content,
c.rating,
c.upvotes,
c."createdAt",
c."updatedAt",
c."parentId",
c.status,
u.id as "authorId",
u.name as "authorName",
u."displayName" as "authorDisplayName",
u.picture as "authorPicture",
u.verified as "authorVerified",
0 as depth
FROM "Comment" c
JOIN "User" u ON c."authorId" = u.id
WHERE c."serviceId" = %s
AND c.status = %s
AND c."parentId" IS NULL
UNION ALL
-- Recursive case: get all replies
SELECT
c.id,
c.content,
c.rating,
c.upvotes,
c."createdAt",
c."updatedAt",
c."parentId",
c.status,
u.id as "authorId",
u.name as "authorName",
u."displayName" as "authorDisplayName",
u.picture as "authorPicture",
u.verified as "authorVerified",
ct.depth + 1
FROM "Comment" c
JOIN "User" u ON c."authorId" = u.id
JOIN comment_tree ct ON c."parentId" = ct.id
WHERE c.status = %s
)
SELECT * FROM comment_tree
ORDER BY "createdAt" DESC, depth ASC
""",
(service_id, status, status),
)
comments = cursor.fetchall()
except Exception as e:
logger.error(
f"Error fetching comments for service {service_id} with status {status}: {e}"
)
return comments
def get_max_comment_updated_at(
service_id: int, status: str = "APPROVED"
) -> Optional[datetime]:
"""
Get the maximum 'updatedAt' timestamp for comments of a specific service and status.
Args:
service_id: The ID of the service.
status: The status of comments to consider.
Returns:
The maximum 'updatedAt' timestamp as a datetime object, or None if no matching comments.
"""
max_updated_at = None
try:
with get_db_connection() as conn:
with (
conn.cursor() as cursor
): # dict_row not strictly needed for single value
cursor.execute(
"""
SELECT MAX("updatedAt")
FROM "Comment"
WHERE "serviceId" = %s AND status = %s
""",
(service_id, status),
)
result = cursor.fetchone()
if result and result[0] is not None:
max_updated_at = result[0]
except Exception as e:
logger.error(
f"Error fetching max comment updatedAt for service {service_id} with status {status}: {e}"
)
return max_updated_at
def save_user_sentiment(
service_id: int,
sentiment: Optional[CommentSentimentSummaryType],
last_processed_comment_timestamp: Optional[datetime],
):
"""
Save user sentiment for a specific service and the timestamp of the last comment processed.
Args:
service_id: The ID of the service.
sentiment: A dictionary containing the sentiment data, or None to clear it.
last_processed_comment_timestamp: The 'updatedAt' timestamp of the most recent comment
considered in this sentiment analysis. Can be None.
"""
try:
sentiment_json = json.dumps(sentiment) if sentiment is not None else None
with get_db_connection() as conn:
with conn.cursor() as cursor: # row_factory not needed for UPDATE
cursor.execute(
"""
UPDATE "Service"
SET "userSentiment" = %s, "userSentimentAt" = %s
WHERE id = %s
""",
(sentiment_json, last_processed_comment_timestamp, service_id),
)
conn.commit()
if sentiment:
logger.info(
f"Successfully saved user sentiment for service {service_id} with last comment processed at {last_processed_comment_timestamp}"
)
else:
logger.info(
f"Successfully cleared user sentiment for service {service_id}, last comment processed at set to {last_processed_comment_timestamp}"
)
except Exception as e:
logger.error(f"Error saving user sentiment for service {service_id}: {e}")
def update_comment_moderation(comment_data: CommentType):
"""
Update an existing comment in the database based on moderation results.
Args:
comment_data: A TypedDict representing the comment data to update.
Expected keys are defined in CommentType.
"""
comment_id = comment_data.get("id")
if not comment_id:
logger.error("Cannot update comment: 'id' is missing from comment_data.")
return
try:
with get_db_connection() as conn:
with conn.cursor() as cursor:
cursor.execute(
"""
UPDATE "Comment"
SET
status = %(status)s,
"requiresAdminReview" = %(requiresAdminReview)s,
"communityNote" = %(communityNote)s,
"internalNote" = %(internalNote)s,
"updatedAt" = NOW()
WHERE id = %(id)s
""",
comment_data,
)
conn.commit()
logger.info(f"Successfully updated comment {comment_id}")
except Exception as e:
logger.error(f"Error updating comment {comment_id}: {e}")
def touch_service_updated_at(service_id: int) -> bool:
"""
Update the updatedAt field for a specific service to now.
Args:
service_id: The ID of the service.
Returns:
bool: True if the update was successful, False otherwise.
"""
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
cursor.execute(
"""
UPDATE "Service"
SET "updatedAt" = NOW()
WHERE id = %s
""",
(service_id,),
)
conn.commit()
logger.info(f"Successfully touched updatedAt for service {service_id}")
return True
except Exception as e:
logger.error(f"Error touching updatedAt for service {service_id}: {e}")
return False
def run_db_query(query: Any, params: Optional[Any] = None) -> List[Dict[str, Any]]:
results = []
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
if params is None:
cursor.execute(query)
else:
cursor.execute(query, params)
results = cursor.fetchall()
except Exception as e:
logger.error(f"Error running query: {e}")
return results
def execute_db_command(command: str, params: Optional[Any] = None) -> int:
"""
Execute a database command (INSERT, UPDATE, DELETE) and return affected rows.
Args:
command: The SQL command string.
params: Optional parameters for the command.
Returns:
The number of rows affected by the command.
"""
affected_rows = 0
try:
with get_db_connection() as conn:
with conn.cursor() as cursor:
# Cast the string to the expected type to satisfy the type checker
# In runtime, this is equivalent to passing the command directly
cursor.execute(command, params) # type: ignore
affected_rows = cursor.rowcount
conn.commit()
logger.info(f"Executed command, {affected_rows} rows affected.")
except Exception as e:
logger.error(f"Error executing command: {e}")
return affected_rows
def create_attribute(
slug: str,
title: str,
description: str,
category: str,
type: str,
privacy_points: float = 0,
trust_points: float = 0,
overall_points: float = 0,
) -> Optional[int]:
"""
Create a new attribute in the database if it doesn't already exist.
Args:
slug: The unique slug for the attribute.
title: The display title of the attribute.
description: The description of the attribute.
category: The category of the attribute (e.g., 'TRUST', 'PRIVACY').
type: The type of the attribute (e.g., 'WARNING', 'FEATURE').
privacy_points: Points affecting privacy score (default: 0).
trust_points: Points affecting trust score (default: 0).
overall_points: Points affecting overall score (default: 0).
Returns:
The ID of the created (or existing) attribute, or None if creation failed.
"""
try:
with get_db_connection() as conn:
with conn.cursor(row_factory=dict_row) as cursor:
# First check if the attribute already exists
cursor.execute('SELECT id FROM "Attribute" WHERE slug = %s', (slug,))
row = cursor.fetchone()
if row:
logger.info(
f"Attribute with slug '{slug}' already exists, id: {row['id']}"
)
return row["id"]
# Create the attribute if it doesn't exist
cursor.execute(
"""
INSERT INTO "Attribute" (
slug, title, description, "privacyPoints", "trustPoints",
category, type, "createdAt", "updatedAt"
) VALUES (
%s, %s, %s, %s, %s, %s, %s, NOW(), NOW()
) RETURNING id
""",
(
slug,
title,
description,
privacy_points,
trust_points,
category,
type,
),
)
conn.commit()
result = cursor.fetchone()
if result is None:
logger.error(
f"Failed to retrieve ID for newly created attribute with slug '{slug}'"
)
return None
attribute_id = result["id"]
logger.info(
f"Created new attribute with slug '{slug}', id: {attribute_id}"
)
return attribute_id
except Exception as e:
logger.error(f"Error creating attribute with slug '{slug}': {e}")
return None

View File

@@ -1,184 +0,0 @@
"""
Scheduler module for managing task execution with cron.
"""
import signal
import threading
from datetime import datetime
from types import FrameType
from typing import Any, Callable, Dict, List, ParamSpec, TypeVar
from croniter import croniter
from pyworker.database import close_db_pool
from .tasks import (
CommentModerationTask,
ForceTriggersTask,
ServiceScoreRecalculationTask,
TosReviewTask,
UserSentimentTask,
)
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
P = ParamSpec("P")
R = TypeVar("R")
class TaskScheduler:
"""Task scheduler for running tasks on a cron schedule."""
def __init__(self):
"""Initialize the task scheduler."""
self.tasks: Dict[str, Dict[str, Any]] = {}
self.running = False
self.threads: List[threading.Thread] = []
self.stop_event = threading.Event()
self.logger = logger
# Set up signal handlers
signal.signal(signal.SIGINT, self._handle_signal)
signal.signal(signal.SIGTERM, self._handle_signal)
def _handle_signal(self, signum: int, frame: FrameType | None) -> None:
"""Handle termination signals."""
self.logger.info(f"Received signal {signum}, shutting down...")
self.stop()
def register_task(
self,
task_name: str,
cron_expression: str,
task_func: Callable[P, R],
*args: P.args,
**kwargs: P.kwargs,
) -> None:
"""
Register a task to be scheduled.
Args:
task_name: Name of the task.
cron_expression: Cron expression defining the schedule.
task_func: Function to execute.
*args: Arguments to pass to the task function.
**kwargs: Keyword arguments to pass to the task function.
"""
# Declare task_instance variable with type annotation upfront
task_instance: Any = None
# Initialize the appropriate task class based on the task name
if task_name.lower() == "tosreview":
task_instance = TosReviewTask()
elif task_name.lower() == "user_sentiment":
task_instance = UserSentimentTask()
elif task_name.lower() == "comment_moderation":
task_instance = CommentModerationTask()
elif task_name.lower() == "force_triggers":
task_instance = ForceTriggersTask()
elif task_name.lower() == "service_score_recalc":
task_instance = ServiceScoreRecalculationTask()
else:
self.logger.warning(f"Unknown task '{task_name}', skipping")
return
self.tasks[task_name] = {
"cron": cron_expression,
"func": task_func,
"instance": task_instance,
"args": args,
"kwargs": kwargs,
}
self.logger.info(
f"Registered task '{task_name}' with schedule: {cron_expression}"
)
def _run_task(self, task_name: str, task_info: Dict[str, Any]):
"""
Run a task on its schedule.
Args:
task_name: Name of the task.
task_info: Task information including function and schedule.
"""
self.logger.info(f"Starting scheduler for task '{task_name}'")
# Parse the cron expression
cron = croniter(task_info["cron"], datetime.now())
while not self.stop_event.is_set():
# Get the next run time
next_run = cron.get_next(datetime)
self.logger.info(f"Next run for task '{task_name}': {next_run}")
# Sleep until the next run time
now = datetime.now()
sleep_seconds = (next_run - now).total_seconds()
if sleep_seconds > 0:
# Wait until next run time or until stop event is set
if self.stop_event.wait(sleep_seconds):
break
# Run the task if we haven't been stopped
if not self.stop_event.is_set():
try:
self.logger.info(f"Running task '{task_name}'")
# Use task instance as a context manager to ensure
# a single database connection is used for the entire task
with task_info["instance"] as task_instance:
# Execute the task instance's run method directly
task_instance.run()
self.logger.info(f"Task '{task_name}' completed")
except Exception as e:
self.logger.exception(f"Error running task '{task_name}': {e}")
finally:
# Close the database pool after task execution
close_db_pool()
def start(self):
"""Start the scheduler."""
if self.running:
self.logger.warning("Scheduler is already running")
return
self.logger.info("Starting scheduler")
self.running = True
self.stop_event.clear()
# Start a thread for each task
for task_name, task_info in self.tasks.items():
thread = threading.Thread(
target=self._run_task,
args=(task_name, task_info),
name=f"scheduler-{task_name}",
)
thread.daemon = True
thread.start()
self.threads.append(thread)
self.logger.info(f"Started {len(self.threads)} scheduler threads")
def stop(self):
"""Stop the scheduler."""
if not self.running:
return
self.logger.info("Stopping scheduler")
self.running = False
self.stop_event.set()
# Wait for all threads to terminate
for thread in self.threads:
thread.join(timeout=5.0)
self.threads = []
# Close database pool when the scheduler stops
close_db_pool()
self.logger.info("Scheduler stopped")
def is_running(self) -> bool:
"""Check if the scheduler is running."""
return self.running

View File

@@ -1,17 +0,0 @@
"""Task modules for the pyworker package."""
from .base import Task
from .comment_moderation import CommentModerationTask
from .force_triggers import ForceTriggersTask
from .service_score_recalc import ServiceScoreRecalculationTask
from .tos_review import TosReviewTask
from .user_sentiment import UserSentimentTask
__all__ = [
"Task",
"CommentModerationTask",
"ForceTriggersTask",
"ServiceScoreRecalculationTask",
"TosReviewTask",
"UserSentimentTask",
]

View File

@@ -1,64 +0,0 @@
"""
Base task module for the pyworker package.
"""
from abc import ABC, abstractmethod
from contextlib import AbstractContextManager
from typing import Any, Optional, Type
from pyworker.database import get_db_connection
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
class Task(ABC):
"""Base class for all worker tasks."""
def __init__(self, name: str):
"""
Initialize a task.
Args:
name: The name of the task.
"""
self.name = name
self.logger = setup_logging(f"pyworker.task.{name}")
self.conn: Optional[Any] = None
self._context: Optional[AbstractContextManager[Any]] = None
def __enter__(self):
"""Enter context manager, acquiring a database connection."""
self._context = get_db_connection()
self.conn = self._context.__enter__()
return self
def __exit__(
self,
exc_type: Optional[Type[BaseException]],
exc_val: Optional[BaseException],
exc_tb: Optional[Any],
) -> Optional[bool]:
"""Exit context manager, releasing the database connection."""
if self._context:
return self._context.__exit__(exc_type, exc_val, exc_tb)
return None
@abstractmethod
def run(self, *args: Any, **kwargs: Any) -> Any:
"""
Run the task.
This method must be implemented by subclasses.
Args:
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Returns:
The result of the task.
"""
pass
def __str__(self) -> str:
return f"{self.__class__.__name__}(name={self.name})"

View File

@@ -1,112 +0,0 @@
"""
Task for summarizing comments and getting overal sentiment
"""
import json
from datetime import datetime
from typing import Any, Dict, List
# Import types from database.py
from pyworker.database import ( # type: ignore
CommentType,
get_comments,
update_comment_moderation,
)
from pyworker.tasks.base import Task # type: ignore
from pyworker.utils.ai import prompt_comment_moderation
class DateTimeEncoder(json.JSONEncoder):
def default(self, o: Any) -> Any:
if isinstance(o, datetime):
return o.isoformat()
return super().default(o)
class CommentModerationTask(Task):
"""Task for summarizing comments and getting overal sentiment"""
def __init__(self):
"""Initialize the comment moderation task."""
super().__init__("comment_moderation")
def run(self, service: Dict[str, Any]) -> bool:
"""
Run the comment moderation task.
Returns True if comments were processed, False otherwise.
"""
service_id = service["id"]
service_name = service["name"]
# Query the approved comments for the service
# get_comments is type ignored, so we assume it returns List[Dict[str, Any]]
comments: List[Dict[str, Any]] = get_comments(service_id, status="PENDING")
if not comments:
self.logger.info(
f"No pending comments found for service {service_name} (ID: {service_id}) during task run."
)
return False
self.logger.info(
f"Found {len(comments)} pending comments for service {service_name} (ID: {service_id}). Starting processing."
)
processed_at_least_one = False
for comment_data in comments:
# Assert the type for the individual dictionary for type checking within the loop
comment: CommentType = comment_data # type: ignore
# Query OpenAI to get the sentiment summary
moderation = prompt_comment_moderation(
f"Information about the service: {service}\\nCurrent time: {datetime.now()}\\n\\nComment to moderate: {json.dumps(comment, cls=DateTimeEncoder)}"
)
modstring = f"Comment {comment['id']} "
if moderation["isSpam"] and moderation["commentQuality"] > 5:
comment["status"] = "HUMAN_PENDING"
modstring += " marked as HUMAN_PENDING"
elif moderation["isSpam"] and moderation["commentQuality"] <= 5:
comment["status"] = "REJECTED"
modstring += " marked as REJECTED"
if moderation["requiresAdminReview"]:
comment["requiresAdminReview"] = True
modstring += " requires admin review"
# Ensure status is HUMAN_PENDING if admin review is required, unless already REJECTED
if comment.get("status") != "REJECTED":
comment["status"] = "HUMAN_PENDING"
if (
"marked as HUMAN_PENDING" not in modstring
): # Avoid duplicate message
modstring += " marked as HUMAN_PENDING"
else:
comment["requiresAdminReview"] = False
if (
comment.get("status") != "HUMAN_PENDING"
and comment.get("status") != "REJECTED"
):
comment["status"] = "APPROVED"
modstring += " marked as APPROVED"
if moderation.get("moderationNote"): # Check if key exists
comment["communityNote"] = moderation["contextNote"]
modstring += " with moderation note: " + moderation["contextNote"]
else:
comment["communityNote"] = None
if moderation.get("internalNote"): # Check if key exists
comment["internalNote"] = moderation["internalNote"]
modstring += (
" with internal note: " + moderation["internalNote"]
) # Changed from spam reason for clarity
else:
comment["internalNote"] = None
# Save the sentiment summary to the database
self.logger.info(f"{modstring}")
update_comment_moderation(comment)
processed_at_least_one = True
return processed_at_least_one

View File

@@ -1,43 +0,0 @@
from pyworker.tasks.base import Task
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
class ForceTriggersTask(Task):
"""
Force triggers to run under certain conditions.
"""
RECENT_LISTED_INTERVAL_DAYS = 15
def __init__(self):
super().__init__("force_triggers")
def run(self) -> bool:
logger.info(f"Starting {self.name} task.")
# Use the connection provided by the base Task class
if not self.conn:
logger.error("No database connection available")
return False
update_query = f"""
UPDATE "Service"
SET "isRecentlyListed" = FALSE, "updatedAt" = NOW()
WHERE "isRecentlyListed" = TRUE
AND "listedAt" IS NOT NULL
AND "listedAt" < NOW() - INTERVAL '{self.RECENT_LISTED_INTERVAL_DAYS} days'
"""
try:
with self.conn.cursor() as cursor:
cursor.execute(update_query)
self.conn.commit()
added_count = cursor.rowcount
logger.info(f"Updated {added_count} services.")
except Exception as e:
logger.error(f"Error updating services: {e}")
return False
logger.info(f"{self.name} task completed successfully.")
return True

View File

@@ -1,325 +0,0 @@
"""
Task to recalculate service scores based on attribute changes.
"""
from typing import Optional
from pyworker.tasks.base import Task
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
class ServiceScoreRecalculationTask(Task):
"""
Process pending service score recalculation jobs.
This task fetches jobs from the ServiceScoreRecalculationJob table
and recalculates service scores using the PostgreSQL functions.
"""
def __init__(self):
super().__init__("service_score_recalc")
def run(self, service_id: Optional[int] = None) -> bool:
"""
Process score recalculation jobs from the ServiceScoreRecalculationJob table.
Args:
service_id: Optional service ID to process only that specific service
Returns:
bool: True if successful, False otherwise
"""
logger.info(f"Starting {self.name} task.")
processed_count = 0
error_count = 0
batch_size = 50
# Use the connection provided by the base Task class
if not self.conn:
logger.error("No database connection available")
return False
try:
# Build query - either for a specific service or all pending jobs
if service_id:
select_query = """
SELECT id, "serviceId"
FROM "ServiceScoreRecalculationJob"
WHERE "serviceId" = %s AND "processedAt" IS NULL
ORDER BY "createdAt" ASC
"""
params = [service_id]
else:
select_query = """
SELECT id, "serviceId"
FROM "ServiceScoreRecalculationJob"
WHERE "processedAt" IS NULL
ORDER BY "createdAt" ASC
LIMIT %s
"""
params = [batch_size]
# Fetch jobs
with self.conn.cursor() as cursor:
cursor.execute(select_query, params)
unprocessed_jobs = cursor.fetchall()
if not unprocessed_jobs:
logger.info("No pending service score recalculation jobs found.")
return True
logger.info(
f"Processing {len(unprocessed_jobs)} service score recalculation jobs."
)
# Process each job
for job in unprocessed_jobs:
job_id = job[0] # First column is id
svc_id = job[1] # Second column is serviceId
try:
self._process_service_score(svc_id, job_id)
processed_count += 1
logger.debug(
f"Successfully processed job {job_id} for service {svc_id}"
)
except Exception as e:
if self.conn:
self.conn.rollback()
error_count += 1
logger.error(
f"Error processing job {job_id} for service {svc_id}: {str(e)}",
exc_info=True,
)
logger.info(
f"{self.name} task completed. Processed: {processed_count}, Errors: {error_count}"
)
return processed_count > 0 or error_count == 0
except Exception as e:
if self.conn:
self.conn.rollback()
logger.error(f"Failed to run {self.name} task: {str(e)}", exc_info=True)
return False
def _process_service_score(self, service_id: int, job_id: int) -> None:
"""
Process a single service score recalculation job.
Args:
service_id: The service ID to recalculate scores for
job_id: The job ID to mark as processed
"""
if not self.conn:
raise ValueError("No database connection available")
with self.conn.cursor() as cursor:
# 1. Calculate privacy score
cursor.execute("SELECT calculate_privacy_score(%s)", [service_id])
privacy_score = cursor.fetchone()[0]
# 2. Calculate trust score
cursor.execute("SELECT calculate_trust_score(%s)", [service_id])
trust_score = cursor.fetchone()[0]
# 3. Calculate overall score
cursor.execute(
"SELECT calculate_overall_score(%s, %s, %s)",
[service_id, privacy_score, trust_score],
)
overall_score = cursor.fetchone()[0]
# 4. Check for verification status and cap score if needed
cursor.execute(
'SELECT "verificationStatus" FROM "Service" WHERE id = %s',
[service_id],
)
result = cursor.fetchone()
if result is None:
logger.warning(
f"Service with ID {service_id} not found. Deleting job {job_id}."
)
# Delete the job if the service is gone
cursor.execute(
"""
DELETE FROM "ServiceScoreRecalculationJob"
WHERE id = %s
""",
[job_id],
)
self.conn.commit()
return # Skip the rest of the processing for this job
status = result[0]
if status == "VERIFICATION_FAILED":
if overall_score > 3:
overall_score = 3
elif overall_score < 0:
overall_score = 0
# 5. Update the service with recalculated scores
cursor.execute(
"""
UPDATE "Service"
SET "privacyScore" = %s, "trustScore" = %s, "overallScore" = %s
WHERE id = %s
""",
[privacy_score, trust_score, overall_score, service_id],
)
# 6. Mark the job as processed
cursor.execute(
"""
UPDATE "ServiceScoreRecalculationJob"
SET "processedAt" = NOW()
WHERE id = %s
""",
[job_id],
)
# Commit the transaction
if self.conn:
self.conn.commit()
def recalculate_all_services(self) -> bool:
"""
Recalculate scores for all active services.
Useful for batch updates after attribute changes.
Returns:
bool: True if successful, False otherwise
"""
logger.info("Starting recalculation for all active services.")
if not self.conn:
logger.error("No database connection available")
return False
try:
# Get all active service IDs
with self.conn.cursor() as cursor:
cursor.execute(
"""
SELECT id
FROM "Service"
WHERE "isActive" = TRUE
"""
)
services = cursor.fetchall()
if not services:
logger.info("No active services found.")
return True
logger.info(f"Found {len(services)} active services to recalculate.")
# Queue recalculation jobs for all services
inserted_count = 0
for service in services:
service_id = service[0]
try:
if self.conn:
with self.conn.cursor() as cursor:
cursor.execute(
"""
INSERT INTO "ServiceScoreRecalculationJob" ("serviceId", "createdAt", "processedAt")
VALUES (%s, NOW(), NULL)
ON CONFLICT ("serviceId") DO UPDATE
SET "processedAt" = NULL, "createdAt" = NOW()
""",
[service_id],
)
self.conn.commit()
inserted_count += 1
except Exception as e:
if self.conn:
self.conn.rollback()
logger.error(
f"Error queueing job for service {service_id}: {str(e)}"
)
logger.info(f"Successfully queued {inserted_count} recalculation jobs.")
return True
except Exception as e:
if self.conn:
self.conn.rollback()
logger.error(f"Failed to queue recalculation jobs: {str(e)}", exc_info=True)
return False
def recalculate_for_attribute(self, attribute_id: int) -> bool:
"""
Recalculate scores for all services associated with a specific attribute.
Args:
attribute_id: The attribute ID to recalculate scores for
Returns:
bool: True if successful, False otherwise
"""
logger.info(
f"Starting recalculation for services with attribute ID {attribute_id}."
)
if not self.conn:
logger.error("No database connection available")
return False
try:
# Get all services associated with this attribute
with self.conn.cursor() as cursor:
cursor.execute(
"""
SELECT DISTINCT sa."serviceId"
FROM "ServiceAttribute" sa
WHERE sa."attributeId" = %s
""",
[attribute_id],
)
services = cursor.fetchall()
if not services:
logger.info(f"No services found with attribute ID {attribute_id}.")
return True
logger.info(
f"Found {len(services)} services with attribute ID {attribute_id}."
)
# Queue recalculation jobs for all services with this attribute
inserted_count = 0
for service in services:
service_id = service[0]
try:
if self.conn:
with self.conn.cursor() as cursor:
cursor.execute(
"""
INSERT INTO "ServiceScoreRecalculationJob" ("serviceId", "createdAt", "processedAt")
VALUES (%s, NOW(), NULL)
ON CONFLICT ("serviceId") DO UPDATE
SET "processedAt" = NULL, "createdAt" = NOW()
""",
[service_id],
)
self.conn.commit()
inserted_count += 1
except Exception as e:
if self.conn:
self.conn.rollback()
logger.error(
f"Error queueing job for service {service_id}: {str(e)}"
)
logger.info(f"Successfully queued {inserted_count} recalculation jobs.")
return True
except Exception as e:
if self.conn:
self.conn.rollback()
logger.error(f"Failed to queue recalculation jobs: {str(e)}", exc_info=True)
return False

View File

@@ -1,116 +0,0 @@
"""
Task for retrieving Terms of Service (TOS) text.
"""
import hashlib
from typing import Any, Dict, Optional
from pyworker.database import TosReviewType, save_tos_review, update_kyc_level
from pyworker.tasks.base import Task
from pyworker.utils.ai import prompt_check_tos_review, prompt_tos_review
from pyworker.utils.crawl import fetch_markdown
class TosReviewTask(Task):
"""Task for retrieving Terms of Service (TOS) text."""
def __init__(self):
"""Initialize the TOS review task."""
super().__init__("tos_review")
def run(self, service: Dict[str, Any]) -> Optional[TosReviewType]:
"""
Review TOS text for a service.
Args:
service: A dictionary containing service information.
Returns:
A dictionary mapping TOS URLs to their retrieved text, or None if no TOS URLs.
"""
service_id = service["id"]
service_name = service["name"]
verification_status = service.get("verificationStatus")
# Only process verified or approved services
if verification_status not in ["VERIFICATION_SUCCESS", "APPROVED"]:
self.logger.info(
f"Skipping TOS review for service: {service_name} (ID: {service_id}) - Status: {verification_status}"
)
return None
tos_urls = service.get("tosUrls", [])
if not tos_urls:
self.logger.info(
f"No TOS URLs found for service: {service_name} (ID: {service_id})"
)
return None
self.logger.info(
f"Reviewing TOS for service: {service_name} (ID: {service_id})"
)
self.logger.info(f"TOS URLs: {tos_urls}")
for tos_url in tos_urls:
api_url = f"{tos_url}"
self.logger.info(f"Fetching TOS from URL: {api_url}")
# Sleep for 1 second to avoid rate limiting
content = fetch_markdown(api_url)
if content:
# Hash the content to avoid repeating the same content
content_hash = hashlib.sha256(content.encode()).hexdigest()
self.logger.info(f"Content hash: {content_hash}")
# service.get("tosReview") can be None if the DB field is NULL.
# Default to an empty dict to prevent AttributeError on .get()
tos_review_data_from_service: Optional[Dict[str, Any]] = service.get(
"tosReview"
)
tos_review: Dict[str, Any] = (
tos_review_data_from_service
if tos_review_data_from_service is not None
else {}
)
stored_hash = tos_review.get("contentHash")
# Skip processing if we've seen this content before
if stored_hash == content_hash:
self.logger.info(
f"Skipping already processed TOS content with hash: {content_hash}"
)
continue
# Skip incomplete TOS content
check = prompt_check_tos_review(content)
if not check:
continue
elif not check["isComplete"]:
continue
# Query OpenAI to summarize the content
review = prompt_tos_review(content)
if review:
review["contentHash"] = content_hash
# Save the review to the database
save_tos_review(service_id, review)
# Update the KYC level based on the review
if "kycLevel" in review:
kyc_level = review["kycLevel"]
self.logger.info(
f"Updating KYC level to {kyc_level} for service {service_name}"
)
update_kyc_level(service_id, kyc_level)
# no need to check other TOS URLs
break
return review
else:
self.logger.warning(
f"Failed to retrieve TOS content for URL: {tos_url}"
)

View File

@@ -1,134 +0,0 @@
"""
Task for summarizing comments and getting overal sentiment
"""
import json
from datetime import datetime
from typing import Any, Dict, Optional
from pyworker.database import (
CommentSentimentSummaryType,
get_comments,
get_max_comment_updated_at,
save_user_sentiment,
)
from pyworker.tasks.base import Task
from pyworker.utils.ai import (
prompt_comment_sentiment_summary,
)
class DateTimeEncoder(json.JSONEncoder):
def default(self, o: Any) -> Any:
if isinstance(o, datetime):
return o.isoformat()
return super().default(o)
class UserSentimentTask(Task):
"""Task for summarizing comments and getting overal sentiment"""
def __init__(self):
"""Initialize the comment sentiment summary task."""
super().__init__("comment_sentiment_summary")
def run(self, service: Dict[str, Any]) -> Optional[CommentSentimentSummaryType]:
"""
Run the comment sentiment summary task.
Skips execution if no new comments are found since the last run.
Clears sentiment if all comments are removed.
"""
service_id = service["id"]
service_name = service["name"]
current_user_sentiment_at: Optional[datetime] = service.get("userSentimentAt")
if isinstance(current_user_sentiment_at, str):
try:
current_user_sentiment_at = datetime.fromisoformat(
str(current_user_sentiment_at).replace("Z", "+00:00")
)
except ValueError:
self.logger.warning(
f"Could not parse userSentimentAt string '{current_user_sentiment_at}' for service {service_id}. Treating as None."
)
current_user_sentiment_at = None
# Get the timestamp of the most recent approved comment
max_comment_updated_at = get_max_comment_updated_at(
service_id, status="APPROVED"
)
self.logger.info(
f"Service {service_name} (ID: {service_id}): Current userSentimentAt: {current_user_sentiment_at}, Max approved comment updatedAt: {max_comment_updated_at}"
)
if max_comment_updated_at is None:
self.logger.info(
f"No approved comments found for service {service_name} (ID: {service_id})."
)
# If there was a sentiment before and now no comments, clear it.
if service.get("userSentiment") is not None:
self.logger.info(
f"Clearing existing sentiment for service {service_name} (ID: {service_id}) as no approved comments are present."
)
save_user_sentiment(service_id, None, None)
return None
if (
current_user_sentiment_at is not None
and max_comment_updated_at <= current_user_sentiment_at
):
self.logger.info(
f"No new approved comments for service {service_name} (ID: {service_id}) since last sentiment analysis ({current_user_sentiment_at}). Skipping."
)
# Optionally, return the existing sentiment if needed:
# existing_sentiment = service.get("userSentiment")
# return existing_sentiment if isinstance(existing_sentiment, dict) else None
return None
# Query the approved comments for the service
# get_comments defaults to status="APPROVED"
comments = get_comments(service_id)
self.logger.info(
f"Found {len(comments)} comments for service {service_name} (ID: {service_id}) to process."
)
if not comments:
# This case could occur if max_comment_updated_at found a comment,
# but get_comments filters it out or it was deleted just before get_comments ran.
self.logger.info(
f"No comments to process for service {service_name} (ID: {service_id}) after fetching (e.g. due to filtering or deletion)."
)
if service.get("userSentiment") is not None:
self.logger.info(
f"Clearing existing sentiment for service {service_name} (ID: {service_id}) as no processable comments found."
)
# Use max_comment_updated_at as the reference point for when this check was made.
save_user_sentiment(service_id, None, max_comment_updated_at)
return None
# Query OpenAI to get the sentiment summary
try:
sentiment_summary = prompt_comment_sentiment_summary(
json.dumps(comments, cls=DateTimeEncoder)
)
except Exception as e:
self.logger.error(
f"Failed to generate sentiment summary for service {service_name} (ID: {service_id}): {e}"
)
return None
if not sentiment_summary: # Defensive check if prompt could return None/empty
self.logger.warning(
f"Sentiment summary generation returned empty for service {service_name} (ID: {service_id})."
)
return None
# Save the sentiment summary to the database, using max_comment_updated_at
save_user_sentiment(service_id, sentiment_summary, max_comment_updated_at)
self.logger.info(
f"Successfully processed and saved user sentiment for service {service_name} (ID: {service_id})."
)
return sentiment_summary

View File

@@ -1 +0,0 @@
"""Utility modules for the pyworker package."""

View File

@@ -1,261 +0,0 @@
import os
import time
from typing import Any, Dict, List, Literal, TypedDict, cast
from json_repair import repair_json
from openai import OpenAI, OpenAIError
from openai.types.chat import ChatCompletionMessageParam
from pyworker.database import (
CommentModerationType,
CommentSentimentSummaryType,
TosReviewType,
)
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
client = OpenAI(
base_url=os.environ.get("OPENAI_BASE_URL"),
api_key=os.environ.get("OPENAI_API_KEY"),
)
def query_openai_json(
messages: List[ChatCompletionMessageParam],
model: str = os.environ.get("OPENAI_MODEL", "deepseek-chat-cheaper"),
) -> Dict[str, Any]:
max_retries = int(os.environ.get("OPENAI_RETRY", 3))
retry_delay = 30
last_error = None
for attempt in range(max_retries):
try:
completion = client.chat.completions.create(
model=model,
messages=messages,
)
content = completion.choices[0].message.content
if content is None:
raise ValueError("OpenAI response content is None")
logger.debug(f"Raw AI response content: {content}")
try:
result = repair_json(content)
if isinstance(result, str):
import json
result = json.loads(result)
if not isinstance(result, dict):
logger.error(
f"Repaired JSON is not a dictionary. Type: {type(result)}, Value: {result}"
)
raise TypeError(
f"Expected a dictionary from AI response, but got {type(result)}"
)
return result
except Exception as e:
logger.error(f"Failed to process JSON response: {e}")
logger.error(f"Raw content was: {content}")
raise
except (OpenAIError, ValueError, TypeError) as e:
last_error = e
if attempt == max_retries - 1: # Last attempt
logger.error(f"Failed after {max_retries} attempts. Last error: {e}")
raise last_error
logger.warning(
f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds..."
)
time.sleep(retry_delay)
retry_delay *= 2 # Exponential backoff
# This line should never be reached due to the raise in the last attempt
raise last_error # type: ignore
ReasonType = Literal["js_required", "firewalled", "other"]
class TosReviewCheck(TypedDict):
isComplete: bool
def prompt_check_tos_review(content: str) -> TosReviewCheck:
messages: List[ChatCompletionMessageParam] = [
{"role": "system", "content": PROMPT_CHECK_TOS_REVIEW},
{"role": "user", "content": content},
]
result_dict = query_openai_json(messages, model="openai/gpt-4.1-mini")
return cast(TosReviewCheck, result_dict)
def prompt_tos_review(content: str) -> TosReviewType:
messages: List[ChatCompletionMessageParam] = [
{"role": "system", "content": PROMPT_TOS_REVIEW},
{"role": "user", "content": content},
]
result_dict = query_openai_json(messages)
return cast(TosReviewType, result_dict)
def prompt_comment_sentiment_summary(content: str) -> CommentSentimentSummaryType:
messages: List[ChatCompletionMessageParam] = [
{"role": "system", "content": PROMPT_COMMENT_SENTIMENT_SUMMARY},
{"role": "user", "content": content},
]
result_dict = query_openai_json(messages)
return cast(CommentSentimentSummaryType, result_dict)
def prompt_comment_moderation(content: str) -> CommentModerationType:
messages: List[ChatCompletionMessageParam] = [
{"role": "system", "content": PROMPT_COMMENT_MODERATION},
{"role": "user", "content": content},
]
result_dict = query_openai_json(messages)
return cast(CommentModerationType, result_dict)
PROMPT_CHECK_TOS_REVIEW = """
You will receive the Markdown content of a website page. Determine if the page is a complete. If the page was blocked (e.g. by Cloudflare or similar), incomplete (e.g. requires JavaScript), irrelevant (login/signup/CAPTCHA), set isComplete to false.
If the page contains meaningful, coherent, valid service information or policy content, with no obvious blocking or truncation, set isComplete to true.
Return only this JSON and nothing else:
{"isComplete": true} or {"isComplete": false}
"""
PROMPT_TOS_REVIEW = """
You are a privacy analysis AI tasked with reviewing Terms of Service documents.
Your goal is to identify key information about data collection, privacy implications, and user rights.
You are a privacy advocate and you are looking for the most important information for the user in regards to privacy, kyc, self-sovereignity, anonymity, etc.
Analyze the provided Terms of Service and extract the following information:
1. KYC level is on a scale of 1 to 4:
- **Guaranteed no KYC (Level 0)**: Terms explicitly state KYC will never be requested.
- **No KYC mention (Level 1)**: No mention of current or future KYC requirements. The document does not mention KYC at all.
- **KYC on authorities request (Level 2)**: No routine KYC, but may share data, block funds or reject transactions. Cooperates with authorities.
- **Shotgun KYC (Level 3)**: May request KYC and block funds based on automated transaction flagging system. It is not mandatory by default, but can be requested at any time, for any reason.
- **Mandatory KYC (Level 4)**: Required for key features or for user registration.
2. Overall summary of the terms of service, must be concise and to the point, no more than 250 characters. Use markdown formatting to highlight the most important information. Plain english.
3. Complexity of the terms of service text for a non-technical user, must be a string of 'low', 'medium', 'high'.
4. 'highlights': The important bits of information from the ToS document for the user to know. Always related to privacy, kyc, self-sovereignity, anonymity, custody, censorship resistance, etc. No need to mention these topics, just the important bits of information from the ToS document.
- important things to look for: automated transaction scanning, rejection or block of funds, refund policy (does it require KYC?), data sharing, logging, kyc requirements, etc.
- if No reference to KYC or proof of funds checks is mentioned or required, you don't need to mention it in the highlights, it is already implied from the kycLevel.
- Try to avoid obvious statements that can be infered from other, more important, highlights. Keep it short and concise only with the most important information for the user.
- You must strictly adhere to the document information, do not make up or infer information, do not make assumptions, do not add any information that is not explicitly stated in the document.
Format your response as a valid JSON object with the following structure:
type TosReview = {
kycLevel: 0 | 1 | 2 | 3 | 4
/** Less than 200 characters */
summary: MarkdownString
complexity: 'high' | 'low' | 'medium'
highlights: {
/** Very short title, max 2-3 words */
title: string
/** Less than 200 characters. Highlight the most important information with markdown formatting. */
content: MarkdownString
/** In regards to KYC, Privacy, Anonymity, Self-Sovereignity, etc. */
/** anything that could harm the user's privacy, identity, self-sovereignity or anonymity is negative, anything that otherwise helps is positive. else it is neutral. */
rating: 'negative' | 'neutral' | 'positive'
}[]
}
The rating is a number between 0 and 2, where 0 is informative, 1 is warning, and 2 is critical.
Be concise but thorough, and make sure your output is properly formatted JSON.
"""
PROMPT_COMMENT_SENTIMENT_SUMMARY = """
You will be given a list of user comments to a service.
Your task is to summarize the comments in a way that is easy to understand and to the point.
The summary should be concise and to the point, no more than 150 words.
Use markdown formatting to highlight in bold the most important information. Only bold is allowed.
You must format your response as a valid JSON object with the following structure:
interface CommentSummary {
summary: string;
sentiment: 'positive'|'negative'|'neutral';
whatUsersLike: string[]; // Concise, 2-3 words, max 4
whatUsersDislike: string[]; // Concise, 2-3 words, max 4
}
Always avoid repeating information in the list of what users like or dislike. Also, make sure you keep the summary short and concise, no more than 150 words. Ignore irrelevant comments. Make an item for each like/dislike, avoid something like 'No logs / Audited', it should be 'No logs' and 'Audited' as separate items.
You must return a valid raw JSON object, without any other text or formatting.
"""
PROMPT_COMMENT_MODERATION = """
You are kycnot.mes comment moderation API. Your sole responsibility is to analyze user comments on directory listings (cryptocurrency, anonymity, privacy services) and decide, in strict accordance with the schema and rules below, whether each comment is spam, needs admin review, and its overall quality for our platform. Output ONLY a plain, valid JSON object, with NO markdown, extra text, annotations, or code blocks.
## Output Schema
interface CommentModeration {
isSpam: boolean;
requiresAdminReview: boolean;
contextNote: string;
internalNote: string;
commentQuality: 0|1|2|3|4|5|6|7|8|9|10;
}
## FIELD EXPLANATION
- isSpam: Mark true if the comment is spam, irrelevant, repetitive, misleading, self-promoting, or fails minimum quality standards.
- requiresAdminReview: Mark true ONLY if the comment reports: service non-functionality, listing inaccuracies, clear scams, exit-scams, critical policy changes, malfunctions, service outages, or sensitive platform issues. If true, always add internalNote to explain why you made this decision.
- contextNote: Optional, visible to users. Add ONLY when clarification or warning is necessary―e.g., unsubstantiated claims or potential spam.
- internalNote: Internal note that is not visible to users. Example: explain why you marked a comment as spam or low quality. You should leave this empty if no relevant information would be added.
- commentQuality: 0 (lowest) to 10 (highest). Rate purely on informativeness, relevance, helpfulness, and evidence.
## STRICT MODERATION RULES
- Reject ALL comments that are generic, extremely short, or meaningless on their own, unless replying with added value or genuine context. Examples: "hey", "hello", "hi", "ok", "good", "great", "thanks", "test", "scam"—these are LOW quality and must generally be flagged as spam or rated VERY low, unless context justifies.
- Exception: Replies allowed if they significantly clarify, elaborate, or engage with a previous comment, and ADD new value.
- Comments must provide context, detail, experience, a clear perspective, or evidence. Approve only if the comment adds meaningful insight to the listings discussion.
- Mark as spam:
- Meaningless, contextless, very short comments (“hi”, “hey”).
- Comments entirely self-promotional, containing excessive emojis, special characters, random text, or multiple unrelated links.
- Use the surrounding context (such as parent comments, service description, previous discussions) to evaluate if a short comment is a valid reply, or still too low quality to approve.
- Rate "commentQuality" based on:
- 0-2: Meaningless, off-topic, one-word, no value.
- 3-5: Vague, minimal, only slightly relevant, lacking evidence.
- 6-8: Detailed, relevant, some insight or evidence, well-explained.
- 9-10: Exceptionally thorough, informative, well-documented experience.
- For claims (positive or negative) without evidence, add a warning context note: "This comment makes claims without supporting evidence."
- For extended, unstructured, or incoherent text (e.g. spam, or AI-generated nonsense), mark as spam.
## EXAMPLES
- "hello":
isSpam: true, internalNote: "Comment provides no value or context.", commentQuality: 0
- "works":
isSpam: true, internalNote: "Comment too short and contextless.", commentQuality: 0
- "Service did not work on my device—got error 503.":
isSpam: false, requiresAdminReview: true, commentQuality: 7
- "Scam!":
isSpam: true, internalNote: "Unsubstantiated, one-word negative claim.", commentQuality: 0, contextNote: "This is a one-word claim without details or evidence."
- "Instant transactions, responsive customer support. Used for 6 months.":
isSpam: false, commentQuality: 8
## INSTRUCTIONS
- Always evaluate if a comment stands on its own, adds value, and has relevance to the listing. Reject one-word, contextless, or “drive-by” comments.
- Replies: Only approve short replies if they directly answer or clarify something above and ADD useful new information.
Format your output EXACTLY as a raw JSON object using the schema, with NO extra formatting, markdown, or text.
"""

View File

@@ -1,31 +0,0 @@
"""
HTTP utilities for the pyworker package.
"""
from typing import Optional
import requests
from pyworker.utils.app_logging import setup_logging
logger = setup_logging(__name__)
def fetch_url(url: str, timeout: int = 30) -> Optional[str]:
"""
Fetch content from a URL.
Args:
url: The URL to fetch.
timeout: The timeout in seconds.
Returns:
The text content of the response, or None if the request failed.
"""
try:
response = requests.get(url, timeout=timeout)
response.raise_for_status()
return response.text
except requests.RequestException as e:
logger.error(f"Error fetching URL {url}: {e}")
return None

View File

@@ -1,36 +0,0 @@
"""
Logging utilities for the pyworker package.
"""
import logging
import sys
from pyworker.config import config
def setup_logging(name: str = "pyworker") -> logging.Logger:
"""
Set up logging for the application.
Args:
name: The name of the logger.
Returns:
A configured logger instance.
"""
logger = logging.getLogger(name)
# Set log level from configuration
log_level = getattr(logging, config.LOG_LEVEL.upper(), logging.INFO)
logger.setLevel(log_level)
# Create console handler
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(log_level)
# Create formatter
formatter = logging.Formatter(config.LOG_FORMAT)
handler.setFormatter(formatter)
# Add handler to logger
logger.addHandler(handler)
return logger

View File

@@ -1,100 +0,0 @@
import argparse
import os
import time
import requests
from dotenv import load_dotenv
from pyworker.utils.app_logging import setup_logging
from typing import Any
logger = setup_logging(__name__)
# Load environment variables from .env file
load_dotenv()
# Include API token header if set
CRAWL4AI_API_TOKEN = os.environ.get("CRAWL4AI_API_TOKEN", "")
HEADERS = (
{"Authorization": f"Bearer {CRAWL4AI_API_TOKEN}"} if CRAWL4AI_API_TOKEN else {}
)
CRAWL4AI_BASE_URL = os.environ.get("CRAWL4AI_BASE_URL", "http://crawl4ai:11235")
CRAWL4AI_TIMEOUT = int(os.environ.get("CRAWL4AI_TIMEOUT", 300))
CRAWL4AI_POLL_INTERVAL = int(os.environ.get("CRAWL4AI_POLL_INTERVAL", 2))
def fetch_fallback(url: str) -> str:
if not url:
raise ValueError("URL must not be empty")
logger.info(f"Fetching fallback for {url}")
fallback_url = f"https://r.jina.ai/{url.lstrip('/')}"
response = requests.get(fallback_url, timeout=80)
response.raise_for_status()
return response.text
def fetch_markdown(url: str, wait_for_dynamic_content: bool = True) -> str:
if not CRAWL4AI_API_TOKEN:
return fetch_fallback(url)
try:
payload: dict[str, Any] = {"urls": url}
if wait_for_dynamic_content:
# According to Crawl4AI docs, wait_for_images=True also waits for network idle state,
# which is helpful for JS-generated content.
# Adding scan_full_page and scroll_delay helps trigger lazy-loaded content.
payload["config"] = {
"wait_for_images": True,
"scan_full_page": True,
"scroll_delay": 0.5,
"magic": True,
}
response = requests.post(
f"{CRAWL4AI_BASE_URL}/crawl",
json=payload,
headers=HEADERS,
)
response.raise_for_status()
task_id = response.json().get("task_id")
start_time = time.time()
while True:
if time.time() - start_time > CRAWL4AI_TIMEOUT:
raise TimeoutError(f"Task {task_id} timeout")
status_resp = requests.get(
f"{CRAWL4AI_BASE_URL}/task/{task_id}",
headers=HEADERS,
)
status_resp.raise_for_status()
status = status_resp.json()
if status.get("status") == "completed":
markdown = status["result"].get("markdown", "")
metadata = status["result"].get("metadata", {})
return f"""
URL: {url}
Page Metadata: `{metadata}`
Markdown Content
----------------
{markdown}
"""
time.sleep(CRAWL4AI_POLL_INTERVAL)
except (requests.exceptions.RequestException, TimeoutError):
return fetch_fallback(url)
def main():
parser = argparse.ArgumentParser(
description="Crawl a URL and print its markdown content."
)
parser.add_argument("--url", required=True, help="The URL to crawl")
args = parser.parse_args()
print(f"Crawling {args.url}...")
markdown_content = fetch_markdown(args.url)
print("\n--- Markdown Content ---")
print(markdown_content)
if __name__ == "__main__":
main()

View File

@@ -1 +0,0 @@
"""Test package for the pyworker."""

View File

@@ -1,74 +0,0 @@
"""
Tests for task modules.
"""
import unittest
from unittest.mock import patch, MagicMock
from typing import Dict, Any
from pyworker.tasks import TosReviewTask
class TestTosRetrievalTask(unittest.TestCase):
"""Tests for the TOS retrieval task."""
def setUp(self):
"""Set up test fixtures."""
self.task = TosReviewTask()
self.service = {
'id': 1,
'name': 'Test Service',
'tosUrls': ['test1', 'test2']
}
@patch('pyworker.tasks.tos_review.fetch_url')
def test_run_success(self, mock_fetch_url: MagicMock) -> None:
"""Test successful TOS retrieval."""
# Mock the fetch_url function to return test responses
mock_fetch_url.side_effect = ["Test TOS 1", "Test TOS 2"]
# Run the task
result = self.task.run(self.service)
# Check that the function was called twice with the correct arguments
self.assertEqual(mock_fetch_url.call_count, 2)
mock_fetch_url.assert_any_call('https://r.jina.ai/test1')
mock_fetch_url.assert_any_call('https://r.jina.ai/test2')
# Check that the result contains the expected content
self.assertEqual(result, {
'test1': 'Test TOS 1',
'test2': 'Test TOS 2'
})
@patch('pyworker.tasks.tos_review.fetch_url')
def test_run_failure(self, mock_fetch_url: MagicMock) -> None:
"""Test TOS retrieval failure."""
# Mock the fetch_url function to return None (failure)
mock_fetch_url.return_value = None
# Run the task
result = self.task.run(self.service)
# Check that the function was called twice
self.assertEqual(mock_fetch_url.call_count, 2)
# Check that the result is None since all fetches failed
self.assertIsNone(result)
def test_run_no_urls(self):
"""Test TOS retrieval with no URLs."""
# Create a service with no TOS URLs
service_no_urls: Dict[str, Any] = {
'id': 2,
'name': 'Service With No TOS',
'tosUrls': []
}
# Run the task
result = self.task.run(service_no_urls)
# Check that the result is None
self.assertIsNone(result)
if __name__ == '__main__':
unittest.main()

414
pyworker/uv.lock generated
View File

@@ -1,414 +0,0 @@
version = 1
revision = 1
requires-python = ">=3.13"
[[package]]
name = "annotated-types"
version = "0.7.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643 },
]
[[package]]
name = "anyio"
version = "4.9.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "idna" },
{ name = "sniffio" },
]
sdist = { url = "https://files.pythonhosted.org/packages/95/7d/4c1bd541d4dffa1b52bd83fb8527089e097a106fc90b467a7313b105f840/anyio-4.9.0.tar.gz", hash = "sha256:673c0c244e15788651a4ff38710fea9675823028a6f08a5eda409e0c9840a028", size = 190949 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a1/ee/48ca1a7c89ffec8b6a0c5d02b89c305671d5ffd8d3c94acf8b8c408575bb/anyio-4.9.0-py3-none-any.whl", hash = "sha256:9f76d541cad6e36af7beb62e978876f3b41e3e04f2c1fbf0884604c0a9c4d93c", size = 100916 },
]
[[package]]
name = "certifi"
version = "2025.1.31"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1c/ab/c9f1e32b7b1bf505bf26f0ef697775960db7932abeb7b516de930ba2705f/certifi-2025.1.31.tar.gz", hash = "sha256:3d5da6925056f6f18f119200434a4780a94263f10d1c21d032a6f6b2baa20651", size = 167577 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/38/fc/bce832fd4fd99766c04d1ee0eead6b0ec6486fb100ae5e74c1d91292b982/certifi-2025.1.31-py3-none-any.whl", hash = "sha256:ca78db4565a652026a4db2bcdf68f2fb589ea80d0be70e03929ed730746b84fe", size = 166393 },
]
[[package]]
name = "charset-normalizer"
version = "3.4.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/38/94/ce8e6f63d18049672c76d07d119304e1e2d7c6098f0841b51c666e9f44a0/charset_normalizer-3.4.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda", size = 195698 },
{ url = "https://files.pythonhosted.org/packages/24/2e/dfdd9770664aae179a96561cc6952ff08f9a8cd09a908f259a9dfa063568/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313", size = 140162 },
{ url = "https://files.pythonhosted.org/packages/24/4e/f646b9093cff8fc86f2d60af2de4dc17c759de9d554f130b140ea4738ca6/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9", size = 150263 },
{ url = "https://files.pythonhosted.org/packages/5e/67/2937f8d548c3ef6e2f9aab0f6e21001056f692d43282b165e7c56023e6dd/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b", size = 142966 },
{ url = "https://files.pythonhosted.org/packages/52/ed/b7f4f07de100bdb95c1756d3a4d17b90c1a3c53715c1a476f8738058e0fa/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11", size = 144992 },
{ url = "https://files.pythonhosted.org/packages/96/2c/d49710a6dbcd3776265f4c923bb73ebe83933dfbaa841c5da850fe0fd20b/charset_normalizer-3.4.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f", size = 147162 },
{ url = "https://files.pythonhosted.org/packages/b4/41/35ff1f9a6bd380303dea55e44c4933b4cc3c4850988927d4082ada230273/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd", size = 140972 },
{ url = "https://files.pythonhosted.org/packages/fb/43/c6a0b685fe6910d08ba971f62cd9c3e862a85770395ba5d9cad4fede33ab/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2", size = 149095 },
{ url = "https://files.pythonhosted.org/packages/4c/ff/a9a504662452e2d2878512115638966e75633519ec11f25fca3d2049a94a/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886", size = 152668 },
{ url = "https://files.pythonhosted.org/packages/6c/71/189996b6d9a4b932564701628af5cee6716733e9165af1d5e1b285c530ed/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601", size = 150073 },
{ url = "https://files.pythonhosted.org/packages/e4/93/946a86ce20790e11312c87c75ba68d5f6ad2208cfb52b2d6a2c32840d922/charset_normalizer-3.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd", size = 145732 },
{ url = "https://files.pythonhosted.org/packages/cd/e5/131d2fb1b0dddafc37be4f3a2fa79aa4c037368be9423061dccadfd90091/charset_normalizer-3.4.1-cp313-cp313-win32.whl", hash = "sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407", size = 95391 },
{ url = "https://files.pythonhosted.org/packages/27/f2/4f9a69cc7712b9b5ad8fdb87039fd89abba997ad5cbe690d1835d40405b0/charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971", size = 102702 },
{ url = "https://files.pythonhosted.org/packages/0e/f6/65ecc6878a89bb1c23a086ea335ad4bf21a588990c3f535a227b9eea9108/charset_normalizer-3.4.1-py3-none-any.whl", hash = "sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85", size = 49767 },
]
[[package]]
name = "colorama"
version = "0.4.6"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 },
]
[[package]]
name = "croniter"
version = "6.0.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "python-dateutil" },
{ name = "pytz" },
]
sdist = { url = "https://files.pythonhosted.org/packages/ad/2f/44d1ae153a0e27be56be43465e5cb39b9650c781e001e7864389deb25090/croniter-6.0.0.tar.gz", hash = "sha256:37c504b313956114a983ece2c2b07790b1f1094fe9d81cc94739214748255577", size = 64481 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/07/4b/290b4c3efd6417a8b0c284896de19b1d5855e6dbdb97d2a35e68fa42de85/croniter-6.0.0-py2.py3-none-any.whl", hash = "sha256:2f878c3856f17896979b2a4379ba1f09c83e374931ea15cc835c5dd2eee9b368", size = 25468 },
]
[[package]]
name = "distro"
version = "1.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/fc/f8/98eea607f65de6527f8a2e8885fc8015d3e6f5775df186e443e0964a11c3/distro-1.9.0.tar.gz", hash = "sha256:2fa77c6fd8940f116ee1d6b94a2f90b13b5ea8d019b98bc8bafdcabcdd9bdbed", size = 60722 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
]
[[package]]
name = "h11"
version = "0.14.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f5/38/3af3d3633a34a3316095b39c8e8fb4853a28a536e55d347bd8d8e9a14b03/h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", size = 100418 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/95/04/ff642e65ad6b90db43e668d70ffb6736436c7ce41fcc549f4e9472234127/h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761", size = 58259 },
]
[[package]]
name = "httpcore"
version = "1.0.8"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "h11" },
]
sdist = { url = "https://files.pythonhosted.org/packages/9f/45/ad3e1b4d448f22c0cff4f5692f5ed0666658578e358b8d58a19846048059/httpcore-1.0.8.tar.gz", hash = "sha256:86e94505ed24ea06514883fd44d2bc02d90e77e7979c8eb71b90f41d364a1bad", size = 85385 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/18/8d/f052b1e336bb2c1fc7ed1aaed898aa570c0b61a09707b108979d9fc6e308/httpcore-1.0.8-py3-none-any.whl", hash = "sha256:5254cf149bcb5f75e9d1b2b9f729ea4a4b883d1ad7379fc632b727cec23674be", size = 78732 },
]
[[package]]
name = "httpx"
version = "0.28.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "certifi" },
{ name = "httpcore" },
{ name = "idna" },
]
sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
]
[[package]]
name = "idna"
version = "3.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f1/70/7703c29685631f5a7590aa73f1f1d3fa9a380e654b86af429e0934a32f7d/idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9", size = 190490 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/76/c6/c88e154df9c4e1a2a66ccf0005a88dfb2650c1dffb6f5ce603dfbd452ce3/idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3", size = 70442 },
]
[[package]]
name = "jiter"
version = "0.9.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/1e/c2/e4562507f52f0af7036da125bb699602ead37a2332af0788f8e0a3417f36/jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", size = 162604 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e7/1b/4cd165c362e8f2f520fdb43245e2b414f42a255921248b4f8b9c8d871ff1/jiter-0.9.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:2764891d3f3e8b18dce2cff24949153ee30c9239da7c00f032511091ba688ff7", size = 308197 },
{ url = "https://files.pythonhosted.org/packages/13/aa/7a890dfe29c84c9a82064a9fe36079c7c0309c91b70c380dc138f9bea44a/jiter-0.9.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:387b22fbfd7a62418d5212b4638026d01723761c75c1c8232a8b8c37c2f1003b", size = 318160 },
{ url = "https://files.pythonhosted.org/packages/6a/38/5888b43fc01102f733f085673c4f0be5a298f69808ec63de55051754e390/jiter-0.9.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40d8da8629ccae3606c61d9184970423655fb4e33d03330bcdfe52d234d32f69", size = 341259 },
{ url = "https://files.pythonhosted.org/packages/3d/5e/bbdbb63305bcc01006de683b6228cd061458b9b7bb9b8d9bc348a58e5dc2/jiter-0.9.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a1be73d8982bdc278b7b9377426a4b44ceb5c7952073dd7488e4ae96b88e1103", size = 363730 },
{ url = "https://files.pythonhosted.org/packages/75/85/53a3edc616992fe4af6814c25f91ee3b1e22f7678e979b6ea82d3bc0667e/jiter-0.9.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2228eaaaa111ec54b9e89f7481bffb3972e9059301a878d085b2b449fbbde635", size = 405126 },
{ url = "https://files.pythonhosted.org/packages/ae/b3/1ee26b12b2693bd3f0b71d3188e4e5d817b12e3c630a09e099e0a89e28fa/jiter-0.9.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:11509bfecbc319459647d4ac3fd391d26fdf530dad00c13c4dadabf5b81f01a4", size = 393668 },
{ url = "https://files.pythonhosted.org/packages/11/87/e084ce261950c1861773ab534d49127d1517b629478304d328493f980791/jiter-0.9.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f22238da568be8bbd8e0650e12feeb2cfea15eda4f9fc271d3b362a4fa0604d", size = 352350 },
{ url = "https://files.pythonhosted.org/packages/f0/06/7dca84b04987e9df563610aa0bc154ea176e50358af532ab40ffb87434df/jiter-0.9.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:17f5d55eb856597607562257c8e36c42bc87f16bef52ef7129b7da11afc779f3", size = 384204 },
{ url = "https://files.pythonhosted.org/packages/16/2f/82e1c6020db72f397dd070eec0c85ebc4df7c88967bc86d3ce9864148f28/jiter-0.9.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:6a99bed9fbb02f5bed416d137944419a69aa4c423e44189bc49718859ea83bc5", size = 520322 },
{ url = "https://files.pythonhosted.org/packages/36/fd/4f0cd3abe83ce208991ca61e7e5df915aa35b67f1c0633eb7cf2f2e88ec7/jiter-0.9.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:e057adb0cd1bd39606100be0eafe742de2de88c79df632955b9ab53a086b3c8d", size = 512184 },
{ url = "https://files.pythonhosted.org/packages/a0/3c/8a56f6d547731a0b4410a2d9d16bf39c861046f91f57c98f7cab3d2aa9ce/jiter-0.9.0-cp313-cp313-win32.whl", hash = "sha256:f7e6850991f3940f62d387ccfa54d1a92bd4bb9f89690b53aea36b4364bcab53", size = 206504 },
{ url = "https://files.pythonhosted.org/packages/f4/1c/0c996fd90639acda75ed7fa698ee5fd7d80243057185dc2f63d4c1c9f6b9/jiter-0.9.0-cp313-cp313-win_amd64.whl", hash = "sha256:c8ae3bf27cd1ac5e6e8b7a27487bf3ab5f82318211ec2e1346a5b058756361f7", size = 204943 },
{ url = "https://files.pythonhosted.org/packages/78/0f/77a63ca7aa5fed9a1b9135af57e190d905bcd3702b36aca46a01090d39ad/jiter-0.9.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f0b2827fb88dda2cbecbbc3e596ef08d69bda06c6f57930aec8e79505dc17001", size = 317281 },
{ url = "https://files.pythonhosted.org/packages/f9/39/a3a1571712c2bf6ec4c657f0d66da114a63a2e32b7e4eb8e0b83295ee034/jiter-0.9.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:062b756ceb1d40b0b28f326cba26cfd575a4918415b036464a52f08632731e5a", size = 350273 },
{ url = "https://files.pythonhosted.org/packages/ee/47/3729f00f35a696e68da15d64eb9283c330e776f3b5789bac7f2c0c4df209/jiter-0.9.0-cp313-cp313t-win_amd64.whl", hash = "sha256:6f7838bc467ab7e8ef9f387bd6de195c43bad82a569c1699cb822f6609dd4cdf", size = 206867 },
]
[[package]]
name = "json-repair"
version = "0.41.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/6d/6a/6c7a75a10da6dc807b582f2449034da1ed74415e8899746bdfff97109012/json_repair-0.41.1.tar.gz", hash = "sha256:bba404b0888c84a6b86ecc02ec43b71b673cfee463baf6da94e079c55b136565", size = 31208 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/10/5c/abd7495c934d9af5c263c2245ae30cfaa716c3c0cf027b2b8fa686ee7bd4/json_repair-0.41.1-py3-none-any.whl", hash = "sha256:0e181fd43a696887881fe19fed23422a54b3e4c558b6ff27a86a8c3ddde9ae79", size = 21578 },
]
[[package]]
name = "openai"
version = "1.74.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
{ name = "distro" },
{ name = "httpx" },
{ name = "jiter" },
{ name = "pydantic" },
{ name = "sniffio" },
{ name = "tqdm" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/75/86/c605a6e84da0248f2cebfcd864b5a6076ecf78849245af5e11d2a5ec7977/openai-1.74.0.tar.gz", hash = "sha256:592c25b8747a7cad33a841958f5eb859a785caea9ee22b9e4f4a2ec062236526", size = 427571 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a9/91/8c150f16a96367e14bd7d20e86e0bbbec3080e3eb593e63f21a7f013f8e4/openai-1.74.0-py3-none-any.whl", hash = "sha256:aff3e0f9fb209836382ec112778667027f4fd6ae38bdb2334bc9e173598b092a", size = 644790 },
]
[[package]]
name = "psycopg"
version = "3.2.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "tzdata", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/67/97/eea08f74f1c6dd2a02ee81b4ebfe5b558beb468ebbd11031adbf58d31be0/psycopg-3.2.6.tar.gz", hash = "sha256:16fa094efa2698f260f2af74f3710f781e4a6f226efe9d1fd0c37f384639ed8a", size = 156322 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d7/7d/0ba52deff71f65df8ec8038adad86ba09368c945424a9bd8145d679a2c6a/psycopg-3.2.6-py3-none-any.whl", hash = "sha256:f3ff5488525890abb0566c429146add66b329e20d6d4835662b920cbbf90ac58", size = 199077 },
]
[package.optional-dependencies]
binary = [
{ name = "psycopg-binary", marker = "implementation_name != 'pypy'" },
]
pool = [
{ name = "psycopg-pool" },
]
[[package]]
name = "psycopg-binary"
version = "3.2.6"
source = { registry = "https://pypi.org/simple" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/bf/32/3d06c478fd3070ac25a49c2e8ca46b6d76b0048fa9fa255b99ee32f32312/psycopg_binary-3.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:54af3fbf871baa2eb19df96fd7dc0cbd88e628a692063c3d1ab5cdd00aa04322", size = 3852672 },
{ url = "https://files.pythonhosted.org/packages/34/97/e581030e279500ede3096adb510f0e6071874b97cfc047a9a87b7d71fc77/psycopg_binary-3.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ad5da1e4636776c21eaeacdec42f25fa4612631a12f25cd9ab34ddf2c346ffb9", size = 3936562 },
{ url = "https://files.pythonhosted.org/packages/74/b6/6a8df4cb23c3d327403a83406c06c9140f311cb56c4e4d720ee7abf6fddc/psycopg_binary-3.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f7956b9ea56f79cd86eddcfbfc65ae2af1e4fe7932fa400755005d903c709370", size = 4499167 },
{ url = "https://files.pythonhosted.org/packages/e4/5b/950eafef61e5e0b8ddb5afc5b6b279756411aa4bf70a346a6f091ad679bb/psycopg_binary-3.2.6-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1e2efb763188008cf2914820dcb9fb23c10fe2be0d2c97ef0fac7cec28e281d8", size = 4311651 },
{ url = "https://files.pythonhosted.org/packages/72/b9/b366c49afc854c26b3053d4d35376046eea9aebdc48ded18ea249ea1f80c/psycopg_binary-3.2.6-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4b3aab3451679f1e7932270e950259ed48c3b79390022d3f660491c0e65e4838", size = 4547852 },
{ url = "https://files.pythonhosted.org/packages/ab/d4/0e047360e2ea387dc7171ca017ffcee5214a0762f74b9dd982035f2e52fb/psycopg_binary-3.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:849a370ac4e125f55f2ad37f928e588291a67ccf91fa33d0b1e042bb3ee1f986", size = 4261725 },
{ url = "https://files.pythonhosted.org/packages/e3/ea/a1b969804250183900959ebe845d86be7fed2cbd9be58f64cd0fc24b2892/psycopg_binary-3.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:566d4ace928419d91f1eb3227fc9ef7b41cf0ad22e93dd2c3368d693cf144408", size = 3850073 },
{ url = "https://files.pythonhosted.org/packages/e5/71/ec2907342f0675092b76aea74365b56f38d960c4c635984dcfe25d8178c8/psycopg_binary-3.2.6-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:f1981f13b10de2f11cfa2f99a8738b35b3f0a0f3075861446894a8d3042430c0", size = 3320323 },
{ url = "https://files.pythonhosted.org/packages/d7/d7/0d2cb4b42f231e2efe8ea1799ce917973d47486212a2c4d33cd331e7ac28/psycopg_binary-3.2.6-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:36f598300b55b3c983ae8df06473ad27333d2fd9f3e2cfdb913b3a5aaa3a8bcf", size = 3402335 },
{ url = "https://files.pythonhosted.org/packages/66/92/7050c372f78e53eba14695cec6c3a91b2d9ca56feaf0bfe95fe90facf730/psycopg_binary-3.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0f4699fa5fe1fffb0d6b2d14b31fd8c29b7ea7375f89d5989f002aaf21728b21", size = 3440442 },
{ url = "https://files.pythonhosted.org/packages/5f/4c/bebcaf754189283b2f3d457822a3d9b233d08ff50973d8f1e8d51f4d35ed/psycopg_binary-3.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:afe697b8b0071f497c5d4c0f41df9e038391534f5614f7fb3a8c1ca32d66e860", size = 2783465 },
]
[[package]]
name = "psycopg-pool"
version = "3.2.6"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/cf/13/1e7850bb2c69a63267c3dbf37387d3f71a00fd0e2fa55c5db14d64ba1af4/psycopg_pool-3.2.6.tar.gz", hash = "sha256:0f92a7817719517212fbfe2fd58b8c35c1850cdd2a80d36b581ba2085d9148e5", size = 29770 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/47/fd/4feb52a55c1a4bd748f2acaed1903ab54a723c47f6d0242780f4d97104d4/psycopg_pool-3.2.6-py3-none-any.whl", hash = "sha256:5887318a9f6af906d041a0b1dc1c60f8f0dda8340c2572b74e10907b51ed5da7", size = 38252 },
]
[[package]]
name = "pydantic"
version = "2.11.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "annotated-types" },
{ name = "pydantic-core" },
{ name = "typing-extensions" },
{ name = "typing-inspection" },
]
sdist = { url = "https://files.pythonhosted.org/packages/10/2e/ca897f093ee6c5f3b0bee123ee4465c50e75431c3d5b6a3b44a47134e891/pydantic-2.11.3.tar.gz", hash = "sha256:7471657138c16adad9322fe3070c0116dd6c3ad8d649300e3cbdfe91f4db4ec3", size = 785513 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b0/1d/407b29780a289868ed696d1616f4aad49d6388e5a77f567dcd2629dcd7b8/pydantic-2.11.3-py3-none-any.whl", hash = "sha256:a082753436a07f9ba1289c6ffa01cd93db3548776088aa917cc43b63f68fa60f", size = 443591 },
]
[[package]]
name = "pydantic-core"
version = "2.33.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/17/19/ed6a078a5287aea7922de6841ef4c06157931622c89c2a47940837b5eecd/pydantic_core-2.33.1.tar.gz", hash = "sha256:bcc9c6fdb0ced789245b02b7d6603e17d1563064ddcfc36f046b61c0c05dd9df", size = 434395 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/7a/24/eed3466a4308d79155f1cdd5c7432c80ddcc4530ba8623b79d5ced021641/pydantic_core-2.33.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:70af6a21237b53d1fe7b9325b20e65cbf2f0a848cf77bed492b029139701e66a", size = 2033551 },
{ url = "https://files.pythonhosted.org/packages/ab/14/df54b1a0bc9b6ded9b758b73139d2c11b4e8eb43e8ab9c5847c0a2913ada/pydantic_core-2.33.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:282b3fe1bbbe5ae35224a0dbd05aed9ccabccd241e8e6b60370484234b456266", size = 1852785 },
{ url = "https://files.pythonhosted.org/packages/fa/96/e275f15ff3d34bb04b0125d9bc8848bf69f25d784d92a63676112451bfb9/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b315e596282bbb5822d0c7ee9d255595bd7506d1cb20c2911a4da0b970187d3", size = 1897758 },
{ url = "https://files.pythonhosted.org/packages/b7/d8/96bc536e975b69e3a924b507d2a19aedbf50b24e08c80fb00e35f9baaed8/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:1dfae24cf9921875ca0ca6a8ecb4bb2f13c855794ed0d468d6abbec6e6dcd44a", size = 1986109 },
{ url = "https://files.pythonhosted.org/packages/90/72/ab58e43ce7e900b88cb571ed057b2fcd0e95b708a2e0bed475b10130393e/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6dd8ecfde08d8bfadaea669e83c63939af76f4cf5538a72597016edfa3fad516", size = 2129159 },
{ url = "https://files.pythonhosted.org/packages/dc/3f/52d85781406886c6870ac995ec0ba7ccc028b530b0798c9080531b409fdb/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2f593494876eae852dc98c43c6f260f45abdbfeec9e4324e31a481d948214764", size = 2680222 },
{ url = "https://files.pythonhosted.org/packages/f4/56/6e2ef42f363a0eec0fd92f74a91e0ac48cd2e49b695aac1509ad81eee86a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:948b73114f47fd7016088e5186d13faf5e1b2fe83f5e320e371f035557fd264d", size = 2006980 },
{ url = "https://files.pythonhosted.org/packages/4c/c0/604536c4379cc78359f9ee0aa319f4aedf6b652ec2854953f5a14fc38c5a/pydantic_core-2.33.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e11f3864eb516af21b01e25fac915a82e9ddad3bb0fb9e95a246067398b435a4", size = 2120840 },
{ url = "https://files.pythonhosted.org/packages/1f/46/9eb764814f508f0edfb291a0f75d10854d78113fa13900ce13729aaec3ae/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:549150be302428b56fdad0c23c2741dcdb5572413776826c965619a25d9c6bde", size = 2072518 },
{ url = "https://files.pythonhosted.org/packages/42/e3/fb6b2a732b82d1666fa6bf53e3627867ea3131c5f39f98ce92141e3e3dc1/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:495bc156026efafd9ef2d82372bd38afce78ddd82bf28ef5276c469e57c0c83e", size = 2248025 },
{ url = "https://files.pythonhosted.org/packages/5c/9d/fbe8fe9d1aa4dac88723f10a921bc7418bd3378a567cb5e21193a3c48b43/pydantic_core-2.33.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ec79de2a8680b1a67a07490bddf9636d5c2fab609ba8c57597e855fa5fa4dacd", size = 2254991 },
{ url = "https://files.pythonhosted.org/packages/aa/99/07e2237b8a66438d9b26482332cda99a9acccb58d284af7bc7c946a42fd3/pydantic_core-2.33.1-cp313-cp313-win32.whl", hash = "sha256:ee12a7be1742f81b8a65b36c6921022301d466b82d80315d215c4c691724986f", size = 1915262 },
{ url = "https://files.pythonhosted.org/packages/8a/f4/e457a7849beeed1e5defbcf5051c6f7b3c91a0624dd31543a64fc9adcf52/pydantic_core-2.33.1-cp313-cp313-win_amd64.whl", hash = "sha256:ede9b407e39949d2afc46385ce6bd6e11588660c26f80576c11c958e6647bc40", size = 1956626 },
{ url = "https://files.pythonhosted.org/packages/20/d0/e8d567a7cff7b04e017ae164d98011f1e1894269fe8e90ea187a3cbfb562/pydantic_core-2.33.1-cp313-cp313-win_arm64.whl", hash = "sha256:aa687a23d4b7871a00e03ca96a09cad0f28f443690d300500603bd0adba4b523", size = 1909590 },
{ url = "https://files.pythonhosted.org/packages/ef/fd/24ea4302d7a527d672c5be06e17df16aabfb4e9fdc6e0b345c21580f3d2a/pydantic_core-2.33.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:401d7b76e1000d0dd5538e6381d28febdcacb097c8d340dde7d7fc6e13e9f95d", size = 1812963 },
{ url = "https://files.pythonhosted.org/packages/5f/95/4fbc2ecdeb5c1c53f1175a32d870250194eb2fdf6291b795ab08c8646d5d/pydantic_core-2.33.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aeb055a42d734c0255c9e489ac67e75397d59c6fbe60d155851e9782f276a9c", size = 1986896 },
{ url = "https://files.pythonhosted.org/packages/71/ae/fe31e7f4a62431222d8f65a3bd02e3fa7e6026d154a00818e6d30520ea77/pydantic_core-2.33.1-cp313-cp313t-win_amd64.whl", hash = "sha256:338ea9b73e6e109f15ab439e62cb3b78aa752c7fd9536794112e14bee02c8d18", size = 1931810 },
]
[[package]]
name = "python-dateutil"
version = "2.9.0.post0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "six" },
]
sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892 },
]
[[package]]
name = "python-dotenv"
version = "1.1.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/88/2c/7bb1416c5620485aa793f2de31d3df393d3686aa8a8506d11e10e13c5baf/python_dotenv-1.1.0.tar.gz", hash = "sha256:41f90bc6f5f177fb41f53e87666db362025010eb28f60a01c9143bfa33a2b2d5", size = 39920 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/1e/18/98a99ad95133c6a6e2005fe89faedf294a748bd5dc803008059409ac9b1e/python_dotenv-1.1.0-py3-none-any.whl", hash = "sha256:d7c01d9e2293916c18baf562d95698754b0dbbb5e74d457c45d4f6561fb9d55d", size = 20256 },
]
[[package]]
name = "pytz"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f8/bf/abbd3cdfb8fbc7fb3d4d38d320f2441b1e7cbe29be4f23797b4a2b5d8aac/pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3", size = 320884 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/81/c4/34e93fe5f5429d7570ec1fa436f1986fb1f00c3e0f43a589fe2bbcd22c3f/pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00", size = 509225 },
]
[[package]]
name = "pyworker"
version = "0.1.0"
source = { editable = "." }
dependencies = [
{ name = "croniter" },
{ name = "json-repair" },
{ name = "openai" },
{ name = "psycopg", extra = ["binary", "pool"] },
{ name = "python-dotenv" },
{ name = "requests" },
]
[package.metadata]
requires-dist = [
{ name = "croniter", specifier = ">=6.0.0" },
{ name = "json-repair", specifier = ">=0.41.1" },
{ name = "openai", specifier = ">=1.74.0" },
{ name = "psycopg", extras = ["binary", "pool"], specifier = ">=3.2.6" },
{ name = "python-dotenv", specifier = ">=1.1.0" },
{ name = "requests", specifier = ">=2.32.3" },
]
[[package]]
name = "requests"
version = "2.32.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "certifi" },
{ name = "charset-normalizer" },
{ name = "idna" },
{ name = "urllib3" },
]
sdist = { url = "https://files.pythonhosted.org/packages/63/70/2bf7780ad2d390a8d301ad0b550f1581eadbd9a20f896afe06353c2a2913/requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760", size = 131218 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f9/9b/335f9764261e915ed497fcdeb11df5dfd6f7bf257d4a6a2a686d80da4d54/requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6", size = 64928 },
]
[[package]]
name = "six"
version = "1.17.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/94/e7/b2c673351809dca68a0e064b6af791aa332cf192da575fd474ed7d6f16a2/six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81", size = 34031 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b7/ce/149a00dd41f10bc29e5921b496af8b574d8413afcd5e30dfa0ed46c2cc5e/six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274", size = 11050 },
]
[[package]]
name = "sniffio"
version = "1.3.1"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/a2/87/a6771e1546d97e7e041b6ae58d80074f81b7d5121207425c964ddf5cfdbd/sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc", size = 20372 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e9/44/75a9c9421471a6c4805dbf2356f7c181a29c1879239abab1ea2cc8f38b40/sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2", size = 10235 },
]
[[package]]
name = "tqdm"
version = "4.67.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "colorama", marker = "sys_platform == 'win32'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/a8/4b/29b4ef32e036bb34e4ab51796dd745cdba7ed47ad142a9f4a1eb8e0c744d/tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2", size = 169737 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d0/30/dc54f88dd4a2b5dc8a0279bdd7270e735851848b762aeb1c1184ed1f6b14/tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2", size = 78540 },
]
[[package]]
name = "typing-extensions"
version = "4.13.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/f6/37/23083fcd6e35492953e8d2aaaa68b860eb422b34627b13f2ce3eb6106061/typing_extensions-4.13.2.tar.gz", hash = "sha256:e6c81219bd689f51865d9e372991c540bda33a0379d5573cddb9a3a23f7caaef", size = 106967 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/8b/54/b1ae86c0973cc6f0210b53d508ca3641fb6d0c56823f288d108bc7ab3cc8/typing_extensions-4.13.2-py3-none-any.whl", hash = "sha256:a439e7c04b49fec3e5d3e2beaa21755cadbbdc391694e28ccdd36ca4a1408f8c", size = 45806 },
]
[[package]]
name = "typing-inspection"
version = "0.4.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/82/5c/e6082df02e215b846b4b8c0b887a64d7d08ffaba30605502639d44c06b82/typing_inspection-0.4.0.tar.gz", hash = "sha256:9765c87de36671694a67904bf2c96e395be9c6439bb6c87b5142569dcdd65122", size = 76222 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/31/08/aa4fdfb71f7de5176385bd9e90852eaf6b5d622735020ad600f2bab54385/typing_inspection-0.4.0-py3-none-any.whl", hash = "sha256:50e72559fcd2a6367a19f7a7e610e6afcb9fac940c650290eed893d61386832f", size = 14125 },
]
[[package]]
name = "tzdata"
version = "2025.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/95/32/1a225d6164441be760d75c2c42e2780dc0873fe382da3e98a2e1e48361e5/tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9", size = 196380 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/5c/23/c7abc0ca0a1526a0774eca151daeb8de62ec457e77262b66b359c3c7679e/tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8", size = 347839 },
]
[[package]]
name = "urllib3"
version = "2.4.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/8a/78/16493d9c386d8e60e442a35feac5e00f0913c0f4b7c217c11e8ec2ff53e0/urllib3-2.4.0.tar.gz", hash = "sha256:414bc6535b787febd7567804cc015fee39daab8ad86268f1310a9250697de466", size = 390672 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/6b/11/cc635220681e93a0183390e26485430ca2c7b5f9d33b15c74c2861cb8091/urllib3-2.4.0-py3-none-any.whl", hash = "sha256:4e16665048960a0900c702d4a66415956a584919c03361cac9f1df5c5dd7e813", size = 128680 },
]