diff --git a/.env.example b/.env.example index 0b89aed..a5e71cb 100644 --- a/.env.example +++ b/.env.example @@ -77,3 +77,11 @@ VIRUSTOTAL_API_KEY= # AbuseIPDB: https://www.abuseipdb.com/ (1000 lookups/day free) ABUSEIPDB_API_KEY= + +# ------------------------------------------------------------ +# Scheduled Scans +# ------------------------------------------------------------ +# No extra configuration needed. The scheduler starts automatically +# with the app and checks every hour for scans that are due. +# Users create scheduled scans via POST /scheduled-scans (requires JWT auth). +# Webhooks fire automatically if a scheduled scan detects a score drop. diff --git a/app/main.py b/app/main.py index 673658d..0e8ad6b 100644 --- a/app/main.py +++ b/app/main.py @@ -11,6 +11,8 @@ from app.config import settings from app.database import close_db, init_db from app.middleware.rate_limiter import limiter from app.routers import auth, health, history, scan, apikey, report, code_scan +from app.routers import scheduled_scans +from app.services.scheduler import start_scheduler, stop_scheduler logging.basicConfig( level=logging.DEBUG if settings.debug else logging.INFO, @@ -24,7 +26,9 @@ async def lifespan(app: FastAPI): import app.models # noqa: F401 — register models with Base.metadata await init_db() logger.info("Database initialized") + start_scheduler() yield + stop_scheduler() await close_db() logger.info("Database connection closed") @@ -57,6 +61,7 @@ def create_app() -> FastAPI: application.include_router(apikey.router) application.include_router(report.router) application.include_router(code_scan.router) + application.include_router(scheduled_scans.router) logger.info(f"{settings.app_name} v{settings.app_version} initialized") diff --git a/app/models/__init__.py b/app/models/__init__.py index acbf549..b14a310 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -1,5 +1,6 @@ from app.models.user import User from app.models.scan import ScanResult from app.models.code_scan import CodeScanResult +from app.models.scheduled_scan import ScheduledScan -__all__ = ["User", "ScanResult", "CodeScanResult"] +__all__ = ["User", "ScanResult", "CodeScanResult", "ScheduledScan"] diff --git a/app/models/user.py b/app/models/user.py index 348becf..71de83f 100644 --- a/app/models/user.py +++ b/app/models/user.py @@ -22,5 +22,7 @@ class User(Base): scans = relationship("ScanResult", back_populates="user", lazy="selectin") code_scans = relationship("CodeScanResult", back_populates="user", lazy="selectin", cascade="all, delete") + scheduled_scans = relationship("ScheduledScan", back_populates="user", lazy="selectin", cascade="all, delete") api_keys = relationship("ApiKey", back_populates="user", lazy="selectin", cascade="all, delete") webhooks = relationship("Webhook", back_populates="user", lazy="selectin", cascade="all, delete") + diff --git a/app/routers/history.py b/app/routers/history.py index 4043ba6..7339a2f 100644 --- a/app/routers/history.py +++ b/app/routers/history.py @@ -17,9 +17,10 @@ from app.schemas.scan import ( ChatResponse, ThreatNarrativeResponse, ScanDiffResponse, + ScheduledScanResponse, ) -from app.services.ai import chat_with_scan_context, generate_threat_narrative +from app.services.ai import chat_with_scan_context, generate_threat_narrative, generate_diff_narrative router = APIRouter(prefix="/scans", tags=["history"]) @@ -198,7 +199,7 @@ async def diff_scans( ): result = await db.execute( select(ScanResult).where( - ScanResult.id.in_([old_id, new_id]), + ScanResult.id.in_([old_id, new_id]), ScanResult.user_id == current_user.id ) ) @@ -210,17 +211,28 @@ async def diff_scans( s_old = scans[0] if scans[0].id == old_id else scans[1] s_new = scans[1] if scans[1].id == new_id else scans[0] - # Convert to set-like structures using issue names + # Map issues by name for set-like comparison old_map = {i.get("issue"): i for i in s_old.issues} new_map = {i.get("issue"): i for i in s_new.issues} resolved = [v for k, v in old_map.items() if k not in new_map] new_issues = [v for k, v in new_map.items() if k not in old_map] persisting = [v for k, v in new_map.items() if k in old_map] + score_change = s_new.security_score - s_old.security_score + + # Ask the AI to narrate the changes in plain English + diff_context = { + "score_change": score_change, + "resolved_issues": resolved, + "new_issues": new_issues, + "persisting_issues": persisting, + } + narrative = await generate_diff_narrative(diff_context) return ScanDiffResponse( resolved_issues=resolved, new_issues=new_issues, persisting_issues=persisting, - score_change=s_new.security_score - s_old.security_score + score_change=score_change, + narrative=narrative, ) diff --git a/app/routers/scan.py b/app/routers/scan.py index 36764af..3443455 100644 --- a/app/routers/scan.py +++ b/app/routers/scan.py @@ -23,6 +23,7 @@ from app.services.scanner.ports import PortScanner from app.services.scoring import calculate_layer_statuses, calculate_score from app.services.ai import enhance_security_issues from app.services.threat_intel import get_threat_intel_summary +from app.services.webhook_dispatcher import dispatch_webhooks from app.utils.validators import validate_url logger = logging.getLogger(__name__) @@ -38,30 +39,6 @@ dns_scanner = DNSScanner() port_scanner = PortScanner() -async def dispatch_webhooks(user_id: str, scan_data: dict, db_session): - import hmac, hashlib, json - from sqlalchemy import select - - result = await db_session.execute( - select(Webhook).where(Webhook.user_id == user_id, Webhook.is_active == True) - ) - hooks = result.scalars().all() - if not hooks: - return - - async with httpx.AsyncClient() as client: - payload = json.dumps(scan_data).encode("utf-8") - for hook in hooks: - headers = {"Content-Type": "application/json"} - if hook.secret_key: - sig = hmac.new(hook.secret_key.encode(), payload, hashlib.sha256).hexdigest() - headers["X-SecureLens-Signature"] = sig - - try: - await client.post(hook.target_url, content=payload, headers=headers, timeout=5.0) - except Exception as e: - logger.warning(f"Webhook {hook.target_url} failed: {e}") - @router.post("/scan", response_model=ScanResponse) @limiter.limit(settings.rate_limit) diff --git a/app/schemas/scan.py b/app/schemas/scan.py index ef84c96..ab51eb3 100644 --- a/app/schemas/scan.py +++ b/app/schemas/scan.py @@ -77,3 +77,26 @@ class ScanDiffResponse(BaseModel): new_issues: list[Issue] persisting_issues: list[Issue] score_change: int + # AI-generated plain-English summary of what changed between the two scans. + # None when the AI key is not configured. + narrative: str | None = None + + +class ScheduledScanCreate(BaseModel): + url: str = Field(..., description="The URL to scan on a schedule") + schedule: str = Field( + "daily", + description="How often to run the scan. Options: 'daily', 'weekly'", + ) + + +class ScheduledScanResponse(BaseModel): + id: str + url: str + schedule: str + is_active: bool + last_run_at: datetime | None = None + last_score: int | None = None + created_at: datetime + + model_config = {"from_attributes": True} diff --git a/app/services/ai.py b/app/services/ai.py index 848e8d5..491f146 100644 --- a/app/services/ai.py +++ b/app/services/ai.py @@ -170,3 +170,40 @@ async def generate_threat_narrative(context_data: dict) -> str: result = await call_ai(prompt, temperature=0.7) return result or "Could not generate threat narrative." + + +async def generate_diff_narrative(diff_data: dict) -> str: + """ + Generates a plain-English summary of the changes between two scans. + + Given the resolved, new, and persisting issues plus the score change, + the model writes a short paragraph explaining what improved, what + regressed, and what still needs attention — written for a developer + who wants to understand progress at a glance. + """ + if not settings.effective_ai_key: + return "AI narration is disabled because no AI API key is configured." + + score_change = diff_data.get("score_change", 0) + resolved = diff_data.get("resolved_issues", []) + new_issues = diff_data.get("new_issues", []) + persisting = diff_data.get("persisting_issues", []) + + prompt = ( + "You are SecureLens AI, a cybersecurity assistant. " + "A developer has run two security scans on the same URL at different points in time. " + "Here is the comparison between the two scans:\n\n" + f"Score change: {score_change:+d} points\n" + f"Issues resolved since last scan ({len(resolved)}): " + f"{json.dumps([i.get('issue') for i in resolved])}\n" + f"New issues found ({len(new_issues)}): " + f"{json.dumps([i.get('issue') for i in new_issues])}\n" + f"Issues still present ({len(persisting)}): " + f"{json.dumps([i.get('issue') for i in persisting])}\n\n" + "Write a short, plain-English summary (2-4 sentences) for the developer. " + "Mention what improved, flag any new regressions if they exist, and note what still needs work. " + "Be direct and practical — no fluff." + ) + + result = await call_ai(prompt, temperature=0.4) + return result or "Could not generate diff narrative." diff --git a/app/services/webhook_dispatcher.py b/app/services/webhook_dispatcher.py new file mode 100644 index 0000000..d211747 --- /dev/null +++ b/app/services/webhook_dispatcher.py @@ -0,0 +1,64 @@ +""" +Webhook Dispatcher +================== + +Shared utility for firing HMAC-signed webhook POST requests. + +Previously the dispatch logic lived inline inside scan.py. Moving it here +means both the scan router and the background scheduler can call the same +function without creating a circular import. +""" + +import hashlib +import hmac +import json +import logging + +import httpx +from sqlalchemy import select +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.webhook import Webhook + +logger = logging.getLogger(__name__) + + +async def dispatch_webhooks(user_id: str, scan_data: dict, db: AsyncSession) -> None: + """ + Fetch all active webhooks for a user and POST the scan_data payload to each. + + The payload is JSON-encoded and signed with HMAC-SHA256 if the webhook has + a secret key set. The signature is sent in the X-SecureLens-Signature header + so the receiving server can verify the request is genuine. + + Failures are logged but never re-raised — a broken webhook should never + crash or block the scan response. + """ + result = await db.execute( + select(Webhook).where( + Webhook.user_id == user_id, + Webhook.is_active == True, # noqa: E712 + ) + ) + hooks = result.scalars().all() + if not hooks: + return + + payload = json.dumps(scan_data).encode("utf-8") + + async with httpx.AsyncClient() as client: + for hook in hooks: + headers = {"Content-Type": "application/json"} + if hook.secret_key: + sig = hmac.new( + hook.secret_key.encode(), payload, hashlib.sha256 + ).hexdigest() + headers["X-SecureLens-Signature"] = sig + + try: + await client.post( + hook.target_url, content=payload, headers=headers, timeout=5.0 + ) + logger.debug(f"Webhook fired: {hook.target_url}") + except Exception as e: + logger.warning(f"Webhook {hook.target_url} failed: {e}") diff --git a/requirements.txt b/requirements.txt index dca3593..1683e89 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,8 @@ alembic google-genai aiodns fpdf2 -# ---- Step 2: LiteLLM for provider-agnostic AI calls ---- +# ---- LiteLLM: provider-agnostic AI calls ---- litellm -# ---- Step 3: Threat Intelligence ---- -# (VirusTotal and AbuseIPDB are queried via httpx, no extra SDK needed) +# ---- APScheduler: scheduled/recurring scans ---- +apscheduler +# ---- Threat Intelligence (VirusTotal + AbuseIPDB via httpx, no extra SDK needed) ----