diff --git a/__pycache__/main.cpython-314.pyc b/__pycache__/main.cpython-314.pyc deleted file mode 100644 index 1f58f9a..0000000 Binary files a/__pycache__/main.cpython-314.pyc and /dev/null differ diff --git a/app/config.py b/app/config.py index 7b2b898..14c0383 100644 --- a/app/config.py +++ b/app/config.py @@ -61,10 +61,35 @@ class Settings(BaseSettings): # ------------------------------------------------------------------------- gemini_api_key: str | None = None - # Threat Intelligence API keys (Step 3) + # Threat Intelligence API keys virustotal_api_key: str | None = None abuseipdb_api_key: str | None = None + # ------------------------------------------------------------------------- + # Active Scanning — Nuclei (optional) + # ------------------------------------------------------------------------- + # Path to the nuclei binary. Leave blank to search PATH automatically. + # Install: go install github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest + nuclei_binary_path: str | None = None + + # ------------------------------------------------------------------------- + # Slack Alerts (optional) + # ------------------------------------------------------------------------- + # Slack Incoming Webhook URL. Create one at: + # https://api.slack.com/messaging/webhooks + slack_webhook_url: str | None = None + + # ------------------------------------------------------------------------- + # Email Alerts via SMTP (optional) + # ------------------------------------------------------------------------- + smtp_host: str | None = None + smtp_port: int = 587 + smtp_username: str | None = None + smtp_password: str | None = None + smtp_from_email: str | None = None + # Set to true for port 465 (SMTP_SSL). Defaults to false (STARTTLS). + smtp_use_ssl: bool = False + model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8") @property diff --git a/app/models/__init__.py b/app/models/__init__.py index b14a310..1c8a228 100644 --- a/app/models/__init__.py +++ b/app/models/__init__.py @@ -2,5 +2,6 @@ from app.models.user import User from app.models.scan import ScanResult from app.models.code_scan import CodeScanResult from app.models.scheduled_scan import ScheduledScan +from app.models.nuclei_result import NucleiScanResult -__all__ = ["User", "ScanResult", "CodeScanResult", "ScheduledScan"] +__all__ = ["User", "ScanResult", "CodeScanResult", "ScheduledScan", "NucleiScanResult"] diff --git a/app/models/scan.py b/app/models/scan.py index 4631088..4264831 100644 --- a/app/models/scan.py +++ b/app/models/scan.py @@ -25,3 +25,4 @@ class ScanResult(Base): ) user = relationship("User", back_populates="scans") + nuclei_result = relationship("NucleiScanResult", back_populates="scan_result", uselist=False, cascade="all, delete") diff --git a/app/routers/history.py b/app/routers/history.py index 7339a2f..eb793c8 100644 --- a/app/routers/history.py +++ b/app/routers/history.py @@ -18,9 +18,17 @@ from app.schemas.scan import ( ThreatNarrativeResponse, ScanDiffResponse, ScheduledScanResponse, + NucleiResultResponse, + RemediationPlan, ) -from app.services.ai import chat_with_scan_context, generate_threat_narrative, generate_diff_narrative +from app.services.ai import ( + chat_with_scan_context, + generate_threat_narrative, + generate_diff_narrative, + generate_remediation_plan, +) +from app.models.nuclei_result import NucleiScanResult router = APIRouter(prefix="/scans", tags=["history"]) @@ -236,3 +244,81 @@ async def diff_scans( score_change=score_change, narrative=narrative, ) + + +@router.get("/{scan_id}/nuclei", response_model=NucleiResultResponse) +async def get_nuclei_result( + scan_id: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + """ + Retrieve the Nuclei active scan result for a given website scan. + + Nuclei runs as a background task after the main scan, so this result may + not be available immediately. Poll this endpoint until status is not + 'pending'. If the Nuclei binary is not installed, status will be 'skipped'. + """ + # Verify the parent scan belongs to the requesting user + scan_check = await db.execute( + select(ScanResult).where( + ScanResult.id == scan_id, + ScanResult.user_id == current_user.id, + ) + ) + if not scan_check.scalar_one_or_none(): + raise HTTPException(status_code=404, detail="Scan not found") + + result = await db.execute( + select(NucleiScanResult).where(NucleiScanResult.scan_result_id == scan_id) + ) + nuclei_row = result.scalar_one_or_none() + + if not nuclei_row: + raise HTTPException( + status_code=404, + detail="Nuclei result not available yet. The background scan may still be running.", + ) + + return nuclei_row + + +@router.post("/{scan_id}/remediation-plan", response_model=RemediationPlan) +async def get_remediation_plan( + scan_id: str, + current_user: User = Depends(get_current_user), + db: AsyncSession = Depends(get_db), +): + """ + Generate an AI-powered prioritized remediation roadmap for a scan. + + The AI receives the full list of issues found in the scan and returns a + sequenced fix plan: what to do first, how hard each fix is, and a + realistic total effort estimate. Each call to this endpoint triggers a + fresh AI generation — results are not cached. + """ + result = await db.execute( + select(ScanResult).where( + ScanResult.id == scan_id, + ScanResult.user_id == current_user.id, + ) + ) + scan = result.scalar_one_or_none() + + if not scan: + raise HTTPException(status_code=404, detail="Scan not found") + + plan_data = await generate_remediation_plan(scan.issues, scan.url) + + # Validate the AI response matches our schema before returning + try: + return RemediationPlan( + summary=plan_data.get("summary", ""), + steps=plan_data.get("steps", []), + estimated_total_effort=plan_data.get("estimated_total_effort", "N/A"), + ) + except Exception as e: + raise HTTPException( + status_code=500, + detail=f"Failed to parse remediation plan from AI response: {e}", + ) diff --git a/app/routers/scan.py b/app/routers/scan.py index 3443455..cf0d296 100644 --- a/app/routers/scan.py +++ b/app/routers/scan.py @@ -1,3 +1,4 @@ +import asyncio import logging import httpx @@ -24,6 +25,12 @@ from app.services.scoring import calculate_layer_statuses, calculate_score from app.services.ai import enhance_security_issues from app.services.threat_intel import get_threat_intel_summary from app.services.webhook_dispatcher import dispatch_webhooks +from app.services.nuclei_scanner import run_nuclei_scan +from app.services.alerting import ( + send_slack_alert, + send_email_alert, + build_scan_email_body, +) from app.utils.validators import validate_url logger = logging.getLogger(__name__) @@ -39,6 +46,41 @@ dns_scanner = DNSScanner() port_scanner = PortScanner() +async def _post_scan_tasks( + user_id: str, + user_email: str, + scan_id: str, + url: str, + score: int, + issue_count: int, + db: AsyncSession, +) -> None: + """ + Groups all post-scan side-effects that run as a background task: + - Dispatch webhooks + - Send Slack alert + - Send email alert + - Trigger Nuclei active scan + + These all run after the response has been sent to the client, so they + never add latency to the scan endpoint. + """ + scan_summary = {"scan_id": scan_id, "url": url, "score": score} + await dispatch_webhooks(user_id, scan_summary, db) + + slack_msg = f"URL: {url}\nScore: {score}/100 | Issues found: {issue_count}" + await send_slack_alert(title="SecureLens Scan Complete", message=slack_msg) + + email_body = build_scan_email_body(url, score, issue_count) + await send_email_alert( + to_email=user_email, + subject=f"SecureLens: Scan complete for {url}", + html_body=email_body, + ) + + # Nuclei runs last — it creates its own DB session and takes the longest + await run_nuclei_scan(scan_id, url) + @router.post("/scan", response_model=ScanResponse) @limiter.limit(settings.rate_limit) @@ -52,11 +94,8 @@ async def scan_website( url = validate_url(data.url) try: - import asyncio - dns_task = asyncio.create_task(dns_scanner.scan(url)) port_task = asyncio.create_task(port_scanner.scan(url)) - # Step 3: Run threat intel lookup concurrently — zero extra latency threat_intel_task = asyncio.create_task(get_threat_intel_summary(url)) async with httpx.AsyncClient( @@ -71,8 +110,6 @@ async def scan_website( all_issues.extend(await header_scanner.scan(url, response)) all_issues.extend(await cookie_scanner.scan(url, response)) all_issues.extend(await exposure_scanner.scan(url, response)) - - # Await infrastructure scans all_issues.extend(await dns_task) all_issues.extend(await port_task) threat_intel = await threat_intel_task @@ -111,12 +148,16 @@ async def scan_website( scan_id = scan_record.id created_at = scan_record.created_at - scan_summary = { - "scan_id": scan_id, - "url": url, - "score": score - } - background_tasks.add_task(dispatch_webhooks, current_user.id, scan_summary, db) + background_tasks.add_task( + _post_scan_tasks, + current_user.id, + current_user.email, + scan_id, + url, + score, + len(all_issues), + db, + ) return ScanResponse( id=scan_id, @@ -125,7 +166,7 @@ async def scan_website( layers=layers, issues=all_issues, created_at=created_at, - threat_intel=threat_intel, # Step 3: attach threat intelligence + threat_intel=threat_intel, ) except httpx.HTTPError as e: @@ -134,3 +175,4 @@ async def scan_website( status_code=502, content={"error": f"Could not reach {url}: {str(e)}"}, ) + diff --git a/app/schemas/scan.py b/app/schemas/scan.py index ab51eb3..6d2f12e 100644 --- a/app/schemas/scan.py +++ b/app/schemas/scan.py @@ -100,3 +100,48 @@ class ScheduledScanResponse(BaseModel): created_at: datetime model_config = {"from_attributes": True} + + +# --------------------------------------------------------------------------- +# Nuclei Active Scan Schemas +# --------------------------------------------------------------------------- + +class NucleiFinding(BaseModel): + template_id: str + name: str + severity: str + matched_at: str + description: str | None = None + + +class NucleiResultResponse(BaseModel): + id: str + scan_result_id: str + url: str + findings: list[NucleiFinding] + # status: "completed" | "skipped" | "timeout" | "error" + status: str + completed_at: datetime | None = None + created_at: datetime + + model_config = {"from_attributes": True} + + +# --------------------------------------------------------------------------- +# AI Remediation Plan Schemas +# --------------------------------------------------------------------------- + +class RemediationStep(BaseModel): + priority: int + issue: str + severity: str + effort: str # Easy | Medium | Hard + fix_summary: str + code_snippet: str | None = None + + +class RemediationPlan(BaseModel): + summary: str + steps: list[RemediationStep] + estimated_total_effort: str + diff --git a/app/services/ai.py b/app/services/ai.py index 491f146..4609a7d 100644 --- a/app/services/ai.py +++ b/app/services/ai.py @@ -207,3 +207,82 @@ async def generate_diff_narrative(diff_data: dict) -> str: result = await call_ai(prompt, temperature=0.4) return result or "Could not generate diff narrative." + + +async def generate_remediation_plan(issues: list[dict], url: str) -> dict: + """ + Generates a prioritized, actionable remediation roadmap from a list of issues. + + Instead of per-issue snippets (which the scanner already provides), this + function looks at the full picture and produces a sequenced plan that a + developer can actually follow: what to fix first, how hard each fix is, + and a realistic estimate of total effort. + + Returns a dict matching the RemediationPlan schema: + { + "summary": str, + "steps": [ + { + "priority": int, + "issue": str, + "severity": str, + "effort": "Easy" | "Medium" | "Hard", + "fix_summary": str, + "code_snippet": str | null + } + ], + "estimated_total_effort": str + } + """ + if not settings.effective_ai_key: + return { + "summary": "AI remediation plans require an AI API key to be configured.", + "steps": [], + "estimated_total_effort": "N/A", + } + + if not issues: + return { + "summary": "No issues were found in the scan. No remediation required.", + "steps": [], + "estimated_total_effort": "0 hours", + } + + prompt = ( + "You are a senior application security consultant reviewing scan results for a website.\n" + f"Target URL: {url}\n" + f"Issues found:\n{json.dumps(issues, indent=2)}\n\n" + "Generate a prioritized remediation roadmap. Return a JSON object with exactly these keys:\n" + " 'summary' : A 2-3 sentence overall assessment of the security posture.\n" + " 'steps' : A list of objects, one per issue, ordered by priority " + "(most critical first). Each step object must have:\n" + " 'priority' : Integer starting at 1\n" + " 'issue' : The exact issue name from the input\n" + " 'severity' : Critical | High | Medium | Low\n" + " 'effort' : Easy | Medium | Hard\n" + " 'fix_summary' : A concrete, actionable description of how to fix it (2-3 sentences)\n" + " 'code_snippet' : A relevant code or config example, or null if not applicable\n" + " 'estimated_total_effort' : A realistic total time estimate for all fixes combined " + "(e.g. '2-4 hours', '1-2 days').\n\n" + "Order steps strictly by: Critical first, then High, Medium, Low. " + "Within the same severity, put Easy fixes before Hard ones." + ) + + raw = await call_ai(prompt, temperature=0.2, json_mode=True) + if not raw: + return { + "summary": "Could not generate remediation plan — AI returned an empty response.", + "steps": [], + "estimated_total_effort": "N/A", + } + + try: + return json.loads(raw) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse remediation plan JSON: {e}\nRaw: {raw[:500]}") + return { + "summary": "Could not parse the AI-generated remediation plan.", + "steps": [], + "estimated_total_effort": "N/A", + } + diff --git a/app/services/scheduler.py b/app/services/scheduler.py index e90dec6..2bc7b33 100644 --- a/app/services/scheduler.py +++ b/app/services/scheduler.py @@ -47,6 +47,11 @@ from app.models.webhook import Webhook from app.services.scoring import calculate_layer_statuses, calculate_score from app.services.ai import enhance_security_issues from app.services.webhook_dispatcher import dispatch_webhooks +from app.services.alerting import ( + send_slack_alert, + send_email_alert, + build_regression_email_body, +) from app.config import settings logger = logging.getLogger(__name__) @@ -150,13 +155,13 @@ async def _run_single_scan(scheduled: ScheduledScan) -> None: await db.commit() - # Fire webhooks if the score dropped + # Fire webhooks, Slack alert, and email if the score dropped score_dropped = previous_score is not None and score < previous_score if score_dropped: delta = previous_score - score logger.warning( f"Score dropped {delta} pts for {url} " - f"({previous_score} → {score}). Firing webhooks." + f"({previous_score} -> {score}). Sending regression alerts." ) webhook_payload = { "event": "scheduled_scan_regression", @@ -168,6 +173,32 @@ async def _run_single_scan(scheduled: ScheduledScan) -> None: } await dispatch_webhooks(user_id, webhook_payload, db) + slack_title = f"Score regression detected for {validated_url}" + slack_msg = ( + f"Previous score: {previous_score}/100\n" + f"New score: {score}/100 ({-delta:+d} points)\n" + f"Action: Review the latest scan in SecureLens." + ) + await send_slack_alert(title=slack_title, message=slack_msg) + + # Fetch user email to send the regression alert + from sqlalchemy import select as _select + from app.models.user import User + async with AsyncSessionLocal() as email_db: + user_result = await email_db.execute( + _select(User).where(User.id == user_id) + ) + user = user_result.scalar_one_or_none() + if user: + email_body = build_regression_email_body( + validated_url, previous_score, score + ) + await send_email_alert( + to_email=user.email, + subject=f"SecureLens: Score regression detected for {validated_url}", + html_body=email_body, + ) + logger.info(f"Scheduled scan complete: {url} → score={score}") except httpx.HTTPError as e: