mirror of
https://github.com/Rarebuffalo/securelens-backend.git
synced 2026-06-19 07:00:30 +00:00
202 lines
7.2 KiB
Python
202 lines
7.2 KiB
Python
"""
|
|
Code Scan Router
|
|
================
|
|
|
|
Two endpoints:
|
|
POST /code-scan/analyze — Clone repo tree, triage + analyze files with AI,
|
|
persist result to PostgreSQL, return scan_id.
|
|
POST /code-scan/chat — Load the persisted scan from DB and answer questions.
|
|
GET /code-scan/models — List available AI models (debug / informational).
|
|
|
|
Why we moved away from in-memory scan_store:
|
|
The original implementation stored scan results in a plain Python dict.
|
|
This caused two critical problems:
|
|
1. Any server restart or crash wiped all stored context, breaking chat.
|
|
2. Multiple Uvicorn workers have isolated memory; a scan on worker A
|
|
cannot be chatted with on worker B.
|
|
By persisting to PostgreSQL we get durability, scalability, and historical
|
|
records of all code scans (same pattern as the web scanner).
|
|
"""
|
|
|
|
import logging
|
|
import json
|
|
from fastapi import APIRouter, Depends, HTTPException
|
|
from sqlalchemy import select
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from typing import Dict
|
|
|
|
from app.database import get_db
|
|
from app.middleware.auth import get_optional_user
|
|
from app.models.code_scan import CodeScanResult
|
|
from app.models.user import User
|
|
from app.schemas.code_scan import (
|
|
CodeScanRequest,
|
|
CodeScanResponse,
|
|
CodeChatRequest,
|
|
CodeChatResponse,
|
|
VulnerabilityIssue,
|
|
)
|
|
from app.services.code_scanner.orchestrator import CodeScanOrchestrator
|
|
from app.config import settings
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(tags=["code-scan"])
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# POST /code-scan/analyze
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.post("/code-scan/analyze", response_model=CodeScanResponse)
|
|
async def analyze_codebase(
|
|
request: CodeScanRequest,
|
|
db: AsyncSession = Depends(get_db),
|
|
current_user: User | None = Depends(get_optional_user),
|
|
):
|
|
"""
|
|
Full agentic scan of a GitHub repository.
|
|
|
|
Flow:
|
|
1. Fetch the full file tree from GitHub.
|
|
2. Ask the AI to triage (select) the most security-critical files.
|
|
3. Analyze each triaged file for OWASP Top-10 vulnerabilities.
|
|
4. Generate an executive summary of all findings.
|
|
5. Persist the result to `code_scan_results` table.
|
|
6. Return the scan_id so the frontend can open a chat session.
|
|
"""
|
|
logger.info(f"Starting code scan for {request.repo_url}")
|
|
|
|
try:
|
|
orchestrator = CodeScanOrchestrator(
|
|
repo_url=request.repo_url,
|
|
github_token=request.github_token,
|
|
branch=request.branch or "main",
|
|
)
|
|
|
|
# Step 1 — Fetch file tree
|
|
all_files = await orchestrator.github.get_repo_tree(
|
|
request.repo_url, request.branch or "main"
|
|
)
|
|
|
|
# Step 2 — AI triage: pick the most security-sensitive files
|
|
triaged_files = await orchestrator.triage_files(all_files)
|
|
logger.info(f"Triaged {len(triaged_files)} files out of {len(all_files)}.")
|
|
|
|
# Step 3 — Analyze each file
|
|
vulnerabilities = await orchestrator.analyze_files(triaged_files)
|
|
|
|
# Step 4 — Generate summary
|
|
summary = await orchestrator.generate_summary(vulnerabilities)
|
|
|
|
# Step 5 — Persist to database
|
|
# Serialise VulnerabilityIssue objects to plain dicts for JSON storage.
|
|
issues_as_dicts = [v.model_dump() for v in vulnerabilities]
|
|
|
|
scan_record = CodeScanResult(
|
|
user_id=current_user.id if current_user else None,
|
|
repo_url=request.repo_url,
|
|
summary=summary,
|
|
issues=issues_as_dicts,
|
|
)
|
|
db.add(scan_record)
|
|
await db.flush() # flush to get the auto-generated id without committing yet
|
|
scan_id = scan_record.id
|
|
|
|
logger.info(f"Code scan {scan_id} persisted to database.")
|
|
|
|
return CodeScanResponse(
|
|
scan_id=scan_id,
|
|
repo_url=request.repo_url,
|
|
summary=summary,
|
|
issues=vulnerabilities,
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Code scan failed: {str(e)}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# POST /code-scan/chat
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.post("/code-scan/chat", response_model=CodeChatResponse)
|
|
async def chat_with_scan(
|
|
request: CodeChatRequest,
|
|
db: AsyncSession = Depends(get_db),
|
|
):
|
|
"""
|
|
Conversational Q&A grounded in a previously completed code scan.
|
|
|
|
We load the scan from PostgreSQL using the scan_id, so this works
|
|
correctly across server restarts and multiple workers.
|
|
"""
|
|
if not settings.ai_api_key:
|
|
raise HTTPException(
|
|
status_code=400,
|
|
detail="AI Chat is disabled because no AI API key is configured.",
|
|
)
|
|
|
|
# Load the scan record from DB
|
|
result = await db.execute(
|
|
select(CodeScanResult).where(CodeScanResult.id == request.scan_id)
|
|
)
|
|
scan_data = result.scalar_one_or_none()
|
|
|
|
if not scan_data:
|
|
raise HTTPException(
|
|
status_code=404,
|
|
detail="Scan ID not found. The scan may have expired or never existed.",
|
|
)
|
|
|
|
# Build a rich context-aware prompt
|
|
prompt = (
|
|
"You are SecureLens AI, an expert application security assistant. "
|
|
"You are helping a developer understand a security scan report for their codebase. "
|
|
f"Here is the context of the scan for the repository '{scan_data.repo_url}':\n\n"
|
|
f"Executive Summary:\n{scan_data.summary}\n\n"
|
|
f"Vulnerabilities Found:\n{json.dumps(scan_data.issues, indent=2)}\n\n"
|
|
f"Developer's Question: {request.message}\n\n"
|
|
"Answer clearly, concisely, and professionally. "
|
|
"Provide specific code fixes when requested. "
|
|
"Reference the exact file paths and line numbers from the scan data when relevant."
|
|
)
|
|
|
|
try:
|
|
# Use the unified AI service so it respects whatever provider is configured
|
|
from app.services.ai import call_ai
|
|
reply = await call_ai(prompt, temperature=0.5)
|
|
return CodeChatResponse(reply=reply)
|
|
except Exception as e:
|
|
logger.error(f"AI Chat Error: {str(e)}")
|
|
raise HTTPException(
|
|
status_code=500,
|
|
detail="I encountered an error trying to process your request.",
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GET /code-scan/models (informational / debug)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@router.get("/code-scan/models")
|
|
async def list_available_models():
|
|
"""
|
|
Lists AI models available to the configured provider.
|
|
Only meaningful when using the Gemini provider.
|
|
"""
|
|
if not settings.ai_api_key:
|
|
raise HTTPException(status_code=500, detail="No AI API key is set.")
|
|
try:
|
|
from google import genai
|
|
|
|
client = genai.Client(api_key=settings.ai_api_key)
|
|
models = []
|
|
for model in client.models.list():
|
|
if "generateContent" in model.supported_actions:
|
|
models.append(model.name)
|
|
return {"models": models}
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"Error fetching models: {e}")
|