mirror of
https://github.com/Rarebuffalo/securelens-backend.git
synced 2026-06-19 07:00:30 +00:00
inegrated the ai agent
This commit is contained in:
@@ -10,7 +10,7 @@ from slowapi.middleware import SlowAPIMiddleware
|
||||
from app.config import settings
|
||||
from app.database import close_db, init_db
|
||||
from app.middleware.rate_limiter import limiter
|
||||
from app.routers import auth, health, history, scan, apikey, report
|
||||
from app.routers import auth, health, history, scan, apikey, report, code_scan
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if settings.debug else logging.INFO,
|
||||
@@ -56,6 +56,7 @@ def create_app() -> FastAPI:
|
||||
application.include_router(history.router)
|
||||
application.include_router(apikey.router)
|
||||
application.include_router(report.router)
|
||||
application.include_router(code_scan.router)
|
||||
|
||||
logger.info(f"{settings.app_name} v{settings.app_version} initialized")
|
||||
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
from .auth import router as auth
|
||||
from .health import router as health
|
||||
from .history import router as history
|
||||
from .scan import router as scan
|
||||
from .apikey import router as apikey
|
||||
from .report import router as report
|
||||
from .code_scan import router as code_scan
|
||||
|
||||
__all__ = [
|
||||
"auth",
|
||||
"health",
|
||||
"history",
|
||||
"scan",
|
||||
"apikey",
|
||||
"report",
|
||||
"code_scan"
|
||||
]
|
||||
|
||||
91
app/routers/code_scan.py
Normal file
91
app/routers/code_scan.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import logging
|
||||
import uuid
|
||||
import json
|
||||
from fastapi import APIRouter, HTTPException
|
||||
from typing import Dict, Any
|
||||
|
||||
from app.schemas.code_scan import CodeScanRequest, CodeScanResponse, CodeChatRequest, CodeChatResponse
|
||||
from app.services.code_scanner.orchestrator import CodeScanOrchestrator, client
|
||||
from app.config import settings
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(tags=["code-scan"])
|
||||
|
||||
# In-memory store for scan results to support chat context.
|
||||
# In a real production app, this would be stored in the database.
|
||||
scan_store: Dict[str, CodeScanResponse] = {}
|
||||
|
||||
@router.post("/code-scan/analyze", response_model=CodeScanResponse)
|
||||
async def analyze_codebase(request: CodeScanRequest):
|
||||
logger.info(f"Starting code scan for {request.repo_url}")
|
||||
|
||||
try:
|
||||
orchestrator = CodeScanOrchestrator(
|
||||
repo_url=request.repo_url,
|
||||
github_token=request.github_token,
|
||||
branch=request.branch or "main"
|
||||
)
|
||||
|
||||
# 1. Fetch repo structure
|
||||
all_files = await orchestrator.github.get_repo_tree(request.repo_url, request.branch or "main")
|
||||
|
||||
# 2. Triage files
|
||||
triaged_files = await orchestrator.triage_files(all_files)
|
||||
logger.info(f"Triaged {len(triaged_files)} files out of {len(all_files)}.")
|
||||
|
||||
# 3. Analyze triaged files
|
||||
vulnerabilities = await orchestrator.analyze_files(triaged_files)
|
||||
|
||||
# 4. Generate Summary
|
||||
summary = await orchestrator.generate_summary(vulnerabilities)
|
||||
|
||||
scan_id = str(uuid.uuid4())
|
||||
|
||||
response = CodeScanResponse(
|
||||
scan_id=scan_id,
|
||||
repo_url=request.repo_url,
|
||||
summary=summary,
|
||||
issues=vulnerabilities
|
||||
)
|
||||
|
||||
# Save to in-memory store for the chat feature
|
||||
scan_store[scan_id] = response
|
||||
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.error(f"Code scan failed: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail=str(e))
|
||||
|
||||
@router.post("/code-scan/chat", response_model=CodeChatResponse)
|
||||
async def chat_with_scan(request: CodeChatRequest):
|
||||
if not settings.openai_api_key:
|
||||
raise HTTPException(status_code=400, detail="AI Chat is disabled because OPENAI_API_KEY is not configured.")
|
||||
|
||||
scan_data = scan_store.get(request.scan_id)
|
||||
if not scan_data:
|
||||
raise HTTPException(status_code=404, detail="Scan ID not found or expired.")
|
||||
|
||||
system_prompt = (
|
||||
"You are SecureLens AI, an expert application security assistant. "
|
||||
"You are helping a developer understand a security scan report for their codebase. "
|
||||
f"Here is the context of the scan for the repository {scan_data.repo_url}:\n"
|
||||
f"Summary: {scan_data.summary}\n"
|
||||
f"Vulnerabilities: {json.dumps([v.model_dump() for v in scan_data.issues])}\n\n"
|
||||
"Answer the user's questions clearly, concisely, and professionally. Provide code fixes if requested."
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": request.message}
|
||||
],
|
||||
temperature=0.5,
|
||||
)
|
||||
reply = response.choices[0].message.content or "No response from AI."
|
||||
return CodeChatResponse(reply=reply)
|
||||
except Exception as e:
|
||||
logger.error(f"AI Chat Error: {str(e)}")
|
||||
raise HTTPException(status_code=500, detail="I encountered an error trying to process your request.")
|
||||
29
app/schemas/code_scan.py
Normal file
29
app/schemas/code_scan.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
class CodeScanRequest(BaseModel):
|
||||
repo_url: str
|
||||
github_token: str
|
||||
# branch or commit hash optional
|
||||
branch: Optional[str] = "main"
|
||||
|
||||
class VulnerabilityIssue(BaseModel):
|
||||
file_path: str
|
||||
severity: str # High, Medium, Low, Critical
|
||||
issue: str
|
||||
explanation: str
|
||||
suggested_fix: Optional[str] = None
|
||||
line_number: Optional[int] = None
|
||||
|
||||
class CodeScanResponse(BaseModel):
|
||||
scan_id: str
|
||||
repo_url: str
|
||||
summary: str
|
||||
issues: List[VulnerabilityIssue]
|
||||
|
||||
class CodeChatRequest(BaseModel):
|
||||
scan_id: str
|
||||
message: str
|
||||
|
||||
class CodeChatResponse(BaseModel):
|
||||
reply: str
|
||||
1
app/services/code_scanner/__init__.py
Normal file
1
app/services/code_scanner/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# Code Scanner Package
|
||||
88
app/services/code_scanner/github_client.py
Normal file
88
app/services/code_scanner/github_client.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import httpx
|
||||
import logging
|
||||
import base64
|
||||
from urllib.parse import urlparse
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class GitHubClient:
|
||||
def __init__(self, token: str):
|
||||
self.token = token
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.token}",
|
||||
"Accept": "application/vnd.github.v3+json"
|
||||
}
|
||||
self.base_url = "https://api.github.com"
|
||||
|
||||
def _parse_repo_url(self, repo_url: str) -> Optional[str]:
|
||||
"""
|
||||
Extracts owner/repo from https://github.com/owner/repo
|
||||
"""
|
||||
try:
|
||||
parsed = urlparse(repo_url)
|
||||
path_parts = parsed.path.strip('/').split('/')
|
||||
if len(path_parts) >= 2:
|
||||
return f"{path_parts[0]}/{path_parts[1]}"
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to parse repo URL {repo_url}: {e}")
|
||||
return None
|
||||
|
||||
async def get_repo_tree(self, repo_url: str, branch: str = "main") -> List[str]:
|
||||
"""
|
||||
Fetches the recursive tree of the repository to get all file paths.
|
||||
Returns a list of file paths.
|
||||
"""
|
||||
repo_path = self._parse_repo_url(repo_url)
|
||||
if not repo_path:
|
||||
raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
|
||||
|
||||
# First, get the commit SHA for the branch to get the tree SHA
|
||||
commits_url = f"{self.base_url}/repos/{repo_path}/commits/{branch}"
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
commit_resp = await client.get(commits_url, headers=self.headers, timeout=10.0)
|
||||
commit_resp.raise_for_status()
|
||||
tree_sha = commit_resp.json()["commit"]["tree"]["sha"]
|
||||
|
||||
# Now get the recursive tree
|
||||
tree_url = f"{self.base_url}/repos/{repo_path}/git/trees/{tree_sha}?recursive=1"
|
||||
tree_resp = await client.get(tree_url, headers=self.headers, timeout=15.0)
|
||||
tree_resp.raise_for_status()
|
||||
|
||||
tree_data = tree_resp.json()
|
||||
file_paths = []
|
||||
for item in tree_data.get("tree", []):
|
||||
if item["type"] == "blob": # Only files, not directories
|
||||
file_paths.append(item["path"])
|
||||
|
||||
return file_paths
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"GitHub API error fetching tree for {repo_path}: {e}")
|
||||
raise Exception(f"Failed to fetch repository structure: {e}")
|
||||
|
||||
async def get_file_content(self, repo_url: str, file_path: str, branch: str = "main") -> str:
|
||||
"""
|
||||
Fetches the content of a specific file.
|
||||
"""
|
||||
repo_path = self._parse_repo_url(repo_url)
|
||||
if not repo_path:
|
||||
raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
|
||||
|
||||
# We can use the raw URL or the contents API
|
||||
content_url = f"{self.base_url}/repos/{repo_path}/contents/{file_path}?ref={branch}"
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
resp = await client.get(content_url, headers=self.headers, timeout=10.0)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
|
||||
if "content" in data and data.get("encoding") == "base64":
|
||||
decoded_content = base64.b64decode(data["content"]).decode('utf-8', errors='replace')
|
||||
return decoded_content
|
||||
return ""
|
||||
except httpx.HTTPError as e:
|
||||
logger.error(f"GitHub API error fetching file {file_path}: {e}")
|
||||
return "" # Return empty if file cannot be read, agent will just skip it
|
||||
151
app/services/code_scanner/orchestrator.py
Normal file
151
app/services/code_scanner/orchestrator.py
Normal file
@@ -0,0 +1,151 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Dict, Any
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from app.config import settings
|
||||
from app.services.code_scanner.github_client import GitHubClient
|
||||
from app.schemas.code_scan import VulnerabilityIssue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
api_key = settings.openai_api_key or "mock-key-for-testing"
|
||||
client = AsyncOpenAI(api_key=api_key)
|
||||
|
||||
class CodeScanOrchestrator:
|
||||
def __init__(self, repo_url: str, github_token: str, branch: str = "main"):
|
||||
self.repo_url = repo_url
|
||||
self.branch = branch
|
||||
self.github = GitHubClient(token=github_token)
|
||||
|
||||
async def triage_files(self, all_files: List[str]) -> List[str]:
|
||||
"""
|
||||
Uses the LLM to select which files are most likely to contain security vulnerabilities
|
||||
(e.g., config files, routers, auth logic).
|
||||
"""
|
||||
if not settings.openai_api_key:
|
||||
logger.warning("OPENAI_API_KEY is not set. Triaging all files up to a limit.")
|
||||
return all_files[:10] # Hard limit for testing
|
||||
|
||||
# To avoid context limit issues, we might want to chunk this, but for now we pass the list
|
||||
# We can enforce a soft limit on the string length
|
||||
files_str = "\n".join(all_files)
|
||||
if len(files_str) > 15000:
|
||||
files_str = files_str[:15000] + "\n... (truncated)"
|
||||
|
||||
prompt = (
|
||||
"You are a Senior Application Security Engineer. I have a repository with the following files:\n"
|
||||
f"{files_str}\n\n"
|
||||
"Select the most critical files to review for security vulnerabilities (e.g., SAST, hardcoded secrets, SQLi, Auth bypass). "
|
||||
"Return a JSON object with a single key 'critical_files' containing a list of the exact file paths. "
|
||||
"Do not select more than 15 files."
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "You always respond with valid JSON."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.1,
|
||||
)
|
||||
content = response.choices[0].message.content
|
||||
if content:
|
||||
data = json.loads(content)
|
||||
return data.get("critical_files", [])
|
||||
except Exception as e:
|
||||
logger.error(f"Error triaging files: {e}")
|
||||
|
||||
return all_files[:10] # Fallback
|
||||
|
||||
async def analyze_files(self, triaged_files: List[str]) -> List[VulnerabilityIssue]:
|
||||
"""
|
||||
Fetches the contents of the triaged files and uses the LLM to find vulnerabilities.
|
||||
"""
|
||||
vulnerabilities = []
|
||||
|
||||
if not settings.openai_api_key:
|
||||
return []
|
||||
|
||||
# Analyze files sequentially or in batches (sequential to avoid rate limits for now)
|
||||
for file_path in triaged_files:
|
||||
content = await self.github.get_file_content(self.repo_url, file_path, self.branch)
|
||||
if not content:
|
||||
continue
|
||||
|
||||
# Truncate very large files
|
||||
if len(content) > 20000:
|
||||
content = content[:20000]
|
||||
|
||||
prompt = (
|
||||
f"Review the following code from the file '{file_path}' for security vulnerabilities.\n"
|
||||
"Focus on OWASP Top 10: SQLi, XSS, Hardcoded Secrets, IDOR, Misconfigurations, etc.\n\n"
|
||||
f"CODE:\n{content}\n\n"
|
||||
"Return a JSON object with a key 'vulnerabilities' containing a list of objects. "
|
||||
"Each object MUST have the following keys: "
|
||||
"'severity' (Critical, High, Medium, Low), "
|
||||
"'issue' (A short title), "
|
||||
"'explanation' (1-2 sentences explaining the vulnerability), "
|
||||
"'suggested_fix' (Code snippet or clear instructions to fix), "
|
||||
"'line_number' (integer or null if general)."
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a SAST security agent. Always respond with valid JSON."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.2,
|
||||
)
|
||||
|
||||
resp_content = response.choices[0].message.content
|
||||
if resp_content:
|
||||
data = json.loads(resp_content)
|
||||
vulns = data.get("vulnerabilities", [])
|
||||
for v in vulns:
|
||||
vulnerabilities.append(VulnerabilityIssue(
|
||||
file_path=file_path,
|
||||
severity=v.get("severity", "Medium"),
|
||||
issue=v.get("issue", "Unknown Issue"),
|
||||
explanation=v.get("explanation", ""),
|
||||
suggested_fix=v.get("suggested_fix"),
|
||||
line_number=v.get("line_number")
|
||||
))
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing file {file_path}: {e}")
|
||||
|
||||
return vulnerabilities
|
||||
|
||||
async def generate_summary(self, vulnerabilities: List[VulnerabilityIssue]) -> str:
|
||||
if not vulnerabilities:
|
||||
return "No obvious security vulnerabilities found in the scanned files."
|
||||
|
||||
if not settings.openai_api_key:
|
||||
return f"Found {len(vulnerabilities)} potential issues."
|
||||
|
||||
issues_data = [v.model_dump() for v in vulnerabilities]
|
||||
prompt = (
|
||||
"You are a Senior AppSec Manager. Summarize the following list of vulnerabilities found in a recent scan. "
|
||||
"Provide a 2-3 paragraph executive summary of the repository's security posture. "
|
||||
"Keep it professional and highlight the most critical risks.\n\n"
|
||||
f"{json.dumps(issues_data)}"
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a cybersecurity expert."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.4,
|
||||
)
|
||||
return response.choices[0].message.content or "Could not generate summary."
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating summary: {e}")
|
||||
return f"Found {len(vulnerabilities)} potential issues."
|
||||
Reference in New Issue
Block a user