inegrated the ai agent

This commit is contained in:
rarebuffalo
2026-04-25 19:27:06 +05:30
parent 5f214c507d
commit 324ebe8955
7 changed files with 379 additions and 1 deletions

View File

@@ -10,7 +10,7 @@ from slowapi.middleware import SlowAPIMiddleware
from app.config import settings
from app.database import close_db, init_db
from app.middleware.rate_limiter import limiter
from app.routers import auth, health, history, scan, apikey, report
from app.routers import auth, health, history, scan, apikey, report, code_scan
logging.basicConfig(
level=logging.DEBUG if settings.debug else logging.INFO,
@@ -56,6 +56,7 @@ def create_app() -> FastAPI:
application.include_router(history.router)
application.include_router(apikey.router)
application.include_router(report.router)
application.include_router(code_scan.router)
logger.info(f"{settings.app_name} v{settings.app_version} initialized")

View File

@@ -0,0 +1,17 @@
from .auth import router as auth
from .health import router as health
from .history import router as history
from .scan import router as scan
from .apikey import router as apikey
from .report import router as report
from .code_scan import router as code_scan
__all__ = [
"auth",
"health",
"history",
"scan",
"apikey",
"report",
"code_scan"
]

91
app/routers/code_scan.py Normal file
View File

@@ -0,0 +1,91 @@
import logging
import uuid
import json
from fastapi import APIRouter, HTTPException
from typing import Dict, Any
from app.schemas.code_scan import CodeScanRequest, CodeScanResponse, CodeChatRequest, CodeChatResponse
from app.services.code_scanner.orchestrator import CodeScanOrchestrator, client
from app.config import settings
logger = logging.getLogger(__name__)
router = APIRouter(tags=["code-scan"])
# In-memory store for scan results to support chat context.
# In a real production app, this would be stored in the database.
scan_store: Dict[str, CodeScanResponse] = {}
@router.post("/code-scan/analyze", response_model=CodeScanResponse)
async def analyze_codebase(request: CodeScanRequest):
logger.info(f"Starting code scan for {request.repo_url}")
try:
orchestrator = CodeScanOrchestrator(
repo_url=request.repo_url,
github_token=request.github_token,
branch=request.branch or "main"
)
# 1. Fetch repo structure
all_files = await orchestrator.github.get_repo_tree(request.repo_url, request.branch or "main")
# 2. Triage files
triaged_files = await orchestrator.triage_files(all_files)
logger.info(f"Triaged {len(triaged_files)} files out of {len(all_files)}.")
# 3. Analyze triaged files
vulnerabilities = await orchestrator.analyze_files(triaged_files)
# 4. Generate Summary
summary = await orchestrator.generate_summary(vulnerabilities)
scan_id = str(uuid.uuid4())
response = CodeScanResponse(
scan_id=scan_id,
repo_url=request.repo_url,
summary=summary,
issues=vulnerabilities
)
# Save to in-memory store for the chat feature
scan_store[scan_id] = response
return response
except Exception as e:
logger.error(f"Code scan failed: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
@router.post("/code-scan/chat", response_model=CodeChatResponse)
async def chat_with_scan(request: CodeChatRequest):
if not settings.openai_api_key:
raise HTTPException(status_code=400, detail="AI Chat is disabled because OPENAI_API_KEY is not configured.")
scan_data = scan_store.get(request.scan_id)
if not scan_data:
raise HTTPException(status_code=404, detail="Scan ID not found or expired.")
system_prompt = (
"You are SecureLens AI, an expert application security assistant. "
"You are helping a developer understand a security scan report for their codebase. "
f"Here is the context of the scan for the repository {scan_data.repo_url}:\n"
f"Summary: {scan_data.summary}\n"
f"Vulnerabilities: {json.dumps([v.model_dump() for v in scan_data.issues])}\n\n"
"Answer the user's questions clearly, concisely, and professionally. Provide code fixes if requested."
)
try:
response = await client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": request.message}
],
temperature=0.5,
)
reply = response.choices[0].message.content or "No response from AI."
return CodeChatResponse(reply=reply)
except Exception as e:
logger.error(f"AI Chat Error: {str(e)}")
raise HTTPException(status_code=500, detail="I encountered an error trying to process your request.")

29
app/schemas/code_scan.py Normal file
View File

@@ -0,0 +1,29 @@
from pydantic import BaseModel, HttpUrl
from typing import List, Optional, Dict, Any
class CodeScanRequest(BaseModel):
repo_url: str
github_token: str
# branch or commit hash optional
branch: Optional[str] = "main"
class VulnerabilityIssue(BaseModel):
file_path: str
severity: str # High, Medium, Low, Critical
issue: str
explanation: str
suggested_fix: Optional[str] = None
line_number: Optional[int] = None
class CodeScanResponse(BaseModel):
scan_id: str
repo_url: str
summary: str
issues: List[VulnerabilityIssue]
class CodeChatRequest(BaseModel):
scan_id: str
message: str
class CodeChatResponse(BaseModel):
reply: str

View File

@@ -0,0 +1 @@
# Code Scanner Package

View File

@@ -0,0 +1,88 @@
import httpx
import logging
import base64
from urllib.parse import urlparse
from typing import List, Dict, Any, Optional
logger = logging.getLogger(__name__)
class GitHubClient:
def __init__(self, token: str):
self.token = token
self.headers = {
"Authorization": f"Bearer {self.token}",
"Accept": "application/vnd.github.v3+json"
}
self.base_url = "https://api.github.com"
def _parse_repo_url(self, repo_url: str) -> Optional[str]:
"""
Extracts owner/repo from https://github.com/owner/repo
"""
try:
parsed = urlparse(repo_url)
path_parts = parsed.path.strip('/').split('/')
if len(path_parts) >= 2:
return f"{path_parts[0]}/{path_parts[1]}"
return None
except Exception as e:
logger.error(f"Failed to parse repo URL {repo_url}: {e}")
return None
async def get_repo_tree(self, repo_url: str, branch: str = "main") -> List[str]:
"""
Fetches the recursive tree of the repository to get all file paths.
Returns a list of file paths.
"""
repo_path = self._parse_repo_url(repo_url)
if not repo_path:
raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
# First, get the commit SHA for the branch to get the tree SHA
commits_url = f"{self.base_url}/repos/{repo_path}/commits/{branch}"
async with httpx.AsyncClient() as client:
try:
commit_resp = await client.get(commits_url, headers=self.headers, timeout=10.0)
commit_resp.raise_for_status()
tree_sha = commit_resp.json()["commit"]["tree"]["sha"]
# Now get the recursive tree
tree_url = f"{self.base_url}/repos/{repo_path}/git/trees/{tree_sha}?recursive=1"
tree_resp = await client.get(tree_url, headers=self.headers, timeout=15.0)
tree_resp.raise_for_status()
tree_data = tree_resp.json()
file_paths = []
for item in tree_data.get("tree", []):
if item["type"] == "blob": # Only files, not directories
file_paths.append(item["path"])
return file_paths
except httpx.HTTPError as e:
logger.error(f"GitHub API error fetching tree for {repo_path}: {e}")
raise Exception(f"Failed to fetch repository structure: {e}")
async def get_file_content(self, repo_url: str, file_path: str, branch: str = "main") -> str:
"""
Fetches the content of a specific file.
"""
repo_path = self._parse_repo_url(repo_url)
if not repo_path:
raise ValueError(f"Invalid GitHub repository URL: {repo_url}")
# We can use the raw URL or the contents API
content_url = f"{self.base_url}/repos/{repo_path}/contents/{file_path}?ref={branch}"
async with httpx.AsyncClient() as client:
try:
resp = await client.get(content_url, headers=self.headers, timeout=10.0)
resp.raise_for_status()
data = resp.json()
if "content" in data and data.get("encoding") == "base64":
decoded_content = base64.b64decode(data["content"]).decode('utf-8', errors='replace')
return decoded_content
return ""
except httpx.HTTPError as e:
logger.error(f"GitHub API error fetching file {file_path}: {e}")
return "" # Return empty if file cannot be read, agent will just skip it

View File

@@ -0,0 +1,151 @@
import json
import logging
from typing import List, Dict, Any
from openai import AsyncOpenAI
from app.config import settings
from app.services.code_scanner.github_client import GitHubClient
from app.schemas.code_scan import VulnerabilityIssue
logger = logging.getLogger(__name__)
api_key = settings.openai_api_key or "mock-key-for-testing"
client = AsyncOpenAI(api_key=api_key)
class CodeScanOrchestrator:
def __init__(self, repo_url: str, github_token: str, branch: str = "main"):
self.repo_url = repo_url
self.branch = branch
self.github = GitHubClient(token=github_token)
async def triage_files(self, all_files: List[str]) -> List[str]:
"""
Uses the LLM to select which files are most likely to contain security vulnerabilities
(e.g., config files, routers, auth logic).
"""
if not settings.openai_api_key:
logger.warning("OPENAI_API_KEY is not set. Triaging all files up to a limit.")
return all_files[:10] # Hard limit for testing
# To avoid context limit issues, we might want to chunk this, but for now we pass the list
# We can enforce a soft limit on the string length
files_str = "\n".join(all_files)
if len(files_str) > 15000:
files_str = files_str[:15000] + "\n... (truncated)"
prompt = (
"You are a Senior Application Security Engineer. I have a repository with the following files:\n"
f"{files_str}\n\n"
"Select the most critical files to review for security vulnerabilities (e.g., SAST, hardcoded secrets, SQLi, Auth bypass). "
"Return a JSON object with a single key 'critical_files' containing a list of the exact file paths. "
"Do not select more than 15 files."
)
try:
response = await client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You always respond with valid JSON."},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.1,
)
content = response.choices[0].message.content
if content:
data = json.loads(content)
return data.get("critical_files", [])
except Exception as e:
logger.error(f"Error triaging files: {e}")
return all_files[:10] # Fallback
async def analyze_files(self, triaged_files: List[str]) -> List[VulnerabilityIssue]:
"""
Fetches the contents of the triaged files and uses the LLM to find vulnerabilities.
"""
vulnerabilities = []
if not settings.openai_api_key:
return []
# Analyze files sequentially or in batches (sequential to avoid rate limits for now)
for file_path in triaged_files:
content = await self.github.get_file_content(self.repo_url, file_path, self.branch)
if not content:
continue
# Truncate very large files
if len(content) > 20000:
content = content[:20000]
prompt = (
f"Review the following code from the file '{file_path}' for security vulnerabilities.\n"
"Focus on OWASP Top 10: SQLi, XSS, Hardcoded Secrets, IDOR, Misconfigurations, etc.\n\n"
f"CODE:\n{content}\n\n"
"Return a JSON object with a key 'vulnerabilities' containing a list of objects. "
"Each object MUST have the following keys: "
"'severity' (Critical, High, Medium, Low), "
"'issue' (A short title), "
"'explanation' (1-2 sentences explaining the vulnerability), "
"'suggested_fix' (Code snippet or clear instructions to fix), "
"'line_number' (integer or null if general)."
)
try:
response = await client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a SAST security agent. Always respond with valid JSON."},
{"role": "user", "content": prompt}
],
response_format={"type": "json_object"},
temperature=0.2,
)
resp_content = response.choices[0].message.content
if resp_content:
data = json.loads(resp_content)
vulns = data.get("vulnerabilities", [])
for v in vulns:
vulnerabilities.append(VulnerabilityIssue(
file_path=file_path,
severity=v.get("severity", "Medium"),
issue=v.get("issue", "Unknown Issue"),
explanation=v.get("explanation", ""),
suggested_fix=v.get("suggested_fix"),
line_number=v.get("line_number")
))
except Exception as e:
logger.error(f"Error analyzing file {file_path}: {e}")
return vulnerabilities
async def generate_summary(self, vulnerabilities: List[VulnerabilityIssue]) -> str:
if not vulnerabilities:
return "No obvious security vulnerabilities found in the scanned files."
if not settings.openai_api_key:
return f"Found {len(vulnerabilities)} potential issues."
issues_data = [v.model_dump() for v in vulnerabilities]
prompt = (
"You are a Senior AppSec Manager. Summarize the following list of vulnerabilities found in a recent scan. "
"Provide a 2-3 paragraph executive summary of the repository's security posture. "
"Keep it professional and highlight the most critical risks.\n\n"
f"{json.dumps(issues_data)}"
)
try:
response = await client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "You are a cybersecurity expert."},
{"role": "user", "content": prompt}
],
temperature=0.4,
)
return response.choices[0].message.content or "Could not generate summary."
except Exception as e:
logger.error(f"Error generating summary: {e}")
return f"Found {len(vulnerabilities)} potential issues."