mirror of
https://github.com/Rarebuffalo/securelens-backend.git
synced 2026-06-19 07:00:30 +00:00
updated the model
This commit is contained in:
@@ -1,7 +1,8 @@
|
||||
import json
|
||||
import logging
|
||||
from typing import List, Dict, Any
|
||||
from openai import AsyncOpenAI
|
||||
from google import genai
|
||||
from google.genai import types
|
||||
|
||||
from app.config import settings
|
||||
from app.services.code_scanner.github_client import GitHubClient
|
||||
@@ -9,26 +10,28 @@ from app.schemas.code_scan import VulnerabilityIssue
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
api_key = settings.openai_api_key or "mock-key-for-testing"
|
||||
client = AsyncOpenAI(api_key=api_key)
|
||||
if settings.gemini_api_key:
|
||||
# google-genai client init
|
||||
ai_client = genai.Client(api_key=settings.gemini_api_key)
|
||||
else:
|
||||
ai_client = None
|
||||
|
||||
class CodeScanOrchestrator:
|
||||
def __init__(self, repo_url: str, github_token: str, branch: str = "main"):
|
||||
self.repo_url = repo_url
|
||||
self.branch = branch
|
||||
self.github = GitHubClient(token=github_token)
|
||||
# We use gemini-2.5-flash for fast and cost-effective analysis
|
||||
self.model_name = 'gemini-2.5-flash'
|
||||
|
||||
async def triage_files(self, all_files: List[str]) -> List[str]:
|
||||
"""
|
||||
Uses the LLM to select which files are most likely to contain security vulnerabilities
|
||||
(e.g., config files, routers, auth logic).
|
||||
"""
|
||||
if not settings.openai_api_key:
|
||||
logger.warning("OPENAI_API_KEY is not set. Triaging all files up to a limit.")
|
||||
return all_files[:10] # Hard limit for testing
|
||||
if not settings.gemini_api_key:
|
||||
logger.warning("GEMINI_API_KEY is not set. Triaging all files up to a limit.")
|
||||
return all_files[:10]
|
||||
|
||||
# To avoid context limit issues, we might want to chunk this, but for now we pass the list
|
||||
# We can enforce a soft limit on the string length
|
||||
files_str = "\n".join(all_files)
|
||||
if len(files_str) > 15000:
|
||||
files_str = files_str[:15000] + "\n... (truncated)"
|
||||
@@ -42,42 +45,35 @@ class CodeScanOrchestrator:
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "You always respond with valid JSON."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.1,
|
||||
response = await ai_client.aio.models.generate_content(
|
||||
model=self.model_name,
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(
|
||||
response_mime_type="application/json",
|
||||
temperature=0.1,
|
||||
)
|
||||
)
|
||||
content = response.choices[0].message.content
|
||||
if content:
|
||||
data = json.loads(content)
|
||||
if response.text:
|
||||
data = json.loads(response.text)
|
||||
return data.get("critical_files", [])
|
||||
except Exception as e:
|
||||
logger.error(f"Error triaging files: {e}")
|
||||
|
||||
return all_files[:10] # Fallback
|
||||
return all_files[:10]
|
||||
|
||||
async def analyze_files(self, triaged_files: List[str]) -> List[VulnerabilityIssue]:
|
||||
"""
|
||||
Fetches the contents of the triaged files and uses the LLM to find vulnerabilities.
|
||||
"""
|
||||
vulnerabilities = []
|
||||
|
||||
if not settings.openai_api_key:
|
||||
if not settings.gemini_api_key:
|
||||
return []
|
||||
|
||||
# Analyze files sequentially or in batches (sequential to avoid rate limits for now)
|
||||
for file_path in triaged_files:
|
||||
content = await self.github.get_file_content(self.repo_url, file_path, self.branch)
|
||||
if not content:
|
||||
continue
|
||||
|
||||
# Truncate very large files
|
||||
if len(content) > 20000:
|
||||
content = content[:20000]
|
||||
if len(content) > 30000:
|
||||
content = content[:30000]
|
||||
|
||||
prompt = (
|
||||
f"Review the following code from the file '{file_path}' for security vulnerabilities.\n"
|
||||
@@ -93,19 +89,16 @@ class CodeScanOrchestrator:
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a SAST security agent. Always respond with valid JSON."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
response_format={"type": "json_object"},
|
||||
temperature=0.2,
|
||||
response = await ai_client.aio.models.generate_content(
|
||||
model=self.model_name,
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(
|
||||
response_mime_type="application/json",
|
||||
temperature=0.2,
|
||||
)
|
||||
)
|
||||
|
||||
resp_content = response.choices[0].message.content
|
||||
if resp_content:
|
||||
data = json.loads(resp_content)
|
||||
if response.text:
|
||||
data = json.loads(response.text)
|
||||
vulns = data.get("vulnerabilities", [])
|
||||
for v in vulns:
|
||||
vulnerabilities.append(VulnerabilityIssue(
|
||||
@@ -125,7 +118,7 @@ class CodeScanOrchestrator:
|
||||
if not vulnerabilities:
|
||||
return "No obvious security vulnerabilities found in the scanned files."
|
||||
|
||||
if not settings.openai_api_key:
|
||||
if not settings.gemini_api_key:
|
||||
return f"Found {len(vulnerabilities)} potential issues."
|
||||
|
||||
issues_data = [v.model_dump() for v in vulnerabilities]
|
||||
@@ -137,15 +130,14 @@ class CodeScanOrchestrator:
|
||||
)
|
||||
|
||||
try:
|
||||
response = await client.chat.completions.create(
|
||||
model="gpt-3.5-turbo",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a cybersecurity expert."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=0.4,
|
||||
response = await ai_client.aio.models.generate_content(
|
||||
model=self.model_name,
|
||||
contents=prompt,
|
||||
config=types.GenerateContentConfig(
|
||||
temperature=0.4,
|
||||
)
|
||||
)
|
||||
return response.choices[0].message.content or "Could not generate summary."
|
||||
return response.text or "Could not generate summary."
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating summary: {e}")
|
||||
return f"Found {len(vulnerabilities)} potential issues."
|
||||
|
||||
Reference in New Issue
Block a user