updated model for suitable testing

This commit is contained in:
rarebuffalo
2026-04-25 21:29:19 +05:30
parent 139c8d982b
commit ae4ed3062a
5 changed files with 72 additions and 34 deletions

View File

@@ -3,6 +3,7 @@ import logging
from typing import List, Dict, Any
from google import genai
from google.genai import types
import asyncio
from app.config import settings
from app.services.code_scanner.github_client import GitHubClient
@@ -21,8 +22,8 @@ class CodeScanOrchestrator:
self.repo_url = repo_url
self.branch = branch
self.github = GitHubClient(token=github_token)
# We use gemini-2.5-flash for fast and cost-effective analysis
self.model_name = 'gemini-2.5-flash'
# We use gemini-2.0-flash for high rate limits and stability
self.model_name = 'gemini-2.0-flash'
async def triage_files(self, all_files: List[str]) -> List[str]:
"""
@@ -30,7 +31,7 @@ class CodeScanOrchestrator:
"""
if not settings.gemini_api_key:
logger.warning("GEMINI_API_KEY is not set. Triaging all files up to a limit.")
return all_files[:10]
return all_files[:5]
files_str = "\n".join(all_files)
if len(files_str) > 15000:
@@ -41,7 +42,7 @@ class CodeScanOrchestrator:
f"{files_str}\n\n"
"Select the most critical files to review for security vulnerabilities (e.g., SAST, hardcoded secrets, SQLi, Auth bypass). "
"Return a JSON object with a single key 'critical_files' containing a list of the exact file paths. "
"Do not select more than 15 files."
"Do not select more than 5 files."
)
try:
@@ -59,18 +60,23 @@ class CodeScanOrchestrator:
except Exception as e:
logger.error(f"Error triaging files: {e}")
return all_files[:10]
return all_files[:5]
async def analyze_files(self, triaged_files: List[str]) -> List[VulnerabilityIssue]:
vulnerabilities = []
if not settings.gemini_api_key:
return []
for file_path in triaged_files:
vulnerabilities = []
semaphore = asyncio.Semaphore(5) # Max 5 concurrent requests to avoid rate limits
async def process_file(file_path: str):
# Skip massive dependency lock files as they are too slow and unhelpful for SAST
if file_path.endswith('package-lock.json') or file_path.endswith('yarn.lock'):
return []
content = await self.github.get_file_content(self.repo_url, file_path, self.branch)
if not content:
continue
return []
if len(content) > 30000:
content = content[:30000]
@@ -88,30 +94,37 @@ class CodeScanOrchestrator:
"'line_number' (integer or null if general)."
)
try:
response = await ai_client.aio.models.generate_content(
model=self.model_name,
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
temperature=0.2,
file_vulns = []
async with semaphore:
try:
response = await ai_client.aio.models.generate_content(
model=self.model_name,
contents=prompt,
config=types.GenerateContentConfig(
response_mime_type="application/json",
temperature=0.2,
)
)
)
if response.text:
data = json.loads(response.text)
vulns = data.get("vulnerabilities", [])
for v in vulns:
vulnerabilities.append(VulnerabilityIssue(
file_path=file_path,
severity=v.get("severity", "Medium"),
issue=v.get("issue", "Unknown Issue"),
explanation=v.get("explanation", ""),
suggested_fix=v.get("suggested_fix"),
line_number=v.get("line_number")
))
except Exception as e:
logger.error(f"Error analyzing file {file_path}: {e}")
if response.text:
data = json.loads(response.text)
vulns = data.get("vulnerabilities", [])
for v in vulns:
file_vulns.append(VulnerabilityIssue(
file_path=file_path,
severity=v.get("severity", "Medium"),
issue=v.get("issue", "Unknown Issue"),
explanation=v.get("explanation", ""),
suggested_fix=v.get("suggested_fix"),
line_number=v.get("line_number")
))
except Exception as e:
logger.error(f"Error analyzing file {file_path}: {e}")
return file_vulns
results = await asyncio.gather(*(process_file(f) for f in triaged_files))
for res in results:
vulnerabilities.extend(res)
return vulnerabilities
async def generate_summary(self, vulnerabilities: List[VulnerabilityIssue]) -> str: