updated model for suitable testing

2026-06-19 07:00:30 +00:00 · 2026-04-25 21:29:19 +05:30
parent 139c8d982b
commit ae4ed3062a
5 changed files with 72 additions and 34 deletions
--- a/app/services/code_scanner/orchestrator.py
+++ b/app/services/code_scanner/orchestrator.py
@@ -3,6 +3,7 @@ import logging
 from typing import List, Dict, Any
 from google import genai
 from google.genai import types
+import asyncio

 from app.config import settings
 from app.services.code_scanner.github_client import GitHubClient
@@ -21,8 +22,8 @@ class CodeScanOrchestrator:
        self.repo_url = repo_url
        self.branch = branch
        self.github = GitHubClient(token=github_token)
-        # We use gemini-2.5-flash for fast and cost-effective analysis
-        self.model_name = 'gemini-2.5-flash'
+        # We use gemini-2.0-flash for high rate limits and stability
+        self.model_name = 'gemini-2.0-flash'

    async def triage_files(self, all_files: List[str]) -> List[str]:
        """
@@ -30,7 +31,7 @@ class CodeScanOrchestrator:
        """
        if not settings.gemini_api_key:
            logger.warning("GEMINI_API_KEY is not set. Triaging all files up to a limit.")
-            return all_files[:10]
+            return all_files[:5]

        files_str = "\n".join(all_files)
        if len(files_str) > 15000:
@@ -41,7 +42,7 @@ class CodeScanOrchestrator:
            f"{files_str}\n\n"
            "Select the most critical files to review for security vulnerabilities (e.g., SAST, hardcoded secrets, SQLi, Auth bypass). "
            "Return a JSON object with a single key 'critical_files' containing a list of the exact file paths. "
-            "Do not select more than 15 files."
+            "Do not select more than 5 files."
        )

        try:
@@ -59,18 +60,23 @@ class CodeScanOrchestrator:
        except Exception as e:
            logger.error(f"Error triaging files: {e}")
            
-        return all_files[:10]
+        return all_files[:5]

    async def analyze_files(self, triaged_files: List[str]) -> List[VulnerabilityIssue]:
-        vulnerabilities = []
-        
        if not settings.gemini_api_key:
            return []

-        for file_path in triaged_files:
+        vulnerabilities = []
+        semaphore = asyncio.Semaphore(5)  # Max 5 concurrent requests to avoid rate limits
+        
+        async def process_file(file_path: str):
+            # Skip massive dependency lock files as they are too slow and unhelpful for SAST
+            if file_path.endswith('package-lock.json') or file_path.endswith('yarn.lock'):
+                return []
+                
            content = await self.github.get_file_content(self.repo_url, file_path, self.branch)
            if not content:
-                continue
+                return []
                
            if len(content) > 30000:
                content = content[:30000]
@@ -88,30 +94,37 @@ class CodeScanOrchestrator:
                "'line_number' (integer or null if general)."
            )

-            try:
-                response = await ai_client.aio.models.generate_content(
-                    model=self.model_name,
-                    contents=prompt,
-                    config=types.GenerateContentConfig(
-                        response_mime_type="application/json",
-                        temperature=0.2,
+            file_vulns = []
+            async with semaphore:
+                try:
+                    response = await ai_client.aio.models.generate_content(
+                        model=self.model_name,
+                        contents=prompt,
+                        config=types.GenerateContentConfig(
+                            response_mime_type="application/json",
+                            temperature=0.2,
+                        )
                    )
-                )
-                if response.text:
-                    data = json.loads(response.text)
-                    vulns = data.get("vulnerabilities", [])
-                    for v in vulns:
-                        vulnerabilities.append(VulnerabilityIssue(
-                            file_path=file_path,
-                            severity=v.get("severity", "Medium"),
-                            issue=v.get("issue", "Unknown Issue"),
-                            explanation=v.get("explanation", ""),
-                            suggested_fix=v.get("suggested_fix"),
-                            line_number=v.get("line_number")
-                        ))
-            except Exception as e:
-                logger.error(f"Error analyzing file {file_path}: {e}")
+                    if response.text:
+                        data = json.loads(response.text)
+                        vulns = data.get("vulnerabilities", [])
+                        for v in vulns:
+                            file_vulns.append(VulnerabilityIssue(
+                                file_path=file_path,
+                                severity=v.get("severity", "Medium"),
+                                issue=v.get("issue", "Unknown Issue"),
+                                explanation=v.get("explanation", ""),
+                                suggested_fix=v.get("suggested_fix"),
+                                line_number=v.get("line_number")
+                            ))
+                except Exception as e:
+                    logger.error(f"Error analyzing file {file_path}: {e}")
+            return file_vulns

+        results = await asyncio.gather(*(process_file(f) for f in triaged_files))
+        for res in results:
+            vulnerabilities.extend(res)
+            
        return vulnerabilities

    async def generate_summary(self, vulnerabilities: List[VulnerabilityIssue]) -> str: