mirror of
https://github.com/Rarebuffalo/securelens-backend.git
synced 2026-06-19 07:00:30 +00:00
scans the vulnerabilites
This commit is contained in:
179
app/services/nuclei_scanner.py
Normal file
179
app/services/nuclei_scanner.py
Normal file
@@ -0,0 +1,179 @@
|
||||
"""
|
||||
Nuclei Scanner
|
||||
==============
|
||||
|
||||
Wraps the Nuclei CLI tool (https://github.com/projectdiscovery/nuclei) as an
|
||||
async subprocess call so it can run as a background task after the main scan
|
||||
response has been returned to the client.
|
||||
|
||||
Why a background task?
|
||||
Nuclei is an active scanner — it actually sends probe requests to the target.
|
||||
A typical run takes 30–120 seconds, which is far too slow to block an API
|
||||
response. Running it in the background lets the client get the passive scan
|
||||
results immediately, and then poll GET /scans/{id}/nuclei for the Nuclei
|
||||
findings when they are ready.
|
||||
|
||||
Nuclei is completely optional. If the binary is not found in PATH (or the
|
||||
configured path), the scan is silently skipped and the NucleiScanResult row
|
||||
is saved with status "skipped". No error is raised.
|
||||
|
||||
Output format:
|
||||
Nuclei outputs one JSON object per line (-json flag). Each line looks like:
|
||||
{
|
||||
"template-id": "...",
|
||||
"info": {"name": "...", "severity": "..."},
|
||||
"matched-at": "https://...",
|
||||
"description": "..."
|
||||
}
|
||||
|
||||
Installation:
|
||||
go install -v github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest
|
||||
Or download the binary from https://github.com/projectdiscovery/nuclei/releases
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import shutil
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from app.config import settings
|
||||
from app.database import AsyncSessionLocal
|
||||
from app.models.nuclei_result import NucleiScanResult
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Conservative timeout — Nuclei can be slow, but we cap it at 90 seconds
|
||||
# to prevent background tasks from running indefinitely.
|
||||
NUCLEI_TIMEOUT_SECONDS = 90
|
||||
|
||||
|
||||
def _find_nuclei_binary() -> str | None:
|
||||
"""
|
||||
Resolve the Nuclei binary path.
|
||||
|
||||
Checks the configured path first (NUCLEI_BINARY_PATH env var), then
|
||||
falls back to searching PATH. Returns None if not found anywhere.
|
||||
"""
|
||||
if settings.nuclei_binary_path:
|
||||
return settings.nuclei_binary_path
|
||||
|
||||
return shutil.which("nuclei")
|
||||
|
||||
|
||||
def _parse_nuclei_output(stdout: bytes) -> list[dict]:
|
||||
"""
|
||||
Parse Nuclei's JSONL output into a list of finding dicts.
|
||||
|
||||
Each line of stdout is expected to be a valid JSON object. Lines that
|
||||
fail to parse are skipped with a warning.
|
||||
"""
|
||||
findings = []
|
||||
for line in stdout.decode("utf-8", errors="replace").splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
try:
|
||||
raw = json.loads(line)
|
||||
# Normalise into a flat schema we control
|
||||
findings.append({
|
||||
"template_id": raw.get("template-id", "unknown"),
|
||||
"name": raw.get("info", {}).get("name", "Unknown"),
|
||||
"severity": raw.get("info", {}).get("severity", "info"),
|
||||
"matched_at": raw.get("matched-at", ""),
|
||||
"description": raw.get("info", {}).get("description", None),
|
||||
})
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Nuclei: could not parse output line: {line[:200]}")
|
||||
|
||||
return findings
|
||||
|
||||
|
||||
async def run_nuclei_scan(scan_result_id: str, url: str) -> None:
|
||||
"""
|
||||
Entry point for the background Nuclei scan task.
|
||||
|
||||
This runs after the main scan response has been sent. It:
|
||||
1. Checks if the Nuclei binary is available
|
||||
2. Runs Nuclei against the URL with a timeout
|
||||
3. Parses the JSONL output
|
||||
4. Saves a NucleiScanResult row to the database
|
||||
|
||||
The NucleiScanResult.status field reflects the outcome:
|
||||
- "completed" : Nuclei ran and (possibly) found issues
|
||||
- "skipped" : Nuclei binary not found
|
||||
- "timeout" : Nuclei ran but exceeded the timeout
|
||||
- "error" : Nuclei subprocess failed
|
||||
"""
|
||||
nuclei_path = _find_nuclei_binary()
|
||||
|
||||
if not nuclei_path:
|
||||
logger.info(
|
||||
"Nuclei binary not found — active scan skipped. "
|
||||
"Install nuclei and set NUCLEI_BINARY_PATH if you want active scanning."
|
||||
)
|
||||
await _save_nuclei_result(scan_result_id, url, [], "skipped")
|
||||
return
|
||||
|
||||
logger.info(f"Nuclei active scan starting: {url}")
|
||||
|
||||
try:
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
nuclei_path,
|
||||
"-u", url,
|
||||
"-json", # output as JSON lines
|
||||
"-silent", # suppress banner/progress
|
||||
"-timeout", "10", # per-request timeout in seconds (inside Nuclei)
|
||||
"-rate-limit", "10", # be polite — 10 req/s max
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
)
|
||||
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(),
|
||||
timeout=NUCLEI_TIMEOUT_SECONDS,
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.communicate()
|
||||
logger.warning(f"Nuclei timed out after {NUCLEI_TIMEOUT_SECONDS}s for {url}")
|
||||
await _save_nuclei_result(scan_result_id, url, [], "timeout")
|
||||
return
|
||||
|
||||
if proc.returncode not in (0, 1):
|
||||
# Nuclei exits 1 when it finds nothing — that's not an error
|
||||
err = stderr.decode("utf-8", errors="replace")[:500]
|
||||
logger.error(f"Nuclei exited with code {proc.returncode}: {err}")
|
||||
await _save_nuclei_result(scan_result_id, url, [], "error")
|
||||
return
|
||||
|
||||
findings = _parse_nuclei_output(stdout)
|
||||
logger.info(f"Nuclei scan complete: {url} — {len(findings)} finding(s)")
|
||||
await _save_nuclei_result(scan_result_id, url, findings, "completed")
|
||||
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Nuclei binary not executable at path: {nuclei_path}")
|
||||
await _save_nuclei_result(scan_result_id, url, [], "skipped")
|
||||
except Exception as e:
|
||||
logger.error(f"Nuclei scan failed for {url}: {e}", exc_info=True)
|
||||
await _save_nuclei_result(scan_result_id, url, [], "error")
|
||||
|
||||
|
||||
async def _save_nuclei_result(
|
||||
scan_result_id: str,
|
||||
url: str,
|
||||
findings: list[dict],
|
||||
status: str,
|
||||
) -> None:
|
||||
"""Persist the Nuclei scan result. Uses its own session (background context)."""
|
||||
async with AsyncSessionLocal() as db:
|
||||
row = NucleiScanResult(
|
||||
scan_result_id=scan_result_id,
|
||||
url=url,
|
||||
findings=findings,
|
||||
status=status,
|
||||
completed_at=datetime.now(timezone.utc),
|
||||
)
|
||||
db.add(row)
|
||||
await db.commit()
|
||||
Reference in New Issue
Block a user