mirror of
https://github.com/Rarebuffalo/securelens-backend.git
synced 2026-06-19 07:00:30 +00:00
180 lines
6.3 KiB
Python
180 lines
6.3 KiB
Python
"""
|
||
Nuclei Scanner
|
||
==============
|
||
|
||
Wraps the Nuclei CLI tool (https://github.com/projectdiscovery/nuclei) as an
|
||
async subprocess call so it can run as a background task after the main scan
|
||
response has been returned to the client.
|
||
|
||
Why a background task?
|
||
Nuclei is an active scanner — it actually sends probe requests to the target.
|
||
A typical run takes 30–120 seconds, which is far too slow to block an API
|
||
response. Running it in the background lets the client get the passive scan
|
||
results immediately, and then poll GET /scans/{id}/nuclei for the Nuclei
|
||
findings when they are ready.
|
||
|
||
Nuclei is completely optional. If the binary is not found in PATH (or the
|
||
configured path), the scan is silently skipped and the NucleiScanResult row
|
||
is saved with status "skipped". No error is raised.
|
||
|
||
Output format:
|
||
Nuclei outputs one JSON object per line (-json flag). Each line looks like:
|
||
{
|
||
"template-id": "...",
|
||
"info": {"name": "...", "severity": "..."},
|
||
"matched-at": "https://...",
|
||
"description": "..."
|
||
}
|
||
|
||
Installation:
|
||
go install -v github.com/projectdiscovery/nuclei/v3/cmd/nuclei@latest
|
||
Or download the binary from https://github.com/projectdiscovery/nuclei/releases
|
||
"""
|
||
|
||
import asyncio
|
||
import json
|
||
import logging
|
||
import shutil
|
||
from datetime import datetime, timezone
|
||
|
||
from app.config import settings
|
||
from app.database import AsyncSessionLocal
|
||
from app.models.nuclei_result import NucleiScanResult
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
# Conservative timeout — Nuclei can be slow, but we cap it at 90 seconds
|
||
# to prevent background tasks from running indefinitely.
|
||
NUCLEI_TIMEOUT_SECONDS = 90
|
||
|
||
|
||
def _find_nuclei_binary() -> str | None:
|
||
"""
|
||
Resolve the Nuclei binary path.
|
||
|
||
Checks the configured path first (NUCLEI_BINARY_PATH env var), then
|
||
falls back to searching PATH. Returns None if not found anywhere.
|
||
"""
|
||
if settings.nuclei_binary_path:
|
||
return settings.nuclei_binary_path
|
||
|
||
return shutil.which("nuclei")
|
||
|
||
|
||
def _parse_nuclei_output(stdout: bytes) -> list[dict]:
|
||
"""
|
||
Parse Nuclei's JSONL output into a list of finding dicts.
|
||
|
||
Each line of stdout is expected to be a valid JSON object. Lines that
|
||
fail to parse are skipped with a warning.
|
||
"""
|
||
findings = []
|
||
for line in stdout.decode("utf-8", errors="replace").splitlines():
|
||
line = line.strip()
|
||
if not line:
|
||
continue
|
||
try:
|
||
raw = json.loads(line)
|
||
# Normalise into a flat schema we control
|
||
findings.append({
|
||
"template_id": raw.get("template-id", "unknown"),
|
||
"name": raw.get("info", {}).get("name", "Unknown"),
|
||
"severity": raw.get("info", {}).get("severity", "info"),
|
||
"matched_at": raw.get("matched-at", ""),
|
||
"description": raw.get("info", {}).get("description", None),
|
||
})
|
||
except json.JSONDecodeError:
|
||
logger.warning(f"Nuclei: could not parse output line: {line[:200]}")
|
||
|
||
return findings
|
||
|
||
|
||
async def run_nuclei_scan(scan_result_id: str, url: str) -> None:
|
||
"""
|
||
Entry point for the background Nuclei scan task.
|
||
|
||
This runs after the main scan response has been sent. It:
|
||
1. Checks if the Nuclei binary is available
|
||
2. Runs Nuclei against the URL with a timeout
|
||
3. Parses the JSONL output
|
||
4. Saves a NucleiScanResult row to the database
|
||
|
||
The NucleiScanResult.status field reflects the outcome:
|
||
- "completed" : Nuclei ran and (possibly) found issues
|
||
- "skipped" : Nuclei binary not found
|
||
- "timeout" : Nuclei ran but exceeded the timeout
|
||
- "error" : Nuclei subprocess failed
|
||
"""
|
||
nuclei_path = _find_nuclei_binary()
|
||
|
||
if not nuclei_path:
|
||
logger.info(
|
||
"Nuclei binary not found — active scan skipped. "
|
||
"Install nuclei and set NUCLEI_BINARY_PATH if you want active scanning."
|
||
)
|
||
await _save_nuclei_result(scan_result_id, url, [], "skipped")
|
||
return
|
||
|
||
logger.info(f"Nuclei active scan starting: {url}")
|
||
|
||
try:
|
||
proc = await asyncio.create_subprocess_exec(
|
||
nuclei_path,
|
||
"-u", url,
|
||
"-json", # output as JSON lines
|
||
"-silent", # suppress banner/progress
|
||
"-timeout", "10", # per-request timeout in seconds (inside Nuclei)
|
||
"-rate-limit", "10", # be polite — 10 req/s max
|
||
stdout=asyncio.subprocess.PIPE,
|
||
stderr=asyncio.subprocess.PIPE,
|
||
)
|
||
|
||
try:
|
||
stdout, stderr = await asyncio.wait_for(
|
||
proc.communicate(),
|
||
timeout=NUCLEI_TIMEOUT_SECONDS,
|
||
)
|
||
except asyncio.TimeoutError:
|
||
proc.kill()
|
||
await proc.communicate()
|
||
logger.warning(f"Nuclei timed out after {NUCLEI_TIMEOUT_SECONDS}s for {url}")
|
||
await _save_nuclei_result(scan_result_id, url, [], "timeout")
|
||
return
|
||
|
||
if proc.returncode not in (0, 1):
|
||
# Nuclei exits 1 when it finds nothing — that's not an error
|
||
err = stderr.decode("utf-8", errors="replace")[:500]
|
||
logger.error(f"Nuclei exited with code {proc.returncode}: {err}")
|
||
await _save_nuclei_result(scan_result_id, url, [], "error")
|
||
return
|
||
|
||
findings = _parse_nuclei_output(stdout)
|
||
logger.info(f"Nuclei scan complete: {url} — {len(findings)} finding(s)")
|
||
await _save_nuclei_result(scan_result_id, url, findings, "completed")
|
||
|
||
except FileNotFoundError:
|
||
logger.error(f"Nuclei binary not executable at path: {nuclei_path}")
|
||
await _save_nuclei_result(scan_result_id, url, [], "skipped")
|
||
except Exception as e:
|
||
logger.error(f"Nuclei scan failed for {url}: {e}", exc_info=True)
|
||
await _save_nuclei_result(scan_result_id, url, [], "error")
|
||
|
||
|
||
async def _save_nuclei_result(
|
||
scan_result_id: str,
|
||
url: str,
|
||
findings: list[dict],
|
||
status: str,
|
||
) -> None:
|
||
"""Persist the Nuclei scan result. Uses its own session (background context)."""
|
||
async with AsyncSessionLocal() as db:
|
||
row = NucleiScanResult(
|
||
scan_result_id=scan_result_id,
|
||
url=url,
|
||
findings=findings,
|
||
status=status,
|
||
completed_at=datetime.now(timezone.utc),
|
||
)
|
||
db.add(row)
|
||
await db.commit()
|