Fix PDF export by sanitizing unicode characters before writing to FPDF

This commit is contained in:
rarebuffalo
2026-06-15 03:08:08 +05:30
parent 852c2f9776
commit 766fe22e1d
2 changed files with 52 additions and 19 deletions

View File

@@ -2,6 +2,39 @@ from fpdf import FPDF
import datetime import datetime
from typing import Optional from typing import Optional
def sanitize_text(text: Optional[str]) -> str:
if not text:
return ""
replacements = {
"\u2018": "'",
"\u2019": "'",
"\u201c": '"',
"\u201d": '"',
"\u2013": "-",
"\u2014": "-",
"\u2022": "*",
"\u2026": "...",
"\u2713": "OK",
"\u2714": "OK",
"\u2715": "X",
"\u2717": "X",
"\u2718": "X",
"\u26a0": "!",
"\u25b6": ">",
"\u25c0": "<",
"\u25b2": "^",
"\u25bc": "v",
"\u25ae": "|",
"\u2588": "#",
"\u2591": ".",
"\u2592": ":",
"\u2593": "#",
"`": "'",
}
for orig, rep in replacements.items():
text = text.replace(orig, rep)
return text.encode("latin-1", errors="replace").decode("latin-1")
class SecureLensPDF(FPDF): class SecureLensPDF(FPDF):
def footer(self): def footer(self):
self.set_y(-15) self.set_y(-15)
@@ -26,9 +59,9 @@ def export_code_pdf(result, output_path: str) -> str:
pdf.set_font("helvetica", "", 10) pdf.set_font("helvetica", "", 10)
pdf.set_text_color(100, 100, 100) pdf.set_text_color(100, 100, 100)
now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
pdf.cell(0, 8, f"Target Path: {result.target}", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, sanitize_text(f"Target Path: {result.target}"), new_x="LMARGIN", new_y="NEXT")
pdf.cell(0, 8, f"Scan Time: {now_str}", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, f"Scan Time: {now_str}", new_x="LMARGIN", new_y="NEXT")
pdf.cell(0, 8, f"Security Score: {result.score}/100 (Grade: {result.grade})", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, sanitize_text(f"Security Score: {result.score}/100 (Grade: {result.grade})"), new_x="LMARGIN", new_y="NEXT")
pdf.ln(5) pdf.ln(5)
# Executive Summary Section # Executive Summary Section
@@ -40,7 +73,7 @@ def export_code_pdf(result, output_path: str) -> str:
pdf.set_text_color(0, 0, 0) pdf.set_text_color(0, 0, 0)
pdf.ln(2) pdf.ln(2)
summary_text = result.ai_summary or f"A static patterns analysis was performed on the codebase. Out of the files discovered, {len(result.vulnerabilities)} potential security vulnerabilities were reported." summary_text = result.ai_summary or f"A static patterns analysis was performed on the codebase. Out of the files discovered, {len(result.vulnerabilities)} potential security vulnerabilities were reported."
pdf.multi_cell(0, 5, summary_text) pdf.multi_cell(0, 5, sanitize_text(summary_text))
pdf.ln(8) pdf.ln(8)
# Files Scanned Section # Files Scanned Section
@@ -53,7 +86,7 @@ def export_code_pdf(result, output_path: str) -> str:
files_list = ", ".join(result.files_triaged[:15]) files_list = ", ".join(result.files_triaged[:15])
if len(result.files_triaged) > 15: if len(result.files_triaged) > 15:
files_list += f", and {len(result.files_triaged) - 15} more" files_list += f", and {len(result.files_triaged) - 15} more"
pdf.multi_cell(0, 5, files_list or "No files selected.") pdf.multi_cell(0, 5, sanitize_text(files_list or "No files selected."))
pdf.ln(8) pdf.ln(8)
# Issues Findings Section # Issues Findings Section
@@ -78,26 +111,26 @@ def export_code_pdf(result, output_path: str) -> str:
else: pdf.set_text_color(0, 100, 0) else: pdf.set_text_color(0, 100, 0)
line_str = f" [Line {v.line_number}]" if v.line_number else "" line_str = f" [Line {v.line_number}]" if v.line_number else ""
pdf.cell(0, 8, f"{idx}. {severity}: {v.issue}{line_str}", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, sanitize_text(f"{idx}. {severity}: {v.issue}{line_str}"), new_x="LMARGIN", new_y="NEXT")
# Details # Details
pdf.set_text_color(0, 0, 0) pdf.set_text_color(0, 0, 0)
pdf.set_font("helvetica", "B", 9) pdf.set_font("helvetica", "B", 9)
pdf.cell(20, 6, "File:", border=0) pdf.cell(20, 6, "File:", border=0)
pdf.set_font("helvetica", "", 9) pdf.set_font("helvetica", "", 9)
pdf.cell(0, 6, v.file_path, new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 6, sanitize_text(v.file_path), new_x="LMARGIN", new_y="NEXT")
pdf.set_font("helvetica", "B", 9) pdf.set_font("helvetica", "B", 9)
pdf.cell(0, 6, "Explanation:", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 6, "Explanation:", new_x="LMARGIN", new_y="NEXT")
pdf.set_font("helvetica", "", 9) pdf.set_font("helvetica", "", 9)
pdf.multi_cell(0, 4.5, v.explanation) pdf.multi_cell(0, 4.5, sanitize_text(v.explanation))
if v.suggested_fix: if v.suggested_fix:
pdf.set_font("helvetica", "B", 9) pdf.set_font("helvetica", "B", 9)
pdf.cell(0, 6, "Suggested Fix:", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 6, "Suggested Fix:", new_x="LMARGIN", new_y="NEXT")
pdf.set_font("courier", "", 8.5) pdf.set_font("courier", "", 8.5)
pdf.set_fill_color(245, 245, 245) pdf.set_fill_color(245, 245, 245)
pdf.multi_cell(0, 4.5, v.suggested_fix, fill=True) pdf.multi_cell(0, 4.5, sanitize_text(v.suggested_fix), fill=True)
pdf.ln(4) pdf.ln(4)
pdf.line(pdf.get_x(), pdf.get_y(), 200, pdf.get_y()) pdf.line(pdf.get_x(), pdf.get_y(), 200, pdf.get_y())
@@ -123,9 +156,9 @@ def export_web_pdf(result, output_path: str) -> str:
pdf.set_font("helvetica", "", 10) pdf.set_font("helvetica", "", 10)
pdf.set_text_color(100, 100, 100) pdf.set_text_color(100, 100, 100)
now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
pdf.cell(0, 8, f"Target URL: {result.url}", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, sanitize_text(f"Target URL: {result.url}"), new_x="LMARGIN", new_y="NEXT")
pdf.cell(0, 8, f"Scan Time: {now_str}", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, f"Scan Time: {now_str}", new_x="LMARGIN", new_y="NEXT")
pdf.cell(0, 8, f"Security Score: {result.score}/100 (Grade: {result.grade})", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, sanitize_text(f"Security Score: {result.score}/100 (Grade: {result.grade})"), new_x="LMARGIN", new_y="NEXT")
if result.ssl_expiry_days is not None: if result.ssl_expiry_days is not None:
pdf.cell(0, 8, f"SSL Expiry: {result.ssl_expiry_days} days left", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, f"SSL Expiry: {result.ssl_expiry_days} days left", new_x="LMARGIN", new_y="NEXT")
pdf.ln(5) pdf.ln(5)
@@ -139,7 +172,7 @@ def export_web_pdf(result, output_path: str) -> str:
pdf.set_text_color(0, 0, 0) pdf.set_text_color(0, 0, 0)
pdf.ln(2) pdf.ln(2)
summary_text = result.ai_summary or f"An automated live security audit was performed on {result.url}. Out of the layers checked, {len(result.issues)} potential issues were flagged." summary_text = result.ai_summary or f"An automated live security audit was performed on {result.url}. Out of the layers checked, {len(result.issues)} potential issues were flagged."
pdf.multi_cell(0, 5, summary_text) pdf.multi_cell(0, 5, sanitize_text(summary_text))
pdf.ln(8) pdf.ln(8)
# Issues Section # Issues Section
@@ -162,20 +195,20 @@ def export_web_pdf(result, output_path: str) -> str:
elif severity == "Warning": pdf.set_text_color(218, 165, 32) elif severity == "Warning": pdf.set_text_color(218, 165, 32)
else: pdf.set_text_color(0, 100, 0) else: pdf.set_text_color(0, 100, 0)
pdf.cell(0, 8, f"{idx}. {severity}: {i.issue}", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, sanitize_text(f"{idx}. {severity}: {i.issue}"), new_x="LMARGIN", new_y="NEXT")
# Details # Details
pdf.set_text_color(0, 0, 0) pdf.set_text_color(0, 0, 0)
pdf.set_font("helvetica", "B", 9) pdf.set_font("helvetica", "B", 9)
pdf.cell(20, 6, "Layer:", border=0) pdf.cell(20, 6, "Layer:", border=0)
pdf.set_font("helvetica", "", 9) pdf.set_font("helvetica", "", 9)
pdf.cell(0, 6, i.layer, new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 6, sanitize_text(i.layer), new_x="LMARGIN", new_y="NEXT")
pdf.set_font("helvetica", "B", 9) pdf.set_font("helvetica", "B", 9)
pdf.cell(0, 6, "Remediation / Fix:", new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 6, "Remediation / Fix:", new_x="LMARGIN", new_y="NEXT")
pdf.set_font("courier", "", 8.5) pdf.set_font("courier", "", 8.5)
pdf.set_fill_color(245, 245, 245) pdf.set_fill_color(245, 245, 245)
pdf.multi_cell(0, 4.5, i.fix, fill=True) pdf.multi_cell(0, 4.5, sanitize_text(i.fix), fill=True)
pdf.ln(4) pdf.ln(4)
pdf.line(pdf.get_x(), pdf.get_y(), 200, pdf.get_y()) pdf.line(pdf.get_x(), pdf.get_y(), 200, pdf.get_y())

View File

@@ -14,15 +14,15 @@ def test_export_code_pdf_compiles(tmp_path):
VulnerabilityFinding( VulnerabilityFinding(
file_path="app.py", file_path="app.py",
severity="Critical", severity="Critical",
issue="Hardcoded Secret Key", issue="Hardcoded Secret Key with unicode smart quotes",
explanation="Exposing secret key inside app.py.", explanation="Exposing secret key inside app.py • vulnerable to attacks.",
suggested_fix="Load key from environment", suggested_fix="Load key from environment: jwt_secret = Field(default=\"\") \u25b6 check it.",
line_number=5 line_number=5
), ),
VulnerabilityFinding( VulnerabilityFinding(
file_path="db.py", file_path="db.py",
severity="High", severity="High",
issue="Raw SQL Statement", issue="Raw SQL Statement \u2717 check fail",
explanation="SQL injection inside db.py.", explanation="SQL injection inside db.py.",
suggested_fix="Use parameterized queries", suggested_fix="Use parameterized queries",
line_number=20 line_number=20
@@ -34,7 +34,7 @@ def test_export_code_pdf_compiles(tmp_path):
total_files_found=10, total_files_found=10,
files_triaged=["app.py", "db.py"], files_triaged=["app.py", "db.py"],
vulnerabilities=findings, vulnerabilities=findings,
ai_summary="This is a dummy AI report summary describing security posture." ai_summary="This is a dummy AI report summary describing security posture with check \u2713 and block \u2588."
) )
result.compute_score() result.compute_score()