From 766fe22e1d40b98aaff2214e40eeb73a8b5f6bbe Mon Sep 17 00:00:00 2001 From: rarebuffalo Date: Mon, 15 Jun 2026 03:08:08 +0530 Subject: [PATCH] Fix PDF export by sanitizing unicode characters before writing to FPDF --- cli/securelens/output/pdf.py | 61 +++++++++++++++++++++++++++--------- tests/test_cli_pdf.py | 10 +++--- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/cli/securelens/output/pdf.py b/cli/securelens/output/pdf.py index 3677e40..df8d6eb 100644 --- a/cli/securelens/output/pdf.py +++ b/cli/securelens/output/pdf.py @@ -2,6 +2,39 @@ from fpdf import FPDF import datetime from typing import Optional +def sanitize_text(text: Optional[str]) -> str: + if not text: + return "" + replacements = { + "\u2018": "'", + "\u2019": "'", + "\u201c": '"', + "\u201d": '"', + "\u2013": "-", + "\u2014": "-", + "\u2022": "*", + "\u2026": "...", + "\u2713": "OK", + "\u2714": "OK", + "\u2715": "X", + "\u2717": "X", + "\u2718": "X", + "\u26a0": "!", + "\u25b6": ">", + "\u25c0": "<", + "\u25b2": "^", + "\u25bc": "v", + "\u25ae": "|", + "\u2588": "#", + "\u2591": ".", + "\u2592": ":", + "\u2593": "#", + "`": "'", + } + for orig, rep in replacements.items(): + text = text.replace(orig, rep) + return text.encode("latin-1", errors="replace").decode("latin-1") + class SecureLensPDF(FPDF): def footer(self): self.set_y(-15) @@ -26,9 +59,9 @@ def export_code_pdf(result, output_path: str) -> str: pdf.set_font("helvetica", "", 10) pdf.set_text_color(100, 100, 100) now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - pdf.cell(0, 8, f"Target Path: {result.target}", new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 8, sanitize_text(f"Target Path: {result.target}"), new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, f"Scan Time: {now_str}", new_x="LMARGIN", new_y="NEXT") - pdf.cell(0, 8, f"Security Score: {result.score}/100 (Grade: {result.grade})", new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 8, sanitize_text(f"Security Score: {result.score}/100 (Grade: {result.grade})"), new_x="LMARGIN", new_y="NEXT") pdf.ln(5) # Executive Summary Section @@ -40,7 +73,7 @@ def export_code_pdf(result, output_path: str) -> str: pdf.set_text_color(0, 0, 0) pdf.ln(2) summary_text = result.ai_summary or f"A static patterns analysis was performed on the codebase. Out of the files discovered, {len(result.vulnerabilities)} potential security vulnerabilities were reported." - pdf.multi_cell(0, 5, summary_text) + pdf.multi_cell(0, 5, sanitize_text(summary_text)) pdf.ln(8) # Files Scanned Section @@ -53,7 +86,7 @@ def export_code_pdf(result, output_path: str) -> str: files_list = ", ".join(result.files_triaged[:15]) if len(result.files_triaged) > 15: files_list += f", and {len(result.files_triaged) - 15} more" - pdf.multi_cell(0, 5, files_list or "No files selected.") + pdf.multi_cell(0, 5, sanitize_text(files_list or "No files selected.")) pdf.ln(8) # Issues Findings Section @@ -78,26 +111,26 @@ def export_code_pdf(result, output_path: str) -> str: else: pdf.set_text_color(0, 100, 0) line_str = f" [Line {v.line_number}]" if v.line_number else "" - pdf.cell(0, 8, f"{idx}. {severity}: {v.issue}{line_str}", new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 8, sanitize_text(f"{idx}. {severity}: {v.issue}{line_str}"), new_x="LMARGIN", new_y="NEXT") # Details pdf.set_text_color(0, 0, 0) pdf.set_font("helvetica", "B", 9) pdf.cell(20, 6, "File:", border=0) pdf.set_font("helvetica", "", 9) - pdf.cell(0, 6, v.file_path, new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 6, sanitize_text(v.file_path), new_x="LMARGIN", new_y="NEXT") pdf.set_font("helvetica", "B", 9) pdf.cell(0, 6, "Explanation:", new_x="LMARGIN", new_y="NEXT") pdf.set_font("helvetica", "", 9) - pdf.multi_cell(0, 4.5, v.explanation) + pdf.multi_cell(0, 4.5, sanitize_text(v.explanation)) if v.suggested_fix: pdf.set_font("helvetica", "B", 9) pdf.cell(0, 6, "Suggested Fix:", new_x="LMARGIN", new_y="NEXT") pdf.set_font("courier", "", 8.5) pdf.set_fill_color(245, 245, 245) - pdf.multi_cell(0, 4.5, v.suggested_fix, fill=True) + pdf.multi_cell(0, 4.5, sanitize_text(v.suggested_fix), fill=True) pdf.ln(4) pdf.line(pdf.get_x(), pdf.get_y(), 200, pdf.get_y()) @@ -123,9 +156,9 @@ def export_web_pdf(result, output_path: str) -> str: pdf.set_font("helvetica", "", 10) pdf.set_text_color(100, 100, 100) now_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") - pdf.cell(0, 8, f"Target URL: {result.url}", new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 8, sanitize_text(f"Target URL: {result.url}"), new_x="LMARGIN", new_y="NEXT") pdf.cell(0, 8, f"Scan Time: {now_str}", new_x="LMARGIN", new_y="NEXT") - pdf.cell(0, 8, f"Security Score: {result.score}/100 (Grade: {result.grade})", new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 8, sanitize_text(f"Security Score: {result.score}/100 (Grade: {result.grade})"), new_x="LMARGIN", new_y="NEXT") if result.ssl_expiry_days is not None: pdf.cell(0, 8, f"SSL Expiry: {result.ssl_expiry_days} days left", new_x="LMARGIN", new_y="NEXT") pdf.ln(5) @@ -139,7 +172,7 @@ def export_web_pdf(result, output_path: str) -> str: pdf.set_text_color(0, 0, 0) pdf.ln(2) summary_text = result.ai_summary or f"An automated live security audit was performed on {result.url}. Out of the layers checked, {len(result.issues)} potential issues were flagged." - pdf.multi_cell(0, 5, summary_text) + pdf.multi_cell(0, 5, sanitize_text(summary_text)) pdf.ln(8) # Issues Section @@ -162,20 +195,20 @@ def export_web_pdf(result, output_path: str) -> str: elif severity == "Warning": pdf.set_text_color(218, 165, 32) else: pdf.set_text_color(0, 100, 0) - pdf.cell(0, 8, f"{idx}. {severity}: {i.issue}", new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 8, sanitize_text(f"{idx}. {severity}: {i.issue}"), new_x="LMARGIN", new_y="NEXT") # Details pdf.set_text_color(0, 0, 0) pdf.set_font("helvetica", "B", 9) pdf.cell(20, 6, "Layer:", border=0) pdf.set_font("helvetica", "", 9) - pdf.cell(0, 6, i.layer, new_x="LMARGIN", new_y="NEXT") + pdf.cell(0, 6, sanitize_text(i.layer), new_x="LMARGIN", new_y="NEXT") pdf.set_font("helvetica", "B", 9) pdf.cell(0, 6, "Remediation / Fix:", new_x="LMARGIN", new_y="NEXT") pdf.set_font("courier", "", 8.5) pdf.set_fill_color(245, 245, 245) - pdf.multi_cell(0, 4.5, i.fix, fill=True) + pdf.multi_cell(0, 4.5, sanitize_text(i.fix), fill=True) pdf.ln(4) pdf.line(pdf.get_x(), pdf.get_y(), 200, pdf.get_y()) diff --git a/tests/test_cli_pdf.py b/tests/test_cli_pdf.py index bac0ede..756102f 100644 --- a/tests/test_cli_pdf.py +++ b/tests/test_cli_pdf.py @@ -14,15 +14,15 @@ def test_export_code_pdf_compiles(tmp_path): VulnerabilityFinding( file_path="app.py", severity="Critical", - issue="Hardcoded Secret Key", - explanation="Exposing secret key inside app.py.", - suggested_fix="Load key from environment", + issue="Hardcoded Secret Key with unicode ’smart’ quotes", + explanation="Exposing secret key inside app.py • vulnerable to attacks.", + suggested_fix="Load key from environment: jwt_secret = Field(default=\"\") \u25b6 check it.", line_number=5 ), VulnerabilityFinding( file_path="db.py", severity="High", - issue="Raw SQL Statement", + issue="Raw SQL Statement \u2717 check fail", explanation="SQL injection inside db.py.", suggested_fix="Use parameterized queries", line_number=20 @@ -34,7 +34,7 @@ def test_export_code_pdf_compiles(tmp_path): total_files_found=10, files_triaged=["app.py", "db.py"], vulnerabilities=findings, - ai_summary="This is a dummy AI report summary describing security posture." + ai_summary="This is a dummy AI report summary describing security posture with check \u2713 and block \u2588." ) result.compute_score()