import { PDFDocument, rgb, StandardFonts } from "pdf-lib"; import { replicateTranslateBatch } from "../replicate-translate"; type PdfParseResult = { numpages: number; text: string; info: Record; }; async function parsePdf(buffer: Buffer): Promise { // Avoid Next.js issues with pdf-parse test file imports // eslint-disable-next-line @typescript-eslint/no-var-requires const pdfParse = require("pdf-parse/lib/pdf-parse.js"); return pdfParse(buffer); } function wrapText(text: string, maxCharsPerLine: number): string[] { const words = text.split(/\s+/); const lines: string[] = []; let current = ""; for (const word of words) { if ((current + " " + word).trim().length > maxCharsPerLine) { if (current) lines.push(current); current = word; } else { current = current ? current + " " + word : word; } } if (current) lines.push(current); return lines; } /** * Translate a PDF buffer. Since PDFs don't support in-place text editing, * this extracts text, translates it, and creates a new formatted PDF. */ export async function translatePdf( buffer: Buffer, targetLanguage: string, sourceLanguage?: string ): Promise { const parsed = await parsePdf(buffer); const rawText = parsed.text; // Split into paragraphs (separated by double newlines or page breaks) const paragraphs = rawText .split(/\n{2,}|\f/) .map(p => p.replace(/\n/g, " ").trim()) .filter(p => p.length > 0); if (paragraphs.length === 0) { throw new Error("No extractable text found in PDF"); } // Translate all paragraphs const translations = await replicateTranslateBatch(paragraphs, targetLanguage); // Build output PDF const pdfDoc = await PDFDocument.create(); const font = await pdfDoc.embedFont(StandardFonts.Helvetica); const boldFont = await pdfDoc.embedFont(StandardFonts.HelveticaBold); const PAGE_WIDTH = 595; const PAGE_HEIGHT = 842; const MARGIN = 50; const FONT_SIZE = 11; const TITLE_SIZE = 13; const LINE_HEIGHT = 16; const MAX_LINE_CHARS = 80; let page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]); let y = PAGE_HEIGHT - MARGIN; function ensureSpace(needed: number) { if (y - needed < MARGIN) { page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]); y = PAGE_HEIGHT - MARGIN; } } // Title const title = `Translation to: ${targetLanguage}${sourceLanguage ? ` (from: ${sourceLanguage})` : ""}`; ensureSpace(TITLE_SIZE + LINE_HEIGHT); page.drawText(title, { x: MARGIN, y, size: TITLE_SIZE, font: boldFont, color: rgb(0.2, 0.2, 0.7) }); y -= TITLE_SIZE + LINE_HEIGHT; // Draw translated paragraphs for (const para of translations) { const lines = wrapText(para, MAX_LINE_CHARS); ensureSpace(lines.length * LINE_HEIGHT + LINE_HEIGHT); for (const line of lines) { ensureSpace(LINE_HEIGHT); page.drawText(line, { x: MARGIN, y, size: FONT_SIZE, font, color: rgb(0, 0, 0) }); y -= LINE_HEIGHT; } y -= LINE_HEIGHT * 0.5; // paragraph gap } const pdfBytes = await pdfDoc.save(); return Buffer.from(pdfBytes); }