- Admin panel (/admin) with JWT auth: configure Replicate API token, JigsawStack API key, model version, enable/disable AI translation, change admin password. Settings persisted in data/settings.json. - Replicate AI translation: POST /api/translate/replicate uses JigsawStack text-translate model via Replicate API. Main page switches to client-side AI translation when enabled. - Document translation tab: supports PDF, DOCX, XLSX, XLS, CSV. Excel/Word formatting fully preserved (SheetJS + JSZip XML manipulation). PDF uses pdf-parse extraction + pdf-lib reconstruction. Column selector UI for tabular data (per-sheet, All/None toggles). - Updated README with full implementation documentation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
116 lines
3.4 KiB
TypeScript
116 lines
3.4 KiB
TypeScript
import { PDFDocument, rgb, StandardFonts } from "pdf-lib";
|
|
import { replicateTranslateBatch } from "../replicate-translate";
|
|
|
|
type PdfParseResult = {
|
|
numpages: number;
|
|
text: string;
|
|
info: Record<string, unknown>;
|
|
};
|
|
|
|
async function parsePdf(buffer: Buffer): Promise<PdfParseResult> {
|
|
// Avoid Next.js issues with pdf-parse test file imports
|
|
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
const pdfParse = require("pdf-parse/lib/pdf-parse.js");
|
|
return pdfParse(buffer);
|
|
}
|
|
|
|
function wrapText(text: string, maxCharsPerLine: number): string[] {
|
|
const words = text.split(/\s+/);
|
|
const lines: string[] = [];
|
|
let current = "";
|
|
|
|
for (const word of words) {
|
|
if ((current + " " + word).trim().length > maxCharsPerLine) {
|
|
if (current) lines.push(current);
|
|
current = word;
|
|
} else {
|
|
current = current ? current + " " + word : word;
|
|
}
|
|
}
|
|
if (current) lines.push(current);
|
|
return lines;
|
|
}
|
|
|
|
/**
|
|
* Translate a PDF buffer. Since PDFs don't support in-place text editing,
|
|
* this extracts text, translates it, and creates a new formatted PDF.
|
|
*/
|
|
export async function translatePdf(
|
|
buffer: Buffer,
|
|
targetLanguage: string,
|
|
sourceLanguage?: string
|
|
): Promise<Buffer> {
|
|
const parsed = await parsePdf(buffer);
|
|
const rawText = parsed.text;
|
|
|
|
// Split into paragraphs (separated by double newlines or page breaks)
|
|
const paragraphs = rawText
|
|
.split(/\n{2,}|\f/)
|
|
.map(p => p.replace(/\n/g, " ").trim())
|
|
.filter(p => p.length > 0);
|
|
|
|
if (paragraphs.length === 0) {
|
|
throw new Error("No extractable text found in PDF");
|
|
}
|
|
|
|
// Translate all paragraphs
|
|
const translations = await replicateTranslateBatch(paragraphs, targetLanguage);
|
|
|
|
// Build output PDF
|
|
const pdfDoc = await PDFDocument.create();
|
|
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
|
const boldFont = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
|
|
|
|
const PAGE_WIDTH = 595;
|
|
const PAGE_HEIGHT = 842;
|
|
const MARGIN = 50;
|
|
const FONT_SIZE = 11;
|
|
const TITLE_SIZE = 13;
|
|
const LINE_HEIGHT = 16;
|
|
const MAX_LINE_CHARS = 80;
|
|
|
|
let page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
|
|
let y = PAGE_HEIGHT - MARGIN;
|
|
|
|
function ensureSpace(needed: number) {
|
|
if (y - needed < MARGIN) {
|
|
page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
|
|
y = PAGE_HEIGHT - MARGIN;
|
|
}
|
|
}
|
|
|
|
// Title
|
|
const title = `Translation to: ${targetLanguage}${sourceLanguage ? ` (from: ${sourceLanguage})` : ""}`;
|
|
ensureSpace(TITLE_SIZE + LINE_HEIGHT);
|
|
page.drawText(title, {
|
|
x: MARGIN,
|
|
y,
|
|
size: TITLE_SIZE,
|
|
font: boldFont,
|
|
color: rgb(0.2, 0.2, 0.7)
|
|
});
|
|
y -= TITLE_SIZE + LINE_HEIGHT;
|
|
|
|
// Draw translated paragraphs
|
|
for (const para of translations) {
|
|
const lines = wrapText(para, MAX_LINE_CHARS);
|
|
ensureSpace(lines.length * LINE_HEIGHT + LINE_HEIGHT);
|
|
|
|
for (const line of lines) {
|
|
ensureSpace(LINE_HEIGHT);
|
|
page.drawText(line, {
|
|
x: MARGIN,
|
|
y,
|
|
size: FONT_SIZE,
|
|
font,
|
|
color: rgb(0, 0, 0)
|
|
});
|
|
y -= LINE_HEIGHT;
|
|
}
|
|
y -= LINE_HEIGHT * 0.5; // paragraph gap
|
|
}
|
|
|
|
const pdfBytes = await pdfDoc.save();
|
|
return Buffer.from(pdfBytes);
|
|
}
|