feat: add admin panel, Replicate AI translation, and document translation
- Admin panel (/admin) with JWT auth: configure Replicate API token, JigsawStack API key, model version, enable/disable AI translation, change admin password. Settings persisted in data/settings.json. - Replicate AI translation: POST /api/translate/replicate uses JigsawStack text-translate model via Replicate API. Main page switches to client-side AI translation when enabled. - Document translation tab: supports PDF, DOCX, XLSX, XLS, CSV. Excel/Word formatting fully preserved (SheetJS + JSZip XML manipulation). PDF uses pdf-parse extraction + pdf-lib reconstruction. Column selector UI for tabular data (per-sheet, All/None toggles). - Updated README with full implementation documentation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
115
utils/document-processors/pdf.ts
Normal file
115
utils/document-processors/pdf.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { PDFDocument, rgb, StandardFonts } from "pdf-lib";
|
||||
import { replicateTranslateBatch } from "../replicate-translate";
|
||||
|
||||
type PdfParseResult = {
|
||||
numpages: number;
|
||||
text: string;
|
||||
info: Record<string, unknown>;
|
||||
};
|
||||
|
||||
async function parsePdf(buffer: Buffer): Promise<PdfParseResult> {
|
||||
// Avoid Next.js issues with pdf-parse test file imports
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const pdfParse = require("pdf-parse/lib/pdf-parse.js");
|
||||
return pdfParse(buffer);
|
||||
}
|
||||
|
||||
function wrapText(text: string, maxCharsPerLine: number): string[] {
|
||||
const words = text.split(/\s+/);
|
||||
const lines: string[] = [];
|
||||
let current = "";
|
||||
|
||||
for (const word of words) {
|
||||
if ((current + " " + word).trim().length > maxCharsPerLine) {
|
||||
if (current) lines.push(current);
|
||||
current = word;
|
||||
} else {
|
||||
current = current ? current + " " + word : word;
|
||||
}
|
||||
}
|
||||
if (current) lines.push(current);
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a PDF buffer. Since PDFs don't support in-place text editing,
|
||||
* this extracts text, translates it, and creates a new formatted PDF.
|
||||
*/
|
||||
export async function translatePdf(
|
||||
buffer: Buffer,
|
||||
targetLanguage: string,
|
||||
sourceLanguage?: string
|
||||
): Promise<Buffer> {
|
||||
const parsed = await parsePdf(buffer);
|
||||
const rawText = parsed.text;
|
||||
|
||||
// Split into paragraphs (separated by double newlines or page breaks)
|
||||
const paragraphs = rawText
|
||||
.split(/\n{2,}|\f/)
|
||||
.map(p => p.replace(/\n/g, " ").trim())
|
||||
.filter(p => p.length > 0);
|
||||
|
||||
if (paragraphs.length === 0) {
|
||||
throw new Error("No extractable text found in PDF");
|
||||
}
|
||||
|
||||
// Translate all paragraphs
|
||||
const translations = await replicateTranslateBatch(paragraphs, targetLanguage);
|
||||
|
||||
// Build output PDF
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
||||
const boldFont = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
|
||||
|
||||
const PAGE_WIDTH = 595;
|
||||
const PAGE_HEIGHT = 842;
|
||||
const MARGIN = 50;
|
||||
const FONT_SIZE = 11;
|
||||
const TITLE_SIZE = 13;
|
||||
const LINE_HEIGHT = 16;
|
||||
const MAX_LINE_CHARS = 80;
|
||||
|
||||
let page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
|
||||
let y = PAGE_HEIGHT - MARGIN;
|
||||
|
||||
function ensureSpace(needed: number) {
|
||||
if (y - needed < MARGIN) {
|
||||
page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
|
||||
y = PAGE_HEIGHT - MARGIN;
|
||||
}
|
||||
}
|
||||
|
||||
// Title
|
||||
const title = `Translation to: ${targetLanguage}${sourceLanguage ? ` (from: ${sourceLanguage})` : ""}`;
|
||||
ensureSpace(TITLE_SIZE + LINE_HEIGHT);
|
||||
page.drawText(title, {
|
||||
x: MARGIN,
|
||||
y,
|
||||
size: TITLE_SIZE,
|
||||
font: boldFont,
|
||||
color: rgb(0.2, 0.2, 0.7)
|
||||
});
|
||||
y -= TITLE_SIZE + LINE_HEIGHT;
|
||||
|
||||
// Draw translated paragraphs
|
||||
for (const para of translations) {
|
||||
const lines = wrapText(para, MAX_LINE_CHARS);
|
||||
ensureSpace(lines.length * LINE_HEIGHT + LINE_HEIGHT);
|
||||
|
||||
for (const line of lines) {
|
||||
ensureSpace(LINE_HEIGHT);
|
||||
page.drawText(line, {
|
||||
x: MARGIN,
|
||||
y,
|
||||
size: FONT_SIZE,
|
||||
font,
|
||||
color: rgb(0, 0, 0)
|
||||
});
|
||||
y -= LINE_HEIGHT;
|
||||
}
|
||||
y -= LINE_HEIGHT * 0.5; // paragraph gap
|
||||
}
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
return Buffer.from(pdfBytes);
|
||||
}
|
||||
Reference in New Issue
Block a user