feat: add admin panel, Replicate AI translation, and document translation

- Admin panel (/admin) with JWT auth: configure Replicate API token,
  JigsawStack API key, model version, enable/disable AI translation,
  change admin password. Settings persisted in data/settings.json.

- Replicate AI translation: POST /api/translate/replicate uses
  JigsawStack text-translate model via Replicate API. Main page
  switches to client-side AI translation when enabled.

- Document translation tab: supports PDF, DOCX, XLSX, XLS, CSV.
  Excel/Word formatting fully preserved (SheetJS + JSZip XML manipulation).
  PDF uses pdf-parse extraction + pdf-lib reconstruction.
  Column selector UI for tabular data (per-sheet, All/None toggles).

- Updated README with full implementation documentation.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-10 07:43:54 +01:00
parent 0190ea5da9
commit 0799101da3
23 changed files with 18595 additions and 261 deletions

View File

@@ -0,0 +1,106 @@
import JSZip from "jszip";
import { replicateTranslateBatch } from "../replicate-translate";
function escapeXml(text: string): string {
return text
.replace(/&/g, "&amp;")
.replace(/</g, "&lt;")
.replace(/>/g, "&gt;")
.replace(/"/g, "&quot;")
.replace(/'/g, "&apos;");
}
/**
* Extract paragraph texts from document.xml string.
* Returns array of {index, text} where index is the paragraph number.
*/
function extractParagraphs(xml: string): { index: number; text: string; start: number; end: number }[] {
const paragraphs: { index: number; text: string; start: number; end: number }[] = [];
const pRegex = /<w:p[ >]/g;
const pCloseTag = "</w:p>";
let idx = 0;
let match: RegExpExecArray | null;
while ((match = pRegex.exec(xml)) !== null) {
const start = match.index;
const end = xml.indexOf(pCloseTag, start) + pCloseTag.length;
if (end < pCloseTag.length) break;
const paraXml = xml.slice(start, end);
// Extract all text content within this paragraph
const textParts: string[] = [];
const tRegex = /<w:t[^>]*>([\s\S]*?)<\/w:t>/g;
let tMatch: RegExpExecArray | null;
while ((tMatch = tRegex.exec(paraXml)) !== null) {
textParts.push(tMatch[1]);
}
const text = textParts.join("").trim();
if (text) {
paragraphs.push({ index: idx, text, start, end });
}
idx++;
}
return paragraphs;
}
/**
* Replace text within a paragraph XML while preserving formatting of first run.
* Empties all other text runs.
*/
function replaceParagraphText(paraXml: string, translatedText: string): string {
let firstDone = false;
return paraXml.replace(/<w:t([^>]*)>([\s\S]*?)<\/w:t>/g, (_match, attrs, content) => {
if (!firstDone && content.trim()) {
firstDone = true;
return `<w:t xml:space="preserve">${escapeXml(translatedText)}</w:t>`;
}
if (firstDone) {
return `<w:t></w:t>`;
}
return _match; // preserve empty runs before the first text
});
}
/**
* Translate a DOCX file buffer, preserving formatting.
*/
export async function translateDocx(
buffer: Buffer,
targetLanguage: string
): Promise<Buffer> {
const zip = await JSZip.loadAsync(buffer);
const docFile = zip.file("word/document.xml");
if (!docFile) throw new Error("Invalid DOCX: missing word/document.xml");
let xml = await docFile.async("string");
const paragraphs = extractParagraphs(xml);
if (paragraphs.length === 0) {
return Buffer.from(await zip.generateAsync({ type: "nodebuffer" }));
}
// Translate all paragraphs
const translations = await replicateTranslateBatch(
paragraphs.map(p => p.text),
targetLanguage
);
// Replace paragraphs from end to start to preserve offsets
const sorted = [...paragraphs].sort((a, b) => b.start - a.start);
for (const para of sorted) {
const translationIdx = paragraphs.findIndex(p => p.start === para.start);
const translated = translations[translationIdx] ?? para.text;
const originalPara = xml.slice(para.start, para.end);
const translatedPara = replaceParagraphText(originalPara, translated);
xml = xml.slice(0, para.start) + translatedPara + xml.slice(para.end);
}
zip.file("word/document.xml", xml);
const outBuffer = await zip.generateAsync({
type: "nodebuffer",
compression: "DEFLATE"
});
return Buffer.from(outBuffer);
}

View File

@@ -0,0 +1,95 @@
import * as XLSX from "xlsx";
import { replicateTranslateBatch } from "../replicate-translate";
export type SheetColumnInfo = {
sheetName: string;
columns: string[]; // header names or A, B, C...
};
export type ColumnSelection = {
sheetName: string;
columnIndices: number[]; // 0-based column indices to translate
};
/**
* Parse an Excel/CSV buffer and return sheet/column metadata for column selection UI.
*/
export function getExcelColumns(buffer: Buffer, filename: string): SheetColumnInfo[] {
const wb = XLSX.read(buffer, { type: "buffer" });
return wb.SheetNames.map(sheetName => {
const ws = wb.Sheets[sheetName];
const range = XLSX.utils.decode_range(ws["!ref"] ?? "A1");
const columns: string[] = [];
for (let c = range.s.c; c <= range.e.c; c++) {
// Try to get header from first row
const cellAddr = XLSX.utils.encode_cell({ r: 0, c });
const cell = ws[cellAddr];
const header = cell && cell.v != null ? String(cell.v) : XLSX.utils.encode_col(c);
columns.push(header);
}
return { sheetName, columns };
});
}
/**
* Translate selected columns in an Excel buffer. Returns translated buffer.
* columnSelections: array of {sheetName, columnIndices}
* If columnSelections is empty, all text columns are translated.
*/
export async function translateExcel(
buffer: Buffer,
targetLanguage: string,
columnSelections: ColumnSelection[]
): Promise<Buffer> {
const wb = XLSX.read(buffer, { type: "buffer", cellStyles: true, cellNF: true });
for (const sheet of wb.SheetNames) {
const ws = wb.Sheets[sheet];
if (!ws["!ref"]) continue;
const range = XLSX.utils.decode_range(ws["!ref"]);
const selection = columnSelections.find(s => s.sheetName === sheet);
const columnsToTranslate = selection
? selection.columnIndices
: Array.from({ length: range.e.c - range.s.c + 1 }, (_, i) => i + range.s.c);
// Collect all text cells for batch translation
type CellRef = { addr: string; text: string };
const cellRefs: CellRef[] = [];
for (const colIdx of columnsToTranslate) {
// Start from row 1 to skip headers (row 0)
for (let r = range.s.r + 1; r <= range.e.r; r++) {
const addr = XLSX.utils.encode_cell({ r, c: colIdx });
const cell = ws[addr];
if (cell && cell.t === "s" && typeof cell.v === "string" && cell.v.trim()) {
cellRefs.push({ addr, text: cell.v });
}
}
}
if (cellRefs.length === 0) continue;
// Translate in batches of 50
const BATCH_SIZE = 50;
for (let i = 0; i < cellRefs.length; i += BATCH_SIZE) {
const batch = cellRefs.slice(i, i + BATCH_SIZE);
const translations = await replicateTranslateBatch(
batch.map(c => c.text),
targetLanguage
);
batch.forEach((cellRef, idx) => {
const cell = ws[cellRef.addr];
if (cell) {
cell.v = translations[idx];
if (cell.h) cell.h = translations[idx];
if (cell.r) cell.r = undefined;
if (cell.w) cell.w = translations[idx];
}
});
}
}
const out = XLSX.write(wb, { type: "buffer", bookType: "xlsx" });
return Buffer.from(out);
}

View File

@@ -0,0 +1,12 @@
declare module "pdf-parse/lib/pdf-parse.js" {
interface PdfData {
numpages: number;
numrender: number;
info: Record<string, unknown>;
metadata: Record<string, unknown>;
text: string;
version: string;
}
function pdfParse(dataBuffer: Buffer, options?: Record<string, unknown>): Promise<PdfData>;
export = pdfParse;
}

View File

@@ -0,0 +1,115 @@
import { PDFDocument, rgb, StandardFonts } from "pdf-lib";
import { replicateTranslateBatch } from "../replicate-translate";
type PdfParseResult = {
numpages: number;
text: string;
info: Record<string, unknown>;
};
async function parsePdf(buffer: Buffer): Promise<PdfParseResult> {
// Avoid Next.js issues with pdf-parse test file imports
// eslint-disable-next-line @typescript-eslint/no-var-requires
const pdfParse = require("pdf-parse/lib/pdf-parse.js");
return pdfParse(buffer);
}
function wrapText(text: string, maxCharsPerLine: number): string[] {
const words = text.split(/\s+/);
const lines: string[] = [];
let current = "";
for (const word of words) {
if ((current + " " + word).trim().length > maxCharsPerLine) {
if (current) lines.push(current);
current = word;
} else {
current = current ? current + " " + word : word;
}
}
if (current) lines.push(current);
return lines;
}
/**
* Translate a PDF buffer. Since PDFs don't support in-place text editing,
* this extracts text, translates it, and creates a new formatted PDF.
*/
export async function translatePdf(
buffer: Buffer,
targetLanguage: string,
sourceLanguage?: string
): Promise<Buffer> {
const parsed = await parsePdf(buffer);
const rawText = parsed.text;
// Split into paragraphs (separated by double newlines or page breaks)
const paragraphs = rawText
.split(/\n{2,}|\f/)
.map(p => p.replace(/\n/g, " ").trim())
.filter(p => p.length > 0);
if (paragraphs.length === 0) {
throw new Error("No extractable text found in PDF");
}
// Translate all paragraphs
const translations = await replicateTranslateBatch(paragraphs, targetLanguage);
// Build output PDF
const pdfDoc = await PDFDocument.create();
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
const boldFont = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
const PAGE_WIDTH = 595;
const PAGE_HEIGHT = 842;
const MARGIN = 50;
const FONT_SIZE = 11;
const TITLE_SIZE = 13;
const LINE_HEIGHT = 16;
const MAX_LINE_CHARS = 80;
let page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
let y = PAGE_HEIGHT - MARGIN;
function ensureSpace(needed: number) {
if (y - needed < MARGIN) {
page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
y = PAGE_HEIGHT - MARGIN;
}
}
// Title
const title = `Translation to: ${targetLanguage}${sourceLanguage ? ` (from: ${sourceLanguage})` : ""}`;
ensureSpace(TITLE_SIZE + LINE_HEIGHT);
page.drawText(title, {
x: MARGIN,
y,
size: TITLE_SIZE,
font: boldFont,
color: rgb(0.2, 0.2, 0.7)
});
y -= TITLE_SIZE + LINE_HEIGHT;
// Draw translated paragraphs
for (const para of translations) {
const lines = wrapText(para, MAX_LINE_CHARS);
ensureSpace(lines.length * LINE_HEIGHT + LINE_HEIGHT);
for (const line of lines) {
ensureSpace(LINE_HEIGHT);
page.drawText(line, {
x: MARGIN,
y,
size: FONT_SIZE,
font,
color: rgb(0, 0, 0)
});
y -= LINE_HEIGHT;
}
y -= LINE_HEIGHT * 0.5; // paragraph gap
}
const pdfBytes = await pdfDoc.save();
return Buffer.from(pdfBytes);
}