feat: add admin panel, Replicate AI translation, and document translation

- Admin panel (/admin) with JWT auth: configure Replicate API token, JigsawStack API key, model version, enable/disable AI translation, change admin password. Settings persisted in data/settings.json. - Replicate AI translation: POST /api/translate/replicate uses JigsawStack text-translate model via Replicate API. Main page switches to client-side AI translation when enabled. - Document translation tab: supports PDF, DOCX, XLSX, XLS, CSV. Excel/Word formatting fully preserved (SheetJS + JSZip XML manipulation). PDF uses pdf-parse extraction + pdf-lib reconstruction. Column selector UI for tabular data (per-sheet, All/None toggles). - Updated README with full implementation documentation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 07:43:54 +01:00
parent 0190ea5da9
commit 0799101da3
23 changed files with 18595 additions and 261 deletions
--- a/utils/admin-auth.ts
+++ b/utils/admin-auth.ts
@@ -0,0 +1,55 @@
+import { SignJWT, jwtVerify } from "jose";
+import { NextApiRequest, NextApiResponse } from "next";
+import { readSettings } from "./settings-store";
+
+const JWT_COOKIE = "lingva_admin";
+const JWT_EXPIRY = "8h";
+
+function getSecret(): Uint8Array {
+    const secret = process.env["ADMIN_JWT_SECRET"] ?? "lingva-admin-secret-change-me";
+    return new TextEncoder().encode(secret);
+}
+
+export async function signAdminToken(): Promise<string> {
+    return new SignJWT({ role: "admin" })
+        .setProtectedHeader({ alg: "HS256" })
+        .setIssuedAt()
+        .setExpirationTime(JWT_EXPIRY)
+        .sign(getSecret());
+}
+
+export async function verifyAdminToken(token: string): Promise<boolean> {
+    try {
+        await jwtVerify(token, getSecret());
+        return true;
+    } catch {
+        return false;
+    }
+}
+
+export function getTokenFromRequest(req: NextApiRequest): string | null {
+    const cookie = req.cookies[JWT_COOKIE];
+    if (cookie) return cookie;
+    const auth = req.headers.authorization;
+    if (auth?.startsWith("Bearer ")) return auth.slice(7);
+    return null;
+}
+
+export async function requireAdmin(
+    req: NextApiRequest,
+    res: NextApiResponse
+): Promise<boolean> {
+    const token = getTokenFromRequest(req);
+    if (!token || !(await verifyAdminToken(token))) {
+        res.status(401).json({ error: "Unauthorized" });
+        return false;
+    }
+    return true;
+}
+
+export function checkPassword(password: string): boolean {
+    const settings = readSettings();
+    return password === settings.adminPasswordHash;
+}
+
+export const COOKIE_NAME = JWT_COOKIE;
--- a/utils/document-processors/docx.ts
+++ b/utils/document-processors/docx.ts
@@ -0,0 +1,106 @@
+import JSZip from "jszip";
+import { replicateTranslateBatch } from "../replicate-translate";
+
+function escapeXml(text: string): string {
+    return text
+        .replace(/&/g, "&amp;")
+        .replace(/</g, "&lt;")
+        .replace(/>/g, "&gt;")
+        .replace(/"/g, "&quot;")
+        .replace(/'/g, "&apos;");
+}
+
+/**
+ * Extract paragraph texts from document.xml string.
+ * Returns array of {index, text} where index is the paragraph number.
+ */
+function extractParagraphs(xml: string): { index: number; text: string; start: number; end: number }[] {
+    const paragraphs: { index: number; text: string; start: number; end: number }[] = [];
+    const pRegex = /<w:p[ >]/g;
+    const pCloseTag = "</w:p>";
+    let idx = 0;
+    let match: RegExpExecArray | null;
+
+    while ((match = pRegex.exec(xml)) !== null) {
+        const start = match.index;
+        const end = xml.indexOf(pCloseTag, start) + pCloseTag.length;
+        if (end < pCloseTag.length) break;
+
+        const paraXml = xml.slice(start, end);
+
+        // Extract all text content within this paragraph
+        const textParts: string[] = [];
+        const tRegex = /<w:t[^>]*>([\s\S]*?)<\/w:t>/g;
+        let tMatch: RegExpExecArray | null;
+        while ((tMatch = tRegex.exec(paraXml)) !== null) {
+            textParts.push(tMatch[1]);
+        }
+
+        const text = textParts.join("").trim();
+        if (text) {
+            paragraphs.push({ index: idx, text, start, end });
+        }
+        idx++;
+    }
+    return paragraphs;
+}
+
+/**
+ * Replace text within a paragraph XML while preserving formatting of first run.
+ * Empties all other text runs.
+ */
+function replaceParagraphText(paraXml: string, translatedText: string): string {
+    let firstDone = false;
+    return paraXml.replace(/<w:t([^>]*)>([\s\S]*?)<\/w:t>/g, (_match, attrs, content) => {
+        if (!firstDone && content.trim()) {
+            firstDone = true;
+            return `<w:t xml:space="preserve">${escapeXml(translatedText)}</w:t>`;
+        }
+        if (firstDone) {
+            return `<w:t></w:t>`;
+        }
+        return _match; // preserve empty runs before the first text
+    });
+}
+
+/**
+ * Translate a DOCX file buffer, preserving formatting.
+ */
+export async function translateDocx(
+    buffer: Buffer,
+    targetLanguage: string
+): Promise<Buffer> {
+    const zip = await JSZip.loadAsync(buffer);
+    const docFile = zip.file("word/document.xml");
+    if (!docFile) throw new Error("Invalid DOCX: missing word/document.xml");
+
+    let xml = await docFile.async("string");
+    const paragraphs = extractParagraphs(xml);
+
+    if (paragraphs.length === 0) {
+        return Buffer.from(await zip.generateAsync({ type: "nodebuffer" }));
+    }
+
+    // Translate all paragraphs
+    const translations = await replicateTranslateBatch(
+        paragraphs.map(p => p.text),
+        targetLanguage
+    );
+
+    // Replace paragraphs from end to start to preserve offsets
+    const sorted = [...paragraphs].sort((a, b) => b.start - a.start);
+    for (const para of sorted) {
+        const translationIdx = paragraphs.findIndex(p => p.start === para.start);
+        const translated = translations[translationIdx] ?? para.text;
+        const originalPara = xml.slice(para.start, para.end);
+        const translatedPara = replaceParagraphText(originalPara, translated);
+        xml = xml.slice(0, para.start) + translatedPara + xml.slice(para.end);
+    }
+
+    zip.file("word/document.xml", xml);
+    const outBuffer = await zip.generateAsync({
+        type: "nodebuffer",
+        compression: "DEFLATE"
+    });
+    return Buffer.from(outBuffer);
+}
--- a/utils/document-processors/excel.ts
+++ b/utils/document-processors/excel.ts
@@ -0,0 +1,95 @@
+import * as XLSX from "xlsx";
+import { replicateTranslateBatch } from "../replicate-translate";
+
+export type SheetColumnInfo = {
+    sheetName: string;
+    columns: string[]; // header names or A, B, C...
+};
+
+export type ColumnSelection = {
+    sheetName: string;
+    columnIndices: number[]; // 0-based column indices to translate
+};
+
+/**
+ * Parse an Excel/CSV buffer and return sheet/column metadata for column selection UI.
+ */
+export function getExcelColumns(buffer: Buffer, filename: string): SheetColumnInfo[] {
+    const wb = XLSX.read(buffer, { type: "buffer" });
+    return wb.SheetNames.map(sheetName => {
+        const ws = wb.Sheets[sheetName];
+        const range = XLSX.utils.decode_range(ws["!ref"] ?? "A1");
+        const columns: string[] = [];
+        for (let c = range.s.c; c <= range.e.c; c++) {
+            // Try to get header from first row
+            const cellAddr = XLSX.utils.encode_cell({ r: 0, c });
+            const cell = ws[cellAddr];
+            const header = cell && cell.v != null ? String(cell.v) : XLSX.utils.encode_col(c);
+            columns.push(header);
+        }
+        return { sheetName, columns };
+    });
+}
+
+/**
+ * Translate selected columns in an Excel buffer. Returns translated buffer.
+ * columnSelections: array of {sheetName, columnIndices}
+ * If columnSelections is empty, all text columns are translated.
+ */
+export async function translateExcel(
+    buffer: Buffer,
+    targetLanguage: string,
+    columnSelections: ColumnSelection[]
+): Promise<Buffer> {
+    const wb = XLSX.read(buffer, { type: "buffer", cellStyles: true, cellNF: true });
+
+    for (const sheet of wb.SheetNames) {
+        const ws = wb.Sheets[sheet];
+        if (!ws["!ref"]) continue;
+
+        const range = XLSX.utils.decode_range(ws["!ref"]);
+        const selection = columnSelections.find(s => s.sheetName === sheet);
+        const columnsToTranslate = selection
+            ? selection.columnIndices
+            : Array.from({ length: range.e.c - range.s.c + 1 }, (_, i) => i + range.s.c);
+
+        // Collect all text cells for batch translation
+        type CellRef = { addr: string; text: string };
+        const cellRefs: CellRef[] = [];
+
+        for (const colIdx of columnsToTranslate) {
+            // Start from row 1 to skip headers (row 0)
+            for (let r = range.s.r + 1; r <= range.e.r; r++) {
+                const addr = XLSX.utils.encode_cell({ r, c: colIdx });
+                const cell = ws[addr];
+                if (cell && cell.t === "s" && typeof cell.v === "string" && cell.v.trim()) {
+                    cellRefs.push({ addr, text: cell.v });
+                }
+            }
+        }
+
+        if (cellRefs.length === 0) continue;
+
+        // Translate in batches of 50
+        const BATCH_SIZE = 50;
+        for (let i = 0; i < cellRefs.length; i += BATCH_SIZE) {
+            const batch = cellRefs.slice(i, i + BATCH_SIZE);
+            const translations = await replicateTranslateBatch(
+                batch.map(c => c.text),
+                targetLanguage
+            );
+            batch.forEach((cellRef, idx) => {
+                const cell = ws[cellRef.addr];
+                if (cell) {
+                    cell.v = translations[idx];
+                    if (cell.h) cell.h = translations[idx];
+                    if (cell.r) cell.r = undefined;
+                    if (cell.w) cell.w = translations[idx];
+                }
+            });
+        }
+    }
+
+    const out = XLSX.write(wb, { type: "buffer", bookType: "xlsx" });
+    return Buffer.from(out);
+}
--- a/utils/document-processors/pdf-types.d.ts
+++ b/utils/document-processors/pdf-types.d.ts
@@ -0,0 +1,12 @@
+declare module "pdf-parse/lib/pdf-parse.js" {
+    interface PdfData {
+        numpages: number;
+        numrender: number;
+        info: Record<string, unknown>;
+        metadata: Record<string, unknown>;
+        text: string;
+        version: string;
+    }
+    function pdfParse(dataBuffer: Buffer, options?: Record<string, unknown>): Promise<PdfData>;
+    export = pdfParse;
+}
--- a/utils/document-processors/pdf.ts
+++ b/utils/document-processors/pdf.ts
@@ -0,0 +1,115 @@
+import { PDFDocument, rgb, StandardFonts } from "pdf-lib";
+import { replicateTranslateBatch } from "../replicate-translate";
+
+type PdfParseResult = {
+    numpages: number;
+    text: string;
+    info: Record<string, unknown>;
+};
+
+async function parsePdf(buffer: Buffer): Promise<PdfParseResult> {
+    // Avoid Next.js issues with pdf-parse test file imports
+    // eslint-disable-next-line @typescript-eslint/no-var-requires
+    const pdfParse = require("pdf-parse/lib/pdf-parse.js");
+    return pdfParse(buffer);
+}
+
+function wrapText(text: string, maxCharsPerLine: number): string[] {
+    const words = text.split(/\s+/);
+    const lines: string[] = [];
+    let current = "";
+
+    for (const word of words) {
+        if ((current + " " + word).trim().length > maxCharsPerLine) {
+            if (current) lines.push(current);
+            current = word;
+        } else {
+            current = current ? current + " " + word : word;
+        }
+    }
+    if (current) lines.push(current);
+    return lines;
+}
+
+/**
+ * Translate a PDF buffer. Since PDFs don't support in-place text editing,
+ * this extracts text, translates it, and creates a new formatted PDF.
+ */
+export async function translatePdf(
+    buffer: Buffer,
+    targetLanguage: string,
+    sourceLanguage?: string
+): Promise<Buffer> {
+    const parsed = await parsePdf(buffer);
+    const rawText = parsed.text;
+
+    // Split into paragraphs (separated by double newlines or page breaks)
+    const paragraphs = rawText
+        .split(/\n{2,}|\f/)
+        .map(p => p.replace(/\n/g, " ").trim())
+        .filter(p => p.length > 0);
+
+    if (paragraphs.length === 0) {
+        throw new Error("No extractable text found in PDF");
+    }
+
+    // Translate all paragraphs
+    const translations = await replicateTranslateBatch(paragraphs, targetLanguage);
+
+    // Build output PDF
+    const pdfDoc = await PDFDocument.create();
+    const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
+    const boldFont = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
+
+    const PAGE_WIDTH = 595;
+    const PAGE_HEIGHT = 842;
+    const MARGIN = 50;
+    const FONT_SIZE = 11;
+    const TITLE_SIZE = 13;
+    const LINE_HEIGHT = 16;
+    const MAX_LINE_CHARS = 80;
+
+    let page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
+    let y = PAGE_HEIGHT - MARGIN;
+
+    function ensureSpace(needed: number) {
+        if (y - needed < MARGIN) {
+            page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
+            y = PAGE_HEIGHT - MARGIN;
+        }
+    }
+
+    // Title
+    const title = `Translation to: ${targetLanguage}${sourceLanguage ? ` (from: ${sourceLanguage})` : ""}`;
+    ensureSpace(TITLE_SIZE + LINE_HEIGHT);
+    page.drawText(title, {
+        x: MARGIN,
+        y,
+        size: TITLE_SIZE,
+        font: boldFont,
+        color: rgb(0.2, 0.2, 0.7)
+    });
+    y -= TITLE_SIZE + LINE_HEIGHT;
+
+    // Draw translated paragraphs
+    for (const para of translations) {
+        const lines = wrapText(para, MAX_LINE_CHARS);
+        ensureSpace(lines.length * LINE_HEIGHT + LINE_HEIGHT);
+
+        for (const line of lines) {
+            ensureSpace(LINE_HEIGHT);
+            page.drawText(line, {
+                x: MARGIN,
+                y,
+                size: FONT_SIZE,
+                font,
+                color: rgb(0, 0, 0)
+            });
+            y -= LINE_HEIGHT;
+        }
+        y -= LINE_HEIGHT * 0.5; // paragraph gap
+    }
+
+    const pdfBytes = await pdfDoc.save();
+    return Buffer.from(pdfBytes);
+}
--- a/utils/replicate-translate.ts
+++ b/utils/replicate-translate.ts
@@ -0,0 +1,83 @@
+import { readSettings } from "./settings-store";
+
+type ReplicateOutput = string | string[] | { translation?: string; translated_text?: string; output?: string };
+
+export async function replicateTranslate(
+    text: string,
+    targetLanguage: string
+): Promise<string> {
+    const settings = readSettings();
+
+    if (!settings.replicateApiToken) {
+        throw new Error("Replicate API token not configured");
+    }
+    if (!settings.jigsawApiKey) {
+        throw new Error("JigsawStack API key not configured");
+    }
+
+    const body = {
+        version: settings.modelVersion,
+        input: {
+            text,
+            api_key: settings.jigsawApiKey,
+            target_language: targetLanguage
+        }
+    };
+
+    const response = await fetch("https://api.replicate.com/v1/predictions", {
+        method: "POST",
+        headers: {
+            "Authorization": `Bearer ${settings.replicateApiToken}`,
+            "Content-Type": "application/json",
+            "Prefer": "wait"
+        },
+        body: JSON.stringify(body)
+    });
+
+    if (!response.ok) {
+        const err = await response.text();
+        throw new Error(`Replicate API error: ${response.status} ${err}`);
+    }
+
+    const data = await response.json();
+
+    if (data.error) {
+        throw new Error(`Replicate model error: ${data.error}`);
+    }
+
+    // Extract translated text from various output formats
+    const output: ReplicateOutput = data.output;
+
+    if (typeof output === "string") return output;
+    if (Array.isArray(output)) return output.join("");
+    if (output && typeof output === "object") {
+        return output.translation ?? output.translated_text ?? output.output ?? String(output);
+    }
+
+    throw new Error("Unexpected output format from Replicate");
+}
+
+// Batch translate using separator trick to minimize API calls
+const SEPARATOR = "\n{{SEP}}\n";
+
+export async function replicateTranslateBatch(
+    texts: string[],
+    targetLanguage: string
+): Promise<string[]> {
+    if (texts.length === 0) return [];
+    if (texts.length === 1) {
+        return [await replicateTranslate(texts[0], targetLanguage)];
+    }
+
+    const joined = texts.join(SEPARATOR);
+    const translated = await replicateTranslate(joined, targetLanguage);
+
+    // Try to split on the separator; fall back to individual calls if it got translated
+    const parts = translated.split(SEPARATOR);
+    if (parts.length === texts.length) {
+        return parts;
+    }
+
+    // Fallback: translate individually
+    return Promise.all(texts.map(t => replicateTranslate(t, targetLanguage)));
+}
--- a/utils/settings-store.ts
+++ b/utils/settings-store.ts
@@ -0,0 +1,48 @@
+import fs from "fs";
+import path from "path";
+
+export type Settings = {
+    replicateApiToken: string;
+    jigsawApiKey: string;
+    modelVersion: string;
+    replicateEnabled: boolean;
+    adminPasswordHash: string;
+};
+
+const DEFAULT_SETTINGS: Settings = {
+    replicateApiToken: process.env["REPLICATE_API_TOKEN"] ?? "",
+    jigsawApiKey: process.env["JIGSAWSTACK_API_KEY"] ?? "",
+    modelVersion: "jigsawstack/text-translate:454df4c49941c05dea05175bd37686d0872c73c1f9366d1c2505db32ade52a89",
+    replicateEnabled: false,
+    adminPasswordHash: process.env["ADMIN_PASSWORD"] ?? "admin"
+};
+
+const SETTINGS_PATH = path.join(process.cwd(), "data", "settings.json");
+
+function ensureDataDir() {
+    const dir = path.dirname(SETTINGS_PATH);
+    if (!fs.existsSync(dir)) {
+        fs.mkdirSync(dir, { recursive: true });
+    }
+}
+
+export function readSettings(): Settings {
+    try {
+        ensureDataDir();
+        if (!fs.existsSync(SETTINGS_PATH)) {
+            return { ...DEFAULT_SETTINGS };
+        }
+        const raw = fs.readFileSync(SETTINGS_PATH, "utf-8");
+        return { ...DEFAULT_SETTINGS, ...JSON.parse(raw) };
+    } catch {
+        return { ...DEFAULT_SETTINGS };
+    }
+}
+
+export function writeSettings(updates: Partial<Settings>): Settings {
+    ensureDataDir();
+    const current = readSettings();
+    const next = { ...current, ...updates };
+    fs.writeFileSync(SETTINGS_PATH, JSON.stringify(next, null, 2), "utf-8");
+    return next;
+}