feat: add admin panel, Replicate AI translation, and document translation
- Admin panel (/admin) with JWT auth: configure Replicate API token, JigsawStack API key, model version, enable/disable AI translation, change admin password. Settings persisted in data/settings.json. - Replicate AI translation: POST /api/translate/replicate uses JigsawStack text-translate model via Replicate API. Main page switches to client-side AI translation when enabled. - Document translation tab: supports PDF, DOCX, XLSX, XLS, CSV. Excel/Word formatting fully preserved (SheetJS + JSZip XML manipulation). PDF uses pdf-parse extraction + pdf-lib reconstruction. Column selector UI for tabular data (per-sheet, All/None toggles). - Updated README with full implementation documentation. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
55
utils/admin-auth.ts
Normal file
55
utils/admin-auth.ts
Normal file
@@ -0,0 +1,55 @@
|
||||
import { SignJWT, jwtVerify } from "jose";
|
||||
import { NextApiRequest, NextApiResponse } from "next";
|
||||
import { readSettings } from "./settings-store";
|
||||
|
||||
const JWT_COOKIE = "lingva_admin";
|
||||
const JWT_EXPIRY = "8h";
|
||||
|
||||
function getSecret(): Uint8Array {
|
||||
const secret = process.env["ADMIN_JWT_SECRET"] ?? "lingva-admin-secret-change-me";
|
||||
return new TextEncoder().encode(secret);
|
||||
}
|
||||
|
||||
export async function signAdminToken(): Promise<string> {
|
||||
return new SignJWT({ role: "admin" })
|
||||
.setProtectedHeader({ alg: "HS256" })
|
||||
.setIssuedAt()
|
||||
.setExpirationTime(JWT_EXPIRY)
|
||||
.sign(getSecret());
|
||||
}
|
||||
|
||||
export async function verifyAdminToken(token: string): Promise<boolean> {
|
||||
try {
|
||||
await jwtVerify(token, getSecret());
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function getTokenFromRequest(req: NextApiRequest): string | null {
|
||||
const cookie = req.cookies[JWT_COOKIE];
|
||||
if (cookie) return cookie;
|
||||
const auth = req.headers.authorization;
|
||||
if (auth?.startsWith("Bearer ")) return auth.slice(7);
|
||||
return null;
|
||||
}
|
||||
|
||||
export async function requireAdmin(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse
|
||||
): Promise<boolean> {
|
||||
const token = getTokenFromRequest(req);
|
||||
if (!token || !(await verifyAdminToken(token))) {
|
||||
res.status(401).json({ error: "Unauthorized" });
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
export function checkPassword(password: string): boolean {
|
||||
const settings = readSettings();
|
||||
return password === settings.adminPasswordHash;
|
||||
}
|
||||
|
||||
export const COOKIE_NAME = JWT_COOKIE;
|
||||
106
utils/document-processors/docx.ts
Normal file
106
utils/document-processors/docx.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import JSZip from "jszip";
|
||||
import { replicateTranslateBatch } from "../replicate-translate";
|
||||
|
||||
function escapeXml(text: string): string {
|
||||
return text
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">")
|
||||
.replace(/"/g, """)
|
||||
.replace(/'/g, "'");
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract paragraph texts from document.xml string.
|
||||
* Returns array of {index, text} where index is the paragraph number.
|
||||
*/
|
||||
function extractParagraphs(xml: string): { index: number; text: string; start: number; end: number }[] {
|
||||
const paragraphs: { index: number; text: string; start: number; end: number }[] = [];
|
||||
const pRegex = /<w:p[ >]/g;
|
||||
const pCloseTag = "</w:p>";
|
||||
let idx = 0;
|
||||
let match: RegExpExecArray | null;
|
||||
|
||||
while ((match = pRegex.exec(xml)) !== null) {
|
||||
const start = match.index;
|
||||
const end = xml.indexOf(pCloseTag, start) + pCloseTag.length;
|
||||
if (end < pCloseTag.length) break;
|
||||
|
||||
const paraXml = xml.slice(start, end);
|
||||
|
||||
// Extract all text content within this paragraph
|
||||
const textParts: string[] = [];
|
||||
const tRegex = /<w:t[^>]*>([\s\S]*?)<\/w:t>/g;
|
||||
let tMatch: RegExpExecArray | null;
|
||||
while ((tMatch = tRegex.exec(paraXml)) !== null) {
|
||||
textParts.push(tMatch[1]);
|
||||
}
|
||||
|
||||
const text = textParts.join("").trim();
|
||||
if (text) {
|
||||
paragraphs.push({ index: idx, text, start, end });
|
||||
}
|
||||
idx++;
|
||||
}
|
||||
return paragraphs;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace text within a paragraph XML while preserving formatting of first run.
|
||||
* Empties all other text runs.
|
||||
*/
|
||||
function replaceParagraphText(paraXml: string, translatedText: string): string {
|
||||
let firstDone = false;
|
||||
return paraXml.replace(/<w:t([^>]*)>([\s\S]*?)<\/w:t>/g, (_match, attrs, content) => {
|
||||
if (!firstDone && content.trim()) {
|
||||
firstDone = true;
|
||||
return `<w:t xml:space="preserve">${escapeXml(translatedText)}</w:t>`;
|
||||
}
|
||||
if (firstDone) {
|
||||
return `<w:t></w:t>`;
|
||||
}
|
||||
return _match; // preserve empty runs before the first text
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a DOCX file buffer, preserving formatting.
|
||||
*/
|
||||
export async function translateDocx(
|
||||
buffer: Buffer,
|
||||
targetLanguage: string
|
||||
): Promise<Buffer> {
|
||||
const zip = await JSZip.loadAsync(buffer);
|
||||
const docFile = zip.file("word/document.xml");
|
||||
if (!docFile) throw new Error("Invalid DOCX: missing word/document.xml");
|
||||
|
||||
let xml = await docFile.async("string");
|
||||
const paragraphs = extractParagraphs(xml);
|
||||
|
||||
if (paragraphs.length === 0) {
|
||||
return Buffer.from(await zip.generateAsync({ type: "nodebuffer" }));
|
||||
}
|
||||
|
||||
// Translate all paragraphs
|
||||
const translations = await replicateTranslateBatch(
|
||||
paragraphs.map(p => p.text),
|
||||
targetLanguage
|
||||
);
|
||||
|
||||
// Replace paragraphs from end to start to preserve offsets
|
||||
const sorted = [...paragraphs].sort((a, b) => b.start - a.start);
|
||||
for (const para of sorted) {
|
||||
const translationIdx = paragraphs.findIndex(p => p.start === para.start);
|
||||
const translated = translations[translationIdx] ?? para.text;
|
||||
const originalPara = xml.slice(para.start, para.end);
|
||||
const translatedPara = replaceParagraphText(originalPara, translated);
|
||||
xml = xml.slice(0, para.start) + translatedPara + xml.slice(para.end);
|
||||
}
|
||||
|
||||
zip.file("word/document.xml", xml);
|
||||
const outBuffer = await zip.generateAsync({
|
||||
type: "nodebuffer",
|
||||
compression: "DEFLATE"
|
||||
});
|
||||
return Buffer.from(outBuffer);
|
||||
}
|
||||
95
utils/document-processors/excel.ts
Normal file
95
utils/document-processors/excel.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import * as XLSX from "xlsx";
|
||||
import { replicateTranslateBatch } from "../replicate-translate";
|
||||
|
||||
export type SheetColumnInfo = {
|
||||
sheetName: string;
|
||||
columns: string[]; // header names or A, B, C...
|
||||
};
|
||||
|
||||
export type ColumnSelection = {
|
||||
sheetName: string;
|
||||
columnIndices: number[]; // 0-based column indices to translate
|
||||
};
|
||||
|
||||
/**
|
||||
* Parse an Excel/CSV buffer and return sheet/column metadata for column selection UI.
|
||||
*/
|
||||
export function getExcelColumns(buffer: Buffer, filename: string): SheetColumnInfo[] {
|
||||
const wb = XLSX.read(buffer, { type: "buffer" });
|
||||
return wb.SheetNames.map(sheetName => {
|
||||
const ws = wb.Sheets[sheetName];
|
||||
const range = XLSX.utils.decode_range(ws["!ref"] ?? "A1");
|
||||
const columns: string[] = [];
|
||||
for (let c = range.s.c; c <= range.e.c; c++) {
|
||||
// Try to get header from first row
|
||||
const cellAddr = XLSX.utils.encode_cell({ r: 0, c });
|
||||
const cell = ws[cellAddr];
|
||||
const header = cell && cell.v != null ? String(cell.v) : XLSX.utils.encode_col(c);
|
||||
columns.push(header);
|
||||
}
|
||||
return { sheetName, columns };
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate selected columns in an Excel buffer. Returns translated buffer.
|
||||
* columnSelections: array of {sheetName, columnIndices}
|
||||
* If columnSelections is empty, all text columns are translated.
|
||||
*/
|
||||
export async function translateExcel(
|
||||
buffer: Buffer,
|
||||
targetLanguage: string,
|
||||
columnSelections: ColumnSelection[]
|
||||
): Promise<Buffer> {
|
||||
const wb = XLSX.read(buffer, { type: "buffer", cellStyles: true, cellNF: true });
|
||||
|
||||
for (const sheet of wb.SheetNames) {
|
||||
const ws = wb.Sheets[sheet];
|
||||
if (!ws["!ref"]) continue;
|
||||
|
||||
const range = XLSX.utils.decode_range(ws["!ref"]);
|
||||
const selection = columnSelections.find(s => s.sheetName === sheet);
|
||||
const columnsToTranslate = selection
|
||||
? selection.columnIndices
|
||||
: Array.from({ length: range.e.c - range.s.c + 1 }, (_, i) => i + range.s.c);
|
||||
|
||||
// Collect all text cells for batch translation
|
||||
type CellRef = { addr: string; text: string };
|
||||
const cellRefs: CellRef[] = [];
|
||||
|
||||
for (const colIdx of columnsToTranslate) {
|
||||
// Start from row 1 to skip headers (row 0)
|
||||
for (let r = range.s.r + 1; r <= range.e.r; r++) {
|
||||
const addr = XLSX.utils.encode_cell({ r, c: colIdx });
|
||||
const cell = ws[addr];
|
||||
if (cell && cell.t === "s" && typeof cell.v === "string" && cell.v.trim()) {
|
||||
cellRefs.push({ addr, text: cell.v });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cellRefs.length === 0) continue;
|
||||
|
||||
// Translate in batches of 50
|
||||
const BATCH_SIZE = 50;
|
||||
for (let i = 0; i < cellRefs.length; i += BATCH_SIZE) {
|
||||
const batch = cellRefs.slice(i, i + BATCH_SIZE);
|
||||
const translations = await replicateTranslateBatch(
|
||||
batch.map(c => c.text),
|
||||
targetLanguage
|
||||
);
|
||||
batch.forEach((cellRef, idx) => {
|
||||
const cell = ws[cellRef.addr];
|
||||
if (cell) {
|
||||
cell.v = translations[idx];
|
||||
if (cell.h) cell.h = translations[idx];
|
||||
if (cell.r) cell.r = undefined;
|
||||
if (cell.w) cell.w = translations[idx];
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const out = XLSX.write(wb, { type: "buffer", bookType: "xlsx" });
|
||||
return Buffer.from(out);
|
||||
}
|
||||
12
utils/document-processors/pdf-types.d.ts
vendored
Normal file
12
utils/document-processors/pdf-types.d.ts
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
declare module "pdf-parse/lib/pdf-parse.js" {
|
||||
interface PdfData {
|
||||
numpages: number;
|
||||
numrender: number;
|
||||
info: Record<string, unknown>;
|
||||
metadata: Record<string, unknown>;
|
||||
text: string;
|
||||
version: string;
|
||||
}
|
||||
function pdfParse(dataBuffer: Buffer, options?: Record<string, unknown>): Promise<PdfData>;
|
||||
export = pdfParse;
|
||||
}
|
||||
115
utils/document-processors/pdf.ts
Normal file
115
utils/document-processors/pdf.ts
Normal file
@@ -0,0 +1,115 @@
|
||||
import { PDFDocument, rgb, StandardFonts } from "pdf-lib";
|
||||
import { replicateTranslateBatch } from "../replicate-translate";
|
||||
|
||||
type PdfParseResult = {
|
||||
numpages: number;
|
||||
text: string;
|
||||
info: Record<string, unknown>;
|
||||
};
|
||||
|
||||
async function parsePdf(buffer: Buffer): Promise<PdfParseResult> {
|
||||
// Avoid Next.js issues with pdf-parse test file imports
|
||||
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
||||
const pdfParse = require("pdf-parse/lib/pdf-parse.js");
|
||||
return pdfParse(buffer);
|
||||
}
|
||||
|
||||
function wrapText(text: string, maxCharsPerLine: number): string[] {
|
||||
const words = text.split(/\s+/);
|
||||
const lines: string[] = [];
|
||||
let current = "";
|
||||
|
||||
for (const word of words) {
|
||||
if ((current + " " + word).trim().length > maxCharsPerLine) {
|
||||
if (current) lines.push(current);
|
||||
current = word;
|
||||
} else {
|
||||
current = current ? current + " " + word : word;
|
||||
}
|
||||
}
|
||||
if (current) lines.push(current);
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Translate a PDF buffer. Since PDFs don't support in-place text editing,
|
||||
* this extracts text, translates it, and creates a new formatted PDF.
|
||||
*/
|
||||
export async function translatePdf(
|
||||
buffer: Buffer,
|
||||
targetLanguage: string,
|
||||
sourceLanguage?: string
|
||||
): Promise<Buffer> {
|
||||
const parsed = await parsePdf(buffer);
|
||||
const rawText = parsed.text;
|
||||
|
||||
// Split into paragraphs (separated by double newlines or page breaks)
|
||||
const paragraphs = rawText
|
||||
.split(/\n{2,}|\f/)
|
||||
.map(p => p.replace(/\n/g, " ").trim())
|
||||
.filter(p => p.length > 0);
|
||||
|
||||
if (paragraphs.length === 0) {
|
||||
throw new Error("No extractable text found in PDF");
|
||||
}
|
||||
|
||||
// Translate all paragraphs
|
||||
const translations = await replicateTranslateBatch(paragraphs, targetLanguage);
|
||||
|
||||
// Build output PDF
|
||||
const pdfDoc = await PDFDocument.create();
|
||||
const font = await pdfDoc.embedFont(StandardFonts.Helvetica);
|
||||
const boldFont = await pdfDoc.embedFont(StandardFonts.HelveticaBold);
|
||||
|
||||
const PAGE_WIDTH = 595;
|
||||
const PAGE_HEIGHT = 842;
|
||||
const MARGIN = 50;
|
||||
const FONT_SIZE = 11;
|
||||
const TITLE_SIZE = 13;
|
||||
const LINE_HEIGHT = 16;
|
||||
const MAX_LINE_CHARS = 80;
|
||||
|
||||
let page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
|
||||
let y = PAGE_HEIGHT - MARGIN;
|
||||
|
||||
function ensureSpace(needed: number) {
|
||||
if (y - needed < MARGIN) {
|
||||
page = pdfDoc.addPage([PAGE_WIDTH, PAGE_HEIGHT]);
|
||||
y = PAGE_HEIGHT - MARGIN;
|
||||
}
|
||||
}
|
||||
|
||||
// Title
|
||||
const title = `Translation to: ${targetLanguage}${sourceLanguage ? ` (from: ${sourceLanguage})` : ""}`;
|
||||
ensureSpace(TITLE_SIZE + LINE_HEIGHT);
|
||||
page.drawText(title, {
|
||||
x: MARGIN,
|
||||
y,
|
||||
size: TITLE_SIZE,
|
||||
font: boldFont,
|
||||
color: rgb(0.2, 0.2, 0.7)
|
||||
});
|
||||
y -= TITLE_SIZE + LINE_HEIGHT;
|
||||
|
||||
// Draw translated paragraphs
|
||||
for (const para of translations) {
|
||||
const lines = wrapText(para, MAX_LINE_CHARS);
|
||||
ensureSpace(lines.length * LINE_HEIGHT + LINE_HEIGHT);
|
||||
|
||||
for (const line of lines) {
|
||||
ensureSpace(LINE_HEIGHT);
|
||||
page.drawText(line, {
|
||||
x: MARGIN,
|
||||
y,
|
||||
size: FONT_SIZE,
|
||||
font,
|
||||
color: rgb(0, 0, 0)
|
||||
});
|
||||
y -= LINE_HEIGHT;
|
||||
}
|
||||
y -= LINE_HEIGHT * 0.5; // paragraph gap
|
||||
}
|
||||
|
||||
const pdfBytes = await pdfDoc.save();
|
||||
return Buffer.from(pdfBytes);
|
||||
}
|
||||
83
utils/replicate-translate.ts
Normal file
83
utils/replicate-translate.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { readSettings } from "./settings-store";
|
||||
|
||||
type ReplicateOutput = string | string[] | { translation?: string; translated_text?: string; output?: string };
|
||||
|
||||
export async function replicateTranslate(
|
||||
text: string,
|
||||
targetLanguage: string
|
||||
): Promise<string> {
|
||||
const settings = readSettings();
|
||||
|
||||
if (!settings.replicateApiToken) {
|
||||
throw new Error("Replicate API token not configured");
|
||||
}
|
||||
if (!settings.jigsawApiKey) {
|
||||
throw new Error("JigsawStack API key not configured");
|
||||
}
|
||||
|
||||
const body = {
|
||||
version: settings.modelVersion,
|
||||
input: {
|
||||
text,
|
||||
api_key: settings.jigsawApiKey,
|
||||
target_language: targetLanguage
|
||||
}
|
||||
};
|
||||
|
||||
const response = await fetch("https://api.replicate.com/v1/predictions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Authorization": `Bearer ${settings.replicateApiToken}`,
|
||||
"Content-Type": "application/json",
|
||||
"Prefer": "wait"
|
||||
},
|
||||
body: JSON.stringify(body)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
const err = await response.text();
|
||||
throw new Error(`Replicate API error: ${response.status} ${err}`);
|
||||
}
|
||||
|
||||
const data = await response.json();
|
||||
|
||||
if (data.error) {
|
||||
throw new Error(`Replicate model error: ${data.error}`);
|
||||
}
|
||||
|
||||
// Extract translated text from various output formats
|
||||
const output: ReplicateOutput = data.output;
|
||||
|
||||
if (typeof output === "string") return output;
|
||||
if (Array.isArray(output)) return output.join("");
|
||||
if (output && typeof output === "object") {
|
||||
return output.translation ?? output.translated_text ?? output.output ?? String(output);
|
||||
}
|
||||
|
||||
throw new Error("Unexpected output format from Replicate");
|
||||
}
|
||||
|
||||
// Batch translate using separator trick to minimize API calls
|
||||
const SEPARATOR = "\n{{SEP}}\n";
|
||||
|
||||
export async function replicateTranslateBatch(
|
||||
texts: string[],
|
||||
targetLanguage: string
|
||||
): Promise<string[]> {
|
||||
if (texts.length === 0) return [];
|
||||
if (texts.length === 1) {
|
||||
return [await replicateTranslate(texts[0], targetLanguage)];
|
||||
}
|
||||
|
||||
const joined = texts.join(SEPARATOR);
|
||||
const translated = await replicateTranslate(joined, targetLanguage);
|
||||
|
||||
// Try to split on the separator; fall back to individual calls if it got translated
|
||||
const parts = translated.split(SEPARATOR);
|
||||
if (parts.length === texts.length) {
|
||||
return parts;
|
||||
}
|
||||
|
||||
// Fallback: translate individually
|
||||
return Promise.all(texts.map(t => replicateTranslate(t, targetLanguage)));
|
||||
}
|
||||
48
utils/settings-store.ts
Normal file
48
utils/settings-store.ts
Normal file
@@ -0,0 +1,48 @@
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
|
||||
export type Settings = {
|
||||
replicateApiToken: string;
|
||||
jigsawApiKey: string;
|
||||
modelVersion: string;
|
||||
replicateEnabled: boolean;
|
||||
adminPasswordHash: string;
|
||||
};
|
||||
|
||||
const DEFAULT_SETTINGS: Settings = {
|
||||
replicateApiToken: process.env["REPLICATE_API_TOKEN"] ?? "",
|
||||
jigsawApiKey: process.env["JIGSAWSTACK_API_KEY"] ?? "",
|
||||
modelVersion: "jigsawstack/text-translate:454df4c49941c05dea05175bd37686d0872c73c1f9366d1c2505db32ade52a89",
|
||||
replicateEnabled: false,
|
||||
adminPasswordHash: process.env["ADMIN_PASSWORD"] ?? "admin"
|
||||
};
|
||||
|
||||
const SETTINGS_PATH = path.join(process.cwd(), "data", "settings.json");
|
||||
|
||||
function ensureDataDir() {
|
||||
const dir = path.dirname(SETTINGS_PATH);
|
||||
if (!fs.existsSync(dir)) {
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
}
|
||||
|
||||
export function readSettings(): Settings {
|
||||
try {
|
||||
ensureDataDir();
|
||||
if (!fs.existsSync(SETTINGS_PATH)) {
|
||||
return { ...DEFAULT_SETTINGS };
|
||||
}
|
||||
const raw = fs.readFileSync(SETTINGS_PATH, "utf-8");
|
||||
return { ...DEFAULT_SETTINGS, ...JSON.parse(raw) };
|
||||
} catch {
|
||||
return { ...DEFAULT_SETTINGS };
|
||||
}
|
||||
}
|
||||
|
||||
export function writeSettings(updates: Partial<Settings>): Settings {
|
||||
ensureDataDir();
|
||||
const current = readSettings();
|
||||
const next = { ...current, ...updates };
|
||||
fs.writeFileSync(SETTINGS_PATH, JSON.stringify(next, null, 2), "utf-8");
|
||||
return next;
|
||||
}
|
||||
Reference in New Issue
Block a user