Files
LingvAI/utils/document-processors/excel.ts

96 lines
3.5 KiB
TypeScript
Raw Normal View History

import * as XLSX from "xlsx";
import { replicateTranslateBatch } from "../replicate-translate";
export type SheetColumnInfo = {
sheetName: string;
columns: string[]; // header names or A, B, C...
};
export type ColumnSelection = {
sheetName: string;
columnIndices: number[]; // 0-based column indices to translate
};
/**
* Parse an Excel/CSV buffer and return sheet/column metadata for column selection UI.
*/
export function getExcelColumns(buffer: Buffer, filename: string): SheetColumnInfo[] {
const wb = XLSX.read(buffer, { type: "buffer" });
return wb.SheetNames.map(sheetName => {
const ws = wb.Sheets[sheetName];
const range = XLSX.utils.decode_range(ws["!ref"] ?? "A1");
const columns: string[] = [];
for (let c = range.s.c; c <= range.e.c; c++) {
// Try to get header from first row
const cellAddr = XLSX.utils.encode_cell({ r: 0, c });
const cell = ws[cellAddr];
const header = cell && cell.v != null ? String(cell.v) : XLSX.utils.encode_col(c);
columns.push(header);
}
return { sheetName, columns };
});
}
/**
* Translate selected columns in an Excel buffer. Returns translated buffer.
* columnSelections: array of {sheetName, columnIndices}
* If columnSelections is empty, all text columns are translated.
*/
export async function translateExcel(
buffer: Buffer,
targetLanguage: string,
columnSelections: ColumnSelection[]
): Promise<Buffer> {
const wb = XLSX.read(buffer, { type: "buffer", cellStyles: true, cellNF: true });
for (const sheet of wb.SheetNames) {
const ws = wb.Sheets[sheet];
if (!ws["!ref"]) continue;
const range = XLSX.utils.decode_range(ws["!ref"]);
const selection = columnSelections.find(s => s.sheetName === sheet);
const columnsToTranslate = selection
? selection.columnIndices
: Array.from({ length: range.e.c - range.s.c + 1 }, (_, i) => i + range.s.c);
// Collect all text cells for batch translation
type CellRef = { addr: string; text: string };
const cellRefs: CellRef[] = [];
for (const colIdx of columnsToTranslate) {
// Start from row 1 to skip headers (row 0)
for (let r = range.s.r + 1; r <= range.e.r; r++) {
const addr = XLSX.utils.encode_cell({ r, c: colIdx });
const cell = ws[addr];
if (cell && cell.t === "s" && typeof cell.v === "string" && cell.v.trim()) {
cellRefs.push({ addr, text: cell.v });
}
}
}
if (cellRefs.length === 0) continue;
// Translate in batches of 50
const BATCH_SIZE = 50;
for (let i = 0; i < cellRefs.length; i += BATCH_SIZE) {
const batch = cellRefs.slice(i, i + BATCH_SIZE);
const translations = await replicateTranslateBatch(
batch.map(c => c.text),
targetLanguage
);
batch.forEach((cellRef, idx) => {
const cell = ws[cellRef.addr];
if (cell) {
cell.v = translations[idx];
if (cell.h) cell.h = translations[idx];
if (cell.r) cell.r = undefined;
if (cell.w) cell.w = translations[idx];
}
});
}
}
const out = XLSX.write(wb, { type: "buffer", bookType: "xlsx" });
return Buffer.from(out);
}