add index pre-fetch

This commit is contained in:
Nystik
2026-05-09 21:43:49 +02:00
parent 6dfe2b5c81
commit cb30e53c5e
3 changed files with 174 additions and 0 deletions

View File

@@ -479,6 +479,58 @@ router.post("/utimes", async (req, res) => {
}
});
// POST /api/fs/batch-read { paths, vault } - bulk read text file contents
// Used by the indexer pre-fetcher to avoid N round trips during startup.
router.post("/batch-read", async (req, res) => {
const vaultRoot = getVaultRoot(req, res);
if (!vaultRoot) {
return;
}
const paths = Array.isArray(req.body?.paths) ? req.body.paths : [];
if (paths.length === 0) {
return res.json({ files: {} });
}
const files = {};
await Promise.all(
paths.map(async (relPath) => {
const resolved = resolveVaultPath(vaultRoot, relPath);
if (!resolved) {
return;
}
try {
const buffered = getPending(resolved);
if (buffered) {
if (typeof buffered.data === "string") {
files[relPath] = buffered.data;
} else if (
buffered.encoding === "utf8" ||
buffered.encoding === "utf-8"
) {
files[relPath] = buffered.data.toString("utf-8");
}
return;
}
const data = await fs.promises.readFile(resolved, "utf-8");
files[relPath] = data;
} catch {
// Skip unreadable files silently. The client falls back to a
// normal readFile when a path isn't in the response.
}
}),
);
res.json({ files });
});
// GET /api/fs/tree?path=...&vault=... returns full recursive file tree with metadata
router.get("/tree", async (req, res) => {
const vaultRoot = getVaultRoot(req, res);

View File

@@ -0,0 +1,113 @@
// Eager batch pre-fetch of vault content into ContentCache.
//
// Fired once after the metadata cache is populated. Iterates the tree in
// directory-traversal order and pulls text file contents in batches via
// /api/fs/batch-read. Caps at MAX_BYTES so it doesn't thrash the LRU.
// Drops content directly into ContentCache; the indexer hits the cache
// instead of fetching each file individually.
const TEXT_EXTENSIONS = new Set([
".md", ".markdown", ".txt", ".json", ".csv",
".css", ".js", ".ts", ".tsx", ".mjs", ".cjs",
".html", ".xml", ".yaml", ".yml", ".toml",
".svg",
]);
const MAX_BYTES = 30 * 1024 * 1024; // 30 MB
const MAX_FILE_BYTES = 512 * 1024; // skip files larger than 512 KB
const BATCH_SIZE = 50;
function isTextPath(path) {
const dot = path.lastIndexOf(".");
if (dot < 0) {
return false;
}
return TEXT_EXTENSIONS.has(path.slice(dot).toLowerCase());
}
function selectPrefetchTargets(tree) {
const paths = [];
let bytes = 0;
// Iterate in tree key order, which already matches directory traversal
// because the server's walk emits parent-before-children.
for (const [path, entry] of Object.entries(tree)) {
if (entry.type !== "file") {
continue;
}
if (!isTextPath(path)) {
continue;
}
const size = entry.size || 0;
if (size === 0 || size > MAX_FILE_BYTES) {
continue;
}
if (bytes + size > MAX_BYTES) {
break;
}
paths.push(path);
bytes += size;
}
return { paths, bytes };
}
async function fetchBatch(vaultId, paths) {
const res = await fetch("/api/fs/batch-read", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ vault: vaultId, paths }),
});
if (!res.ok) {
throw new Error("batch-read failed: " + res.status);
}
return res.json();
}
export async function prefetchVaultContent(vaultId, tree, contentCache) {
if (!vaultId || !tree) {
return;
}
const { paths, bytes } = selectPrefetchTargets(tree);
if (paths.length === 0) {
return;
}
const t0 = Date.now();
let cached = 0;
for (let i = 0; i < paths.length; i += BATCH_SIZE) {
const batch = paths.slice(i, i + BATCH_SIZE);
try {
const result = await fetchBatch(vaultId, batch);
for (const [path, content] of Object.entries(result.files || {})) {
if (typeof content === "string") {
contentCache.set(path, content);
cached++;
}
}
} catch (e) {
console.warn("[ignis] Prefetch batch failed:", e.message);
return;
}
}
const ms = Date.now() - t0;
console.log(
`[ignis] Prefetched ${cached}/${paths.length} files (${(bytes / 1024).toFixed(0)} KB) in ${ms}ms`,
);
}

View File

@@ -4,6 +4,7 @@ import { vaultService } from "../services/vault-service.js";
import { showPluginInstallDialog } from "../ui/bootstrap.js";
import { registerReadTransform } from "./fs/read-transforms.js";
import { resolveWorkspaceName, initWorkspacePatch } from "./workspace.js";
import { prefetchVaultContent } from "./fs/indexer-prefetch.js";
function resolveVaultId() {
const urlParams = new URLSearchParams(window.location.search);
@@ -218,6 +219,14 @@ export function initialize() {
window.__vaultList = bootstrap.vaultList;
applyTree(bootstrap.tree);
applyCoreSyncGuard(bootstrap.plugins);
// Race the indexer: batch-fetch text content into ContentCache so
// Obsidian's startup indexing reads hit the cache instead of the network.
prefetchVaultContent(
window.__currentVaultId,
bootstrap.tree,
fsShim._contentCache,
);
} else {
initVaultConfigFallback();
initVaultListFallback();