From cb30e53c5e25f8ddcf954e8bcb88b4f54e18afdb Mon Sep 17 00:00:00 2001 From: Nystik <236107-Nystik@users.noreply.gitlab.com> Date: Sat, 9 May 2026 21:43:49 +0200 Subject: [PATCH] add index pre-fetch --- server/routes/fs.js | 52 ++++++++++++++ src/shims/fs/indexer-prefetch.js | 113 +++++++++++++++++++++++++++++++ src/shims/init.js | 9 +++ 3 files changed, 174 insertions(+) create mode 100644 src/shims/fs/indexer-prefetch.js diff --git a/server/routes/fs.js b/server/routes/fs.js index 100a9d0..f390c09 100644 --- a/server/routes/fs.js +++ b/server/routes/fs.js @@ -479,6 +479,58 @@ router.post("/utimes", async (req, res) => { } }); +// POST /api/fs/batch-read { paths, vault } - bulk read text file contents +// Used by the indexer pre-fetcher to avoid N round trips during startup. +router.post("/batch-read", async (req, res) => { + const vaultRoot = getVaultRoot(req, res); + + if (!vaultRoot) { + return; + } + + const paths = Array.isArray(req.body?.paths) ? req.body.paths : []; + + if (paths.length === 0) { + return res.json({ files: {} }); + } + + const files = {}; + + await Promise.all( + paths.map(async (relPath) => { + const resolved = resolveVaultPath(vaultRoot, relPath); + + if (!resolved) { + return; + } + + try { + const buffered = getPending(resolved); + + if (buffered) { + if (typeof buffered.data === "string") { + files[relPath] = buffered.data; + } else if ( + buffered.encoding === "utf8" || + buffered.encoding === "utf-8" + ) { + files[relPath] = buffered.data.toString("utf-8"); + } + return; + } + + const data = await fs.promises.readFile(resolved, "utf-8"); + files[relPath] = data; + } catch { + // Skip unreadable files silently. The client falls back to a + // normal readFile when a path isn't in the response. + } + }), + ); + + res.json({ files }); +}); + // GET /api/fs/tree?path=...&vault=... returns full recursive file tree with metadata router.get("/tree", async (req, res) => { const vaultRoot = getVaultRoot(req, res); diff --git a/src/shims/fs/indexer-prefetch.js b/src/shims/fs/indexer-prefetch.js new file mode 100644 index 0000000..1ec1ccd --- /dev/null +++ b/src/shims/fs/indexer-prefetch.js @@ -0,0 +1,113 @@ +// Eager batch pre-fetch of vault content into ContentCache. +// +// Fired once after the metadata cache is populated. Iterates the tree in +// directory-traversal order and pulls text file contents in batches via +// /api/fs/batch-read. Caps at MAX_BYTES so it doesn't thrash the LRU. +// Drops content directly into ContentCache; the indexer hits the cache +// instead of fetching each file individually. + +const TEXT_EXTENSIONS = new Set([ + ".md", ".markdown", ".txt", ".json", ".csv", + ".css", ".js", ".ts", ".tsx", ".mjs", ".cjs", + ".html", ".xml", ".yaml", ".yml", ".toml", + ".svg", +]); + +const MAX_BYTES = 30 * 1024 * 1024; // 30 MB +const MAX_FILE_BYTES = 512 * 1024; // skip files larger than 512 KB +const BATCH_SIZE = 50; + +function isTextPath(path) { + const dot = path.lastIndexOf("."); + + if (dot < 0) { + return false; + } + + return TEXT_EXTENSIONS.has(path.slice(dot).toLowerCase()); +} + +function selectPrefetchTargets(tree) { + const paths = []; + let bytes = 0; + + // Iterate in tree key order, which already matches directory traversal + // because the server's walk emits parent-before-children. + for (const [path, entry] of Object.entries(tree)) { + if (entry.type !== "file") { + continue; + } + + if (!isTextPath(path)) { + continue; + } + + const size = entry.size || 0; + + if (size === 0 || size > MAX_FILE_BYTES) { + continue; + } + + if (bytes + size > MAX_BYTES) { + break; + } + + paths.push(path); + bytes += size; + } + + return { paths, bytes }; +} + +async function fetchBatch(vaultId, paths) { + const res = await fetch("/api/fs/batch-read", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ vault: vaultId, paths }), + }); + + if (!res.ok) { + throw new Error("batch-read failed: " + res.status); + } + + return res.json(); +} + +export async function prefetchVaultContent(vaultId, tree, contentCache) { + if (!vaultId || !tree) { + return; + } + + const { paths, bytes } = selectPrefetchTargets(tree); + + if (paths.length === 0) { + return; + } + + const t0 = Date.now(); + let cached = 0; + + for (let i = 0; i < paths.length; i += BATCH_SIZE) { + const batch = paths.slice(i, i + BATCH_SIZE); + + try { + const result = await fetchBatch(vaultId, batch); + + for (const [path, content] of Object.entries(result.files || {})) { + if (typeof content === "string") { + contentCache.set(path, content); + cached++; + } + } + } catch (e) { + console.warn("[ignis] Prefetch batch failed:", e.message); + return; + } + } + + const ms = Date.now() - t0; + + console.log( + `[ignis] Prefetched ${cached}/${paths.length} files (${(bytes / 1024).toFixed(0)} KB) in ${ms}ms`, + ); +} diff --git a/src/shims/init.js b/src/shims/init.js index 5d496ce..5139a93 100644 --- a/src/shims/init.js +++ b/src/shims/init.js @@ -4,6 +4,7 @@ import { vaultService } from "../services/vault-service.js"; import { showPluginInstallDialog } from "../ui/bootstrap.js"; import { registerReadTransform } from "./fs/read-transforms.js"; import { resolveWorkspaceName, initWorkspacePatch } from "./workspace.js"; +import { prefetchVaultContent } from "./fs/indexer-prefetch.js"; function resolveVaultId() { const urlParams = new URLSearchParams(window.location.search); @@ -218,6 +219,14 @@ export function initialize() { window.__vaultList = bootstrap.vaultList; applyTree(bootstrap.tree); applyCoreSyncGuard(bootstrap.plugins); + + // Race the indexer: batch-fetch text content into ContentCache so + // Obsidian's startup indexing reads hit the cache instead of the network. + prefetchVaultContent( + window.__currentVaultId, + bootstrap.tree, + fsShim._contentCache, + ); } else { initVaultConfigFallback(); initVaultListFallback();