From cb30e53c5e25f8ddcf954e8bcb88b4f54e18afdb Mon Sep 17 00:00:00 2001
From: Nystik <236107-Nystik@users.noreply.gitlab.com>
Date: Sat, 9 May 2026 21:43:49 +0200
Subject: [PATCH] add index pre-fetch

---
 server/routes/fs.js              |  52 ++++++++++++++
 src/shims/fs/indexer-prefetch.js | 113 +++++++++++++++++++++++++++++++
 src/shims/init.js                |   9 +++
 3 files changed, 174 insertions(+)
 create mode 100644 src/shims/fs/indexer-prefetch.js

diff --git a/server/routes/fs.js b/server/routes/fs.js
index 100a9d0..f390c09 100644
--- a/server/routes/fs.js
+++ b/server/routes/fs.js
@@ -479,6 +479,58 @@ router.post("/utimes", async (req, res) => {
   }
 });
 
+// POST /api/fs/batch-read { paths, vault } - bulk read text file contents
+// Used by the indexer pre-fetcher to avoid N round trips during startup.
+router.post("/batch-read", async (req, res) => {
+  const vaultRoot = getVaultRoot(req, res);
+
+  if (!vaultRoot) {
+    return;
+  }
+
+  const paths = Array.isArray(req.body?.paths) ? req.body.paths : [];
+
+  if (paths.length === 0) {
+    return res.json({ files: {} });
+  }
+
+  const files = {};
+
+  await Promise.all(
+    paths.map(async (relPath) => {
+      const resolved = resolveVaultPath(vaultRoot, relPath);
+
+      if (!resolved) {
+        return;
+      }
+
+      try {
+        const buffered = getPending(resolved);
+
+        if (buffered) {
+          if (typeof buffered.data === "string") {
+            files[relPath] = buffered.data;
+          } else if (
+            buffered.encoding === "utf8" ||
+            buffered.encoding === "utf-8"
+          ) {
+            files[relPath] = buffered.data.toString("utf-8");
+          }
+          return;
+        }
+
+        const data = await fs.promises.readFile(resolved, "utf-8");
+        files[relPath] = data;
+      } catch {
+        // Skip unreadable files silently. The client falls back to a
+        // normal readFile when a path isn't in the response.
+      }
+    }),
+  );
+
+  res.json({ files });
+});
+
 // GET /api/fs/tree?path=...&vault=... returns full recursive file tree with metadata
 router.get("/tree", async (req, res) => {
   const vaultRoot = getVaultRoot(req, res);
diff --git a/src/shims/fs/indexer-prefetch.js b/src/shims/fs/indexer-prefetch.js
new file mode 100644
index 0000000..1ec1ccd
--- /dev/null
+++ b/src/shims/fs/indexer-prefetch.js
@@ -0,0 +1,113 @@
+// Eager batch pre-fetch of vault content into ContentCache.
+//
+// Fired once after the metadata cache is populated. Iterates the tree in
+// directory-traversal order and pulls text file contents in batches via
+// /api/fs/batch-read. Caps at MAX_BYTES so it doesn't thrash the LRU.
+// Drops content directly into ContentCache; the indexer hits the cache
+// instead of fetching each file individually.
+
+const TEXT_EXTENSIONS = new Set([
+  ".md", ".markdown", ".txt", ".json", ".csv",
+  ".css", ".js", ".ts", ".tsx", ".mjs", ".cjs",
+  ".html", ".xml", ".yaml", ".yml", ".toml",
+  ".svg",
+]);
+
+const MAX_BYTES = 30 * 1024 * 1024; // 30 MB
+const MAX_FILE_BYTES = 512 * 1024; // skip files larger than 512 KB
+const BATCH_SIZE = 50;
+
+function isTextPath(path) {
+  const dot = path.lastIndexOf(".");
+
+  if (dot < 0) {
+    return false;
+  }
+
+  return TEXT_EXTENSIONS.has(path.slice(dot).toLowerCase());
+}
+
+function selectPrefetchTargets(tree) {
+  const paths = [];
+  let bytes = 0;
+
+  // Iterate in tree key order, which already matches directory traversal
+  // because the server's walk emits parent-before-children.
+  for (const [path, entry] of Object.entries(tree)) {
+    if (entry.type !== "file") {
+      continue;
+    }
+
+    if (!isTextPath(path)) {
+      continue;
+    }
+
+    const size = entry.size || 0;
+
+    if (size === 0 || size > MAX_FILE_BYTES) {
+      continue;
+    }
+
+    if (bytes + size > MAX_BYTES) {
+      break;
+    }
+
+    paths.push(path);
+    bytes += size;
+  }
+
+  return { paths, bytes };
+}
+
+async function fetchBatch(vaultId, paths) {
+  const res = await fetch("/api/fs/batch-read", {
+    method: "POST",
+    headers: { "Content-Type": "application/json" },
+    body: JSON.stringify({ vault: vaultId, paths }),
+  });
+
+  if (!res.ok) {
+    throw new Error("batch-read failed: " + res.status);
+  }
+
+  return res.json();
+}
+
+export async function prefetchVaultContent(vaultId, tree, contentCache) {
+  if (!vaultId || !tree) {
+    return;
+  }
+
+  const { paths, bytes } = selectPrefetchTargets(tree);
+
+  if (paths.length === 0) {
+    return;
+  }
+
+  const t0 = Date.now();
+  let cached = 0;
+
+  for (let i = 0; i < paths.length; i += BATCH_SIZE) {
+    const batch = paths.slice(i, i + BATCH_SIZE);
+
+    try {
+      const result = await fetchBatch(vaultId, batch);
+
+      for (const [path, content] of Object.entries(result.files || {})) {
+        if (typeof content === "string") {
+          contentCache.set(path, content);
+          cached++;
+        }
+      }
+    } catch (e) {
+      console.warn("[ignis] Prefetch batch failed:", e.message);
+      return;
+    }
+  }
+
+  const ms = Date.now() - t0;
+
+  console.log(
+    `[ignis] Prefetched ${cached}/${paths.length} files (${(bytes / 1024).toFixed(0)} KB) in ${ms}ms`,
+  );
+}
diff --git a/src/shims/init.js b/src/shims/init.js
index 5d496ce..5139a93 100644
--- a/src/shims/init.js
+++ b/src/shims/init.js
@@ -4,6 +4,7 @@ import { vaultService } from "../services/vault-service.js";
 import { showPluginInstallDialog } from "../ui/bootstrap.js";
 import { registerReadTransform } from "./fs/read-transforms.js";
 import { resolveWorkspaceName, initWorkspacePatch } from "./workspace.js";
+import { prefetchVaultContent } from "./fs/indexer-prefetch.js";
 
 function resolveVaultId() {
   const urlParams = new URLSearchParams(window.location.search);
@@ -218,6 +219,14 @@ export function initialize() {
     window.__vaultList = bootstrap.vaultList;
     applyTree(bootstrap.tree);
     applyCoreSyncGuard(bootstrap.plugins);
+
+    // Race the indexer: batch-fetch text content into ContentCache so
+    // Obsidian's startup indexing reads hit the cache instead of the network.
+    prefetchVaultContent(
+      window.__currentVaultId,
+      bootstrap.tree,
+      fsShim._contentCache,
+    );
   } else {
     initVaultConfigFallback();
     initVaultListFallback();