Fix local embeddings: resolve all Electron/WASM bundling issues
- esbuild: stub onnxruntime-node/sharp/canvas (required unconditionally) - esbuild: patch onnx.js to always use ONNX_WEB backend (Electron has process.release.name === "node" which wrongly selects the Node backend) - esbuild: patch ort-web.min.js to use browser mode (avoids worker_threads and threaded WASM; uses single-threaded ort-wasm-simd.wasm instead) - EmbedSearch: set numThreads=1, proxy=false for inline WASM inference - EmbedSearch: yield (setTimeout 0) before each embed to keep UI responsive - EmbedSearch: create .memex-chat/ dirs unconditionally at buildIndex start - EmbedSearch: model cache redirected to .memex-chat/models/ via env.cacheDir - main.ts: trigger buildIndex() in background after initEmbedSearch() Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+58
-4
@@ -1,12 +1,66 @@
|
||||
import esbuild from "esbuild";
|
||||
import process from "process";
|
||||
import builtins from "builtin-modules";
|
||||
import { readFile } from "fs/promises";
|
||||
|
||||
const prod = process.argv[2] === "production";
|
||||
|
||||
// @xenova/transformers requires several native modules unconditionally even though
|
||||
// the Electron renderer only uses the WASM (onnxruntime-web) path.
|
||||
// Stub them out so the require() calls don't throw at runtime.
|
||||
const stubNativeModules = {
|
||||
name: "stub-native-modules",
|
||||
setup(build) {
|
||||
const stubs = /^(onnxruntime-node|sharp|canvas)$/;
|
||||
build.onResolve({ filter: stubs }, (args) => ({
|
||||
path: args.path,
|
||||
namespace: "stub",
|
||||
}));
|
||||
build.onLoad({ filter: /.*/, namespace: "stub" }, () => ({
|
||||
contents: "module.exports = {};",
|
||||
loader: "js",
|
||||
}));
|
||||
},
|
||||
};
|
||||
|
||||
// In Electron's renderer process, process.release.name === "node" is true, which
|
||||
// causes @xenova/transformers to select the onnxruntime-node backend (our stub).
|
||||
// Patch onnx.js at build time to always use the WASM/web backend instead.
|
||||
const forceOnnxWeb = {
|
||||
name: "force-onnx-web",
|
||||
setup(build) {
|
||||
build.onLoad({ filter: /backends\/onnx\.js$/ }, async (args) => {
|
||||
let source = await readFile(args.path, "utf8");
|
||||
source = source.replace(
|
||||
"process?.release?.name === 'node'",
|
||||
"false"
|
||||
);
|
||||
return { contents: source, loader: "js" };
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
// ort-web detects Node.js via process.versions.node and uses threaded WASM + worker_threads,
|
||||
// which fails in Electron's renderer. Force browser mode so it uses non-threaded WASM instead.
|
||||
const forceOrtWebBrowserMode = {
|
||||
name: "force-ort-web-browser-mode",
|
||||
setup(build) {
|
||||
build.onLoad({ filter: /ort-web\.min\.js$/ }, async (args) => {
|
||||
let source = await readFile(args.path, "utf8");
|
||||
// Replace all three occurrences of the Node.js version check
|
||||
source = source.replaceAll(
|
||||
'"string"==typeof process.versions.node',
|
||||
"false"
|
||||
);
|
||||
return { contents: source, loader: "js" };
|
||||
});
|
||||
},
|
||||
};
|
||||
|
||||
const context = await esbuild.context({
|
||||
entryPoints: ["src/main.ts"],
|
||||
bundle: true,
|
||||
plugins: [stubNativeModules, forceOnnxWeb, forceOrtWebBrowserMode],
|
||||
external: [
|
||||
"obsidian",
|
||||
"electron",
|
||||
@@ -21,14 +75,14 @@ const context = await esbuild.context({
|
||||
"@lezer/common",
|
||||
"@lezer/highlight",
|
||||
"@lezer/lr",
|
||||
// Native modules used by onnxruntime — not bundleable, loaded by Node.js at runtime
|
||||
"onnxruntime-node",
|
||||
"sharp",
|
||||
"canvas",
|
||||
...builtins,
|
||||
],
|
||||
format: "cjs",
|
||||
target: "es2020", // raised from es2018 to support BigInt used by @xenova/transformers
|
||||
// @xenova/transformers uses import.meta.url to locate its own files.
|
||||
// In CJS output, esbuild stubs import.meta as {}, making fileURLToPath(undefined) throw.
|
||||
// Provide a valid dummy URL so init_env() can complete and env.backends is populated.
|
||||
define: { "import.meta.url": '"file:///obsidian-bundle.js"' },
|
||||
logLevel: "info",
|
||||
sourcemap: prod ? false : "inline",
|
||||
treeShaking: true,
|
||||
|
||||
+43
-12
@@ -16,11 +16,11 @@ interface Manifest { model: string; version: number }
|
||||
/**
|
||||
* Semantic search engine using Transformers.js for local embeddings.
|
||||
*
|
||||
* Cache layout (one .ajson file per note, incremental writes):
|
||||
* <vault>/.memex-chat/embeddings/.manifest.json — model name + version
|
||||
* <vault>/.memex-chat/embeddings/some/note.ajson — { mtime, vec }
|
||||
* All data lives under <vault>/.memex-chat/:
|
||||
* models/ — downloaded ONNX model files (via env.cacheDir)
|
||||
* embeddings/.manifest.json — model name + version
|
||||
* embeddings/some/note.ajson — { mtime, vec }
|
||||
*
|
||||
* Models are downloaded from HuggingFace on first use and cached by the browser.
|
||||
* WASM runtime is loaded from CDN (cdn.jsdelivr.net) on first use.
|
||||
*/
|
||||
export class EmbedSearch {
|
||||
@@ -51,8 +51,16 @@ export class EmbedSearch {
|
||||
return (this.app.vault.adapter as any).basePath as string;
|
||||
}
|
||||
|
||||
private get baseDir(): string {
|
||||
return join(this.vaultRoot, ".memex-chat");
|
||||
}
|
||||
|
||||
private get modelsDir(): string {
|
||||
return join(this.baseDir, "models");
|
||||
}
|
||||
|
||||
private get embedDir(): string {
|
||||
return join(this.vaultRoot, ".memex-chat", "embeddings");
|
||||
return join(this.baseDir, "embeddings");
|
||||
}
|
||||
|
||||
private get manifestPath(): string {
|
||||
@@ -69,16 +77,19 @@ export class EmbedSearch {
|
||||
private async loadPipeline(): Promise<void> {
|
||||
if (this.pipe) return;
|
||||
|
||||
// Dynamic import — bundled by esbuild, WASM loaded from CDN at runtime
|
||||
// Use require() — reliable in CJS bundle; still lazy since we're inside an async function.
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const mod = await import("@xenova/transformers") as any;
|
||||
const { pipeline, env } = mod;
|
||||
const { pipeline, env } = require("@xenova/transformers") as any;
|
||||
|
||||
env.backends.onnx.wasm.wasmPaths =
|
||||
"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/";
|
||||
env.backends.onnx.wasm.proxy = false;
|
||||
env.backends.onnx.wasm.proxy = false; // proxy Worker hangs in Obsidian; run inline instead
|
||||
env.backends.onnx.wasm.numThreads = 1;
|
||||
env.allowLocalModels = false;
|
||||
env.useBrowserCache = true;
|
||||
env.allowRemoteModels = true;
|
||||
env.useBrowserCache = false;
|
||||
env.useFSCache = true;
|
||||
env.cacheDir = this.modelsDir; // store downloaded models in vault's .memex-chat/models/
|
||||
|
||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||
const progress_callback = (p: any) => {
|
||||
@@ -121,6 +132,15 @@ export class EmbedSearch {
|
||||
this.vecs.clear();
|
||||
|
||||
const changed: string[] = []; // vault paths newly embedded this run
|
||||
let pipelineError: unknown = null;
|
||||
|
||||
// Create directories unconditionally — independent of pipeline success
|
||||
try {
|
||||
await fsp.mkdir(this.modelsDir, { recursive: true });
|
||||
await fsp.mkdir(this.embedDir, { recursive: true });
|
||||
} catch (e) {
|
||||
console.error("[Memex] Verzeichnisse konnten nicht angelegt werden:", e);
|
||||
}
|
||||
|
||||
try {
|
||||
await this.loadCache();
|
||||
@@ -140,6 +160,9 @@ export class EmbedSearch {
|
||||
this.vecs.set(file.path, { vec: cached.vec, file });
|
||||
} else {
|
||||
try {
|
||||
// Yield before each inference so Obsidian's event loop can process events
|
||||
// (WASM inference is synchronous and blocks the main thread briefly per note)
|
||||
await new Promise((r) => setTimeout(r, 0));
|
||||
const raw = await this.app.vault.cachedRead(file);
|
||||
const text = this.preprocess(raw).slice(0, 800) + " " + file.basename;
|
||||
const vec = await this.embed(text);
|
||||
@@ -147,8 +170,14 @@ export class EmbedSearch {
|
||||
this.vecs.set(file.path, { vec, file });
|
||||
changed.push(file.path);
|
||||
windowEmbedded++;
|
||||
} catch {
|
||||
// skip unembeddable files
|
||||
} catch (e) {
|
||||
if (!this.pipe && !pipelineError) {
|
||||
// Pipeline failed to load — log once and abort embedding loop
|
||||
pipelineError = e;
|
||||
console.error("[Memex] Pipeline-Ladefehler:", e);
|
||||
break;
|
||||
}
|
||||
// skip individual file
|
||||
}
|
||||
}
|
||||
|
||||
@@ -163,6 +192,8 @@ export class EmbedSearch {
|
||||
}
|
||||
}
|
||||
|
||||
if (pipelineError) throw pipelineError;
|
||||
|
||||
const allPaths = new Set(files.map((f) => f.path));
|
||||
await this.saveCache(changed, allPaths);
|
||||
this.indexed = true;
|
||||
|
||||
Reference in New Issue
Block a user