Fix local embeddings: resolve all Electron/WASM bundling issues
- esbuild: stub onnxruntime-node/sharp/canvas (required unconditionally) - esbuild: patch onnx.js to always use ONNX_WEB backend (Electron has process.release.name === "node" which wrongly selects the Node backend) - esbuild: patch ort-web.min.js to use browser mode (avoids worker_threads and threaded WASM; uses single-threaded ort-wasm-simd.wasm instead) - EmbedSearch: set numThreads=1, proxy=false for inline WASM inference - EmbedSearch: yield (setTimeout 0) before each embed to keep UI responsive - EmbedSearch: create .memex-chat/ dirs unconditionally at buildIndex start - EmbedSearch: model cache redirected to .memex-chat/models/ via env.cacheDir - main.ts: trigger buildIndex() in background after initEmbedSearch() Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+58
-4
@@ -1,12 +1,66 @@
|
|||||||
import esbuild from "esbuild";
|
import esbuild from "esbuild";
|
||||||
import process from "process";
|
import process from "process";
|
||||||
import builtins from "builtin-modules";
|
import builtins from "builtin-modules";
|
||||||
|
import { readFile } from "fs/promises";
|
||||||
|
|
||||||
const prod = process.argv[2] === "production";
|
const prod = process.argv[2] === "production";
|
||||||
|
|
||||||
|
// @xenova/transformers requires several native modules unconditionally even though
|
||||||
|
// the Electron renderer only uses the WASM (onnxruntime-web) path.
|
||||||
|
// Stub them out so the require() calls don't throw at runtime.
|
||||||
|
const stubNativeModules = {
|
||||||
|
name: "stub-native-modules",
|
||||||
|
setup(build) {
|
||||||
|
const stubs = /^(onnxruntime-node|sharp|canvas)$/;
|
||||||
|
build.onResolve({ filter: stubs }, (args) => ({
|
||||||
|
path: args.path,
|
||||||
|
namespace: "stub",
|
||||||
|
}));
|
||||||
|
build.onLoad({ filter: /.*/, namespace: "stub" }, () => ({
|
||||||
|
contents: "module.exports = {};",
|
||||||
|
loader: "js",
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// In Electron's renderer process, process.release.name === "node" is true, which
|
||||||
|
// causes @xenova/transformers to select the onnxruntime-node backend (our stub).
|
||||||
|
// Patch onnx.js at build time to always use the WASM/web backend instead.
|
||||||
|
const forceOnnxWeb = {
|
||||||
|
name: "force-onnx-web",
|
||||||
|
setup(build) {
|
||||||
|
build.onLoad({ filter: /backends\/onnx\.js$/ }, async (args) => {
|
||||||
|
let source = await readFile(args.path, "utf8");
|
||||||
|
source = source.replace(
|
||||||
|
"process?.release?.name === 'node'",
|
||||||
|
"false"
|
||||||
|
);
|
||||||
|
return { contents: source, loader: "js" };
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
// ort-web detects Node.js via process.versions.node and uses threaded WASM + worker_threads,
|
||||||
|
// which fails in Electron's renderer. Force browser mode so it uses non-threaded WASM instead.
|
||||||
|
const forceOrtWebBrowserMode = {
|
||||||
|
name: "force-ort-web-browser-mode",
|
||||||
|
setup(build) {
|
||||||
|
build.onLoad({ filter: /ort-web\.min\.js$/ }, async (args) => {
|
||||||
|
let source = await readFile(args.path, "utf8");
|
||||||
|
// Replace all three occurrences of the Node.js version check
|
||||||
|
source = source.replaceAll(
|
||||||
|
'"string"==typeof process.versions.node',
|
||||||
|
"false"
|
||||||
|
);
|
||||||
|
return { contents: source, loader: "js" };
|
||||||
|
});
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
const context = await esbuild.context({
|
const context = await esbuild.context({
|
||||||
entryPoints: ["src/main.ts"],
|
entryPoints: ["src/main.ts"],
|
||||||
bundle: true,
|
bundle: true,
|
||||||
|
plugins: [stubNativeModules, forceOnnxWeb, forceOrtWebBrowserMode],
|
||||||
external: [
|
external: [
|
||||||
"obsidian",
|
"obsidian",
|
||||||
"electron",
|
"electron",
|
||||||
@@ -21,14 +75,14 @@ const context = await esbuild.context({
|
|||||||
"@lezer/common",
|
"@lezer/common",
|
||||||
"@lezer/highlight",
|
"@lezer/highlight",
|
||||||
"@lezer/lr",
|
"@lezer/lr",
|
||||||
// Native modules used by onnxruntime — not bundleable, loaded by Node.js at runtime
|
|
||||||
"onnxruntime-node",
|
|
||||||
"sharp",
|
|
||||||
"canvas",
|
|
||||||
...builtins,
|
...builtins,
|
||||||
],
|
],
|
||||||
format: "cjs",
|
format: "cjs",
|
||||||
target: "es2020", // raised from es2018 to support BigInt used by @xenova/transformers
|
target: "es2020", // raised from es2018 to support BigInt used by @xenova/transformers
|
||||||
|
// @xenova/transformers uses import.meta.url to locate its own files.
|
||||||
|
// In CJS output, esbuild stubs import.meta as {}, making fileURLToPath(undefined) throw.
|
||||||
|
// Provide a valid dummy URL so init_env() can complete and env.backends is populated.
|
||||||
|
define: { "import.meta.url": '"file:///obsidian-bundle.js"' },
|
||||||
logLevel: "info",
|
logLevel: "info",
|
||||||
sourcemap: prod ? false : "inline",
|
sourcemap: prod ? false : "inline",
|
||||||
treeShaking: true,
|
treeShaking: true,
|
||||||
|
|||||||
+43
-12
@@ -16,11 +16,11 @@ interface Manifest { model: string; version: number }
|
|||||||
/**
|
/**
|
||||||
* Semantic search engine using Transformers.js for local embeddings.
|
* Semantic search engine using Transformers.js for local embeddings.
|
||||||
*
|
*
|
||||||
* Cache layout (one .ajson file per note, incremental writes):
|
* All data lives under <vault>/.memex-chat/:
|
||||||
* <vault>/.memex-chat/embeddings/.manifest.json — model name + version
|
* models/ — downloaded ONNX model files (via env.cacheDir)
|
||||||
* <vault>/.memex-chat/embeddings/some/note.ajson — { mtime, vec }
|
* embeddings/.manifest.json — model name + version
|
||||||
|
* embeddings/some/note.ajson — { mtime, vec }
|
||||||
*
|
*
|
||||||
* Models are downloaded from HuggingFace on first use and cached by the browser.
|
|
||||||
* WASM runtime is loaded from CDN (cdn.jsdelivr.net) on first use.
|
* WASM runtime is loaded from CDN (cdn.jsdelivr.net) on first use.
|
||||||
*/
|
*/
|
||||||
export class EmbedSearch {
|
export class EmbedSearch {
|
||||||
@@ -51,8 +51,16 @@ export class EmbedSearch {
|
|||||||
return (this.app.vault.adapter as any).basePath as string;
|
return (this.app.vault.adapter as any).basePath as string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private get baseDir(): string {
|
||||||
|
return join(this.vaultRoot, ".memex-chat");
|
||||||
|
}
|
||||||
|
|
||||||
|
private get modelsDir(): string {
|
||||||
|
return join(this.baseDir, "models");
|
||||||
|
}
|
||||||
|
|
||||||
private get embedDir(): string {
|
private get embedDir(): string {
|
||||||
return join(this.vaultRoot, ".memex-chat", "embeddings");
|
return join(this.baseDir, "embeddings");
|
||||||
}
|
}
|
||||||
|
|
||||||
private get manifestPath(): string {
|
private get manifestPath(): string {
|
||||||
@@ -69,16 +77,19 @@ export class EmbedSearch {
|
|||||||
private async loadPipeline(): Promise<void> {
|
private async loadPipeline(): Promise<void> {
|
||||||
if (this.pipe) return;
|
if (this.pipe) return;
|
||||||
|
|
||||||
// Dynamic import — bundled by esbuild, WASM loaded from CDN at runtime
|
// Use require() — reliable in CJS bundle; still lazy since we're inside an async function.
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
const mod = await import("@xenova/transformers") as any;
|
const { pipeline, env } = require("@xenova/transformers") as any;
|
||||||
const { pipeline, env } = mod;
|
|
||||||
|
|
||||||
env.backends.onnx.wasm.wasmPaths =
|
env.backends.onnx.wasm.wasmPaths =
|
||||||
"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/";
|
"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/";
|
||||||
env.backends.onnx.wasm.proxy = false;
|
env.backends.onnx.wasm.proxy = false; // proxy Worker hangs in Obsidian; run inline instead
|
||||||
|
env.backends.onnx.wasm.numThreads = 1;
|
||||||
env.allowLocalModels = false;
|
env.allowLocalModels = false;
|
||||||
env.useBrowserCache = true;
|
env.allowRemoteModels = true;
|
||||||
|
env.useBrowserCache = false;
|
||||||
|
env.useFSCache = true;
|
||||||
|
env.cacheDir = this.modelsDir; // store downloaded models in vault's .memex-chat/models/
|
||||||
|
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
const progress_callback = (p: any) => {
|
const progress_callback = (p: any) => {
|
||||||
@@ -121,6 +132,15 @@ export class EmbedSearch {
|
|||||||
this.vecs.clear();
|
this.vecs.clear();
|
||||||
|
|
||||||
const changed: string[] = []; // vault paths newly embedded this run
|
const changed: string[] = []; // vault paths newly embedded this run
|
||||||
|
let pipelineError: unknown = null;
|
||||||
|
|
||||||
|
// Create directories unconditionally — independent of pipeline success
|
||||||
|
try {
|
||||||
|
await fsp.mkdir(this.modelsDir, { recursive: true });
|
||||||
|
await fsp.mkdir(this.embedDir, { recursive: true });
|
||||||
|
} catch (e) {
|
||||||
|
console.error("[Memex] Verzeichnisse konnten nicht angelegt werden:", e);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await this.loadCache();
|
await this.loadCache();
|
||||||
@@ -140,6 +160,9 @@ export class EmbedSearch {
|
|||||||
this.vecs.set(file.path, { vec: cached.vec, file });
|
this.vecs.set(file.path, { vec: cached.vec, file });
|
||||||
} else {
|
} else {
|
||||||
try {
|
try {
|
||||||
|
// Yield before each inference so Obsidian's event loop can process events
|
||||||
|
// (WASM inference is synchronous and blocks the main thread briefly per note)
|
||||||
|
await new Promise((r) => setTimeout(r, 0));
|
||||||
const raw = await this.app.vault.cachedRead(file);
|
const raw = await this.app.vault.cachedRead(file);
|
||||||
const text = this.preprocess(raw).slice(0, 800) + " " + file.basename;
|
const text = this.preprocess(raw).slice(0, 800) + " " + file.basename;
|
||||||
const vec = await this.embed(text);
|
const vec = await this.embed(text);
|
||||||
@@ -147,8 +170,14 @@ export class EmbedSearch {
|
|||||||
this.vecs.set(file.path, { vec, file });
|
this.vecs.set(file.path, { vec, file });
|
||||||
changed.push(file.path);
|
changed.push(file.path);
|
||||||
windowEmbedded++;
|
windowEmbedded++;
|
||||||
} catch {
|
} catch (e) {
|
||||||
// skip unembeddable files
|
if (!this.pipe && !pipelineError) {
|
||||||
|
// Pipeline failed to load — log once and abort embedding loop
|
||||||
|
pipelineError = e;
|
||||||
|
console.error("[Memex] Pipeline-Ladefehler:", e);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// skip individual file
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -163,6 +192,8 @@ export class EmbedSearch {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pipelineError) throw pipelineError;
|
||||||
|
|
||||||
const allPaths = new Set(files.map((f) => f.path));
|
const allPaths = new Set(files.map((f) => f.path));
|
||||||
await this.saveCache(changed, allPaths);
|
await this.saveCache(changed, allPaths);
|
||||||
this.indexed = true;
|
this.indexed = true;
|
||||||
|
|||||||
Reference in New Issue
Block a user