v0.3.0: Local embeddings via Transformers.js
- Add EmbedSearch: semantic vector search using @xenova/transformers - Four model options (BGE Micro v2 default, MiniLM, multilingual variants) - Per-note .ajson cache in .memex-chat/embeddings/ with incremental writes - Model download progress + embedding speed (N/s) and ETA display - Settings: toggle embeddings on/off, model selector - activeSearch getter: uses EmbedSearch when enabled, falls back to VaultSearch - esbuild: target es2020 (BigInt), externalize onnxruntime-node Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+5
-1
@@ -21,10 +21,14 @@ const context = await esbuild.context({
|
|||||||
"@lezer/common",
|
"@lezer/common",
|
||||||
"@lezer/highlight",
|
"@lezer/highlight",
|
||||||
"@lezer/lr",
|
"@lezer/lr",
|
||||||
|
// Native modules used by onnxruntime — not bundleable, loaded by Node.js at runtime
|
||||||
|
"onnxruntime-node",
|
||||||
|
"sharp",
|
||||||
|
"canvas",
|
||||||
...builtins,
|
...builtins,
|
||||||
],
|
],
|
||||||
format: "cjs",
|
format: "cjs",
|
||||||
target: "es2018",
|
target: "es2020", // raised from es2018 to support BigInt used by @xenova/transformers
|
||||||
logLevel: "info",
|
logLevel: "info",
|
||||||
sourcemap: prod ? false : "inline",
|
sourcemap: prod ? false : "inline",
|
||||||
treeShaking: true,
|
treeShaking: true,
|
||||||
|
|||||||
+1
-1
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"id": "memex-chat",
|
"id": "memex-chat",
|
||||||
"name": "Memex Chat",
|
"name": "Memex Chat",
|
||||||
"version": "0.2.3",
|
"version": "0.3.0",
|
||||||
"minAppVersion": "1.4.0",
|
"minAppVersion": "1.4.0",
|
||||||
"description": "Chat with your Obsidian vault using Claude AI — semantic context retrieval, @ mentions, thread history.",
|
"description": "Chat with your Obsidian vault using Claude AI — semantic context retrieval, @ mentions, thread history.",
|
||||||
"author": "Sven",
|
"author": "Sven",
|
||||||
|
|||||||
Generated
+948
-2
File diff suppressed because it is too large
Load Diff
+2
-1
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "memex-chat",
|
"name": "memex-chat",
|
||||||
"version": "0.2.3",
|
"version": "0.3.0",
|
||||||
"description": "Obsidian plugin: Chat with your vault using Claude AI",
|
"description": "Obsidian plugin: Chat with your vault using Claude AI",
|
||||||
"main": "main.js",
|
"main": "main.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
@@ -9,6 +9,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@types/node": "^20.0.0",
|
"@types/node": "^20.0.0",
|
||||||
|
"@xenova/transformers": "^2.17.2",
|
||||||
"builtin-modules": "^3.3.0",
|
"builtin-modules": "^3.3.0",
|
||||||
"esbuild": "^0.20.0",
|
"esbuild": "^0.20.0",
|
||||||
"obsidian": "latest",
|
"obsidian": "latest",
|
||||||
|
|||||||
+41
-9
@@ -120,7 +120,8 @@ export class ChatView extends ItemView {
|
|||||||
rebuildBtn.disabled = true;
|
rebuildBtn.disabled = true;
|
||||||
this.setStatus("Indiziere Vault…");
|
this.setStatus("Indiziere Vault…");
|
||||||
await this.plugin.rebuildIndex();
|
await this.plugin.rebuildIndex();
|
||||||
this.setStatus(`✓ ${this.plugin.search.isIndexed() ? "Index bereit" : ""}`);
|
const ready = this.plugin.activeSearch.isIndexed();
|
||||||
|
this.setStatus(ready ? "✓ Index bereit" : "");
|
||||||
setTimeout(() => this.setStatus(""), 2000);
|
setTimeout(() => this.setStatus(""), 2000);
|
||||||
rebuildBtn.disabled = false;
|
rebuildBtn.disabled = false;
|
||||||
};
|
};
|
||||||
@@ -301,20 +302,45 @@ export class ChatView extends ItemView {
|
|||||||
this.setStatus("Suche relevante Notizen…");
|
this.setStatus("Suche relevante Notizen…");
|
||||||
this.isLoading = true;
|
this.isLoading = true;
|
||||||
try {
|
try {
|
||||||
if (!this.plugin.search.isIndexed()) {
|
const engine = this.plugin.activeSearch;
|
||||||
this.setStatus("Indiziere Vault…");
|
if (!engine.isIndexed()) {
|
||||||
await this.plugin.search.buildIndex();
|
this.setStatus(this.plugin.settings.useEmbeddings ? "Lade Embedding-Modell…" : "Indiziere Vault…");
|
||||||
|
this.attachEmbedProgress();
|
||||||
|
await engine.buildIndex();
|
||||||
|
this.detachEmbedProgress();
|
||||||
}
|
}
|
||||||
this.pendingContext = await this.plugin.search.search(query, this.plugin.settings.maxContextNotes);
|
this.pendingContext = await engine.search(query, this.plugin.settings.maxContextNotes);
|
||||||
this.explicitContext = mentions;
|
this.explicitContext = mentions;
|
||||||
this.renderContextPreview();
|
this.renderContextPreview();
|
||||||
this.setStatus("Kontext bereit — Senden bestätigen oder anpassen");
|
this.setStatus("Kontext bereit — Senden bestätigen oder anpassen");
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.setStatus("Fehler bei Kontextsuche: " + e.message);
|
this.detachEmbedProgress();
|
||||||
|
this.setStatus("Fehler bei Kontextsuche: " + (e as Error).message);
|
||||||
}
|
}
|
||||||
this.isLoading = false;
|
this.isLoading = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Wire up EmbedSearch progress callbacks to the status bar */
|
||||||
|
private attachEmbedProgress(): void {
|
||||||
|
const es = this.plugin.embedSearch;
|
||||||
|
if (!es) return;
|
||||||
|
es.onModelStatus = (s) => this.setStatus(s);
|
||||||
|
es.onProgress = (done, total, speed) => {
|
||||||
|
const speedStr = speed > 0 ? ` • ${speed.toFixed(1)} N/s` : "";
|
||||||
|
const eta = speed > 0 && done < total
|
||||||
|
? ` • ~${Math.ceil((total - done) / speed)}s`
|
||||||
|
: "";
|
||||||
|
this.setStatus(`Embedding ${done}/${total}${speedStr}${eta}`);
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private detachEmbedProgress(): void {
|
||||||
|
const es = this.plugin.embedSearch;
|
||||||
|
if (!es) return;
|
||||||
|
es.onModelStatus = undefined;
|
||||||
|
es.onProgress = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
private async sendMessage(query: string, additionalFiles: TFile[] = []): Promise<void> {
|
private async sendMessage(query: string, additionalFiles: TFile[] = []): Promise<void> {
|
||||||
this.isLoading = true;
|
this.isLoading = true;
|
||||||
this.sendBtn.disabled = true;
|
this.sendBtn.disabled = true;
|
||||||
@@ -536,13 +562,19 @@ export class ChatView extends ItemView {
|
|||||||
const query = this.inputEl.value.trim() || lastUserMsg;
|
const query = this.inputEl.value.trim() || lastUserMsg;
|
||||||
this.setStatus("Suche Notizen…");
|
this.setStatus("Suche Notizen…");
|
||||||
try {
|
try {
|
||||||
if (!this.plugin.search.isIndexed()) await this.plugin.search.buildIndex();
|
const engine = this.plugin.activeSearch;
|
||||||
const results = await this.plugin.search.search(query, this.plugin.settings.maxContextNotes);
|
if (!engine.isIndexed()) {
|
||||||
|
this.attachEmbedProgress();
|
||||||
|
await engine.buildIndex();
|
||||||
|
this.detachEmbedProgress();
|
||||||
|
}
|
||||||
|
const results = await engine.search(query, this.plugin.settings.maxContextNotes);
|
||||||
this.pendingContext = results;
|
this.pendingContext = results;
|
||||||
this.renderContextPreview();
|
this.renderContextPreview();
|
||||||
this.setStatus("");
|
this.setStatus("");
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
this.setStatus("Fehler: " + e.message);
|
this.detachEmbedProgress();
|
||||||
|
this.setStatus("Fehler: " + (e as Error).message);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,299 @@
|
|||||||
|
import { App, TFile } from "obsidian";
|
||||||
|
import { promises as fsp } from "fs";
|
||||||
|
import { join, relative, dirname } from "path";
|
||||||
|
import type { SearchResult } from "./VaultSearch";
|
||||||
|
|
||||||
|
export const EMBEDDING_MODELS = [
|
||||||
|
{ id: "TaylorAI/bge-micro-v2", name: "BGE Micro v2 (schnell, 384-dim, empfohlen)" },
|
||||||
|
{ id: "Xenova/all-MiniLM-L6-v2", name: "MiniLM L6 v2 (384-dim)" },
|
||||||
|
{ id: "Xenova/multilingual-e5-small", name: "Multilingual E5 Small (mehrsprachig, DE/EN)" },
|
||||||
|
{ id: "Xenova/paraphrase-multilingual-MiniLM-L12-v2", name: "Multilingual MiniLM L12 (mehrsprachig)" },
|
||||||
|
];
|
||||||
|
|
||||||
|
interface EmbedCacheEntry { mtime: number; vec: number[] }
|
||||||
|
interface Manifest { model: string; version: number }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Semantic search engine using Transformers.js for local embeddings.
|
||||||
|
*
|
||||||
|
* Cache layout (one .ajson file per note, incremental writes):
|
||||||
|
* <vault>/.memex-chat/embeddings/.manifest.json — model name + version
|
||||||
|
* <vault>/.memex-chat/embeddings/some/note.ajson — { mtime, vec }
|
||||||
|
*
|
||||||
|
* Models are downloaded from HuggingFace on first use and cached by the browser.
|
||||||
|
* WASM runtime is loaded from CDN (cdn.jsdelivr.net) on first use.
|
||||||
|
*/
|
||||||
|
export class EmbedSearch {
|
||||||
|
private app: App;
|
||||||
|
private modelId: string;
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
private pipe: ((text: string, opts: object) => Promise<{ data: Float32Array }>) | null = null;
|
||||||
|
private cache: Map<string, EmbedCacheEntry> = new Map(); // vaultPath → entry
|
||||||
|
private vecs: Map<string, { vec: number[]; file: TFile }> = new Map();
|
||||||
|
private indexed = false;
|
||||||
|
private indexing = false;
|
||||||
|
/** Called every ~5 notes during indexing. speed = newly embedded notes/sec (cached notes excluded). */
|
||||||
|
onProgress?: (done: number, total: number, speed: number) => void;
|
||||||
|
/** Called during model/WASM download with a human-readable status string. */
|
||||||
|
onModelStatus?: (status: string) => void;
|
||||||
|
|
||||||
|
constructor(app: App, modelId: string) {
|
||||||
|
this.app = app;
|
||||||
|
this.modelId = modelId;
|
||||||
|
}
|
||||||
|
|
||||||
|
isIndexed(): boolean { return this.indexed; }
|
||||||
|
|
||||||
|
// ─── Paths ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
private get vaultRoot(): string {
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
return (this.app.vault.adapter as any).basePath as string;
|
||||||
|
}
|
||||||
|
|
||||||
|
private get embedDir(): string {
|
||||||
|
return join(this.vaultRoot, ".memex-chat", "embeddings");
|
||||||
|
}
|
||||||
|
|
||||||
|
private get manifestPath(): string {
|
||||||
|
return join(this.embedDir, ".manifest.json");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Disk path for the embedding of a vault-relative note path (e.g. "folder/note.md") */
|
||||||
|
private noteEmbedPath(vaultPath: string): string {
|
||||||
|
return join(this.embedDir, vaultPath.replace(/\.md$/, ".ajson"));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Pipeline ────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
private async loadPipeline(): Promise<void> {
|
||||||
|
if (this.pipe) return;
|
||||||
|
|
||||||
|
// Dynamic import — bundled by esbuild, WASM loaded from CDN at runtime
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const mod = await import("@xenova/transformers") as any;
|
||||||
|
const { pipeline, env } = mod;
|
||||||
|
|
||||||
|
env.backends.onnx.wasm.wasmPaths =
|
||||||
|
"https://cdn.jsdelivr.net/npm/onnxruntime-web@1.14.0/dist/";
|
||||||
|
env.backends.onnx.wasm.proxy = false;
|
||||||
|
env.allowLocalModels = false;
|
||||||
|
env.useBrowserCache = true;
|
||||||
|
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
const progress_callback = (p: any) => {
|
||||||
|
if (!this.onModelStatus) return;
|
||||||
|
if (p.status === "initiate") {
|
||||||
|
this.onModelStatus(`Lade Modell: ${p.name ?? p.file ?? ""}…`);
|
||||||
|
} else if (p.status === "download") {
|
||||||
|
const pct = p.progress != null ? ` ${Math.round(p.progress)}%` : "";
|
||||||
|
const mb = p.total ? ` (${(p.total / 1e6).toFixed(1)} MB)` : "";
|
||||||
|
this.onModelStatus(`Download${pct}${mb}: ${p.file ?? ""}`);
|
||||||
|
} else if (p.status === "ready") {
|
||||||
|
this.onModelStatus("Modell bereit");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
this.pipe = await pipeline("feature-extraction", this.modelId, {
|
||||||
|
quantized: true,
|
||||||
|
progress_callback,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
private async embed(text: string): Promise<number[]> {
|
||||||
|
await this.loadPipeline();
|
||||||
|
const result = await this.pipe!(text.slice(0, 512), { pooling: "mean", normalize: true });
|
||||||
|
return Array.from(result.data);
|
||||||
|
}
|
||||||
|
|
||||||
|
private cosine(a: number[], b: number[]): number {
|
||||||
|
let dot = 0;
|
||||||
|
for (let i = 0; i < a.length; i++) dot += a[i] * b[i];
|
||||||
|
return dot;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Index ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
async buildIndex(): Promise<void> {
|
||||||
|
if (this.indexing) return;
|
||||||
|
this.indexing = true;
|
||||||
|
this.indexed = false;
|
||||||
|
this.vecs.clear();
|
||||||
|
|
||||||
|
const changed: string[] = []; // vault paths newly embedded this run
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.loadCache();
|
||||||
|
|
||||||
|
const files = this.app.vault.getMarkdownFiles();
|
||||||
|
const total = files.length;
|
||||||
|
let done = 0;
|
||||||
|
let windowStart = Date.now();
|
||||||
|
let windowEmbedded = 0;
|
||||||
|
let speed = 0;
|
||||||
|
|
||||||
|
for (const file of files) {
|
||||||
|
const mtime = file.stat.mtime;
|
||||||
|
const cached = this.cache.get(file.path);
|
||||||
|
|
||||||
|
if (cached && cached.mtime === mtime) {
|
||||||
|
this.vecs.set(file.path, { vec: cached.vec, file });
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
const raw = await this.app.vault.cachedRead(file);
|
||||||
|
const text = this.preprocess(raw).slice(0, 800) + " " + file.basename;
|
||||||
|
const vec = await this.embed(text);
|
||||||
|
this.cache.set(file.path, { mtime, vec });
|
||||||
|
this.vecs.set(file.path, { vec, file });
|
||||||
|
changed.push(file.path);
|
||||||
|
windowEmbedded++;
|
||||||
|
} catch {
|
||||||
|
// skip unembeddable files
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
done++;
|
||||||
|
if (this.onProgress && done % 5 === 0) {
|
||||||
|
const elapsed = (Date.now() - windowStart) / 1000;
|
||||||
|
if (elapsed > 0 && windowEmbedded > 0) {
|
||||||
|
speed = windowEmbedded / elapsed;
|
||||||
|
if (windowEmbedded >= 25) { windowStart = Date.now(); windowEmbedded = 0; }
|
||||||
|
}
|
||||||
|
this.onProgress(done, total, speed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const allPaths = new Set(files.map((f) => f.path));
|
||||||
|
await this.saveCache(changed, allPaths);
|
||||||
|
this.indexed = true;
|
||||||
|
if (this.onProgress) this.onProgress(total, total, speed);
|
||||||
|
} finally {
|
||||||
|
this.indexing = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async search(query: string, topK = 8): Promise<SearchResult[]> {
|
||||||
|
if (!this.indexed) await this.buildIndex();
|
||||||
|
|
||||||
|
const qvec = await this.embed(query);
|
||||||
|
const scores: Array<[string, number]> = [];
|
||||||
|
|
||||||
|
for (const [path, { vec }] of this.vecs) {
|
||||||
|
const s = this.cosine(qvec, vec);
|
||||||
|
if (s > 0.2) scores.push([path, s]);
|
||||||
|
}
|
||||||
|
|
||||||
|
scores.sort((a, b) => b[1] - a[1]);
|
||||||
|
return scores.slice(0, topK).map(([path, score]) => {
|
||||||
|
const { file } = this.vecs.get(path)!;
|
||||||
|
return { file, score, excerpt: "", title: file.basename };
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Cache I/O ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load all existing .ajson files from embedDir into this.cache.
|
||||||
|
* If the manifest model doesn't match, skip loading (full rebuild).
|
||||||
|
*/
|
||||||
|
private async loadCache(): Promise<void> {
|
||||||
|
this.cache.clear();
|
||||||
|
try {
|
||||||
|
const manifestRaw = await fsp.readFile(this.manifestPath, "utf8");
|
||||||
|
const manifest: Manifest = JSON.parse(manifestRaw);
|
||||||
|
if (manifest.model !== this.modelId) return; // model changed — rebuild all
|
||||||
|
} catch {
|
||||||
|
return; // no manifest yet — start fresh
|
||||||
|
}
|
||||||
|
await this.loadCacheDir(this.embedDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
private async loadCacheDir(dir: string): Promise<void> {
|
||||||
|
let entries;
|
||||||
|
try { entries = await fsp.readdir(dir, { withFileTypes: true }); }
|
||||||
|
catch { return; }
|
||||||
|
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (entry.name.startsWith(".")) continue; // skip .manifest.json
|
||||||
|
const fullPath = join(dir, entry.name);
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
await this.loadCacheDir(fullPath);
|
||||||
|
} else if (entry.name.endsWith(".ajson")) {
|
||||||
|
try {
|
||||||
|
const raw = await fsp.readFile(fullPath, "utf8");
|
||||||
|
const { mtime, vec }: EmbedCacheEntry = JSON.parse(raw);
|
||||||
|
// Reconstruct vault path: relative path inside embedDir, swap .ajson → .md
|
||||||
|
const rel = relative(this.embedDir, fullPath).replace(/\.ajson$/, ".md");
|
||||||
|
// Normalise to forward slashes (vault paths always use /)
|
||||||
|
const vaultPath = rel.split("\\").join("/");
|
||||||
|
this.cache.set(vaultPath, { mtime, vec });
|
||||||
|
} catch {
|
||||||
|
// skip corrupt file
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write .ajson for each newly embedded note; delete .ajson for removed notes;
|
||||||
|
* write/update the manifest.
|
||||||
|
*/
|
||||||
|
private async saveCache(changed: string[], allVaultPaths: Set<string>): Promise<void> {
|
||||||
|
try {
|
||||||
|
await fsp.mkdir(this.embedDir, { recursive: true });
|
||||||
|
|
||||||
|
// Manifest
|
||||||
|
const manifest: Manifest = { model: this.modelId, version: 1 };
|
||||||
|
await fsp.writeFile(this.manifestPath, JSON.stringify(manifest), "utf8");
|
||||||
|
|
||||||
|
// Write only the newly embedded notes
|
||||||
|
for (const vaultPath of changed) {
|
||||||
|
const entry = this.cache.get(vaultPath);
|
||||||
|
if (!entry) continue;
|
||||||
|
const filePath = this.noteEmbedPath(vaultPath);
|
||||||
|
await fsp.mkdir(dirname(filePath), { recursive: true });
|
||||||
|
await fsp.writeFile(filePath, JSON.stringify({ mtime: entry.mtime, vec: entry.vec }), "utf8");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prune .ajson files whose notes no longer exist
|
||||||
|
await this.pruneStale(this.embedDir, allVaultPaths);
|
||||||
|
} catch (e) {
|
||||||
|
console.error("[Memex] Embedding-Cache konnte nicht gespeichert werden:", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private async pruneStale(dir: string, allVaultPaths: Set<string>): Promise<void> {
|
||||||
|
let entries;
|
||||||
|
try { entries = await fsp.readdir(dir, { withFileTypes: true }); }
|
||||||
|
catch { return; }
|
||||||
|
|
||||||
|
for (const entry of entries) {
|
||||||
|
if (entry.name.startsWith(".")) continue;
|
||||||
|
const fullPath = join(dir, entry.name);
|
||||||
|
if (entry.isDirectory()) {
|
||||||
|
await this.pruneStale(fullPath, allVaultPaths);
|
||||||
|
} else if (entry.name.endsWith(".ajson")) {
|
||||||
|
const rel = relative(this.embedDir, fullPath).replace(/\.ajson$/, ".md");
|
||||||
|
const vaultPath = rel.split("\\").join("/");
|
||||||
|
if (!allVaultPaths.has(vaultPath)) {
|
||||||
|
await fsp.unlink(fullPath).catch(() => {});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Text preprocessing ──────────────────────────────────────────────────
|
||||||
|
|
||||||
|
private preprocess(raw: string): string {
|
||||||
|
let c = raw;
|
||||||
|
if (c.startsWith("---")) {
|
||||||
|
const end = c.indexOf("\n---", 3);
|
||||||
|
if (end > 0) c = c.slice(end + 4);
|
||||||
|
}
|
||||||
|
c = c.replace(/\[\[([^\]|]+)(?:\|([^\]]+))?\]\]/g, (_, t, a) => a || t);
|
||||||
|
c = c.replace(/!\[.*?\]\(.*?\)/g, "");
|
||||||
|
c = c.replace(/\[([^\]]+)\]\(.*?\)/g, "$1");
|
||||||
|
c = c.replace(/^#{1,6}\s+/gm, "");
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
}
|
||||||
+36
-1
@@ -1,5 +1,6 @@
|
|||||||
import { App, PluginSettingTab, Setting } from "obsidian";
|
import { App, PluginSettingTab, Setting } from "obsidian";
|
||||||
import type MemexChatPlugin from "./main";
|
import type MemexChatPlugin from "./main";
|
||||||
|
import { EMBEDDING_MODELS } from "./EmbedSearch";
|
||||||
|
|
||||||
export interface PromptButton {
|
export interface PromptButton {
|
||||||
label: string;
|
label: string;
|
||||||
@@ -24,6 +25,8 @@ export interface MemexChatSettings {
|
|||||||
contextProperties: string[];
|
contextProperties: string[];
|
||||||
promptButtons: PromptButton[];
|
promptButtons: PromptButton[];
|
||||||
systemContextFile: string; // optional vault path for extended system context
|
systemContextFile: string; // optional vault path for extended system context
|
||||||
|
useEmbeddings: boolean; // use local embedding model instead of TF-IDF
|
||||||
|
embeddingModel: string; // HuggingFace model ID
|
||||||
}
|
}
|
||||||
|
|
||||||
export const DEFAULT_SETTINGS: MemexChatSettings = {
|
export const DEFAULT_SETTINGS: MemexChatSettings = {
|
||||||
@@ -47,6 +50,8 @@ Wenn du Fragen beantwortest:
|
|||||||
sendOnEnter: false,
|
sendOnEnter: false,
|
||||||
contextProperties: ["collection", "related", "up", "tags"],
|
contextProperties: ["collection", "related", "up", "tags"],
|
||||||
systemContextFile: "",
|
systemContextFile: "",
|
||||||
|
useEmbeddings: false,
|
||||||
|
embeddingModel: "TaylorAI/bge-micro-v2",
|
||||||
promptButtons: [
|
promptButtons: [
|
||||||
{
|
{
|
||||||
label: "Draft Check",
|
label: "Draft Check",
|
||||||
@@ -133,6 +138,36 @@ export class MemexChatSettingsTab extends PluginSettingTab {
|
|||||||
})
|
})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
// --- Semantic Search ---
|
||||||
|
containerEl.createEl("h3", { text: "Semantische Suche (Embeddings)" });
|
||||||
|
containerEl.createEl("p", {
|
||||||
|
text: "Ersetzt TF-IDF durch ein lokales KI-Modell (Transformers.js). Das Modell wird beim ersten Einsatz von HuggingFace heruntergeladen und dann lokal gecacht. WASM-Laufzeit wird einmalig vom CDN geladen.",
|
||||||
|
cls: "setting-item-description",
|
||||||
|
});
|
||||||
|
|
||||||
|
new Setting(containerEl)
|
||||||
|
.setName("Semantische Suche aktivieren")
|
||||||
|
.setDesc("Nutzt lokale Embeddings für kontextbasierte Ähnlichkeitssuche statt TF-IDF")
|
||||||
|
.addToggle((toggle) =>
|
||||||
|
toggle.setValue(this.plugin.settings.useEmbeddings).onChange(async (value) => {
|
||||||
|
this.plugin.settings.useEmbeddings = value;
|
||||||
|
await this.plugin.saveSettings();
|
||||||
|
await this.plugin.initEmbedSearch();
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
new Setting(containerEl)
|
||||||
|
.setName("Embedding-Modell")
|
||||||
|
.setDesc("Welches Modell für die semantische Suche verwenden? Kleiner = schneller, größer = besser.")
|
||||||
|
.addDropdown((drop) => {
|
||||||
|
for (const m of EMBEDDING_MODELS) drop.addOption(m.id, m.name);
|
||||||
|
drop.setValue(this.plugin.settings.embeddingModel).onChange(async (value) => {
|
||||||
|
this.plugin.settings.embeddingModel = value;
|
||||||
|
await this.plugin.saveSettings();
|
||||||
|
await this.plugin.initEmbedSearch();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
// --- Context ---
|
// --- Context ---
|
||||||
containerEl.createEl("h3", { text: "Kontext-Einstellungen" });
|
containerEl.createEl("h3", { text: "Kontext-Einstellungen" });
|
||||||
|
|
||||||
@@ -428,7 +463,7 @@ export class MemexChatSettingsTab extends PluginSettingTab {
|
|||||||
|
|
||||||
new Setting(containerEl)
|
new Setting(containerEl)
|
||||||
.setName("Index neu aufbauen")
|
.setName("Index neu aufbauen")
|
||||||
.setDesc("Vault-Index für die Suche neu aufbauen (dauert je nach Vault-Größe einige Sekunden)")
|
.setDesc("Vault-Index neu aufbauen (TF-IDF oder Embedding-Index, je nach Einstellung)")
|
||||||
.addButton((btn) =>
|
.addButton((btn) =>
|
||||||
btn
|
btn
|
||||||
.setButtonText("Index neu aufbauen")
|
.setButtonText("Index neu aufbauen")
|
||||||
|
|||||||
+41
-1
@@ -1,6 +1,7 @@
|
|||||||
import { Plugin, WorkspaceLeaf } from "obsidian";
|
import { Plugin, WorkspaceLeaf } from "obsidian";
|
||||||
import { ChatView, VIEW_TYPE_MEMEX_CHAT } from "./ChatView";
|
import { ChatView, VIEW_TYPE_MEMEX_CHAT } from "./ChatView";
|
||||||
import { VaultSearch } from "./VaultSearch";
|
import { VaultSearch } from "./VaultSearch";
|
||||||
|
import { EmbedSearch } from "./EmbedSearch";
|
||||||
import { ClaudeClient } from "./ClaudeClient";
|
import { ClaudeClient } from "./ClaudeClient";
|
||||||
import { MemexChatSettingsTab, MemexChatSettings, DEFAULT_SETTINGS } from "./SettingsTab";
|
import { MemexChatSettingsTab, MemexChatSettings, DEFAULT_SETTINGS } from "./SettingsTab";
|
||||||
|
|
||||||
@@ -12,9 +13,15 @@ interface PluginData {
|
|||||||
export default class MemexChatPlugin extends Plugin {
|
export default class MemexChatPlugin extends Plugin {
|
||||||
settings!: MemexChatSettings;
|
settings!: MemexChatSettings;
|
||||||
search!: VaultSearch;
|
search!: VaultSearch;
|
||||||
|
embedSearch: EmbedSearch | null = null;
|
||||||
claude!: ClaudeClient;
|
claude!: ClaudeClient;
|
||||||
data!: PluginData;
|
data!: PluginData;
|
||||||
|
|
||||||
|
/** Returns the active search engine: EmbedSearch when enabled, else VaultSearch */
|
||||||
|
get activeSearch(): VaultSearch | EmbedSearch {
|
||||||
|
return this.embedSearch ?? this.search;
|
||||||
|
}
|
||||||
|
|
||||||
async onload(): Promise<void> {
|
async onload(): Promise<void> {
|
||||||
// Load data
|
// Load data
|
||||||
const loaded = (await this.loadData()) as PluginData | null;
|
const loaded = (await this.loadData()) as PluginData | null;
|
||||||
@@ -85,6 +92,9 @@ export default class MemexChatPlugin extends Plugin {
|
|||||||
this.search.priorityProperties = this.settings.contextProperties;
|
this.search.priorityProperties = this.settings.contextProperties;
|
||||||
this.search.buildIndex().catch(console.error);
|
this.search.buildIndex().catch(console.error);
|
||||||
}
|
}
|
||||||
|
if (this.settings.useEmbeddings) {
|
||||||
|
this.initEmbedSearch().catch(console.error);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
console.log("[Memex Chat] Plugin geladen");
|
console.log("[Memex Chat] Plugin geladen");
|
||||||
@@ -107,19 +117,49 @@ export default class MemexChatPlugin extends Plugin {
|
|||||||
this.app.workspace.revealLeaf(leaf);
|
this.app.workspace.revealLeaf(leaf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Create or recreate the EmbedSearch instance (called when settings change) */
|
||||||
|
async initEmbedSearch(): Promise<void> {
|
||||||
|
if (!this.settings.useEmbeddings) {
|
||||||
|
this.embedSearch = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
this.embedSearch = new EmbedSearch(this.app, this.settings.embeddingModel);
|
||||||
|
// Don't build immediately — build on first search or explicit rebuild
|
||||||
|
}
|
||||||
|
|
||||||
async rebuildIndex(): Promise<void> {
|
async rebuildIndex(): Promise<void> {
|
||||||
const leaves = this.app.workspace.getLeavesOfType(VIEW_TYPE_MEMEX_CHAT);
|
const leaves = this.app.workspace.getLeavesOfType(VIEW_TYPE_MEMEX_CHAT);
|
||||||
const view = leaves[0]?.view as ChatView | undefined;
|
const view = leaves[0]?.view as ChatView | undefined;
|
||||||
|
|
||||||
|
if (this.settings.useEmbeddings && this.embedSearch) {
|
||||||
|
// Rebuild semantic (embedding) index
|
||||||
|
this.embedSearch.onModelStatus = (status) => {
|
||||||
|
if (view) view.setStatus(status);
|
||||||
|
};
|
||||||
|
this.embedSearch.onProgress = (done, total, speed) => {
|
||||||
|
if (view) {
|
||||||
|
const speedStr = speed > 0 ? ` • ${speed.toFixed(1)} N/s` : "";
|
||||||
|
const eta = speed > 0 && done < total
|
||||||
|
? ` • noch ~${Math.ceil((total - done) / speed)}s`
|
||||||
|
: "";
|
||||||
|
view.setStatus(`Embedding ${done}/${total}${speedStr}${eta}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
await this.embedSearch.buildIndex();
|
||||||
|
this.embedSearch.onProgress = undefined;
|
||||||
|
this.embedSearch.onModelStatus = undefined;
|
||||||
|
} else {
|
||||||
|
// Rebuild TF-IDF index
|
||||||
this.search.priorityProperties = this.settings.contextProperties;
|
this.search.priorityProperties = this.settings.contextProperties;
|
||||||
this.search.onProgress = (done, total) => {
|
this.search.onProgress = (done, total) => {
|
||||||
if (view && done % 200 === 0) {
|
if (view && done % 200 === 0) {
|
||||||
view.setStatus(`Indiziere… ${done}/${total}`);
|
view.setStatus(`Indiziere… ${done}/${total}`);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
await this.search.buildIndex();
|
await this.search.buildIndex();
|
||||||
this.search.onProgress = undefined;
|
this.search.onProgress = undefined;
|
||||||
|
}
|
||||||
|
|
||||||
if (view) {
|
if (view) {
|
||||||
view.setStatus(`✓ ${this.app.vault.getMarkdownFiles().length} Notizen indiziert`);
|
view.setStatus(`✓ ${this.app.vault.getMarkdownFiles().length} Notizen indiziert`);
|
||||||
setTimeout(() => view.setStatus(""), 3000);
|
setTimeout(() => view.setStatus(""), 3000);
|
||||||
|
|||||||
Reference in New Issue
Block a user