From 3b5c1a2e8e0edbff486615db16a51949de628b4c Mon Sep 17 00:00:00 2001
From: svemagie <869694+svemagie@users.noreply.github.com>
Date: Tue, 31 Mar 2026 00:29:05 +0200
Subject: [PATCH] feat: hybrid search via RRF when embeddings are enabled

Combines TF-IDF and semantic search results using Reciprocal Rank Fusion
instead of switching between engines. Notes scoring well in both rise to
the top; notes found by only one engine are still included if their rank
is strong enough. Improves recall for paraphrased queries and precision
over embeddings-only mode.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 CLAUDE.md           | 23 +++++++++++++++-
 README.md           | 34 +++++++++++++++---------
 main.js             | 58 ++++++++++++++++++++++++++++++++++++++--
 src/HybridSearch.ts | 64 +++++++++++++++++++++++++++++++++++++++++++++
 src/main.ts         | 12 ++++++---
 5 files changed, 172 insertions(+), 19 deletions(-)
 create mode 100644 src/HybridSearch.ts

diff --git a/CLAUDE.md b/CLAUDE.md
index 9813e7b..bad4f32 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -118,4 +118,25 @@ Copy `main.js`, `manifest.json`, `styles.css` into `.obsidian/plugins/memex-chat
 
 ## Models (SettingsTab.ts)
 
-Default: `claude-opus-4-6`. Update `MODELS` array and `DEFAULT_SETTINGS.model` when adding new model IDs.
+Static `MODELS` array (fallback / initial dropdown population):
+
+| ID | Label |
+|---|---|
+| `claude-opus-4-6` | Claude Opus 4.6 (Stärkste) |
+| `claude-sonnet-4-6` | Claude Sonnet 4.6 (Empfohlen) |
+| `claude-haiku-4-5-20251001` | Claude Haiku 4.5 (Schnell) |
+
+Default: `claude-opus-4-6`.
+
+**"Aktualisieren" button**: calls `ClaudeClient.fetchModels(apiKey)` to fetch the live model list from the Anthropic API and repopulate the dropdown dynamically. This supersedes the static array at runtime. Update `MODELS` and `DEFAULT_SETTINGS.model` only when changing the compile-time fallback.
+
+## Embedding Models (EmbedSearch.ts)
+
+`EMBEDDING_MODELS` array exported from `EmbedSearch.ts` and used to populate the embedding model dropdown in settings:
+
+| ID | Description |
+|---|---|
+| `TaylorAI/bge-micro-v2` | BGE Micro v2 — default, 384-dim, fastest |
+| `Xenova/all-MiniLM-L6-v2` | MiniLM L6 v2 — 384-dim |
+| `Xenova/multilingual-e5-small` | Multilingual E5 Small — DE/EN |
+| `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | Multilingual MiniLM L12 |
diff --git a/README.md b/README.md
index be11222..eb6850c 100644
--- a/README.md
+++ b/README.md
@@ -4,12 +4,11 @@ Chat with your Obsidian vault using Claude AI. Ask questions about your notes, g
 
 ## Features
 
-- **Semantic vault search** — TF-IDF index over all your notes, no external API needed for retrieval
-- **Local embeddings** — optional on-device semantic search using `@xenova/transformers` (BGE Micro v2), fully offline after first model download
+- **Vault search** — TF-IDF index by default; enable local embeddings for hybrid mode (TF-IDF + semantic merged via RRF), fully offline after first model download
 - **Related notes sidebar** — panel showing the most similar notes to whatever you have open, ranked by semantic similarity + frontmatter links + shared tags
 - **Auto context** — relevant notes are automatically found and sent to Claude as context
-- **Context preview** — see and edit which notes are included before sending
-- **`@mention` autocomplete** — reference specific notes directly in your message
+- **Context preview** — see which notes are included before sending, or dismiss to send without context
+- **`@mention` autocomplete** — pin specific notes into context directly from the input field
 - **Thread history** — chats saved as Markdown in your vault (default: `Calendar/Chat/`)
 - **Source links** — every answer shows which notes were used as context
 - **Prompt buttons** — header mode buttons that extend Claude's system prompt (e.g. draft check, monthly review)
@@ -67,8 +66,8 @@ Header buttons that activate a mode by extending Claude's system prompt with the
 When a button is active:
 - The file at its configured vault path is appended to the system prompt
 - An optional hint is shown above the input
-- If `searchMode: "date"` is set, context retrieval switches from TF-IDF/embeddings to date-based file lookup (useful for monthly review modes)
-- Auto context retrieval is skipped while any button is active
+- If `searchMode: "date"` is set, context retrieval switches to date-based file lookup (useful for monthly review modes)
+- Auto context retrieval is skipped
 
 Configure prompt buttons in **Settings → Prompt Buttons**.
 
@@ -82,18 +81,18 @@ In settings you can specify a vault note to always append to the system prompt (
 |---|---|
 | `Memex Chat öffnen` | Open the chat panel |
 | `Verwandte Notizen` | Open the related notes sidebar |
-| `Memex Chat: Index neu aufbauen` | Rebuild the TF-IDF search index |
+| `Memex Chat: Index neu aufbauen` | Rebuild the search index |
 | `Memex Chat: Aktive Notiz als Kontext` | Ask Claude about the currently open note |
 
 ## Related Notes Sidebar
 
-Opens in the right sidebar and automatically shows the top 10 most similar notes to the currently active file. Similarity is computed from:
+Requires embeddings to be enabled. Opens in the right sidebar and automatically shows the top 10 most similar notes to the currently active file. Similarity is computed from:
 
-1. **Semantic embedding similarity** (cosine distance on 384-dim vectors)
+1. **Semantic embedding similarity** (cosine similarity on 384-dim vectors)
 2. **+0.15 boost** for notes linked via `contextProperties` frontmatter fields (e.g. `related: [[Note]]`)
 3. **+0.05 per shared tag** (up to +0.15)
 
-Notes explicitly linked via frontmatter are marked with a **verknüpft** badge.
+Notes boosted by a frontmatter link are marked with a **verknüpft** badge.
 
 ## Settings
 
@@ -102,7 +101,7 @@ Notes explicitly linked via frontmatter are marked with a **verknüpft** badge.
 | Setting | Default | Description |
 |---|---|---|
 | API Key | — | Your Anthropic API key |
-| Model | Claude Opus 4.5 | Which Claude model to use |
+| Model | `claude-opus-4-6` | Which Claude model to use. Click **Aktualisieren** to fetch the live model list from the Anthropic API. |
 | Max tokens | 8192 | Maximum output tokens per response |
 | Max context notes | 6 | How many notes to retrieve per query |
 | Max chars per note | 2500 | How much of each note to include |
@@ -119,11 +118,20 @@ Notes explicitly linked via frontmatter are marked with a **verknüpft** badge.
 
 | Setting | Default | Description |
 |---|---|---|
-| Use embeddings | off | Enable local semantic search instead of TF-IDF |
+| Use embeddings | off | Enable hybrid search (TF-IDF + semantic, merged via RRF) |
 | Embedding model | BGE Micro v2 | ONNX model for local inference |
 | Exclude folders | — | Vault folders skipped during embedding |
 
-When enabled, embeddings are computed locally (no API call) and cached in `<vault>/.memex-chat/embeddings/`. The model (~22 MB) is downloaded once to `<vault>/.memex-chat/models/`. Indexing progress is shown as an Obsidian notice. Obsidian Sync activity is detected automatically — indexing waits until sync is idle before starting.
+| Model | Notes |
+|---|---|
+| `TaylorAI/bge-micro-v2` | Default — fastest, 384-dim |
+| `Xenova/all-MiniLM-L6-v2` | 384-dim |
+| `Xenova/multilingual-e5-small` | German + English |
+| `Xenova/paraphrase-multilingual-MiniLM-L12-v2` | German + English, larger |
+
+Embeddings are computed locally (no API call) and cached in `<vault>/.memex-chat/embeddings/`. The model (~22 MB) is downloaded once to `<vault>/.memex-chat/models/`. Indexing progress is shown as an Obsidian notice. Obsidian Sync activity is detected automatically — indexing waits until sync is idle before starting.
+
+Once indexing completes, context retrieval switches to **hybrid mode**: TF-IDF and semantic results are fetched independently then rank-merged via Reciprocal Rank Fusion. Notes that score well in both engines rise to the top; notes found by only one are still included if their rank is strong enough. This catches paraphrased queries that TF-IDF misses and avoids the over-broadness of embeddings alone.
 
 ## License
 
diff --git a/main.js b/main.js
index d1354ab..27c9b4d 100644
--- a/main.js
+++ b/main.js
@@ -32667,6 +32667,54 @@ var EmbedSearch = class {
   }
 };
 
+// src/HybridSearch.ts
+var RRF_K = 60;
+var HybridSearch = class {
+  constructor(tfidf, embed) {
+    this.tfidf = tfidf;
+    this.embed = embed;
+  }
+  isIndexed() {
+    return this.embed.isIndexed();
+  }
+  async search(query, topK = 8) {
+    const fetchK = topK * 3;
+    const [tfidfResults, embedResults] = await Promise.all([
+      this.tfidf.search(query, fetchK),
+      this.embed.search(query, fetchK)
+    ]);
+    const tfidfRank = /* @__PURE__ */ new Map();
+    tfidfResults.forEach((r, i) => tfidfRank.set(r.file.path, i));
+    const embedRank = /* @__PURE__ */ new Map();
+    embedResults.forEach((r, i) => embedRank.set(r.file.path, i));
+    const tfidfMap = new Map(tfidfResults.map((r) => [r.file.path, r]));
+    const embedMap = new Map(embedResults.map((r) => [r.file.path, r]));
+    const allPaths = /* @__PURE__ */ new Set([
+      ...tfidfResults.map((r) => r.file.path),
+      ...embedResults.map((r) => r.file.path)
+    ]);
+    const scored = [];
+    for (const path3 of allPaths) {
+      const tr = tfidfRank.has(path3) ? 1 / (RRF_K + tfidfRank.get(path3) + 1) : 0;
+      const er = embedRank.has(path3) ? 1 / (RRF_K + embedRank.get(path3) + 1) : 0;
+      scored.push([path3, tr + er]);
+    }
+    scored.sort((a, b) => b[1] - a[1]);
+    return scored.slice(0, topK).map(([path3, score]) => {
+      const t = tfidfMap.get(path3);
+      const e = embedMap.get(path3);
+      const base = t ?? e;
+      return {
+        file: base.file,
+        score,
+        excerpt: t?.excerpt ?? "",
+        title: base.title,
+        linked: t?.linked ?? e?.linked
+      };
+    });
+  }
+};
+
 // src/ClaudeClient.ts
 var import_obsidian2 = require("obsidian");
 var ClaudeClient = class {
@@ -33299,10 +33347,11 @@ var MemexChatPlugin = class extends import_obsidian5.Plugin {
   constructor() {
     super(...arguments);
     this.embedSearch = null;
+    this.hybridSearch = null;
   }
-  /** Returns the active search engine: EmbedSearch when enabled, else VaultSearch */
+  /** Returns the active search engine: HybridSearch when embeddings are ready, else VaultSearch */
   get activeSearch() {
-    return this.embedSearch ?? this.search;
+    return this.hybridSearch ?? this.search;
   }
   async onload() {
     const loaded = await this.loadData();
@@ -33415,6 +33464,7 @@ var MemexChatPlugin = class extends import_obsidian5.Plugin {
   async initEmbedSearch() {
     if (!this.settings.useEmbeddings) {
       this.embedSearch = null;
+      this.hybridSearch = null;
       return;
     }
     this.embedSearch = new EmbedSearch(this.app, this.settings.embeddingModel);
@@ -33432,6 +33482,8 @@ var MemexChatPlugin = class extends import_obsidian5.Plugin {
       notice.setMessage(`Memex [${modelShort}]: ${done}/${total}${speedStr}${eta}`);
     };
     this.waitForSyncIdle(notice).then(() => this.embedSearch?.buildIndex()).then(() => {
+      if (this.embedSearch)
+        this.hybridSearch = new HybridSearch(this.search, this.embedSearch);
       notice.setMessage(`\u2713 Memex [${modelShort}]: ${this.app.vault.getMarkdownFiles().length} Notizen eingebettet`);
       setTimeout(() => notice.hide(), 4e3);
       this.notifyRelatedView();
@@ -33450,6 +33502,7 @@ var MemexChatPlugin = class extends import_obsidian5.Plugin {
     const leaves = this.app.workspace.getLeavesOfType(VIEW_TYPE_MEMEX_CHAT);
     const view = leaves[0]?.view;
     if (this.settings.useEmbeddings && this.embedSearch) {
+      this.hybridSearch = null;
       this.embedSearch.onModelStatus = (status) => {
         if (view)
           view.setStatus(status);
@@ -33462,6 +33515,7 @@ var MemexChatPlugin = class extends import_obsidian5.Plugin {
         }
       };
       await this.embedSearch.buildIndex();
+      this.hybridSearch = new HybridSearch(this.search, this.embedSearch);
       this.embedSearch.onProgress = void 0;
       this.embedSearch.onModelStatus = void 0;
     } else {
diff --git a/src/HybridSearch.ts b/src/HybridSearch.ts
new file mode 100644
index 0000000..a268138
--- /dev/null
+++ b/src/HybridSearch.ts
@@ -0,0 +1,64 @@
+import type { VaultSearch, SearchResult } from "./VaultSearch";
+import type { EmbedSearch } from "./EmbedSearch";
+
+const RRF_K = 60;
+
+/**
+ * Combines TF-IDF and embedding search via Reciprocal Rank Fusion.
+ * Runs both engines in parallel; rank-merges results so neither score
+ * space needs normalization. TF-IDF excerpts are preserved in merged output.
+ */
+export class HybridSearch {
+  constructor(
+    private tfidf: VaultSearch,
+    private embed: EmbedSearch
+  ) {}
+
+  isIndexed(): boolean {
+    return this.embed.isIndexed();
+  }
+
+  async search(query: string, topK = 8): Promise<SearchResult[]> {
+    const fetchK = topK * 3;
+    const [tfidfResults, embedResults] = await Promise.all([
+      this.tfidf.search(query, fetchK),
+      this.embed.search(query, fetchK),
+    ]);
+
+    const tfidfRank = new Map<string, number>();
+    tfidfResults.forEach((r, i) => tfidfRank.set(r.file.path, i));
+
+    const embedRank = new Map<string, number>();
+    embedResults.forEach((r, i) => embedRank.set(r.file.path, i));
+
+    const tfidfMap = new Map(tfidfResults.map((r) => [r.file.path, r]));
+    const embedMap = new Map(embedResults.map((r) => [r.file.path, r]));
+
+    const allPaths = new Set<string>([
+      ...tfidfResults.map((r) => r.file.path),
+      ...embedResults.map((r) => r.file.path),
+    ]);
+
+    const scored: Array<[string, number]> = [];
+    for (const path of allPaths) {
+      const tr = tfidfRank.has(path) ? 1 / (RRF_K + tfidfRank.get(path)! + 1) : 0;
+      const er = embedRank.has(path) ? 1 / (RRF_K + embedRank.get(path)! + 1) : 0;
+      scored.push([path, tr + er]);
+    }
+
+    scored.sort((a, b) => b[1] - a[1]);
+
+    return scored.slice(0, topK).map(([path, score]) => {
+      const t = tfidfMap.get(path);
+      const e = embedMap.get(path);
+      const base = t ?? e!;
+      return {
+        file: base.file,
+        score,
+        excerpt: t?.excerpt ?? "",
+        title: base.title,
+        linked: t?.linked ?? e?.linked,
+      };
+    });
+  }
+}
diff --git a/src/main.ts b/src/main.ts
index 3b534de..37fc662 100644
--- a/src/main.ts
+++ b/src/main.ts
@@ -2,6 +2,7 @@ import { Notice, Plugin, TFile } from "obsidian";
 import { ChatView, VIEW_TYPE_MEMEX_CHAT } from "./ChatView";
 import { VaultSearch } from "./VaultSearch";
 import { EmbedSearch } from "./EmbedSearch";
+import { HybridSearch } from "./HybridSearch";
 import { ClaudeClient } from "./ClaudeClient";
 import { MemexChatSettingsTab, MemexChatSettings, DEFAULT_SETTINGS } from "./SettingsTab";
 import { RelatedNotesView, VIEW_TYPE_RELATED } from "./RelatedNotesView";
@@ -15,12 +16,13 @@ export default class MemexChatPlugin extends Plugin {
   settings!: MemexChatSettings;
   search!: VaultSearch;
   embedSearch: EmbedSearch | null = null;
+  hybridSearch: HybridSearch | null = null;
   claude!: ClaudeClient;
   data!: PluginData;
 
-  /** Returns the active search engine: EmbedSearch when enabled, else VaultSearch */
-  get activeSearch(): VaultSearch | EmbedSearch {
-    return this.embedSearch ?? this.search;
+  /** Returns the active search engine: HybridSearch when embeddings are ready, else VaultSearch */
+  get activeSearch(): VaultSearch | HybridSearch {
+    return this.hybridSearch ?? this.search;
   }
 
   async onload(): Promise<void> {
@@ -149,6 +151,7 @@ export default class MemexChatPlugin extends Plugin {
   async initEmbedSearch(): Promise<void> {
     if (!this.settings.useEmbeddings) {
       this.embedSearch = null;
+      this.hybridSearch = null;
       return;
     }
     this.embedSearch = new EmbedSearch(this.app, this.settings.embeddingModel);
@@ -176,6 +179,7 @@ export default class MemexChatPlugin extends Plugin {
     // Wait for Obsidian Sync to finish before starting (avoids embedding stale/partial files)
     this.waitForSyncIdle(notice).then(() => this.embedSearch?.buildIndex())
       .then(() => {
+        if (this.embedSearch) this.hybridSearch = new HybridSearch(this.search, this.embedSearch);
         notice.setMessage(`✓ Memex [${modelShort}]: ${this.app.vault.getMarkdownFiles().length} Notizen eingebettet`);
         setTimeout(() => notice.hide(), 4000);
         this.notifyRelatedView();
@@ -199,6 +203,7 @@ export default class MemexChatPlugin extends Plugin {
 
     if (this.settings.useEmbeddings && this.embedSearch) {
       // Rebuild semantic (embedding) index
+      this.hybridSearch = null;
       this.embedSearch.onModelStatus = (status) => {
         if (view) view.setStatus(status);
       };
@@ -212,6 +217,7 @@ export default class MemexChatPlugin extends Plugin {
         }
       };
       await this.embedSearch.buildIndex();
+      this.hybridSearch = new HybridSearch(this.search, this.embedSearch);
       this.embedSearch.onProgress = undefined;
       this.embedSearch.onModelStatus = undefined;
     } else {