From dc05f9d304dc8e118369c8ac3c674330c94c3841 Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 31 Mar 2026 20:00:15 +0200 Subject: [PATCH] feat: apply keyword filters to timeline responses --- lib/mastodon/helpers/apply-filters.js | 158 ++++++++++++++++++++++++++ lib/mastodon/routes/timelines.js | 28 ++++- 2 files changed, 183 insertions(+), 3 deletions(-) create mode 100644 lib/mastodon/helpers/apply-filters.js diff --git a/lib/mastodon/helpers/apply-filters.js b/lib/mastodon/helpers/apply-filters.js new file mode 100644 index 0000000..87dc135 --- /dev/null +++ b/lib/mastodon/helpers/apply-filters.js @@ -0,0 +1,158 @@ +/** + * Keyword filter helpers for Mastodon Client API v2. + * + * Loads active filters from MongoDB and applies them to serialized + * Mastodon Status objects, following the v2 filter spec: + * - filterAction "hide" → status removed from results + * - filterAction "warn" → status kept with `filtered` array attached + */ + +/** + * Strip HTML tags from a string for plain-text keyword matching. + * + * @param {string} html - HTML string + * @returns {string} Plain text + */ +function stripHtml(html) { + if (!html) return ""; + return html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim(); +} + +/** + * Compile a regex from a list of keyword documents. + * + * Keywords with `wholeWord: true` are wrapped in `\b` word boundaries. + * Keywords with `wholeWord: false` are matched as plain substrings. + * Returns null if there are no keywords. + * + * @param {Array<{keyword: string, wholeWord: boolean}>} keywords + * @returns {RegExp|null} + */ +function compileKeywordRegex(keywords) { + if (!keywords || keywords.length === 0) return null; + + const parts = keywords.map((kw) => { + const escaped = kw.keyword.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&"); + return kw.wholeWord ? `\\b${escaped}\\b` : escaped; + }); + + return new RegExp(parts.join("|"), "i"); +} + +/** + * Load active filters for a given context from MongoDB. + * + * Skips expired filters. For each filter, loads its keywords and compiles + * a single regex from all of them. + * + * @param {object} collections - MongoDB collections (must have ap_filters, ap_filter_keywords) + * @param {string} context - Filter context to match ("home", "public", "notifications", "thread") + * @returns {Promise>} + */ +export async function loadUserFilters(collections, context) { + if (!collections.ap_filters) return []; + + const now = new Date().toISOString(); + + // Load filters that include this context, skipping expired ones + const filterDocs = await collections.ap_filters + .find({ context }) + .toArray(); + + const activeFilters = filterDocs.filter((f) => { + if (!f.expiresAt) return true; + return f.expiresAt > now; + }); + + if (activeFilters.length === 0) return []; + + const result = []; + + for (const filter of activeFilters) { + const keywords = collections.ap_filter_keywords + ? await collections.ap_filter_keywords + .find({ filterId: filter._id }) + .toArray() + : []; + + const regex = compileKeywordRegex(keywords); + + result.push({ + id: filter._id.toString(), + title: filter.title || "", + context: filter.context || [], + filterAction: filter.filterAction || "warn", + expiresAt: filter.expiresAt || null, + regex, + keywords, + }); + } + + return result; +} + +/** + * Apply compiled filters to an array of serialized Mastodon statuses. + * + * - "hide" filters: matching statuses are removed entirely + * - "warn" filters: matching statuses get a `filtered` array attached + * + * @param {Array} statuses - Serialized Mastodon Status objects + * @param {Array} filters - Compiled filter objects from loadUserFilters() + * @returns {Array} Processed statuses (hide-matched ones removed) + */ +export function applyFilters(statuses, filters) { + if (!filters || filters.length === 0) return statuses; + + const result = []; + + for (const status of statuses) { + const text = stripHtml(status.content || ""); + let hidden = false; + + for (const filter of filters) { + if (!filter.regex) continue; + + const match = text.match(filter.regex); + if (!match) continue; + + if (filter.filterAction === "hide") { + hidden = true; + break; + } + + // filterAction === "warn" — attach filtered metadata + const matchedKeywords = filter.keywords + .filter((kw) => { + const escaped = kw.keyword.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&"); + const kwRegex = new RegExp( + kw.wholeWord ? `\\b${escaped}\\b` : escaped, + "i", + ); + return kwRegex.test(text); + }) + .map((kw) => kw.keyword); + + if (!status.filtered) { + status.filtered = []; + } + + status.filtered.push({ + filter: { + id: filter.id, + title: filter.title, + context: filter.context, + filter_action: filter.filterAction, + expires_at: filter.expiresAt, + }, + keyword_matches: matchedKeywords, + }); + } + + if (!hidden) { + result.push(status); + } + } + + return result; +} diff --git a/lib/mastodon/routes/timelines.js b/lib/mastodon/routes/timelines.js index 01328ca..c3696f7 100644 --- a/lib/mastodon/routes/timelines.js +++ b/lib/mastodon/routes/timelines.js @@ -11,6 +11,7 @@ import { buildPaginationQuery, parseLimit, setPaginationHeaders } from "../helpe import { resolveReplyIds } from "../helpers/resolve-reply-ids.js"; import { loadModerationData, applyModerationFilters } from "../../item-processing.js"; import { enrichAccountStats } from "../helpers/enrich-accounts.js"; +import { loadUserFilters, applyFilters } from "../helpers/apply-filters.js"; import { tokenRequired } from "../middleware/token-required.js"; import { scopeRequired } from "../middleware/scope-required.js"; @@ -85,10 +86,17 @@ router.get("/api/v1/timelines/home", tokenRequired, scopeRequired("read", "read: const pluginOptions = req.app.locals.mastodonPluginOptions || {}; await enrichAccountStats(statuses, pluginOptions, baseUrl); + // Apply keyword filters + let filteredStatuses = statuses; + if (collections.ap_filters) { + const filters = await loadUserFilters(collections, "home"); + filteredStatuses = applyFilters(statuses, filters); + } + // Set pagination Link headers setPaginationHeaders(res, req, items, limit); - res.json(statuses); + res.json(filteredStatuses); } catch (error) { next(error); } @@ -187,8 +195,15 @@ router.get("/api/v1/timelines/public", async (req, res, next) => { const pluginOpts = req.app.locals.mastodonPluginOptions || {}; await enrichAccountStats(statuses, pluginOpts, baseUrl); + // Apply keyword filters + let filteredStatuses = statuses; + if (collections.ap_filters) { + const filters = await loadUserFilters(collections, "public"); + filteredStatuses = applyFilters(statuses, filters); + } + setPaginationHeaders(res, req, items, limit); - res.json(statuses); + res.json(filteredStatuses); } catch (error) { next(error); } @@ -255,8 +270,15 @@ router.get("/api/v1/timelines/tag/:hashtag", async (req, res, next) => { const pluginOpts = req.app.locals.mastodonPluginOptions || {}; await enrichAccountStats(statuses, pluginOpts, baseUrl); + // Apply keyword filters + let filteredStatuses = statuses; + if (collections.ap_filters) { + const filters = await loadUserFilters(collections, "public"); + filteredStatuses = applyFilters(statuses, filters); + } + setPaginationHeaders(res, req, items, limit); - res.json(statuses); + res.json(filteredStatuses); } catch (error) { next(error); }