feat: apply keyword filters to timeline responses

This commit is contained in:
Ricardo
2026-03-31 20:00:15 +02:00
parent 8d833bec89
commit dc05f9d304
2 changed files with 183 additions and 3 deletions
+158
View File
@@ -0,0 +1,158 @@
/**
* Keyword filter helpers for Mastodon Client API v2.
*
* Loads active filters from MongoDB and applies them to serialized
* Mastodon Status objects, following the v2 filter spec:
* - filterAction "hide" → status removed from results
* - filterAction "warn" → status kept with `filtered` array attached
*/
/**
* Strip HTML tags from a string for plain-text keyword matching.
*
* @param {string} html - HTML string
* @returns {string} Plain text
*/
function stripHtml(html) {
if (!html) return "";
return html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
}
/**
* Compile a regex from a list of keyword documents.
*
* Keywords with `wholeWord: true` are wrapped in `\b` word boundaries.
* Keywords with `wholeWord: false` are matched as plain substrings.
* Returns null if there are no keywords.
*
* @param {Array<{keyword: string, wholeWord: boolean}>} keywords
* @returns {RegExp|null}
*/
function compileKeywordRegex(keywords) {
if (!keywords || keywords.length === 0) return null;
const parts = keywords.map((kw) => {
const escaped = kw.keyword.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&");
return kw.wholeWord ? `\\b${escaped}\\b` : escaped;
});
return new RegExp(parts.join("|"), "i");
}
/**
* Load active filters for a given context from MongoDB.
*
* Skips expired filters. For each filter, loads its keywords and compiles
* a single regex from all of them.
*
* @param {object} collections - MongoDB collections (must have ap_filters, ap_filter_keywords)
* @param {string} context - Filter context to match ("home", "public", "notifications", "thread")
* @returns {Promise<Array<{id: string, title: string, context: string[], filterAction: string, expiresAt: string|null, regex: RegExp|null, keywords: Array}>>}
*/
export async function loadUserFilters(collections, context) {
if (!collections.ap_filters) return [];
const now = new Date().toISOString();
// Load filters that include this context, skipping expired ones
const filterDocs = await collections.ap_filters
.find({ context })
.toArray();
const activeFilters = filterDocs.filter((f) => {
if (!f.expiresAt) return true;
return f.expiresAt > now;
});
if (activeFilters.length === 0) return [];
const result = [];
for (const filter of activeFilters) {
const keywords = collections.ap_filter_keywords
? await collections.ap_filter_keywords
.find({ filterId: filter._id })
.toArray()
: [];
const regex = compileKeywordRegex(keywords);
result.push({
id: filter._id.toString(),
title: filter.title || "",
context: filter.context || [],
filterAction: filter.filterAction || "warn",
expiresAt: filter.expiresAt || null,
regex,
keywords,
});
}
return result;
}
/**
* Apply compiled filters to an array of serialized Mastodon statuses.
*
* - "hide" filters: matching statuses are removed entirely
* - "warn" filters: matching statuses get a `filtered` array attached
*
* @param {Array<object>} statuses - Serialized Mastodon Status objects
* @param {Array<object>} filters - Compiled filter objects from loadUserFilters()
* @returns {Array<object>} Processed statuses (hide-matched ones removed)
*/
export function applyFilters(statuses, filters) {
if (!filters || filters.length === 0) return statuses;
const result = [];
for (const status of statuses) {
const text = stripHtml(status.content || "");
let hidden = false;
for (const filter of filters) {
if (!filter.regex) continue;
const match = text.match(filter.regex);
if (!match) continue;
if (filter.filterAction === "hide") {
hidden = true;
break;
}
// filterAction === "warn" — attach filtered metadata
const matchedKeywords = filter.keywords
.filter((kw) => {
const escaped = kw.keyword.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&");
const kwRegex = new RegExp(
kw.wholeWord ? `\\b${escaped}\\b` : escaped,
"i",
);
return kwRegex.test(text);
})
.map((kw) => kw.keyword);
if (!status.filtered) {
status.filtered = [];
}
status.filtered.push({
filter: {
id: filter.id,
title: filter.title,
context: filter.context,
filter_action: filter.filterAction,
expires_at: filter.expiresAt,
},
keyword_matches: matchedKeywords,
});
}
if (!hidden) {
result.push(status);
}
}
return result;
}
+25 -3
View File
@@ -11,6 +11,7 @@ import { buildPaginationQuery, parseLimit, setPaginationHeaders } from "../helpe
import { resolveReplyIds } from "../helpers/resolve-reply-ids.js"; import { resolveReplyIds } from "../helpers/resolve-reply-ids.js";
import { loadModerationData, applyModerationFilters } from "../../item-processing.js"; import { loadModerationData, applyModerationFilters } from "../../item-processing.js";
import { enrichAccountStats } from "../helpers/enrich-accounts.js"; import { enrichAccountStats } from "../helpers/enrich-accounts.js";
import { loadUserFilters, applyFilters } from "../helpers/apply-filters.js";
import { tokenRequired } from "../middleware/token-required.js"; import { tokenRequired } from "../middleware/token-required.js";
import { scopeRequired } from "../middleware/scope-required.js"; import { scopeRequired } from "../middleware/scope-required.js";
@@ -85,10 +86,17 @@ router.get("/api/v1/timelines/home", tokenRequired, scopeRequired("read", "read:
const pluginOptions = req.app.locals.mastodonPluginOptions || {}; const pluginOptions = req.app.locals.mastodonPluginOptions || {};
await enrichAccountStats(statuses, pluginOptions, baseUrl); await enrichAccountStats(statuses, pluginOptions, baseUrl);
// Apply keyword filters
let filteredStatuses = statuses;
if (collections.ap_filters) {
const filters = await loadUserFilters(collections, "home");
filteredStatuses = applyFilters(statuses, filters);
}
// Set pagination Link headers // Set pagination Link headers
setPaginationHeaders(res, req, items, limit); setPaginationHeaders(res, req, items, limit);
res.json(statuses); res.json(filteredStatuses);
} catch (error) { } catch (error) {
next(error); next(error);
} }
@@ -187,8 +195,15 @@ router.get("/api/v1/timelines/public", async (req, res, next) => {
const pluginOpts = req.app.locals.mastodonPluginOptions || {}; const pluginOpts = req.app.locals.mastodonPluginOptions || {};
await enrichAccountStats(statuses, pluginOpts, baseUrl); await enrichAccountStats(statuses, pluginOpts, baseUrl);
// Apply keyword filters
let filteredStatuses = statuses;
if (collections.ap_filters) {
const filters = await loadUserFilters(collections, "public");
filteredStatuses = applyFilters(statuses, filters);
}
setPaginationHeaders(res, req, items, limit); setPaginationHeaders(res, req, items, limit);
res.json(statuses); res.json(filteredStatuses);
} catch (error) { } catch (error) {
next(error); next(error);
} }
@@ -255,8 +270,15 @@ router.get("/api/v1/timelines/tag/:hashtag", async (req, res, next) => {
const pluginOpts = req.app.locals.mastodonPluginOptions || {}; const pluginOpts = req.app.locals.mastodonPluginOptions || {};
await enrichAccountStats(statuses, pluginOpts, baseUrl); await enrichAccountStats(statuses, pluginOpts, baseUrl);
// Apply keyword filters
let filteredStatuses = statuses;
if (collections.ap_filters) {
const filters = await loadUserFilters(collections, "public");
filteredStatuses = applyFilters(statuses, filters);
}
setPaginationHeaders(res, req, items, limit); setPaginationHeaders(res, req, items, limit);
res.json(statuses); res.json(filteredStatuses);
} catch (error) { } catch (error) {
next(error); next(error);
} }