feat: apply keyword filters to timeline responses
This commit is contained in:
@@ -0,0 +1,158 @@
|
||||
/**
|
||||
* Keyword filter helpers for Mastodon Client API v2.
|
||||
*
|
||||
* Loads active filters from MongoDB and applies them to serialized
|
||||
* Mastodon Status objects, following the v2 filter spec:
|
||||
* - filterAction "hide" → status removed from results
|
||||
* - filterAction "warn" → status kept with `filtered` array attached
|
||||
*/
|
||||
|
||||
/**
|
||||
* Strip HTML tags from a string for plain-text keyword matching.
|
||||
*
|
||||
* @param {string} html - HTML string
|
||||
* @returns {string} Plain text
|
||||
*/
|
||||
function stripHtml(html) {
|
||||
if (!html) return "";
|
||||
return html.replace(/<[^>]+>/g, " ").replace(/\s+/g, " ").trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Compile a regex from a list of keyword documents.
|
||||
*
|
||||
* Keywords with `wholeWord: true` are wrapped in `\b` word boundaries.
|
||||
* Keywords with `wholeWord: false` are matched as plain substrings.
|
||||
* Returns null if there are no keywords.
|
||||
*
|
||||
* @param {Array<{keyword: string, wholeWord: boolean}>} keywords
|
||||
* @returns {RegExp|null}
|
||||
*/
|
||||
function compileKeywordRegex(keywords) {
|
||||
if (!keywords || keywords.length === 0) return null;
|
||||
|
||||
const parts = keywords.map((kw) => {
|
||||
const escaped = kw.keyword.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&");
|
||||
return kw.wholeWord ? `\\b${escaped}\\b` : escaped;
|
||||
});
|
||||
|
||||
return new RegExp(parts.join("|"), "i");
|
||||
}
|
||||
|
||||
/**
|
||||
* Load active filters for a given context from MongoDB.
|
||||
*
|
||||
* Skips expired filters. For each filter, loads its keywords and compiles
|
||||
* a single regex from all of them.
|
||||
*
|
||||
* @param {object} collections - MongoDB collections (must have ap_filters, ap_filter_keywords)
|
||||
* @param {string} context - Filter context to match ("home", "public", "notifications", "thread")
|
||||
* @returns {Promise<Array<{id: string, title: string, context: string[], filterAction: string, expiresAt: string|null, regex: RegExp|null, keywords: Array}>>}
|
||||
*/
|
||||
export async function loadUserFilters(collections, context) {
|
||||
if (!collections.ap_filters) return [];
|
||||
|
||||
const now = new Date().toISOString();
|
||||
|
||||
// Load filters that include this context, skipping expired ones
|
||||
const filterDocs = await collections.ap_filters
|
||||
.find({ context })
|
||||
.toArray();
|
||||
|
||||
const activeFilters = filterDocs.filter((f) => {
|
||||
if (!f.expiresAt) return true;
|
||||
return f.expiresAt > now;
|
||||
});
|
||||
|
||||
if (activeFilters.length === 0) return [];
|
||||
|
||||
const result = [];
|
||||
|
||||
for (const filter of activeFilters) {
|
||||
const keywords = collections.ap_filter_keywords
|
||||
? await collections.ap_filter_keywords
|
||||
.find({ filterId: filter._id })
|
||||
.toArray()
|
||||
: [];
|
||||
|
||||
const regex = compileKeywordRegex(keywords);
|
||||
|
||||
result.push({
|
||||
id: filter._id.toString(),
|
||||
title: filter.title || "",
|
||||
context: filter.context || [],
|
||||
filterAction: filter.filterAction || "warn",
|
||||
expiresAt: filter.expiresAt || null,
|
||||
regex,
|
||||
keywords,
|
||||
});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply compiled filters to an array of serialized Mastodon statuses.
|
||||
*
|
||||
* - "hide" filters: matching statuses are removed entirely
|
||||
* - "warn" filters: matching statuses get a `filtered` array attached
|
||||
*
|
||||
* @param {Array<object>} statuses - Serialized Mastodon Status objects
|
||||
* @param {Array<object>} filters - Compiled filter objects from loadUserFilters()
|
||||
* @returns {Array<object>} Processed statuses (hide-matched ones removed)
|
||||
*/
|
||||
export function applyFilters(statuses, filters) {
|
||||
if (!filters || filters.length === 0) return statuses;
|
||||
|
||||
const result = [];
|
||||
|
||||
for (const status of statuses) {
|
||||
const text = stripHtml(status.content || "");
|
||||
let hidden = false;
|
||||
|
||||
for (const filter of filters) {
|
||||
if (!filter.regex) continue;
|
||||
|
||||
const match = text.match(filter.regex);
|
||||
if (!match) continue;
|
||||
|
||||
if (filter.filterAction === "hide") {
|
||||
hidden = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// filterAction === "warn" — attach filtered metadata
|
||||
const matchedKeywords = filter.keywords
|
||||
.filter((kw) => {
|
||||
const escaped = kw.keyword.replace(/[$()*+.?[\\\]^{|}]/g, "\\$&");
|
||||
const kwRegex = new RegExp(
|
||||
kw.wholeWord ? `\\b${escaped}\\b` : escaped,
|
||||
"i",
|
||||
);
|
||||
return kwRegex.test(text);
|
||||
})
|
||||
.map((kw) => kw.keyword);
|
||||
|
||||
if (!status.filtered) {
|
||||
status.filtered = [];
|
||||
}
|
||||
|
||||
status.filtered.push({
|
||||
filter: {
|
||||
id: filter.id,
|
||||
title: filter.title,
|
||||
context: filter.context,
|
||||
filter_action: filter.filterAction,
|
||||
expires_at: filter.expiresAt,
|
||||
},
|
||||
keyword_matches: matchedKeywords,
|
||||
});
|
||||
}
|
||||
|
||||
if (!hidden) {
|
||||
result.push(status);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import { buildPaginationQuery, parseLimit, setPaginationHeaders } from "../helpe
|
||||
import { resolveReplyIds } from "../helpers/resolve-reply-ids.js";
|
||||
import { loadModerationData, applyModerationFilters } from "../../item-processing.js";
|
||||
import { enrichAccountStats } from "../helpers/enrich-accounts.js";
|
||||
import { loadUserFilters, applyFilters } from "../helpers/apply-filters.js";
|
||||
import { tokenRequired } from "../middleware/token-required.js";
|
||||
import { scopeRequired } from "../middleware/scope-required.js";
|
||||
|
||||
@@ -85,10 +86,17 @@ router.get("/api/v1/timelines/home", tokenRequired, scopeRequired("read", "read:
|
||||
const pluginOptions = req.app.locals.mastodonPluginOptions || {};
|
||||
await enrichAccountStats(statuses, pluginOptions, baseUrl);
|
||||
|
||||
// Apply keyword filters
|
||||
let filteredStatuses = statuses;
|
||||
if (collections.ap_filters) {
|
||||
const filters = await loadUserFilters(collections, "home");
|
||||
filteredStatuses = applyFilters(statuses, filters);
|
||||
}
|
||||
|
||||
// Set pagination Link headers
|
||||
setPaginationHeaders(res, req, items, limit);
|
||||
|
||||
res.json(statuses);
|
||||
res.json(filteredStatuses);
|
||||
} catch (error) {
|
||||
next(error);
|
||||
}
|
||||
@@ -187,8 +195,15 @@ router.get("/api/v1/timelines/public", async (req, res, next) => {
|
||||
const pluginOpts = req.app.locals.mastodonPluginOptions || {};
|
||||
await enrichAccountStats(statuses, pluginOpts, baseUrl);
|
||||
|
||||
// Apply keyword filters
|
||||
let filteredStatuses = statuses;
|
||||
if (collections.ap_filters) {
|
||||
const filters = await loadUserFilters(collections, "public");
|
||||
filteredStatuses = applyFilters(statuses, filters);
|
||||
}
|
||||
|
||||
setPaginationHeaders(res, req, items, limit);
|
||||
res.json(statuses);
|
||||
res.json(filteredStatuses);
|
||||
} catch (error) {
|
||||
next(error);
|
||||
}
|
||||
@@ -255,8 +270,15 @@ router.get("/api/v1/timelines/tag/:hashtag", async (req, res, next) => {
|
||||
const pluginOpts = req.app.locals.mastodonPluginOptions || {};
|
||||
await enrichAccountStats(statuses, pluginOpts, baseUrl);
|
||||
|
||||
// Apply keyword filters
|
||||
let filteredStatuses = statuses;
|
||||
if (collections.ap_filters) {
|
||||
const filters = await loadUserFilters(collections, "public");
|
||||
filteredStatuses = applyFilters(statuses, filters);
|
||||
}
|
||||
|
||||
setPaginationHeaders(res, req, items, limit);
|
||||
res.json(statuses);
|
||||
res.json(filteredStatuses);
|
||||
} catch (error) {
|
||||
next(error);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user