/** * Timeline item extraction helpers * @module timeline-store */ import { Article, Application, Emoji, Hashtag, Mention, Question, Service } from "@fedify/fedify/vocab"; import sanitizeHtml from "sanitize-html"; /** * Sanitize HTML content for safe display * @param {string} html - Raw HTML content * @returns {string} Sanitized HTML */ export function sanitizeContent(html) { if (!html) return ""; return sanitizeHtml(html, { allowedTags: [ "p", "br", "a", "strong", "em", "ul", "ol", "li", "blockquote", "code", "pre", "h1", "h2", "h3", "h4", "h5", "h6", "span", "div", "img" ], allowedAttributes: { a: ["href", "rel", "class"], img: ["src", "alt", "class"], span: ["class"], div: ["class"] }, allowedSchemes: ["http", "https", "mailto"], allowedSchemesByTag: { img: ["http", "https"] } }); } /** * Replace custom emoji :shortcode: placeholders with inline tags. * Applied AFTER sanitization — the tags are controlled output from * trusted emoji data, not user-supplied HTML. * * @param {string} html - Content HTML (already sanitized) * @param {Array<{shortcode: string, url: string}>} emojis - Custom emoji data * @returns {string} HTML with shortcodes replaced by tags */ export function replaceCustomEmoji(html, emojis) { if (!emojis?.length || !html) return html; let result = html; for (const { shortcode, url } of emojis) { // Validate URL is HTTP(S) only — reject data:, javascript:, etc. if (!url || (!url.startsWith("https://") && !url.startsWith("http://"))) continue; // Escape HTML special characters in URL and shortcode to prevent attribute injection const safeUrl = url.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); const safeShortcode = shortcode.replace(/&/g, "&").replace(/"/g, """).replace(//g, ">"); const escaped = shortcode.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); const pattern = new RegExp(`:${escaped}:`, "g"); result = result.replace( pattern, `:${safeShortcode}:`, ); } return result; } /** * Extract actor information from Fedify Person/Application/Service object * @param {object} actor - Fedify actor object * @param {object} [options] - Options * @param {object} [options.documentLoader] - Authenticated DocumentLoader for Secure Mode servers * @returns {object} { name, url, photo, handle } */ export async function extractActorInfo(actor, options = {}) { if (!actor) { return { name: "Unknown", url: "", photo: "", handle: "" }; } const rawName = actor.name?.toString() || actor.preferredUsername?.toString() || "Unknown"; // Strip all HTML from actor names to prevent stored XSS const name = sanitizeHtml(rawName, { allowedTags: [], allowedAttributes: {} }); const url = actor.id?.href || ""; // Extract photo URL from icon (Fedify uses async getters) const loaderOpts = options.documentLoader ? { documentLoader: options.documentLoader } : {}; let photo = ""; try { if (typeof actor.getIcon === "function") { const iconObj = await actor.getIcon(loaderOpts); photo = iconObj?.url?.href || ""; } else { const iconObj = await actor.icon; photo = iconObj?.url?.href || ""; } } catch { // No icon available } // Extract handle from actor URL let handle = ""; try { const actorUrl = new URL(url); const username = actor.preferredUsername?.toString() || ""; if (username) { handle = `@${username}@${actorUrl.hostname}`; } } catch { // Invalid URL, keep handle empty } // Extract custom emoji from actor tags const emojis = []; try { if (typeof actor.getTags === "function") { const tags = await actor.getTags(loaderOpts); for await (const tag of tags) { if (tag instanceof Emoji) { const shortcode = (tag.name?.toString() || "").replace(/^:|:$/g, ""); const iconUrl = tag.iconId?.href || ""; if (shortcode && iconUrl) { emojis.push({ shortcode, url: iconUrl }); } } } } } catch { // Emoji extraction failed — non-critical } // Bot detection — Service and Application actors are automated accounts const bot = actor instanceof Service || actor instanceof Application; // Replace custom emoji shortcodes in display name with tags const nameHtml = replaceCustomEmoji(name, emojis); return { name, nameHtml, url, photo, handle, emojis, bot }; } /** * Extract timeline item data from Fedify Note/Article object * @param {object} object - Fedify Note or Article object * @param {object} options - Extraction options * @param {object} [options.boostedBy] - Actor info for boosts * @param {Date} [options.boostedAt] - Boost timestamp * @param {object} [options.actorFallback] - Fedify actor to use when object.getAttributedTo() fails * @param {object} [options.documentLoader] - Authenticated DocumentLoader for Secure Mode servers * @returns {Promise} Timeline item data with: * - category: string[] — hashtag names (stripped of # prefix) * - mentions: Array<{name: string, url: string}> — @mention entries with actor URLs */ export async function extractObjectData(object, options = {}) { if (!object) { throw new Error("Object is required"); } const uid = object.id?.href || ""; const url = object.url?.href || uid; // Determine type — use instanceof for Fedify vocab objects let type = "note"; if (object instanceof Article) { type = "article"; } if (object instanceof Question) { type = "question"; } if (options.boostedBy) { type = "boost"; } // Extract content const contentHtml = object.content?.toString() || ""; const contentText = object.source?.content?.toString() || contentHtml.replace(/<[^>]*>/g, ""); const content = { text: contentText, html: sanitizeContent(contentHtml) }; // Extract name (articles only) const name = type === "article" ? (object.name?.toString() || "") : ""; // Content warning / summary const summary = object.summary?.toString() || ""; const sensitive = object.sensitive || false; // Poll options (Question type) let pollOptions = []; let votersCount = 0; let pollClosed = false; let pollEndTime = ""; if (object instanceof Question) { try { const exclusive = []; for await (const opt of object.getExclusiveOptions?.() || []) { exclusive.push({ name: opt.name?.toString() || "", votes: typeof opt.replies?.totalItems === "number" ? opt.replies.totalItems : 0, }); } const inclusive = []; for await (const opt of object.getInclusiveOptions?.() || []) { inclusive.push({ name: opt.name?.toString() || "", votes: typeof opt.replies?.totalItems === "number" ? opt.replies.totalItems : 0, }); } pollOptions = exclusive.length > 0 ? exclusive : inclusive; } catch { // Poll options couldn't be extracted — show as regular post } votersCount = typeof object.voters === "number" ? object.voters : 0; pollEndTime = object.endTime ? String(object.endTime) : ""; const closedValue = object.closed; pollClosed = closedValue === true || (closedValue != null && closedValue !== false); } // Published date — store as UTC ISO string (Z-suffix) for consistency. // String(Temporal.Instant) preserves the original timezone offset (e.g. +01:00); // normalizing via new Date() converts to Z-suffix UTC. const published = object.published ? new Date(String(object.published)).toISOString() : new Date().toISOString(); // Edited date — non-null when the post has been updated after publishing const updated = object.updated ? String(object.updated) : ""; // Extract author — try multiple strategies in order of reliability const loaderOpts = options.documentLoader ? { documentLoader: options.documentLoader } : {}; let authorObj = null; try { if (typeof object.getAttributedTo === "function") { const attr = await object.getAttributedTo(loaderOpts); authorObj = Array.isArray(attr) ? attr[0] : attr; } } catch { // getAttributedTo() failed (unreachable, deleted, etc.) } // If getAttributedTo() returned nothing, use the actor from the wrapping activity if (!authorObj && options.actorFallback) { authorObj = options.actorFallback; } // Try direct property access for plain objects if (!authorObj) { authorObj = object.attribution || object.attributedTo || null; } let author; if (authorObj) { author = await extractActorInfo(authorObj, loaderOpts); } else { // Last resort: use attributionIds (non-fetching) to get at least a URL const attrIds = object.attributionIds; if (attrIds && attrIds.length > 0) { const authorUrl = attrIds[0].href; const parsedUrl = new URL(authorUrl); const authorHostname = parsedUrl.hostname; // Extract username from common URL patterns: // /@username, /users/username, /ap/users/12345/ const pathname = parsedUrl.pathname; let username = ""; const atPattern = pathname.match(/\/@([^/]+)/); const usersPattern = pathname.match(/\/users\/([^/]+)/); if (atPattern) { username = atPattern[1]; } else if (usersPattern) { username = usersPattern[1]; } author = { name: username || authorHostname, url: authorUrl, photo: "", handle: username ? `@${username}@${authorHostname}` : "", }; } else { author = { name: "Unknown", url: "", photo: "", handle: "" }; } } // Extract tags — Fedify uses async getTags() which returns typed vocab objects. // Hashtag → category[] (plain strings, # prefix stripped) // Mention → mentions[] ({ name, url } objects for profile linking) // Emoji → emojis[] ({ shortcode, url } for custom emoji rendering) const category = []; const mentions = []; const emojis = []; try { if (typeof object.getTags === "function") { const tags = await object.getTags(loaderOpts); for await (const tag of tags) { if (tag instanceof Hashtag) { const tagName = tag.name?.toString().replace(/^#/, "") || ""; if (tagName) category.push(tagName); } else if (tag instanceof Mention) { // Strip leading @ from name (Fedify Mention names start with @) const rawName = tag.name?.toString() || ""; const mentionName = rawName.startsWith("@") ? rawName.slice(1) : rawName; // tag.href is a URL object — use .href to get the string const mentionUrl = tag.href?.href || ""; if (mentionName) mentions.push({ name: mentionName, url: mentionUrl }); } else if (tag instanceof Emoji) { // Custom emoji: name is ":shortcode:", icon is an Image with url const shortcode = (tag.name?.toString() || "").replace(/^:|:$/g, ""); const iconUrl = tag.iconId?.href || ""; if (shortcode && iconUrl) { emojis.push({ shortcode, url: iconUrl }); } } } } } catch { // Tags extraction failed — non-critical } // Extract media attachments — Fedify uses async getAttachments() const photo = []; const video = []; const audio = []; try { if (typeof object.getAttachments === "function") { const attachments = await object.getAttachments(loaderOpts); for await (const att of attachments) { const mediaUrl = att.url?.href || ""; if (!mediaUrl) continue; const mediaType = att.mediaType?.toLowerCase() || ""; if (mediaType.startsWith("image/")) { photo.push({ url: mediaUrl, alt: att.name?.toString() || "", width: att.width || null, height: att.height || null, }); } else if (mediaType.startsWith("video/")) { video.push(mediaUrl); } else if (mediaType.startsWith("audio/")) { audio.push(mediaUrl); } } } } catch { // Attachment extraction failed — non-critical } // In-reply-to — Fedify uses replyTargetId (non-fetching). // Only set when non-empty — $exists:false filters depend on field absence. const inReplyTo = object.replyTargetId?.href || undefined; // Quote URL — Fedify reads quoteUrl / _misskey_quote / quoteUri const quoteUrl = object.quoteUrl?.href || undefined; // Interaction counts — not fetched at ingest time. The three collection // fetches (getReplies, getLikes, getShares) each trigger an HTTP round-trip // for counts that are ephemeral and stale moments after fetching. Removed // per audit M11 to save 3 network calls per inbound activity. const counts = { replies: null, boosts: null, likes: null }; // Replace custom emoji :shortcode: in content with inline tags. // Applied after sanitization — these are trusted emoji from the post's tags. content.html = replaceCustomEmoji(content.html, emojis); // Build base timeline item const item = { uid, type, url, name, content, summary, sensitive, published, updated, author, category, mentions, emojis, photo, video, audio, ...(inReplyTo ? { inReplyTo } : {}), ...(quoteUrl ? { quoteUrl } : {}), counts, pollOptions, votersCount, pollClosed, pollEndTime, createdAt: new Date().toISOString() }; // Add boost metadata if this is a boost if (options.boostedBy) { item.boostedBy = options.boostedBy; item.boostedAt = options.boostedAt || new Date().toISOString(); item.originalUrl = url; } return item; }