From 6e63422c213f407c92d3988a91d01a7a73dc8bac Mon Sep 17 00:00:00 2001 From: Ricardo Date: Tue, 31 Mar 2026 09:57:37 +0200 Subject: [PATCH] feat!: replace cursor-based status IDs with MongoDB ObjectId BREAKING: Status IDs are now _id.toString() instead of encodeCursor(published). This fixes the critical threading bug where multiple posts sharing the same published second produced identical IDs, causing findTimelineItemById to return the wrong document. Changes: - status.js: id = _id.toString() (unique, chronologically sortable) - notification.js: same - findTimelineItemById: ObjectId-only lookup (no cursor fallback) - pagination.js: _id-based cursor pagination ($lt/$gt on ObjectId) - resolve-reply-ids.js: returns _id.toString() for parent IDs - Removed all encodeCursor/decodeCursor usage from API layer ObjectIds have a 4-byte timestamp prefix so chronological sort via _id: -1 works correctly. Pagination cursors are now ObjectId hex strings in Link headers. --- lib/mastodon/entities/notification.js | 3 +- lib/mastodon/entities/status.js | 7 +- lib/mastodon/helpers/pagination.js | 117 +++++++++------------- lib/mastodon/helpers/resolve-reply-ids.js | 21 ++-- lib/mastodon/routes/statuses.js | 24 +---- package.json | 2 +- 6 files changed, 61 insertions(+), 113 deletions(-) diff --git a/lib/mastodon/entities/notification.js b/lib/mastodon/entities/notification.js index 9ee0093..7086bee 100644 --- a/lib/mastodon/entities/notification.js +++ b/lib/mastodon/entities/notification.js @@ -13,7 +13,6 @@ */ import { serializeAccount } from "./account.js"; import { serializeStatus } from "./status.js"; -import { encodeCursor } from "../helpers/pagination.js"; /** * Map internal notification types to Mastodon API types. @@ -121,7 +120,7 @@ export function serializeNotification(notif, { baseUrl, statusMap, interactionSt : notif.published || notif.createdAt || new Date().toISOString(); return { - id: encodeCursor(createdAt) || notif._id.toString(), + id: notif._id.toString(), type: mastodonType, created_at: createdAt, account, diff --git a/lib/mastodon/entities/status.js b/lib/mastodon/entities/status.js index c0c53e1..bcd5652 100644 --- a/lib/mastodon/entities/status.js +++ b/lib/mastodon/entities/status.js @@ -15,7 +15,6 @@ */ import { serializeAccount } from "./account.js"; import { sanitizeHtml } from "./sanitize.js"; -import { encodeCursor } from "../helpers/pagination.js"; // Module-level defaults set once at startup via setLocalIdentity() let _localPublicationUrl = ""; @@ -47,10 +46,8 @@ export function setLocalIdentity(publicationUrl, handle) { export function serializeStatus(item, { baseUrl, favouritedIds, rebloggedIds, bookmarkedIds, pinnedIds, replyIdMap, replyAccountIdMap } = {}) { if (!item) return null; - // Use published-based cursor as the status ID so pagination cursors - // (max_id/min_id) sort chronologically, not by insertion order. - const cursorDate = item.published || item.createdAt || item.boostedAt; - const id = encodeCursor(cursorDate) || item._id.toString(); + // Use MongoDB ObjectId as the status ID — unique and chronologically sortable. + const id = item._id.toString(); const uid = item.uid || ""; const url = item.url || uid; diff --git a/lib/mastodon/helpers/pagination.js b/lib/mastodon/helpers/pagination.js index 3f4da71..792b3ce 100644 --- a/lib/mastodon/helpers/pagination.js +++ b/lib/mastodon/helpers/pagination.js @@ -1,50 +1,17 @@ /** - * Mastodon-compatible cursor pagination helpers. + * Mastodon-compatible pagination helpers using MongoDB ObjectId. * - * Uses `published` date as cursor (chronologically correct) instead of - * MongoDB ObjectId. ObjectId reflects insertion order, not publication - * order — backfilled or syndicated posts get new ObjectIds at import - * time, breaking chronological sort. The `published` field matches the - * native reader's sort and produces a correct timeline. + * ObjectIds are 12-byte values with a 4-byte timestamp prefix, making + * them chronologically sortable. Status IDs are _id.toString() — unique, + * sortable, and directly usable as pagination cursors. * - * Cursor values are `published` ISO strings, but Mastodon clients pass - * them as opaque `max_id`/`min_id`/`since_id` strings. We encode the - * published date as a Mastodon-style snowflake-ish ID (milliseconds - * since epoch) so clients treat them as comparable integers. - * - * Emits RFC 8288 Link headers that masto.js / Phanpy parse. + * Emits RFC 8288 Link headers that Phanpy/Elk/Moshidon parse. */ +import { ObjectId } from "mongodb"; const DEFAULT_LIMIT = 20; const MAX_LIMIT = 40; -/** - * Encode a published date string as a numeric cursor ID. - * Mastodon clients expect IDs to be numeric strings that sort chronologically. - * We use milliseconds since epoch — monotonic and comparable. - * - * @param {string|Date} published - ISO date string or Date object - * @returns {string} Numeric string (ms since epoch) - */ -export function encodeCursor(published) { - if (!published) return "0"; - const ms = new Date(published).getTime(); - return Number.isFinite(ms) ? String(ms) : "0"; -} - -/** - * Decode a numeric cursor ID back to an ISO date string. - * - * @param {string} cursor - Numeric cursor from client - * @returns {string|null} ISO date string, or null if invalid - */ -export function decodeCursor(cursor) { - if (!cursor) return null; - const ms = Number.parseInt(cursor, 10); - if (!Number.isFinite(ms) || ms <= 0) return null; - return new Date(ms).toISOString(); -} - /** * Parse and clamp the limit parameter. * @@ -58,46 +25,60 @@ export function parseLimit(raw) { } /** - * Build a MongoDB filter object for cursor-based pagination. + * Try to parse a cursor string as an ObjectId. + * Returns null if invalid. * - * Mastodon cursor params (all optional, applied to `published`): - * max_id — return items older than this cursor (exclusive) - * min_id — return items newer than this cursor (exclusive), closest first - * since_id — return items newer than this cursor (exclusive), most recent first + * @param {string} cursor - ObjectId hex string from client + * @returns {ObjectId|null} + */ +function parseCursor(cursor) { + if (!cursor || typeof cursor !== "string") return null; + try { + return new ObjectId(cursor); + } catch { + return null; + } +} + +/** + * Build a MongoDB filter object for ObjectId-based pagination. + * + * Mastodon cursor params (all optional, applied to `_id`): + * max_id — return items older than this ID (exclusive) + * min_id — return items newer than this ID (exclusive), closest first + * since_id — return items newer than this ID (exclusive), most recent first * * @param {object} baseFilter - Existing MongoDB filter to extend * @param {object} cursors - * @param {string} [cursors.max_id] - Numeric cursor (ms since epoch) - * @param {string} [cursors.min_id] - Numeric cursor (ms since epoch) - * @param {string} [cursors.since_id] - Numeric cursor (ms since epoch) + * @param {string} [cursors.max_id] - ObjectId hex string + * @param {string} [cursors.min_id] - ObjectId hex string + * @param {string} [cursors.since_id] - ObjectId hex string * @returns {{ filter: object, sort: object, reverse: boolean }} */ export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } = {}) { const filter = { ...baseFilter }; - let sort = { published: -1 }; // newest first (default) + let sort = { _id: -1 }; // newest first (default) let reverse = false; if (max_id) { - const date = decodeCursor(max_id); - if (date) { - filter.published = { ...filter.published, $lt: date }; + const oid = parseCursor(max_id); + if (oid) { + filter._id = { ...filter._id, $lt: oid }; } } if (since_id) { - const date = decodeCursor(since_id); - if (date) { - filter.published = { ...filter.published, $gt: date }; + const oid = parseCursor(since_id); + if (oid) { + filter._id = { ...filter._id, $gt: oid }; } } if (min_id) { - const date = decodeCursor(min_id); - if (date) { - filter.published = { ...filter.published, $gt: date }; - // min_id returns results closest to the cursor, so sort ascending - // then reverse the results before returning - sort = { published: 1 }; + const oid = parseCursor(min_id); + if (oid) { + filter._id = { ...filter._id, $gt: oid }; + sort = { _id: 1 }; reverse = true; } } @@ -110,7 +91,7 @@ export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } = * * @param {object} res - Express response object * @param {object} req - Express request object (for building URLs) - * @param {Array} items - Result items (must have `published`) + * @param {Array} items - Result items (must have `_id`) * @param {number} limit - The limit used for the query */ export function setPaginationHeaders(res, req, items, limit) { @@ -119,10 +100,8 @@ export function setPaginationHeaders(res, req, items, limit) { // Only emit Link if we got a full page (may have more) if (items.length < limit) return; - const firstCursor = encodeCursor(items[0].published); - const lastCursor = encodeCursor(items[items.length - 1].published); - - if (firstCursor === "0" || lastCursor === "0") return; + const firstId = items[0]._id.toString(); + const lastId = items[items.length - 1]._id.toString(); const baseUrl = `${req.protocol}://${req.get("host")}${req.path}`; @@ -139,14 +118,14 @@ export function setPaginationHeaders(res, req, items, limit) { const links = []; - // rel="next" — older items (max_id = last item's cursor) + // rel="next" — older items (max_id = last item's ID) const nextParams = new URLSearchParams(existingParams); - nextParams.set("max_id", lastCursor); + nextParams.set("max_id", lastId); links.push(`<${baseUrl}?${nextParams.toString()}>; rel="next"`); - // rel="prev" — newer items (min_id = first item's cursor) + // rel="prev" — newer items (min_id = first item's ID) const prevParams = new URLSearchParams(existingParams); - prevParams.set("min_id", firstCursor); + prevParams.set("min_id", firstId); links.push(`<${baseUrl}?${prevParams.toString()}>; rel="prev"`); res.set("Link", links.join(", ")); diff --git a/lib/mastodon/helpers/resolve-reply-ids.js b/lib/mastodon/helpers/resolve-reply-ids.js index d8d8c6c..cf57e1b 100644 --- a/lib/mastodon/helpers/resolve-reply-ids.js +++ b/lib/mastodon/helpers/resolve-reply-ids.js @@ -1,17 +1,14 @@ /** - * Batch-resolve inReplyTo URLs to Mastodon cursor IDs and account IDs. + * Batch-resolve inReplyTo URLs to ObjectId strings and account IDs. * * Looks up parent posts in ap_timeline by uid/url and returns two Maps: - * - replyIdMap: inReplyTo URL → cursor ID (status ID) - * - replyAccountIdMap: inReplyTo URL → author account ID - * - * Used by route handlers before calling serializeStatus(). + * - replyIdMap: inReplyTo URL → parent _id.toString() + * - replyAccountIdMap: inReplyTo URL → parent author account ID * * @param {object} collection - ap_timeline MongoDB collection * @param {Array} items - Timeline items with optional inReplyTo * @returns {Promise<{replyIdMap: Map, replyAccountIdMap: Map}>} */ -import { encodeCursor } from "./pagination.js"; import { remoteActorId } from "./id-mapping.js"; export async function resolveReplyIds(collection, items) { @@ -19,29 +16,25 @@ export async function resolveReplyIds(collection, items) { const replyAccountIdMap = new Map(); if (!collection || !items?.length) return { replyIdMap, replyAccountIdMap }; - // Collect unique inReplyTo URLs const urls = [ ...new Set( - items - .map((item) => item.inReplyTo) - .filter(Boolean), + items.map((item) => item.inReplyTo).filter(Boolean), ), ]; if (urls.length === 0) return { replyIdMap, replyAccountIdMap }; - // Batch lookup parents by uid or url const parents = await collection .find({ $or: [{ uid: { $in: urls } }, { url: { $in: urls } }] }) - .project({ uid: 1, url: 1, published: 1, "author.url": 1 }) + .project({ uid: 1, url: 1, "author.url": 1 }) .toArray(); for (const parent of parents) { - const cursorId = encodeCursor(parent.published); + const parentId = parent._id.toString(); const authorUrl = parent.author?.url; const authorAccountId = authorUrl ? remoteActorId(authorUrl) : null; const setMaps = (key) => { - if (cursorId && cursorId !== "0") replyIdMap.set(key, cursorId); + replyIdMap.set(key, parentId); if (authorAccountId) replyAccountIdMap.set(key, authorAccountId); }; diff --git a/lib/mastodon/routes/statuses.js b/lib/mastodon/routes/statuses.js index d275b8e..fe7fd71 100644 --- a/lib/mastodon/routes/statuses.js +++ b/lib/mastodon/routes/statuses.js @@ -17,7 +17,6 @@ import express from "express"; import { ObjectId } from "mongodb"; import { serializeStatus } from "../entities/status.js"; -import { decodeCursor } from "../helpers/pagination.js"; import { resolveReplyIds } from "../helpers/resolve-reply-ids.js"; import { likePost, unlikePost, @@ -844,32 +843,13 @@ router.get("/api/v1/statuses/:id/card", async (req, res, next) => { // ─── Helpers ───────────────────────────────────────────────────────────────── /** - * Find a timeline item by cursor ID (published-based) or ObjectId (legacy). - * Status IDs are now encodeCursor(published) — milliseconds since epoch. - * Falls back to ObjectId lookup for backwards compatibility. + * Find a timeline item by ObjectId. * * @param {object} collection - ap_timeline collection - * @param {string} id - Status ID from client + * @param {string} id - MongoDB ObjectId string * @returns {Promise} Timeline document or null */ async function findTimelineItemById(collection, id) { - // Try cursor-based lookup first (published date from ms-since-epoch) - const publishedDate = decodeCursor(id); - if (publishedDate) { - // Try exact match first (with .000Z suffix from toISOString) - let item = await collection.findOne({ published: publishedDate }); - if (item) return item; - - // Try without milliseconds — stored dates often lack .000Z - // e.g., "2026-03-21T15:33:50Z" vs "2026-03-21T15:33:50.000Z" - const withoutMs = publishedDate.replace(/\.000Z$/, "Z"); - if (withoutMs !== publishedDate) { - item = await collection.findOne({ published: withoutMs }); - if (item) return item; - } - } - - // Fall back to ObjectId lookup (legacy IDs) try { return await collection.findOne({ _id: new ObjectId(id) }); } catch { diff --git a/package.json b/package.json index 47fd0ef..057750d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@rmdes/indiekit-endpoint-activitypub", - "version": "3.11.8", + "version": "3.12.0", "description": "ActivityPub federation endpoint for Indiekit via Fedify. Adds full fediverse support: actor, inbox, outbox, followers, following, syndication, and Mastodon migration.", "keywords": [ "indiekit",