feat!: replace cursor-based status IDs with MongoDB ObjectId

BREAKING: Status IDs are now _id.toString() instead of
encodeCursor(published). This fixes the critical threading bug where
multiple posts sharing the same published second produced identical
IDs, causing findTimelineItemById to return the wrong document.

Changes:
- status.js: id = _id.toString() (unique, chronologically sortable)
- notification.js: same
- findTimelineItemById: ObjectId-only lookup (no cursor fallback)
- pagination.js: _id-based cursor pagination ($lt/$gt on ObjectId)
- resolve-reply-ids.js: returns _id.toString() for parent IDs
- Removed all encodeCursor/decodeCursor usage from API layer

ObjectIds have a 4-byte timestamp prefix so chronological sort via
_id: -1 works correctly. Pagination cursors are now ObjectId hex
strings in Link headers.
This commit is contained in:
Ricardo
2026-03-31 09:57:37 +02:00
parent f8cb42b10e
commit 6e63422c21
6 changed files with 61 additions and 113 deletions
+1 -2
View File
@@ -13,7 +13,6 @@
*/ */
import { serializeAccount } from "./account.js"; import { serializeAccount } from "./account.js";
import { serializeStatus } from "./status.js"; import { serializeStatus } from "./status.js";
import { encodeCursor } from "../helpers/pagination.js";
/** /**
* Map internal notification types to Mastodon API types. * Map internal notification types to Mastodon API types.
@@ -121,7 +120,7 @@ export function serializeNotification(notif, { baseUrl, statusMap, interactionSt
: notif.published || notif.createdAt || new Date().toISOString(); : notif.published || notif.createdAt || new Date().toISOString();
return { return {
id: encodeCursor(createdAt) || notif._id.toString(), id: notif._id.toString(),
type: mastodonType, type: mastodonType,
created_at: createdAt, created_at: createdAt,
account, account,
+2 -5
View File
@@ -15,7 +15,6 @@
*/ */
import { serializeAccount } from "./account.js"; import { serializeAccount } from "./account.js";
import { sanitizeHtml } from "./sanitize.js"; import { sanitizeHtml } from "./sanitize.js";
import { encodeCursor } from "../helpers/pagination.js";
// Module-level defaults set once at startup via setLocalIdentity() // Module-level defaults set once at startup via setLocalIdentity()
let _localPublicationUrl = ""; let _localPublicationUrl = "";
@@ -47,10 +46,8 @@ export function setLocalIdentity(publicationUrl, handle) {
export function serializeStatus(item, { baseUrl, favouritedIds, rebloggedIds, bookmarkedIds, pinnedIds, replyIdMap, replyAccountIdMap } = {}) { export function serializeStatus(item, { baseUrl, favouritedIds, rebloggedIds, bookmarkedIds, pinnedIds, replyIdMap, replyAccountIdMap } = {}) {
if (!item) return null; if (!item) return null;
// Use published-based cursor as the status ID so pagination cursors // Use MongoDB ObjectId as the status ID — unique and chronologically sortable.
// (max_id/min_id) sort chronologically, not by insertion order. const id = item._id.toString();
const cursorDate = item.published || item.createdAt || item.boostedAt;
const id = encodeCursor(cursorDate) || item._id.toString();
const uid = item.uid || ""; const uid = item.uid || "";
const url = item.url || uid; const url = item.url || uid;
+48 -69
View File
@@ -1,50 +1,17 @@
/** /**
* Mastodon-compatible cursor pagination helpers. * Mastodon-compatible pagination helpers using MongoDB ObjectId.
* *
* Uses `published` date as cursor (chronologically correct) instead of * ObjectIds are 12-byte values with a 4-byte timestamp prefix, making
* MongoDB ObjectId. ObjectId reflects insertion order, not publication * them chronologically sortable. Status IDs are _id.toString() — unique,
* order — backfilled or syndicated posts get new ObjectIds at import * sortable, and directly usable as pagination cursors.
* time, breaking chronological sort. The `published` field matches the
* native reader's sort and produces a correct timeline.
* *
* Cursor values are `published` ISO strings, but Mastodon clients pass * Emits RFC 8288 Link headers that Phanpy/Elk/Moshidon parse.
* them as opaque `max_id`/`min_id`/`since_id` strings. We encode the
* published date as a Mastodon-style snowflake-ish ID (milliseconds
* since epoch) so clients treat them as comparable integers.
*
* Emits RFC 8288 Link headers that masto.js / Phanpy parse.
*/ */
import { ObjectId } from "mongodb";
const DEFAULT_LIMIT = 20; const DEFAULT_LIMIT = 20;
const MAX_LIMIT = 40; const MAX_LIMIT = 40;
/**
* Encode a published date string as a numeric cursor ID.
* Mastodon clients expect IDs to be numeric strings that sort chronologically.
* We use milliseconds since epoch — monotonic and comparable.
*
* @param {string|Date} published - ISO date string or Date object
* @returns {string} Numeric string (ms since epoch)
*/
export function encodeCursor(published) {
if (!published) return "0";
const ms = new Date(published).getTime();
return Number.isFinite(ms) ? String(ms) : "0";
}
/**
* Decode a numeric cursor ID back to an ISO date string.
*
* @param {string} cursor - Numeric cursor from client
* @returns {string|null} ISO date string, or null if invalid
*/
export function decodeCursor(cursor) {
if (!cursor) return null;
const ms = Number.parseInt(cursor, 10);
if (!Number.isFinite(ms) || ms <= 0) return null;
return new Date(ms).toISOString();
}
/** /**
* Parse and clamp the limit parameter. * Parse and clamp the limit parameter.
* *
@@ -58,46 +25,60 @@ export function parseLimit(raw) {
} }
/** /**
* Build a MongoDB filter object for cursor-based pagination. * Try to parse a cursor string as an ObjectId.
* Returns null if invalid.
* *
* Mastodon cursor params (all optional, applied to `published`): * @param {string} cursor - ObjectId hex string from client
* max_id — return items older than this cursor (exclusive) * @returns {ObjectId|null}
* min_id — return items newer than this cursor (exclusive), closest first */
* since_id — return items newer than this cursor (exclusive), most recent first function parseCursor(cursor) {
if (!cursor || typeof cursor !== "string") return null;
try {
return new ObjectId(cursor);
} catch {
return null;
}
}
/**
* Build a MongoDB filter object for ObjectId-based pagination.
*
* Mastodon cursor params (all optional, applied to `_id`):
* max_id — return items older than this ID (exclusive)
* min_id — return items newer than this ID (exclusive), closest first
* since_id — return items newer than this ID (exclusive), most recent first
* *
* @param {object} baseFilter - Existing MongoDB filter to extend * @param {object} baseFilter - Existing MongoDB filter to extend
* @param {object} cursors * @param {object} cursors
* @param {string} [cursors.max_id] - Numeric cursor (ms since epoch) * @param {string} [cursors.max_id] - ObjectId hex string
* @param {string} [cursors.min_id] - Numeric cursor (ms since epoch) * @param {string} [cursors.min_id] - ObjectId hex string
* @param {string} [cursors.since_id] - Numeric cursor (ms since epoch) * @param {string} [cursors.since_id] - ObjectId hex string
* @returns {{ filter: object, sort: object, reverse: boolean }} * @returns {{ filter: object, sort: object, reverse: boolean }}
*/ */
export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } = {}) { export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } = {}) {
const filter = { ...baseFilter }; const filter = { ...baseFilter };
let sort = { published: -1 }; // newest first (default) let sort = { _id: -1 }; // newest first (default)
let reverse = false; let reverse = false;
if (max_id) { if (max_id) {
const date = decodeCursor(max_id); const oid = parseCursor(max_id);
if (date) { if (oid) {
filter.published = { ...filter.published, $lt: date }; filter._id = { ...filter._id, $lt: oid };
} }
} }
if (since_id) { if (since_id) {
const date = decodeCursor(since_id); const oid = parseCursor(since_id);
if (date) { if (oid) {
filter.published = { ...filter.published, $gt: date }; filter._id = { ...filter._id, $gt: oid };
} }
} }
if (min_id) { if (min_id) {
const date = decodeCursor(min_id); const oid = parseCursor(min_id);
if (date) { if (oid) {
filter.published = { ...filter.published, $gt: date }; filter._id = { ...filter._id, $gt: oid };
// min_id returns results closest to the cursor, so sort ascending sort = { _id: 1 };
// then reverse the results before returning
sort = { published: 1 };
reverse = true; reverse = true;
} }
} }
@@ -110,7 +91,7 @@ export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } =
* *
* @param {object} res - Express response object * @param {object} res - Express response object
* @param {object} req - Express request object (for building URLs) * @param {object} req - Express request object (for building URLs)
* @param {Array} items - Result items (must have `published`) * @param {Array} items - Result items (must have `_id`)
* @param {number} limit - The limit used for the query * @param {number} limit - The limit used for the query
*/ */
export function setPaginationHeaders(res, req, items, limit) { export function setPaginationHeaders(res, req, items, limit) {
@@ -119,10 +100,8 @@ export function setPaginationHeaders(res, req, items, limit) {
// Only emit Link if we got a full page (may have more) // Only emit Link if we got a full page (may have more)
if (items.length < limit) return; if (items.length < limit) return;
const firstCursor = encodeCursor(items[0].published); const firstId = items[0]._id.toString();
const lastCursor = encodeCursor(items[items.length - 1].published); const lastId = items[items.length - 1]._id.toString();
if (firstCursor === "0" || lastCursor === "0") return;
const baseUrl = `${req.protocol}://${req.get("host")}${req.path}`; const baseUrl = `${req.protocol}://${req.get("host")}${req.path}`;
@@ -139,14 +118,14 @@ export function setPaginationHeaders(res, req, items, limit) {
const links = []; const links = [];
// rel="next" — older items (max_id = last item's cursor) // rel="next" — older items (max_id = last item's ID)
const nextParams = new URLSearchParams(existingParams); const nextParams = new URLSearchParams(existingParams);
nextParams.set("max_id", lastCursor); nextParams.set("max_id", lastId);
links.push(`<${baseUrl}?${nextParams.toString()}>; rel="next"`); links.push(`<${baseUrl}?${nextParams.toString()}>; rel="next"`);
// rel="prev" — newer items (min_id = first item's cursor) // rel="prev" — newer items (min_id = first item's ID)
const prevParams = new URLSearchParams(existingParams); const prevParams = new URLSearchParams(existingParams);
prevParams.set("min_id", firstCursor); prevParams.set("min_id", firstId);
links.push(`<${baseUrl}?${prevParams.toString()}>; rel="prev"`); links.push(`<${baseUrl}?${prevParams.toString()}>; rel="prev"`);
res.set("Link", links.join(", ")); res.set("Link", links.join(", "));
+7 -14
View File
@@ -1,17 +1,14 @@
/** /**
* Batch-resolve inReplyTo URLs to Mastodon cursor IDs and account IDs. * Batch-resolve inReplyTo URLs to ObjectId strings and account IDs.
* *
* Looks up parent posts in ap_timeline by uid/url and returns two Maps: * Looks up parent posts in ap_timeline by uid/url and returns two Maps:
* - replyIdMap: inReplyTo URL → cursor ID (status ID) * - replyIdMap: inReplyTo URL → parent _id.toString()
* - replyAccountIdMap: inReplyTo URL → author account ID * - replyAccountIdMap: inReplyTo URL → parent author account ID
*
* Used by route handlers before calling serializeStatus().
* *
* @param {object} collection - ap_timeline MongoDB collection * @param {object} collection - ap_timeline MongoDB collection
* @param {Array<object>} items - Timeline items with optional inReplyTo * @param {Array<object>} items - Timeline items with optional inReplyTo
* @returns {Promise<{replyIdMap: Map<string, string>, replyAccountIdMap: Map<string, string>}>} * @returns {Promise<{replyIdMap: Map<string, string>, replyAccountIdMap: Map<string, string>}>}
*/ */
import { encodeCursor } from "./pagination.js";
import { remoteActorId } from "./id-mapping.js"; import { remoteActorId } from "./id-mapping.js";
export async function resolveReplyIds(collection, items) { export async function resolveReplyIds(collection, items) {
@@ -19,29 +16,25 @@ export async function resolveReplyIds(collection, items) {
const replyAccountIdMap = new Map(); const replyAccountIdMap = new Map();
if (!collection || !items?.length) return { replyIdMap, replyAccountIdMap }; if (!collection || !items?.length) return { replyIdMap, replyAccountIdMap };
// Collect unique inReplyTo URLs
const urls = [ const urls = [
...new Set( ...new Set(
items items.map((item) => item.inReplyTo).filter(Boolean),
.map((item) => item.inReplyTo)
.filter(Boolean),
), ),
]; ];
if (urls.length === 0) return { replyIdMap, replyAccountIdMap }; if (urls.length === 0) return { replyIdMap, replyAccountIdMap };
// Batch lookup parents by uid or url
const parents = await collection const parents = await collection
.find({ $or: [{ uid: { $in: urls } }, { url: { $in: urls } }] }) .find({ $or: [{ uid: { $in: urls } }, { url: { $in: urls } }] })
.project({ uid: 1, url: 1, published: 1, "author.url": 1 }) .project({ uid: 1, url: 1, "author.url": 1 })
.toArray(); .toArray();
for (const parent of parents) { for (const parent of parents) {
const cursorId = encodeCursor(parent.published); const parentId = parent._id.toString();
const authorUrl = parent.author?.url; const authorUrl = parent.author?.url;
const authorAccountId = authorUrl ? remoteActorId(authorUrl) : null; const authorAccountId = authorUrl ? remoteActorId(authorUrl) : null;
const setMaps = (key) => { const setMaps = (key) => {
if (cursorId && cursorId !== "0") replyIdMap.set(key, cursorId); replyIdMap.set(key, parentId);
if (authorAccountId) replyAccountIdMap.set(key, authorAccountId); if (authorAccountId) replyAccountIdMap.set(key, authorAccountId);
}; };
+2 -22
View File
@@ -17,7 +17,6 @@
import express from "express"; import express from "express";
import { ObjectId } from "mongodb"; import { ObjectId } from "mongodb";
import { serializeStatus } from "../entities/status.js"; import { serializeStatus } from "../entities/status.js";
import { decodeCursor } from "../helpers/pagination.js";
import { resolveReplyIds } from "../helpers/resolve-reply-ids.js"; import { resolveReplyIds } from "../helpers/resolve-reply-ids.js";
import { import {
likePost, unlikePost, likePost, unlikePost,
@@ -844,32 +843,13 @@ router.get("/api/v1/statuses/:id/card", async (req, res, next) => {
// ─── Helpers ───────────────────────────────────────────────────────────────── // ─── Helpers ─────────────────────────────────────────────────────────────────
/** /**
* Find a timeline item by cursor ID (published-based) or ObjectId (legacy). * Find a timeline item by ObjectId.
* Status IDs are now encodeCursor(published) — milliseconds since epoch.
* Falls back to ObjectId lookup for backwards compatibility.
* *
* @param {object} collection - ap_timeline collection * @param {object} collection - ap_timeline collection
* @param {string} id - Status ID from client * @param {string} id - MongoDB ObjectId string
* @returns {Promise<object|null>} Timeline document or null * @returns {Promise<object|null>} Timeline document or null
*/ */
async function findTimelineItemById(collection, id) { async function findTimelineItemById(collection, id) {
// Try cursor-based lookup first (published date from ms-since-epoch)
const publishedDate = decodeCursor(id);
if (publishedDate) {
// Try exact match first (with .000Z suffix from toISOString)
let item = await collection.findOne({ published: publishedDate });
if (item) return item;
// Try without milliseconds — stored dates often lack .000Z
// e.g., "2026-03-21T15:33:50Z" vs "2026-03-21T15:33:50.000Z"
const withoutMs = publishedDate.replace(/\.000Z$/, "Z");
if (withoutMs !== publishedDate) {
item = await collection.findOne({ published: withoutMs });
if (item) return item;
}
}
// Fall back to ObjectId lookup (legacy IDs)
try { try {
return await collection.findOne({ _id: new ObjectId(id) }); return await collection.findOne({ _id: new ObjectId(id) });
} catch { } catch {
+1 -1
View File
@@ -1,6 +1,6 @@
{ {
"name": "@rmdes/indiekit-endpoint-activitypub", "name": "@rmdes/indiekit-endpoint-activitypub",
"version": "3.11.8", "version": "3.12.0",
"description": "ActivityPub federation endpoint for Indiekit via Fedify. Adds full fediverse support: actor, inbox, outbox, followers, following, syndication, and Mastodon migration.", "description": "ActivityPub federation endpoint for Indiekit via Fedify. Adds full fediverse support: actor, inbox, outbox, followers, following, syndication, and Mastodon migration.",
"keywords": [ "keywords": [
"indiekit", "indiekit",