feat!: replace cursor-based status IDs with MongoDB ObjectId

BREAKING: Status IDs are now _id.toString() instead of
encodeCursor(published). This fixes the critical threading bug where
multiple posts sharing the same published second produced identical
IDs, causing findTimelineItemById to return the wrong document.

Changes:
- status.js: id = _id.toString() (unique, chronologically sortable)
- notification.js: same
- findTimelineItemById: ObjectId-only lookup (no cursor fallback)
- pagination.js: _id-based cursor pagination ($lt/$gt on ObjectId)
- resolve-reply-ids.js: returns _id.toString() for parent IDs
- Removed all encodeCursor/decodeCursor usage from API layer

ObjectIds have a 4-byte timestamp prefix so chronological sort via
_id: -1 works correctly. Pagination cursors are now ObjectId hex
strings in Link headers.
This commit is contained in:
Ricardo
2026-03-31 09:57:37 +02:00
parent f8cb42b10e
commit 6e63422c21
6 changed files with 61 additions and 113 deletions
+1 -2
View File
@@ -13,7 +13,6 @@
*/
import { serializeAccount } from "./account.js";
import { serializeStatus } from "./status.js";
import { encodeCursor } from "../helpers/pagination.js";
/**
* Map internal notification types to Mastodon API types.
@@ -121,7 +120,7 @@ export function serializeNotification(notif, { baseUrl, statusMap, interactionSt
: notif.published || notif.createdAt || new Date().toISOString();
return {
id: encodeCursor(createdAt) || notif._id.toString(),
id: notif._id.toString(),
type: mastodonType,
created_at: createdAt,
account,
+2 -5
View File
@@ -15,7 +15,6 @@
*/
import { serializeAccount } from "./account.js";
import { sanitizeHtml } from "./sanitize.js";
import { encodeCursor } from "../helpers/pagination.js";
// Module-level defaults set once at startup via setLocalIdentity()
let _localPublicationUrl = "";
@@ -47,10 +46,8 @@ export function setLocalIdentity(publicationUrl, handle) {
export function serializeStatus(item, { baseUrl, favouritedIds, rebloggedIds, bookmarkedIds, pinnedIds, replyIdMap, replyAccountIdMap } = {}) {
if (!item) return null;
// Use published-based cursor as the status ID so pagination cursors
// (max_id/min_id) sort chronologically, not by insertion order.
const cursorDate = item.published || item.createdAt || item.boostedAt;
const id = encodeCursor(cursorDate) || item._id.toString();
// Use MongoDB ObjectId as the status ID — unique and chronologically sortable.
const id = item._id.toString();
const uid = item.uid || "";
const url = item.url || uid;
+48 -69
View File
@@ -1,50 +1,17 @@
/**
* Mastodon-compatible cursor pagination helpers.
* Mastodon-compatible pagination helpers using MongoDB ObjectId.
*
* Uses `published` date as cursor (chronologically correct) instead of
* MongoDB ObjectId. ObjectId reflects insertion order, not publication
* order — backfilled or syndicated posts get new ObjectIds at import
* time, breaking chronological sort. The `published` field matches the
* native reader's sort and produces a correct timeline.
* ObjectIds are 12-byte values with a 4-byte timestamp prefix, making
* them chronologically sortable. Status IDs are _id.toString() — unique,
* sortable, and directly usable as pagination cursors.
*
* Cursor values are `published` ISO strings, but Mastodon clients pass
* them as opaque `max_id`/`min_id`/`since_id` strings. We encode the
* published date as a Mastodon-style snowflake-ish ID (milliseconds
* since epoch) so clients treat them as comparable integers.
*
* Emits RFC 8288 Link headers that masto.js / Phanpy parse.
* Emits RFC 8288 Link headers that Phanpy/Elk/Moshidon parse.
*/
import { ObjectId } from "mongodb";
const DEFAULT_LIMIT = 20;
const MAX_LIMIT = 40;
/**
* Encode a published date string as a numeric cursor ID.
* Mastodon clients expect IDs to be numeric strings that sort chronologically.
* We use milliseconds since epoch — monotonic and comparable.
*
* @param {string|Date} published - ISO date string or Date object
* @returns {string} Numeric string (ms since epoch)
*/
export function encodeCursor(published) {
if (!published) return "0";
const ms = new Date(published).getTime();
return Number.isFinite(ms) ? String(ms) : "0";
}
/**
* Decode a numeric cursor ID back to an ISO date string.
*
* @param {string} cursor - Numeric cursor from client
* @returns {string|null} ISO date string, or null if invalid
*/
export function decodeCursor(cursor) {
if (!cursor) return null;
const ms = Number.parseInt(cursor, 10);
if (!Number.isFinite(ms) || ms <= 0) return null;
return new Date(ms).toISOString();
}
/**
* Parse and clamp the limit parameter.
*
@@ -58,46 +25,60 @@ export function parseLimit(raw) {
}
/**
* Build a MongoDB filter object for cursor-based pagination.
* Try to parse a cursor string as an ObjectId.
* Returns null if invalid.
*
* Mastodon cursor params (all optional, applied to `published`):
* max_id — return items older than this cursor (exclusive)
* min_id — return items newer than this cursor (exclusive), closest first
* since_id — return items newer than this cursor (exclusive), most recent first
* @param {string} cursor - ObjectId hex string from client
* @returns {ObjectId|null}
*/
function parseCursor(cursor) {
if (!cursor || typeof cursor !== "string") return null;
try {
return new ObjectId(cursor);
} catch {
return null;
}
}
/**
* Build a MongoDB filter object for ObjectId-based pagination.
*
* Mastodon cursor params (all optional, applied to `_id`):
* max_id — return items older than this ID (exclusive)
* min_id — return items newer than this ID (exclusive), closest first
* since_id — return items newer than this ID (exclusive), most recent first
*
* @param {object} baseFilter - Existing MongoDB filter to extend
* @param {object} cursors
* @param {string} [cursors.max_id] - Numeric cursor (ms since epoch)
* @param {string} [cursors.min_id] - Numeric cursor (ms since epoch)
* @param {string} [cursors.since_id] - Numeric cursor (ms since epoch)
* @param {string} [cursors.max_id] - ObjectId hex string
* @param {string} [cursors.min_id] - ObjectId hex string
* @param {string} [cursors.since_id] - ObjectId hex string
* @returns {{ filter: object, sort: object, reverse: boolean }}
*/
export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } = {}) {
const filter = { ...baseFilter };
let sort = { published: -1 }; // newest first (default)
let sort = { _id: -1 }; // newest first (default)
let reverse = false;
if (max_id) {
const date = decodeCursor(max_id);
if (date) {
filter.published = { ...filter.published, $lt: date };
const oid = parseCursor(max_id);
if (oid) {
filter._id = { ...filter._id, $lt: oid };
}
}
if (since_id) {
const date = decodeCursor(since_id);
if (date) {
filter.published = { ...filter.published, $gt: date };
const oid = parseCursor(since_id);
if (oid) {
filter._id = { ...filter._id, $gt: oid };
}
}
if (min_id) {
const date = decodeCursor(min_id);
if (date) {
filter.published = { ...filter.published, $gt: date };
// min_id returns results closest to the cursor, so sort ascending
// then reverse the results before returning
sort = { published: 1 };
const oid = parseCursor(min_id);
if (oid) {
filter._id = { ...filter._id, $gt: oid };
sort = { _id: 1 };
reverse = true;
}
}
@@ -110,7 +91,7 @@ export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } =
*
* @param {object} res - Express response object
* @param {object} req - Express request object (for building URLs)
* @param {Array} items - Result items (must have `published`)
* @param {Array} items - Result items (must have `_id`)
* @param {number} limit - The limit used for the query
*/
export function setPaginationHeaders(res, req, items, limit) {
@@ -119,10 +100,8 @@ export function setPaginationHeaders(res, req, items, limit) {
// Only emit Link if we got a full page (may have more)
if (items.length < limit) return;
const firstCursor = encodeCursor(items[0].published);
const lastCursor = encodeCursor(items[items.length - 1].published);
if (firstCursor === "0" || lastCursor === "0") return;
const firstId = items[0]._id.toString();
const lastId = items[items.length - 1]._id.toString();
const baseUrl = `${req.protocol}://${req.get("host")}${req.path}`;
@@ -139,14 +118,14 @@ export function setPaginationHeaders(res, req, items, limit) {
const links = [];
// rel="next" — older items (max_id = last item's cursor)
// rel="next" — older items (max_id = last item's ID)
const nextParams = new URLSearchParams(existingParams);
nextParams.set("max_id", lastCursor);
nextParams.set("max_id", lastId);
links.push(`<${baseUrl}?${nextParams.toString()}>; rel="next"`);
// rel="prev" — newer items (min_id = first item's cursor)
// rel="prev" — newer items (min_id = first item's ID)
const prevParams = new URLSearchParams(existingParams);
prevParams.set("min_id", firstCursor);
prevParams.set("min_id", firstId);
links.push(`<${baseUrl}?${prevParams.toString()}>; rel="prev"`);
res.set("Link", links.join(", "));
+7 -14
View File
@@ -1,17 +1,14 @@
/**
* Batch-resolve inReplyTo URLs to Mastodon cursor IDs and account IDs.
* Batch-resolve inReplyTo URLs to ObjectId strings and account IDs.
*
* Looks up parent posts in ap_timeline by uid/url and returns two Maps:
* - replyIdMap: inReplyTo URL → cursor ID (status ID)
* - replyAccountIdMap: inReplyTo URL → author account ID
*
* Used by route handlers before calling serializeStatus().
* - replyIdMap: inReplyTo URL → parent _id.toString()
* - replyAccountIdMap: inReplyTo URL → parent author account ID
*
* @param {object} collection - ap_timeline MongoDB collection
* @param {Array<object>} items - Timeline items with optional inReplyTo
* @returns {Promise<{replyIdMap: Map<string, string>, replyAccountIdMap: Map<string, string>}>}
*/
import { encodeCursor } from "./pagination.js";
import { remoteActorId } from "./id-mapping.js";
export async function resolveReplyIds(collection, items) {
@@ -19,29 +16,25 @@ export async function resolveReplyIds(collection, items) {
const replyAccountIdMap = new Map();
if (!collection || !items?.length) return { replyIdMap, replyAccountIdMap };
// Collect unique inReplyTo URLs
const urls = [
...new Set(
items
.map((item) => item.inReplyTo)
.filter(Boolean),
items.map((item) => item.inReplyTo).filter(Boolean),
),
];
if (urls.length === 0) return { replyIdMap, replyAccountIdMap };
// Batch lookup parents by uid or url
const parents = await collection
.find({ $or: [{ uid: { $in: urls } }, { url: { $in: urls } }] })
.project({ uid: 1, url: 1, published: 1, "author.url": 1 })
.project({ uid: 1, url: 1, "author.url": 1 })
.toArray();
for (const parent of parents) {
const cursorId = encodeCursor(parent.published);
const parentId = parent._id.toString();
const authorUrl = parent.author?.url;
const authorAccountId = authorUrl ? remoteActorId(authorUrl) : null;
const setMaps = (key) => {
if (cursorId && cursorId !== "0") replyIdMap.set(key, cursorId);
replyIdMap.set(key, parentId);
if (authorAccountId) replyAccountIdMap.set(key, authorAccountId);
};
+2 -22
View File
@@ -17,7 +17,6 @@
import express from "express";
import { ObjectId } from "mongodb";
import { serializeStatus } from "../entities/status.js";
import { decodeCursor } from "../helpers/pagination.js";
import { resolveReplyIds } from "../helpers/resolve-reply-ids.js";
import {
likePost, unlikePost,
@@ -844,32 +843,13 @@ router.get("/api/v1/statuses/:id/card", async (req, res, next) => {
// ─── Helpers ─────────────────────────────────────────────────────────────────
/**
* Find a timeline item by cursor ID (published-based) or ObjectId (legacy).
* Status IDs are now encodeCursor(published) — milliseconds since epoch.
* Falls back to ObjectId lookup for backwards compatibility.
* Find a timeline item by ObjectId.
*
* @param {object} collection - ap_timeline collection
* @param {string} id - Status ID from client
* @param {string} id - MongoDB ObjectId string
* @returns {Promise<object|null>} Timeline document or null
*/
async function findTimelineItemById(collection, id) {
// Try cursor-based lookup first (published date from ms-since-epoch)
const publishedDate = decodeCursor(id);
if (publishedDate) {
// Try exact match first (with .000Z suffix from toISOString)
let item = await collection.findOne({ published: publishedDate });
if (item) return item;
// Try without milliseconds — stored dates often lack .000Z
// e.g., "2026-03-21T15:33:50Z" vs "2026-03-21T15:33:50.000Z"
const withoutMs = publishedDate.replace(/\.000Z$/, "Z");
if (withoutMs !== publishedDate) {
item = await collection.findOne({ published: withoutMs });
if (item) return item;
}
}
// Fall back to ObjectId lookup (legacy IDs)
try {
return await collection.findOne({ _id: new ObjectId(id) });
} catch {
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "@rmdes/indiekit-endpoint-activitypub",
"version": "3.11.8",
"version": "3.12.0",
"description": "ActivityPub federation endpoint for Indiekit via Fedify. Adds full fediverse support: actor, inbox, outbox, followers, following, syndication, and Mastodon migration.",
"keywords": [
"indiekit",