feat!: replace cursor-based status IDs with MongoDB ObjectId

BREAKING: Status IDs are now _id.toString() instead of encodeCursor(published). This fixes the critical threading bug where multiple posts sharing the same published second produced identical IDs, causing findTimelineItemById to return the wrong document. Changes: - status.js: id = _id.toString() (unique, chronologically sortable) - notification.js: same - findTimelineItemById: ObjectId-only lookup (no cursor fallback) - pagination.js: _id-based cursor pagination ($lt/$gt on ObjectId) - resolve-reply-ids.js: returns _id.toString() for parent IDs - Removed all encodeCursor/decodeCursor usage from API layer ObjectIds have a 4-byte timestamp prefix so chronological sort via _id: -1 works correctly. Pagination cursors are now ObjectId hex strings in Link headers.
2026-03-31 09:57:37 +02:00
parent f8cb42b10e
commit 6e63422c21
6 changed files with 61 additions and 113 deletions
@@ -13,7 +13,6 @@
 */
 import { serializeAccount } from "./account.js";
 import { serializeStatus } from "./status.js";
-import { encodeCursor } from "../helpers/pagination.js";

 /**
 * Map internal notification types to Mastodon API types.
@@ -121,7 +120,7 @@ export function serializeNotification(notif, { baseUrl, statusMap, interactionSt
    : notif.published || notif.createdAt || new Date().toISOString();

  return {
-    id: encodeCursor(createdAt) || notif._id.toString(),
+    id: notif._id.toString(),
    type: mastodonType,
    created_at: createdAt,
    account,
@@ -15,7 +15,6 @@
 */
 import { serializeAccount } from "./account.js";
 import { sanitizeHtml } from "./sanitize.js";
-import { encodeCursor } from "../helpers/pagination.js";

 // Module-level defaults set once at startup via setLocalIdentity()
 let _localPublicationUrl = "";
@@ -47,10 +46,8 @@ export function setLocalIdentity(publicationUrl, handle) {
 export function serializeStatus(item, { baseUrl, favouritedIds, rebloggedIds, bookmarkedIds, pinnedIds, replyIdMap, replyAccountIdMap } = {}) {
  if (!item) return null;

-  // Use published-based cursor as the status ID so pagination cursors
-  // (max_id/min_id) sort chronologically, not by insertion order.
-  const cursorDate = item.published || item.createdAt || item.boostedAt;
-  const id = encodeCursor(cursorDate) || item._id.toString();
+  // Use MongoDB ObjectId as the status ID — unique and chronologically sortable.
+  const id = item._id.toString();
  const uid = item.uid || "";
  const url = item.url || uid;

@@ -1,50 +1,17 @@
 /**
- * Mastodon-compatible cursor pagination helpers.
+ * Mastodon-compatible pagination helpers using MongoDB ObjectId.
 *
- * Uses `published` date as cursor (chronologically correct) instead of
- * MongoDB ObjectId. ObjectId reflects insertion order, not publication
- * order — backfilled or syndicated posts get new ObjectIds at import
- * time, breaking chronological sort. The `published` field matches the
- * native reader's sort and produces a correct timeline.
+ * ObjectIds are 12-byte values with a 4-byte timestamp prefix, making
+ * them chronologically sortable. Status IDs are _id.toString() — unique,
+ * sortable, and directly usable as pagination cursors.
 *
- * Cursor values are `published` ISO strings, but Mastodon clients pass
- * them as opaque `max_id`/`min_id`/`since_id` strings. We encode the
- * published date as a Mastodon-style snowflake-ish ID (milliseconds
- * since epoch) so clients treat them as comparable integers.
- *
- * Emits RFC 8288 Link headers that masto.js / Phanpy parse.
+ * Emits RFC 8288 Link headers that Phanpy/Elk/Moshidon parse.
 */
+import { ObjectId } from "mongodb";

 const DEFAULT_LIMIT = 20;
 const MAX_LIMIT = 40;

-/**
- * Encode a published date string as a numeric cursor ID.
- * Mastodon clients expect IDs to be numeric strings that sort chronologically.
- * We use milliseconds since epoch — monotonic and comparable.
- *
- * @param {string|Date} published - ISO date string or Date object
- * @returns {string} Numeric string (ms since epoch)
- */
-export function encodeCursor(published) {
-  if (!published) return "0";
-  const ms = new Date(published).getTime();
-  return Number.isFinite(ms) ? String(ms) : "0";
-}
-
-/**
- * Decode a numeric cursor ID back to an ISO date string.
- *
- * @param {string} cursor - Numeric cursor from client
- * @returns {string|null} ISO date string, or null if invalid
- */
-export function decodeCursor(cursor) {
-  if (!cursor) return null;
-  const ms = Number.parseInt(cursor, 10);
-  if (!Number.isFinite(ms) || ms <= 0) return null;
-  return new Date(ms).toISOString();
-}
-
 /**
 * Parse and clamp the limit parameter.
 *
@@ -58,46 +25,60 @@ export function parseLimit(raw) {
 }

 /**
- * Build a MongoDB filter object for cursor-based pagination.
+ * Try to parse a cursor string as an ObjectId.
+ * Returns null if invalid.
 *
- * Mastodon cursor params (all optional, applied to `published`):
- *   max_id   — return items older than this cursor (exclusive)
- *   min_id   — return items newer than this cursor (exclusive), closest first
- *   since_id — return items newer than this cursor (exclusive), most recent first
+ * @param {string} cursor - ObjectId hex string from client
+ * @returns {ObjectId|null}
+ */
+function parseCursor(cursor) {
+  if (!cursor || typeof cursor !== "string") return null;
+  try {
+    return new ObjectId(cursor);
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Build a MongoDB filter object for ObjectId-based pagination.
+ *
+ * Mastodon cursor params (all optional, applied to `_id`):
+ *   max_id   — return items older than this ID (exclusive)
+ *   min_id   — return items newer than this ID (exclusive), closest first
+ *   since_id — return items newer than this ID (exclusive), most recent first
 *
 * @param {object} baseFilter - Existing MongoDB filter to extend
 * @param {object} cursors
- * @param {string} [cursors.max_id] - Numeric cursor (ms since epoch)
- * @param {string} [cursors.min_id] - Numeric cursor (ms since epoch)
- * @param {string} [cursors.since_id] - Numeric cursor (ms since epoch)
+ * @param {string} [cursors.max_id] - ObjectId hex string
+ * @param {string} [cursors.min_id] - ObjectId hex string
+ * @param {string} [cursors.since_id] - ObjectId hex string
 * @returns {{ filter: object, sort: object, reverse: boolean }}
 */
 export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } = {}) {
  const filter = { ...baseFilter };
-  let sort = { published: -1 }; // newest first (default)
+  let sort = { _id: -1 }; // newest first (default)
  let reverse = false;

  if (max_id) {
-    const date = decodeCursor(max_id);
-    if (date) {
-      filter.published = { ...filter.published, $lt: date };
+    const oid = parseCursor(max_id);
+    if (oid) {
+      filter._id = { ...filter._id, $lt: oid };
    }
  }

  if (since_id) {
-    const date = decodeCursor(since_id);
-    if (date) {
-      filter.published = { ...filter.published, $gt: date };
+    const oid = parseCursor(since_id);
+    if (oid) {
+      filter._id = { ...filter._id, $gt: oid };
    }
  }

  if (min_id) {
-    const date = decodeCursor(min_id);
-    if (date) {
-      filter.published = { ...filter.published, $gt: date };
-      // min_id returns results closest to the cursor, so sort ascending
-      // then reverse the results before returning
-      sort = { published: 1 };
+    const oid = parseCursor(min_id);
+    if (oid) {
+      filter._id = { ...filter._id, $gt: oid };
+      sort = { _id: 1 };
      reverse = true;
    }
  }
@@ -110,7 +91,7 @@ export function buildPaginationQuery(baseFilter, { max_id, min_id, since_id } =
 *
 * @param {object} res - Express response object
 * @param {object} req - Express request object (for building URLs)
- * @param {Array} items - Result items (must have `published`)
+ * @param {Array} items - Result items (must have `_id`)
 * @param {number} limit - The limit used for the query
 */
 export function setPaginationHeaders(res, req, items, limit) {
@@ -119,10 +100,8 @@ export function setPaginationHeaders(res, req, items, limit) {
  // Only emit Link if we got a full page (may have more)
  if (items.length < limit) return;

-  const firstCursor = encodeCursor(items[0].published);
-  const lastCursor = encodeCursor(items[items.length - 1].published);
-
-  if (firstCursor === "0" || lastCursor === "0") return;
+  const firstId = items[0]._id.toString();
+  const lastId = items[items.length - 1]._id.toString();

  const baseUrl = `${req.protocol}://${req.get("host")}${req.path}`;

@@ -139,14 +118,14 @@ export function setPaginationHeaders(res, req, items, limit) {

  const links = [];

-  // rel="next" — older items (max_id = last item's cursor)
+  // rel="next" — older items (max_id = last item's ID)
  const nextParams = new URLSearchParams(existingParams);
-  nextParams.set("max_id", lastCursor);
+  nextParams.set("max_id", lastId);
  links.push(`<${baseUrl}?${nextParams.toString()}>; rel="next"`);

-  // rel="prev" — newer items (min_id = first item's cursor)
+  // rel="prev" — newer items (min_id = first item's ID)
  const prevParams = new URLSearchParams(existingParams);
-  prevParams.set("min_id", firstCursor);
+  prevParams.set("min_id", firstId);
  links.push(`<${baseUrl}?${prevParams.toString()}>; rel="prev"`);

  res.set("Link", links.join(", "));
@@ -1,17 +1,14 @@
 /**
- * Batch-resolve inReplyTo URLs to Mastodon cursor IDs and account IDs.
+ * Batch-resolve inReplyTo URLs to ObjectId strings and account IDs.
 *
 * Looks up parent posts in ap_timeline by uid/url and returns two Maps:
- * - replyIdMap: inReplyTo URL → cursor ID (status ID)
- * - replyAccountIdMap: inReplyTo URL → author account ID
- *
- * Used by route handlers before calling serializeStatus().
+ * - replyIdMap: inReplyTo URL → parent _id.toString()
+ * - replyAccountIdMap: inReplyTo URL → parent author account ID
 *
 * @param {object} collection - ap_timeline MongoDB collection
 * @param {Array<object>} items - Timeline items with optional inReplyTo
 * @returns {Promise<{replyIdMap: Map<string, string>, replyAccountIdMap: Map<string, string>}>}
 */
-import { encodeCursor } from "./pagination.js";
 import { remoteActorId } from "./id-mapping.js";

 export async function resolveReplyIds(collection, items) {
@@ -19,29 +16,25 @@ export async function resolveReplyIds(collection, items) {
  const replyAccountIdMap = new Map();
  if (!collection || !items?.length) return { replyIdMap, replyAccountIdMap };

-  // Collect unique inReplyTo URLs
  const urls = [
    ...new Set(
-      items
-        .map((item) => item.inReplyTo)
-        .filter(Boolean),
+      items.map((item) => item.inReplyTo).filter(Boolean),
    ),
  ];
  if (urls.length === 0) return { replyIdMap, replyAccountIdMap };

-  // Batch lookup parents by uid or url
  const parents = await collection
    .find({ $or: [{ uid: { $in: urls } }, { url: { $in: urls } }] })
-    .project({ uid: 1, url: 1, published: 1, "author.url": 1 })
+    .project({ uid: 1, url: 1, "author.url": 1 })
    .toArray();

  for (const parent of parents) {
-    const cursorId = encodeCursor(parent.published);
+    const parentId = parent._id.toString();
    const authorUrl = parent.author?.url;
    const authorAccountId = authorUrl ? remoteActorId(authorUrl) : null;

    const setMaps = (key) => {
-      if (cursorId && cursorId !== "0") replyIdMap.set(key, cursorId);
+      replyIdMap.set(key, parentId);
      if (authorAccountId) replyAccountIdMap.set(key, authorAccountId);
    };

@@ -17,7 +17,6 @@
 import express from "express";
 import { ObjectId } from "mongodb";
 import { serializeStatus } from "../entities/status.js";
-import { decodeCursor } from "../helpers/pagination.js";
 import { resolveReplyIds } from "../helpers/resolve-reply-ids.js";
 import {
  likePost, unlikePost,
@@ -844,32 +843,13 @@ router.get("/api/v1/statuses/:id/card", async (req, res, next) => {
 // ─── Helpers ─────────────────────────────────────────────────────────────────

 /**
- * Find a timeline item by cursor ID (published-based) or ObjectId (legacy).
- * Status IDs are now encodeCursor(published) — milliseconds since epoch.
- * Falls back to ObjectId lookup for backwards compatibility.
+ * Find a timeline item by ObjectId.
 *
 * @param {object} collection - ap_timeline collection
- * @param {string} id - Status ID from client
+ * @param {string} id - MongoDB ObjectId string
 * @returns {Promise<object|null>} Timeline document or null
 */
 async function findTimelineItemById(collection, id) {
-  // Try cursor-based lookup first (published date from ms-since-epoch)
-  const publishedDate = decodeCursor(id);
-  if (publishedDate) {
-    // Try exact match first (with .000Z suffix from toISOString)
-    let item = await collection.findOne({ published: publishedDate });
-    if (item) return item;
-
-    // Try without milliseconds — stored dates often lack .000Z
-    // e.g., "2026-03-21T15:33:50Z" vs "2026-03-21T15:33:50.000Z"
-    const withoutMs = publishedDate.replace(/\.000Z$/, "Z");
-    if (withoutMs !== publishedDate) {
-      item = await collection.findOne({ published: withoutMs });
-      if (item) return item;
-    }
-  }
-
-  // Fall back to ObjectId lookup (legacy IDs)
  try {
    return await collection.findOne({ _id: new ObjectId(id) });
  } catch {
@@ -1,6 +1,6 @@
 {
  "name": "@rmdes/indiekit-endpoint-activitypub",
-  "version": "3.11.8",
+  "version": "3.12.0",
  "description": "ActivityPub federation endpoint for Indiekit via Fedify. Adds full fediverse support: actor, inbox, outbox, followers, following, syndication, and Mastodon migration.",
  "keywords": [
    "indiekit",