fix(webmention): build synthetic h-entry from stored properties, drop live fetch

Root cause: blog.giersig.eu DNS resolves internally to 10.100.0.10 (the
indiekit admin nginx), which returns the login page for post URLs of
certain types (notes, photos, replies). Live page fetching is inherently
unreliable in this split-DNS / jailed setup.

The fix: indiekit already stores all microformat target URLs in MongoDB
(in-reply-to, like-of, bookmark-of, repost-of) and content.html has
inline links. We can build a synthetic h-entry HTML snippet directly
from post.properties — no network fetch required for the source post.

Bumps livefetch patch to v5:
- Replace live page fetch with synthetic HTML built from post.properties
- Handles string values, mf2 objects ({properties.url[0]}), and plain
  value strings for each microformat property
- Simplifies patch script: single full-block replacement handles all
  prior versions (v1–v4) via marker detection

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Sven
2026-03-20 22:23:30 +01:00
parent 7f9f02bc36
commit 17b93b3a2a
+48 -149
View File
@@ -1,26 +1,20 @@
/** /**
* Patch @rmdes/indiekit-endpoint-webmention-sender controller to: * Patch @rmdes/indiekit-endpoint-webmention-sender controller to:
* *
* 1. Always fetch the live page instead of using stored post content. * 1. Build synthetic h-entry HTML from stored post properties instead of
* The stored content (post.properties.content.html) is just the post body — * fetching the live page. The stored properties already contain all
* it never contains template-rendered links like u-in-reply-to, u-like-of, * microformat target URLs (in-reply-to, like-of, bookmark-of, repost-of)
* u-bookmark-of, u-repost-of. Only the live HTML has those. * and content.html has inline links — no live page fetch needed.
* *
* 2. Don't permanently mark a post as webmention-sent when the live page * This fixes unreliable live fetches caused by internal DNS routing
* is unreachable (e.g. deploy still in progress). Skip it silently so * blog.giersig.eu to the indiekit admin nginx (10.100.0.10) which
* the next poll retries it. * returns a login page for post URLs.
* *
* 3. Fetch blog pages from the public URL directly. INTERNAL_FETCH_URL is for * 2. Don't permanently mark a post as webmention-sent when processing
* indiekit API calls only — blog pages are served by an external host * fails. Skip it silently so the next poll retries.
* (e.g. GitHub Pages) that the jail can reach over the public URL.
* Override with WEBMENTION_LIVEFETCH_URL if a local static server is
* available (e.g. http://10.x.x.x; will send Host: <public-hostname>).
* *
* 4. Log the actual fetchUrl and response preview when h-entry check fails, * Handles the original upstream code, the older retry patch, and all
* so the cause is visible in the logs. * prior livefetch patch versions (v1v4) via full block replacement.
*
* Handles the original upstream code, the older retry patch, v1/v2/v3
* livefetch patches, and upgrades any prior version to v4.
*/ */
import { access, readFile, writeFile } from "node:fs/promises"; import { access, readFile, writeFile } from "node:fs/promises";
@@ -28,10 +22,7 @@ import { access, readFile, writeFile } from "node:fs/promises";
const filePath = const filePath =
"node_modules/@rmdes/indiekit-endpoint-webmention-sender/lib/controllers/webmention-sender.js"; "node_modules/@rmdes/indiekit-endpoint-webmention-sender/lib/controllers/webmention-sender.js";
const patchMarker = "// [patched:livefetch:v4]"; const patchMarker = "// [patched:livefetch:v5]";
const v3PatchMarker = "// [patched:livefetch:v3]";
const v2PatchMarker = "// [patched:livefetch:v2]";
const oldPatchMarker = "// [patched:livefetch]";
// Original upstream code // Original upstream code
const originalBlock = ` // If no content, try fetching the published page const originalBlock = ` // If no content, try fetching the published page
@@ -53,8 +44,7 @@ const originalBlock = ` // If no content, try fetching the published page
continue; continue;
}`; }`;
// State left by older patch-webmention-sender-retry.mjs (which only fixed the // State left by older patch-webmention-sender-retry.mjs
// fetch-failure path but not the live-fetch-always path)
const retryPatchedBlock = ` // If no content, try fetching the published page const retryPatchedBlock = ` // If no content, try fetching the published page
let contentToProcess = postContent; let contentToProcess = postContent;
let fetchFailed = false; let fetchFailed = false;
@@ -74,8 +64,6 @@ const retryPatchedBlock = ` // If no content, try fetching the published
if (!contentToProcess) { if (!contentToProcess) {
if (fetchFailed) { if (fetchFailed) {
// Page not yet available — skip and retry on next poll rather than
// permanently marking this post as sent with zero webmentions.
console.log(\`[webmention] Page not yet available for \${postUrl}, will retry next poll\`); console.log(\`[webmention] Page not yet available for \${postUrl}, will retry next poll\`);
continue; continue;
} }
@@ -84,56 +72,30 @@ const retryPatchedBlock = ` // If no content, try fetching the published
continue; continue;
}`; }`;
const newBlock = ` // [patched:livefetch:v4] Always fetch the live page so template-rendered links const newBlock = ` // [patched:livefetch:v5] Build synthetic h-entry HTML from stored post properties.
// (u-in-reply-to, u-like-of, u-bookmark-of, u-repost-of, etc.) are included. // The stored properties already contain all microformat target URLs
// Stored content only has the post body, not these microformat links. // (in-reply-to, like-of, bookmark-of, repost-of) and content.html has inline
// // links — no live page fetch needed, and no exposure to internal DNS issues.
// Fetch from the public URL directly. INTERNAL_FETCH_URL is for indiekit API const _propLinks = {
// calls only — blog pages are served by an external host (e.g. GitHub Pages) "in-reply-to": "u-in-reply-to",
// that the jail can reach fine over the public URL. "like-of": "u-like-of",
// Override with WEBMENTION_LIVEFETCH_URL if a local static server is available. "bookmark-of": "u-bookmark-of",
let contentToProcess = ""; "repost-of": "u-repost-of",
try { "syndication": "u-syndication",
const _wmLivefetchBase = (process.env.WEBMENTION_LIVEFETCH_URL || "").replace(/\\/+$/, ""); };
const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, ""); const _anchors = [];
const fetchUrl = (_wmLivefetchBase && _wmPublicBase && postUrl.startsWith(_wmPublicBase)) for (const [_prop, _cls] of Object.entries(_propLinks)) {
? _wmLivefetchBase + postUrl.slice(_wmPublicBase.length) const _vals = post.properties[_prop];
: postUrl; if (!_vals) continue;
if (fetchUrl !== postUrl) { for (const _v of (Array.isArray(_vals) ? _vals : [_vals])) {
console.log(\`[webmention] Fetching \${postUrl} via WEBMENTION_LIVEFETCH_URL: \${fetchUrl}\`); const _href = (typeof _v === "string") ? _v : (_v?.properties?.url?.[0] ?? _v?.value ?? null);
if (_href && /^https?:\\/\\//.test(_href)) {
_anchors.push(\`<a class="\${_cls}" href="\${_href}"></a>\`);
} }
const _ac = new AbortController();
const _timeout = setTimeout(() => _ac.abort(), 15000);
const _fetchOpts = { signal: _ac.signal };
if (fetchUrl !== postUrl && _wmPublicBase) {
_fetchOpts.headers = { host: new URL(_wmPublicBase).hostname };
} }
const pageResponse = await fetch(fetchUrl, _fetchOpts);
clearTimeout(_timeout);
if (pageResponse.ok) {
const _html = await pageResponse.text();
// Validate the response is a real post page, not an error/502 page.
// extractLinks scopes to .h-entry, so if there's no .h-entry the page
// is not a valid post (e.g. nginx 502, login redirect, error template).
if (_html.includes("h-entry") /* [patched:hentry-syntax] */ || _html.includes("h-entry ")) {
contentToProcess = _html;
} else {
console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);
} }
} else { const _bodyHtml = post.properties.content?.html || post.properties.content?.value || "";
console.log(\`[webmention] Live page returned \${pageResponse.status} for \${fetchUrl}\`); const contentToProcess = \`<div class="h-entry">\${_anchors.join("")}\${_bodyHtml ? \`<div class="e-content">\${_bodyHtml}</div>\` : ""}</div>\`;`;
}
} catch (error) {
console.log(\`[webmention] Could not fetch live page for \${postUrl}: \${error.message}\`);
}
if (!contentToProcess) {
// Live page missing or invalid — skip without marking sent so the next
// poll retries. Don't fall back to stored content because it lacks the
// template-rendered microformat links we need.
console.log(\`[webmention] No valid page for \${postUrl}, will retry next poll\`);
continue;
}`;
async function exists(p) { async function exists(p) {
try { try {
@@ -152,91 +114,28 @@ if (!(await exists(filePath))) {
const source = await readFile(filePath, "utf8"); const source = await readFile(filePath, "utf8");
if (source.includes(patchMarker)) { if (source.includes(patchMarker)) {
console.log("[patch-webmention-sender-livefetch] Already patched (v4)"); console.log("[patch-webmention-sender-livefetch] Already patched (v5)");
process.exit(0); process.exit(0);
} }
// Upgrade v3 → v4: replace the whole fetch+log block within the existing v3 marker. // For v1v4: extract the old patched block by finding the marker and the
// Match the unique INTERNAL_FETCH_URL reference to isolate the block to replace. // closing "continue;\n }" that ends the if (!contentToProcess) block.
if (source.includes(v3PatchMarker)) { const priorMarkers = [
const v3InternalBase = ` const _wmInternalBase = (() => { "// [patched:livefetch:v4]",
if (process.env.INTERNAL_FETCH_URL) return process.env.INTERNAL_FETCH_URL.replace(/\\/+$/, ""); "// [patched:livefetch:v3]",
const port = process.env.PORT || "3000"; "// [patched:livefetch:v2]",
return \`http://localhost:\${port}\`; "// [patched:livefetch]",
})(); ];
const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, "");
const fetchUrl = (_wmPublicBase && postUrl.startsWith(_wmPublicBase))
? _wmInternalBase + postUrl.slice(_wmPublicBase.length)
: postUrl;
if (fetchUrl !== postUrl) {
console.log(\`[webmention] Fetching \${postUrl} via internal URL: \${fetchUrl}\`);
}
const _ac = new AbortController();
const _timeout = setTimeout(() => _ac.abort(), 15000);
// When fetching via internal URL (nginx), send the public Host header so
// nginx can route to the correct virtual host.
// Without this, nginx sees the internal IP as Host and serves the wrong vhost.
const _fetchOpts = { signal: _ac.signal };
if (fetchUrl !== postUrl && _wmPublicBase) {
_fetchOpts.headers = { host: new URL(_wmPublicBase).hostname };
}
const pageResponse = await fetch(fetchUrl, _fetchOpts);`;
const v4FetchBlock = ` const _wmLivefetchBase = (process.env.WEBMENTION_LIVEFETCH_URL || "").replace(/\\/+$/, "");
const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, "");
const fetchUrl = (_wmLivefetchBase && _wmPublicBase && postUrl.startsWith(_wmPublicBase))
? _wmLivefetchBase + postUrl.slice(_wmPublicBase.length)
: postUrl;
if (fetchUrl !== postUrl) {
console.log(\`[webmention] Fetching \${postUrl} via WEBMENTION_LIVEFETCH_URL: \${fetchUrl}\`);
}
const _ac = new AbortController();
const _timeout = setTimeout(() => _ac.abort(), 15000);
const _fetchOpts = { signal: _ac.signal };
if (fetchUrl !== postUrl && _wmPublicBase) {
_fetchOpts.headers = { host: new URL(_wmPublicBase).hostname };
}
const pageResponse = await fetch(fetchUrl, _fetchOpts);`;
const v3DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`;
const v4DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`;
let upgraded = source
.replace(v3PatchMarker, patchMarker)
.replace(v3InternalBase, v4FetchBlock)
.replace(v3DiagLine, v4DiagLine);
// Also update the comment line that mentions INTERNAL_FETCH_URL
upgraded = upgraded.replace(
" // Rewrite public URL to internal URL for jailed setups where the server\n // can't reach its own public HTTPS URL.\n // Send public Host header on internal fetches so nginx routes to the right vhost.",
" //\n // Fetch from the public URL directly. INTERNAL_FETCH_URL is for indiekit API\n // calls only — blog pages are served by an external host (e.g. GitHub Pages)\n // that the jail can reach fine over the public URL.\n // Override with WEBMENTION_LIVEFETCH_URL if a local static server is available."
);
if (!upgraded.includes(patchMarker)) {
console.warn("[patch-webmention-sender-livefetch] v3→v4 upgrade validation failed, skipping");
process.exit(0);
}
await writeFile(filePath, upgraded, "utf8");
console.log("[patch-webmention-sender-livefetch] Upgraded v3 → v4 (public URL fetch, no INTERNAL_FETCH_URL)");
process.exit(0);
}
// Earlier versions (v1/v2 or unpatched): extract block and do full replacement.
let oldPatchBlock = null; let oldPatchBlock = null;
if (source.includes(v2PatchMarker)) { for (const marker of priorMarkers) {
const startIdx = source.lastIndexOf(" // [patched:livefetch:v2]"); if (!source.includes(marker)) continue;
const endMarker = " continue;\n }"; const startIdx = source.lastIndexOf(` ${marker}`);
const endSearch = source.indexOf(endMarker, startIdx);
if (startIdx !== -1 && endSearch !== -1) {
oldPatchBlock = source.slice(startIdx, endSearch + endMarker.length);
}
} else if (source.includes(oldPatchMarker)) {
const startIdx = source.lastIndexOf(" // [patched:livefetch]");
const endMarker = " continue;\n }"; const endMarker = " continue;\n }";
const endSearch = source.indexOf(endMarker, startIdx); const endSearch = source.indexOf(endMarker, startIdx);
if (startIdx !== -1 && endSearch !== -1) { if (startIdx !== -1 && endSearch !== -1) {
oldPatchBlock = source.slice(startIdx, endSearch + endMarker.length); oldPatchBlock = source.slice(startIdx, endSearch + endMarker.length);
break;
} }
} }
@@ -263,4 +162,4 @@ if (!patched.includes(patchMarker)) {
} }
await writeFile(filePath, patched, "utf8"); await writeFile(filePath, patched, "utf8");
console.log("[patch-webmention-sender-livefetch] Patched successfully (v4)"); console.log("[patch-webmention-sender-livefetch] Patched successfully (v5)");