fix(webmention): fetch live pages from public URL, not INTERNAL_FETCH_URL

Root cause: INTERNAL_FETCH_URL (10.100.0.10) points to the nginx reverse
proxy in front of indiekit's admin interface. Post URLs like /bookmarks/…
require authentication there, so the fetch returned the login page
("Anmelden - Indiekit") which has no .h-entry.

The blog HTML is served by an external host (GitHub Pages), reachable
from the jail over the public URL. INTERNAL_FETCH_URL should only be
used for indiekit API calls, not for fetching blog post pages.

Bumps livefetch patch to v4:
- Remove INTERNAL_FETCH_URL rewrite for live page fetches
- Fetch from postUrl (public URL) directly by default
- Add WEBMENTION_LIVEFETCH_URL env var as opt-in override for setups
  where a local static server can serve blog pages faster
- Add v3→v4 in-place upgrade logic to the patch script

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Sven
2026-03-20 22:04:10 +01:00
parent 11d600058d
commit 7f9f02bc36
+116 -92
View File
@@ -10,16 +10,17 @@
* is unreachable (e.g. deploy still in progress). Skip it silently so
* the next poll retries it.
*
* 3. When fetching via an internal URL (nginx reverse proxy), send the public
* Host header so nginx can route to the correct virtual host.
* Without this, nginx sees the internal IP as Host and may serve the wrong
* vhost, returning a page with no .h-entry.
* 3. Fetch blog pages from the public URL directly. INTERNAL_FETCH_URL is for
* indiekit API calls only — blog pages are served by an external host
* (e.g. GitHub Pages) that the jail can reach over the public URL.
* Override with WEBMENTION_LIVEFETCH_URL if a local static server is
* available (e.g. http://10.x.x.x; will send Host: <public-hostname>).
*
* 4. Log the actual fetchUrl and response preview when h-entry check fails,
* so the cause (wrong vhost, indiekit page, etc.) is visible in the logs.
* so the cause is visible in the logs.
*
* Handles the original upstream code, the older retry patch, the v1 livefetch
* patch, and upgrades v2 → v3 (adds Host header + better diagnostics).
* Handles the original upstream code, the older retry patch, v1/v2/v3
* livefetch patches, and upgrades any prior version to v4.
*/
import { access, readFile, writeFile } from "node:fs/promises";
@@ -27,7 +28,8 @@ import { access, readFile, writeFile } from "node:fs/promises";
const filePath =
"node_modules/@rmdes/indiekit-endpoint-webmention-sender/lib/controllers/webmention-sender.js";
const patchMarker = "// [patched:livefetch:v3]";
const patchMarker = "// [patched:livefetch:v4]";
const v3PatchMarker = "// [patched:livefetch:v3]";
const v2PatchMarker = "// [patched:livefetch:v2]";
const oldPatchMarker = "// [patched:livefetch]";
@@ -82,15 +84,82 @@ const retryPatchedBlock = ` // If no content, try fetching the published
continue;
}`;
const newBlock = ` // [patched:livefetch:v3] Always fetch the live page so template-rendered links
const newBlock = ` // [patched:livefetch:v4] Always fetch the live page so template-rendered links
// (u-in-reply-to, u-like-of, u-bookmark-of, u-repost-of, etc.) are included.
// Stored content only has the post body, not these microformat links.
// Rewrite public URL to internal URL for jailed setups where the server
// can't reach its own public HTTPS URL.
// Send public Host header on internal fetches so nginx routes to the right vhost.
//
// Fetch from the public URL directly. INTERNAL_FETCH_URL is for indiekit API
// calls only — blog pages are served by an external host (e.g. GitHub Pages)
// that the jail can reach fine over the public URL.
// Override with WEBMENTION_LIVEFETCH_URL if a local static server is available.
let contentToProcess = "";
try {
const _wmInternalBase = (() => {
const _wmLivefetchBase = (process.env.WEBMENTION_LIVEFETCH_URL || "").replace(/\\/+$/, "");
const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, "");
const fetchUrl = (_wmLivefetchBase && _wmPublicBase && postUrl.startsWith(_wmPublicBase))
? _wmLivefetchBase + postUrl.slice(_wmPublicBase.length)
: postUrl;
if (fetchUrl !== postUrl) {
console.log(\`[webmention] Fetching \${postUrl} via WEBMENTION_LIVEFETCH_URL: \${fetchUrl}\`);
}
const _ac = new AbortController();
const _timeout = setTimeout(() => _ac.abort(), 15000);
const _fetchOpts = { signal: _ac.signal };
if (fetchUrl !== postUrl && _wmPublicBase) {
_fetchOpts.headers = { host: new URL(_wmPublicBase).hostname };
}
const pageResponse = await fetch(fetchUrl, _fetchOpts);
clearTimeout(_timeout);
if (pageResponse.ok) {
const _html = await pageResponse.text();
// Validate the response is a real post page, not an error/502 page.
// extractLinks scopes to .h-entry, so if there's no .h-entry the page
// is not a valid post (e.g. nginx 502, login redirect, error template).
if (_html.includes("h-entry") /* [patched:hentry-syntax] */ || _html.includes("h-entry ")) {
contentToProcess = _html;
} else {
console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);
}
} else {
console.log(\`[webmention] Live page returned \${pageResponse.status} for \${fetchUrl}\`);
}
} catch (error) {
console.log(\`[webmention] Could not fetch live page for \${postUrl}: \${error.message}\`);
}
if (!contentToProcess) {
// Live page missing or invalid — skip without marking sent so the next
// poll retries. Don't fall back to stored content because it lacks the
// template-rendered microformat links we need.
console.log(\`[webmention] No valid page for \${postUrl}, will retry next poll\`);
continue;
}`;
async function exists(p) {
try {
await access(p);
return true;
} catch {
return false;
}
}
if (!(await exists(filePath))) {
console.log("[patch-webmention-sender-livefetch] File not found, skipping");
process.exit(0);
}
const source = await readFile(filePath, "utf8");
if (source.includes(patchMarker)) {
console.log("[patch-webmention-sender-livefetch] Already patched (v4)");
process.exit(0);
}
// Upgrade v3 → v4: replace the whole fetch+log block within the existing v3 marker.
// Match the unique INTERNAL_FETCH_URL reference to isolate the block to replace.
if (source.includes(v3PatchMarker)) {
const v3InternalBase = ` const _wmInternalBase = (() => {
if (process.env.INTERNAL_FETCH_URL) return process.env.INTERNAL_FETCH_URL.replace(/\\/+$/, "");
const port = process.env.PORT || "3000";
return \`http://localhost:\${port}\`;
@@ -111,103 +180,58 @@ const newBlock = ` // [patched:livefetch:v3] Always fetch the live page s
if (fetchUrl !== postUrl && _wmPublicBase) {
_fetchOpts.headers = { host: new URL(_wmPublicBase).hostname };
}
const pageResponse = await fetch(fetchUrl, _fetchOpts);
clearTimeout(_timeout);
if (pageResponse.ok) {
const _html = await pageResponse.text();
// Validate the response is a real post page, not an error/502 page.
// extractLinks scopes to .h-entry, so if there's no .h-entry the page
// is not a valid post (e.g. nginx 502, login redirect, error template).
if (_html.includes("h-entry") /* [patched:hentry-syntax] */ || _html.includes("h-entry ")) {
contentToProcess = _html;
} else {
console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);
}
} else {
console.log(\`[webmention] Live page returned \${pageResponse.status} for \${fetchUrl}\`);
const pageResponse = await fetch(fetchUrl, _fetchOpts);`;
const v4FetchBlock = ` const _wmLivefetchBase = (process.env.WEBMENTION_LIVEFETCH_URL || "").replace(/\\/+$/, "");
const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, "");
const fetchUrl = (_wmLivefetchBase && _wmPublicBase && postUrl.startsWith(_wmPublicBase))
? _wmLivefetchBase + postUrl.slice(_wmPublicBase.length)
: postUrl;
if (fetchUrl !== postUrl) {
console.log(\`[webmention] Fetching \${postUrl} via WEBMENTION_LIVEFETCH_URL: \${fetchUrl}\`);
}
} catch (error) {
console.log(\`[webmention] Could not fetch live page for \${postUrl}: \${error.message}\`);
}
if (!contentToProcess) {
// Live page missing or invalid — skip without marking sent so the next
// poll retries. Don't fall back to stored content because it lacks the
// template-rendered microformat links we need.
console.log(\`[webmention] No valid page for \${postUrl}, will retry next poll\`);
continue;
}`;
// Lines changed in v2 → v3: fetch call + log message.
// Match just the fetch call so we can upgrade without re-matching the whole block.
const v2FetchLine = ` const pageResponse = await fetch(fetchUrl, { signal: _ac.signal });`;
const v3FetchLines = ` // When fetching via internal URL (nginx), send the public Host header so
// nginx can route to the correct virtual host.
// Without this, nginx sees the internal IP as Host and serves the wrong vhost.
const _ac = new AbortController();
const _timeout = setTimeout(() => _ac.abort(), 15000);
const _fetchOpts = { signal: _ac.signal };
if (fetchUrl !== postUrl && _wmPublicBase) {
_fetchOpts.headers = { host: new URL(_wmPublicBase).hostname };
}
const pageResponse = await fetch(fetchUrl, _fetchOpts);`;
const v2DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (error page?)\`);`;
const v3DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`;
const v3DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`;
const v4DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`;
const v2FetchUrlLog = ` const fetchUrl = (_wmPublicBase && postUrl.startsWith(_wmPublicBase))
? _wmInternalBase + postUrl.slice(_wmPublicBase.length)
: postUrl;
const _ac = new AbortController();`;
const v3FetchUrlLog = ` const fetchUrl = (_wmPublicBase && postUrl.startsWith(_wmPublicBase))
? _wmInternalBase + postUrl.slice(_wmPublicBase.length)
: postUrl;
if (fetchUrl !== postUrl) {
console.log(\`[webmention] Fetching \${postUrl} via internal URL: \${fetchUrl}\`);
}
const _ac = new AbortController();`;
async function exists(p) {
try {
await access(p);
return true;
} catch {
return false;
}
}
if (!(await exists(filePath))) {
console.log("[patch-webmention-sender-livefetch] File not found, skipping");
process.exit(0);
}
const source = await readFile(filePath, "utf8");
if (source.includes(patchMarker)) {
console.log("[patch-webmention-sender-livefetch] Already patched (v3)");
process.exit(0);
}
// Upgrade v2 → v3: apply targeted line replacements within the existing v2 block.
if (source.includes(v2PatchMarker)) {
let upgraded = source
.replace(v2PatchMarker, patchMarker)
.replace(v2FetchUrlLog, v3FetchUrlLog)
.replace(v2FetchLine, v3FetchLines)
.replace(v2DiagLine, v3DiagLine);
.replace(v3PatchMarker, patchMarker)
.replace(v3InternalBase, v4FetchBlock)
.replace(v3DiagLine, v4DiagLine);
// Also update the comment line that mentions INTERNAL_FETCH_URL
upgraded = upgraded.replace(
" // Rewrite public URL to internal URL for jailed setups where the server\n // can't reach its own public HTTPS URL.\n // Send public Host header on internal fetches so nginx routes to the right vhost.",
" //\n // Fetch from the public URL directly. INTERNAL_FETCH_URL is for indiekit API\n // calls only — blog pages are served by an external host (e.g. GitHub Pages)\n // that the jail can reach fine over the public URL.\n // Override with WEBMENTION_LIVEFETCH_URL if a local static server is available."
);
if (!upgraded.includes(patchMarker)) {
console.warn("[patch-webmention-sender-livefetch] v2→v3 upgrade validation failed, skipping");
console.warn("[patch-webmention-sender-livefetch] v3→v4 upgrade validation failed, skipping");
process.exit(0);
}
await writeFile(filePath, upgraded, "utf8");
console.log("[patch-webmention-sender-livefetch] Upgraded v2 → v3 (Host header + diagnostics)");
console.log("[patch-webmention-sender-livefetch] Upgraded v3 → v4 (public URL fetch, no INTERNAL_FETCH_URL)");
process.exit(0);
}
// If old v1 patch is applied, we need to replace it with v3.
// Extract the old patched block by matching from its marker to the "continue;" that ends it.
// Earlier versions (v1/v2 or unpatched): extract block and do full replacement.
let oldPatchBlock = null;
if (source.includes(oldPatchMarker) && !source.includes(v2PatchMarker)) {
if (source.includes(v2PatchMarker)) {
const startIdx = source.lastIndexOf(" // [patched:livefetch:v2]");
const endMarker = " continue;\n }";
const endSearch = source.indexOf(endMarker, startIdx);
if (startIdx !== -1 && endSearch !== -1) {
oldPatchBlock = source.slice(startIdx, endSearch + endMarker.length);
}
} else if (source.includes(oldPatchMarker)) {
const startIdx = source.lastIndexOf(" // [patched:livefetch]");
const endMarker = " continue;\n }";
const endSearch = source.indexOf(endMarker, startIdx);
@@ -239,4 +263,4 @@ if (!patched.includes(patchMarker)) {
}
await writeFile(filePath, patched, "utf8");
console.log("[patch-webmention-sender-livefetch] Patched successfully (v3)");
console.log("[patch-webmention-sender-livefetch] Patched successfully (v4)");