From 7f9f02bc363e97c74b605253c884586114986e9c Mon Sep 17 00:00:00 2001 From: Sven Date: Fri, 20 Mar 2026 22:04:10 +0100 Subject: [PATCH] fix(webmention): fetch live pages from public URL, not INTERNAL_FETCH_URL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: INTERNAL_FETCH_URL (10.100.0.10) points to the nginx reverse proxy in front of indiekit's admin interface. Post URLs like /bookmarks/… require authentication there, so the fetch returned the login page ("Anmelden - Indiekit") which has no .h-entry. The blog HTML is served by an external host (GitHub Pages), reachable from the jail over the public URL. INTERNAL_FETCH_URL should only be used for indiekit API calls, not for fetching blog post pages. Bumps livefetch patch to v4: - Remove INTERNAL_FETCH_URL rewrite for live page fetches - Fetch from postUrl (public URL) directly by default - Add WEBMENTION_LIVEFETCH_URL env var as opt-in override for setups where a local static server can serve blog pages faster - Add v3→v4 in-place upgrade logic to the patch script Co-Authored-By: Claude Sonnet 4.6 --- scripts/patch-webmention-sender-livefetch.mjs | 208 ++++++++++-------- 1 file changed, 116 insertions(+), 92 deletions(-) diff --git a/scripts/patch-webmention-sender-livefetch.mjs b/scripts/patch-webmention-sender-livefetch.mjs index 5133a62b..ea04e204 100644 --- a/scripts/patch-webmention-sender-livefetch.mjs +++ b/scripts/patch-webmention-sender-livefetch.mjs @@ -10,16 +10,17 @@ * is unreachable (e.g. deploy still in progress). Skip it silently so * the next poll retries it. * - * 3. When fetching via an internal URL (nginx reverse proxy), send the public - * Host header so nginx can route to the correct virtual host. - * Without this, nginx sees the internal IP as Host and may serve the wrong - * vhost, returning a page with no .h-entry. + * 3. Fetch blog pages from the public URL directly. INTERNAL_FETCH_URL is for + * indiekit API calls only — blog pages are served by an external host + * (e.g. GitHub Pages) that the jail can reach over the public URL. + * Override with WEBMENTION_LIVEFETCH_URL if a local static server is + * available (e.g. http://10.x.x.x; will send Host: ). * * 4. Log the actual fetchUrl and response preview when h-entry check fails, - * so the cause (wrong vhost, indiekit page, etc.) is visible in the logs. + * so the cause is visible in the logs. * - * Handles the original upstream code, the older retry patch, the v1 livefetch - * patch, and upgrades v2 → v3 (adds Host header + better diagnostics). + * Handles the original upstream code, the older retry patch, v1/v2/v3 + * livefetch patches, and upgrades any prior version to v4. */ import { access, readFile, writeFile } from "node:fs/promises"; @@ -27,7 +28,8 @@ import { access, readFile, writeFile } from "node:fs/promises"; const filePath = "node_modules/@rmdes/indiekit-endpoint-webmention-sender/lib/controllers/webmention-sender.js"; -const patchMarker = "// [patched:livefetch:v3]"; +const patchMarker = "// [patched:livefetch:v4]"; +const v3PatchMarker = "// [patched:livefetch:v3]"; const v2PatchMarker = "// [patched:livefetch:v2]"; const oldPatchMarker = "// [patched:livefetch]"; @@ -82,15 +84,82 @@ const retryPatchedBlock = ` // If no content, try fetching the published continue; }`; -const newBlock = ` // [patched:livefetch:v3] Always fetch the live page so template-rendered links +const newBlock = ` // [patched:livefetch:v4] Always fetch the live page so template-rendered links // (u-in-reply-to, u-like-of, u-bookmark-of, u-repost-of, etc.) are included. // Stored content only has the post body, not these microformat links. - // Rewrite public URL to internal URL for jailed setups where the server - // can't reach its own public HTTPS URL. - // Send public Host header on internal fetches so nginx routes to the right vhost. + // + // Fetch from the public URL directly. INTERNAL_FETCH_URL is for indiekit API + // calls only — blog pages are served by an external host (e.g. GitHub Pages) + // that the jail can reach fine over the public URL. + // Override with WEBMENTION_LIVEFETCH_URL if a local static server is available. let contentToProcess = ""; try { - const _wmInternalBase = (() => { + const _wmLivefetchBase = (process.env.WEBMENTION_LIVEFETCH_URL || "").replace(/\\/+$/, ""); + const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, ""); + const fetchUrl = (_wmLivefetchBase && _wmPublicBase && postUrl.startsWith(_wmPublicBase)) + ? _wmLivefetchBase + postUrl.slice(_wmPublicBase.length) + : postUrl; + if (fetchUrl !== postUrl) { + console.log(\`[webmention] Fetching \${postUrl} via WEBMENTION_LIVEFETCH_URL: \${fetchUrl}\`); + } + const _ac = new AbortController(); + const _timeout = setTimeout(() => _ac.abort(), 15000); + const _fetchOpts = { signal: _ac.signal }; + if (fetchUrl !== postUrl && _wmPublicBase) { + _fetchOpts.headers = { host: new URL(_wmPublicBase).hostname }; + } + const pageResponse = await fetch(fetchUrl, _fetchOpts); + clearTimeout(_timeout); + if (pageResponse.ok) { + const _html = await pageResponse.text(); + // Validate the response is a real post page, not an error/502 page. + // extractLinks scopes to .h-entry, so if there's no .h-entry the page + // is not a valid post (e.g. nginx 502, login redirect, error template). + if (_html.includes("h-entry") /* [patched:hentry-syntax] */ || _html.includes("h-entry ")) { + contentToProcess = _html; + } else { + console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`); + } + } else { + console.log(\`[webmention] Live page returned \${pageResponse.status} for \${fetchUrl}\`); + } + } catch (error) { + console.log(\`[webmention] Could not fetch live page for \${postUrl}: \${error.message}\`); + } + + if (!contentToProcess) { + // Live page missing or invalid — skip without marking sent so the next + // poll retries. Don't fall back to stored content because it lacks the + // template-rendered microformat links we need. + console.log(\`[webmention] No valid page for \${postUrl}, will retry next poll\`); + continue; + }`; + +async function exists(p) { + try { + await access(p); + return true; + } catch { + return false; + } +} + +if (!(await exists(filePath))) { + console.log("[patch-webmention-sender-livefetch] File not found, skipping"); + process.exit(0); +} + +const source = await readFile(filePath, "utf8"); + +if (source.includes(patchMarker)) { + console.log("[patch-webmention-sender-livefetch] Already patched (v4)"); + process.exit(0); +} + +// Upgrade v3 → v4: replace the whole fetch+log block within the existing v3 marker. +// Match the unique INTERNAL_FETCH_URL reference to isolate the block to replace. +if (source.includes(v3PatchMarker)) { + const v3InternalBase = ` const _wmInternalBase = (() => { if (process.env.INTERNAL_FETCH_URL) return process.env.INTERNAL_FETCH_URL.replace(/\\/+$/, ""); const port = process.env.PORT || "3000"; return \`http://localhost:\${port}\`; @@ -111,103 +180,58 @@ const newBlock = ` // [patched:livefetch:v3] Always fetch the live page s if (fetchUrl !== postUrl && _wmPublicBase) { _fetchOpts.headers = { host: new URL(_wmPublicBase).hostname }; } - const pageResponse = await fetch(fetchUrl, _fetchOpts); - clearTimeout(_timeout); - if (pageResponse.ok) { - const _html = await pageResponse.text(); - // Validate the response is a real post page, not an error/502 page. - // extractLinks scopes to .h-entry, so if there's no .h-entry the page - // is not a valid post (e.g. nginx 502, login redirect, error template). - if (_html.includes("h-entry") /* [patched:hentry-syntax] */ || _html.includes("h-entry ")) { - contentToProcess = _html; - } else { - console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`); - } - } else { - console.log(\`[webmention] Live page returned \${pageResponse.status} for \${fetchUrl}\`); + const pageResponse = await fetch(fetchUrl, _fetchOpts);`; + + const v4FetchBlock = ` const _wmLivefetchBase = (process.env.WEBMENTION_LIVEFETCH_URL || "").replace(/\\/+$/, ""); + const _wmPublicBase = (process.env.PUBLICATION_URL || process.env.SITE_URL || "").replace(/\\/+$/, ""); + const fetchUrl = (_wmLivefetchBase && _wmPublicBase && postUrl.startsWith(_wmPublicBase)) + ? _wmLivefetchBase + postUrl.slice(_wmPublicBase.length) + : postUrl; + if (fetchUrl !== postUrl) { + console.log(\`[webmention] Fetching \${postUrl} via WEBMENTION_LIVEFETCH_URL: \${fetchUrl}\`); } - } catch (error) { - console.log(\`[webmention] Could not fetch live page for \${postUrl}: \${error.message}\`); - } - - if (!contentToProcess) { - // Live page missing or invalid — skip without marking sent so the next - // poll retries. Don't fall back to stored content because it lacks the - // template-rendered microformat links we need. - console.log(\`[webmention] No valid page for \${postUrl}, will retry next poll\`); - continue; - }`; - -// Lines changed in v2 → v3: fetch call + log message. -// Match just the fetch call so we can upgrade without re-matching the whole block. -const v2FetchLine = ` const pageResponse = await fetch(fetchUrl, { signal: _ac.signal });`; -const v3FetchLines = ` // When fetching via internal URL (nginx), send the public Host header so - // nginx can route to the correct virtual host. - // Without this, nginx sees the internal IP as Host and serves the wrong vhost. + const _ac = new AbortController(); + const _timeout = setTimeout(() => _ac.abort(), 15000); const _fetchOpts = { signal: _ac.signal }; if (fetchUrl !== postUrl && _wmPublicBase) { _fetchOpts.headers = { host: new URL(_wmPublicBase).hostname }; } const pageResponse = await fetch(fetchUrl, _fetchOpts);`; -const v2DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (error page?)\`);`; -const v3DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`; + const v3DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, host-sent: \${_fetchOpts.headers?.host ?? "(none)"}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`; + const v4DiagLine = ` console.log(\`[webmention] Live page for \${postUrl} has no .h-entry — skipping (fetched: \${fetchUrl}, preview: \${_html.slice(0, 200).replace(/[\\n\\r]+/g, " ")})\`);`; -const v2FetchUrlLog = ` const fetchUrl = (_wmPublicBase && postUrl.startsWith(_wmPublicBase)) - ? _wmInternalBase + postUrl.slice(_wmPublicBase.length) - : postUrl; - const _ac = new AbortController();`; -const v3FetchUrlLog = ` const fetchUrl = (_wmPublicBase && postUrl.startsWith(_wmPublicBase)) - ? _wmInternalBase + postUrl.slice(_wmPublicBase.length) - : postUrl; - if (fetchUrl !== postUrl) { - console.log(\`[webmention] Fetching \${postUrl} via internal URL: \${fetchUrl}\`); - } - const _ac = new AbortController();`; - -async function exists(p) { - try { - await access(p); - return true; - } catch { - return false; - } -} - -if (!(await exists(filePath))) { - console.log("[patch-webmention-sender-livefetch] File not found, skipping"); - process.exit(0); -} - -const source = await readFile(filePath, "utf8"); - -if (source.includes(patchMarker)) { - console.log("[patch-webmention-sender-livefetch] Already patched (v3)"); - process.exit(0); -} - -// Upgrade v2 → v3: apply targeted line replacements within the existing v2 block. -if (source.includes(v2PatchMarker)) { let upgraded = source - .replace(v2PatchMarker, patchMarker) - .replace(v2FetchUrlLog, v3FetchUrlLog) - .replace(v2FetchLine, v3FetchLines) - .replace(v2DiagLine, v3DiagLine); + .replace(v3PatchMarker, patchMarker) + .replace(v3InternalBase, v4FetchBlock) + .replace(v3DiagLine, v4DiagLine); + + // Also update the comment line that mentions INTERNAL_FETCH_URL + upgraded = upgraded.replace( + " // Rewrite public URL to internal URL for jailed setups where the server\n // can't reach its own public HTTPS URL.\n // Send public Host header on internal fetches so nginx routes to the right vhost.", + " //\n // Fetch from the public URL directly. INTERNAL_FETCH_URL is for indiekit API\n // calls only — blog pages are served by an external host (e.g. GitHub Pages)\n // that the jail can reach fine over the public URL.\n // Override with WEBMENTION_LIVEFETCH_URL if a local static server is available." + ); if (!upgraded.includes(patchMarker)) { - console.warn("[patch-webmention-sender-livefetch] v2→v3 upgrade validation failed, skipping"); + console.warn("[patch-webmention-sender-livefetch] v3→v4 upgrade validation failed, skipping"); process.exit(0); } await writeFile(filePath, upgraded, "utf8"); - console.log("[patch-webmention-sender-livefetch] Upgraded v2 → v3 (Host header + diagnostics)"); + console.log("[patch-webmention-sender-livefetch] Upgraded v3 → v4 (public URL fetch, no INTERNAL_FETCH_URL)"); process.exit(0); } -// If old v1 patch is applied, we need to replace it with v3. -// Extract the old patched block by matching from its marker to the "continue;" that ends it. +// Earlier versions (v1/v2 or unpatched): extract block and do full replacement. let oldPatchBlock = null; -if (source.includes(oldPatchMarker) && !source.includes(v2PatchMarker)) { +if (source.includes(v2PatchMarker)) { + const startIdx = source.lastIndexOf(" // [patched:livefetch:v2]"); + const endMarker = " continue;\n }"; + const endSearch = source.indexOf(endMarker, startIdx); + if (startIdx !== -1 && endSearch !== -1) { + oldPatchBlock = source.slice(startIdx, endSearch + endMarker.length); + } +} else if (source.includes(oldPatchMarker)) { const startIdx = source.lastIndexOf(" // [patched:livefetch]"); const endMarker = " continue;\n }"; const endSearch = source.indexOf(endMarker, startIdx); @@ -239,4 +263,4 @@ if (!patched.includes(patchMarker)) { } await writeFile(filePath, patched, "utf8"); -console.log("[patch-webmention-sender-livefetch] Patched successfully (v3)"); +console.log("[patch-webmention-sender-livefetch] Patched successfully (v4)");