Files
Ricardo 4a53f74bbc feat: add feed auto-discovery to blog add form
- Add feed-discovery.js utility that discovers RSS/Atom/JSON feeds from website URLs
- Add /api/discover endpoint for frontend feed discovery
- Update blog edit form with discovery UI (enter website URL, discover feeds)
- Auto-populate feedUrl, title, and siteUrl from discovery results
- Handle multiple feed options (let user choose)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-07 15:37:23 +01:00

165 lines
4.5 KiB
JavaScript

/**
* RSS/Atom feed discovery from website URLs
* @module utils/feed-discovery
*/
/**
* Discover RSS/Atom feeds from a website URL
* @param {string} websiteUrl - The website URL to check
* @param {number} timeout - Fetch timeout in ms
* @returns {Promise<object>} Discovery result with feeds array
*/
export async function discoverFeeds(websiteUrl, timeout = 10000) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeout);
try {
// Normalize URL
let url = websiteUrl.trim();
if (!url.startsWith("http://") && !url.startsWith("https://")) {
url = "https://" + url;
}
const response = await fetch(url, {
signal: controller.signal,
headers: {
"User-Agent": "Indiekit-Blogroll/1.0 (Feed Discovery)",
Accept: "text/html,application/xhtml+xml",
},
});
if (!response.ok) {
return { success: false, error: `HTTP ${response.status}`, feeds: [] };
}
const html = await response.text();
const feeds = [];
const baseUrl = new URL(url);
// Find <link rel="alternate"> feeds in HTML
const linkRegex =
/<link[^>]+rel=["']alternate["'][^>]*>/gi;
const typeRegex = /type=["']([^"']+)["']/i;
const hrefRegex = /href=["']([^"']+)["']/i;
const titleRegex = /title=["']([^"']+)["']/i;
const feedTypes = [
"application/rss+xml",
"application/atom+xml",
"application/feed+json",
"application/json",
"text/xml",
];
let match;
while ((match = linkRegex.exec(html)) !== null) {
const linkTag = match[0];
const typeMatch = typeRegex.exec(linkTag);
const hrefMatch = hrefRegex.exec(linkTag);
if (hrefMatch) {
const type = typeMatch ? typeMatch[1].toLowerCase() : "";
const href = hrefMatch[1];
const titleMatch = titleRegex.exec(linkTag);
const title = titleMatch ? titleMatch[1] : null;
// Check if it's a feed type
if (feedTypes.some((ft) => type.includes(ft.split("/")[1]))) {
// Resolve relative URLs
const feedUrl = new URL(href, baseUrl).href;
feeds.push({
url: feedUrl,
type: type.includes("atom")
? "atom"
: type.includes("json")
? "json"
: "rss",
title,
});
}
}
}
// Also check common feed paths if no feeds found in HTML
if (feeds.length === 0) {
const commonPaths = [
"/feed",
"/feed.xml",
"/rss",
"/rss.xml",
"/atom.xml",
"/feed/atom",
"/feed/rss",
"/index.xml",
"/blog/feed",
"/blog/rss",
"/.rss",
"/feed.json",
];
for (const path of commonPaths) {
try {
const feedUrl = new URL(path, baseUrl).href;
const feedResponse = await fetch(feedUrl, {
method: "HEAD",
signal: controller.signal,
headers: {
"User-Agent": "Indiekit-Blogroll/1.0 (Feed Discovery)",
},
});
if (feedResponse.ok) {
const contentType = feedResponse.headers.get("content-type") || "";
if (
contentType.includes("xml") ||
contentType.includes("rss") ||
contentType.includes("atom") ||
contentType.includes("json")
) {
feeds.push({
url: feedUrl,
type: contentType.includes("atom")
? "atom"
: contentType.includes("json")
? "json"
: "rss",
title: null,
});
break; // Found one, stop checking
}
}
} catch {
// Ignore individual path errors
}
}
}
// Try to extract page title for blog name
let pageTitle = null;
const titleTagMatch = /<title[^>]*>([^<]+)<\/title>/i.exec(html);
if (titleTagMatch) {
pageTitle = titleTagMatch[1].trim();
// Clean up common suffixes
pageTitle = pageTitle
.replace(/\s*[-|–—]\s*.*$/, "")
.replace(/\s*:\s*Home.*$/i, "")
.trim();
}
return {
success: true,
feeds,
pageTitle,
siteUrl: baseUrl.origin,
};
} catch (error) {
if (error.name === "AbortError") {
return { success: false, error: "Request timed out", feeds: [] };
}
return { success: false, error: error.message, feeds: [] };
} finally {
clearTimeout(timeoutId);
}
}