|
| 1 | +export interface FetchSitemapParams { |
| 2 | + type?: string | null; |
| 3 | + page?: string | number | null; |
| 4 | +} |
| 5 | + |
| 6 | +export interface HandleSitemapRouteOptions { |
| 7 | + getSitemapIndex: () => Promise<string>; |
| 8 | + fetchSitemapResponse: (params: FetchSitemapParams) => Promise<Response>; |
| 9 | +} |
| 10 | + |
| 11 | +/** |
| 12 | + * Handles GET /sitemap.xml with two behaviors: |
| 13 | + * - Without query params: returns the sitemap index with <loc> links rewritten to the incoming host and |
| 14 | + * normalized to /sitemap.xml, only when both type and page exist on the original link. |
| 15 | + * - With query params: requires both type and page, proxies upstream response as-is (status/body), |
| 16 | + * ignoring any other params. |
| 17 | + */ |
| 18 | +/** |
| 19 | + * Handles the Catalyst sitemap route. |
| 20 | + * |
| 21 | + * @param {Request} request Incoming Next.js Request for `/sitemap.xml`. |
| 22 | + * @param {any} options Helpers to fetch the upstream sitemap resources. |
| 23 | + * @returns {Promise<Response>} Response suitable for returning from the route handler. |
| 24 | + */ |
| 25 | +export async function handleSitemapRoute( |
| 26 | + request: Request, |
| 27 | + options: HandleSitemapRouteOptions, |
| 28 | +): Promise<Response> { |
| 29 | + const { getSitemapIndex, fetchSitemapResponse } = options; |
| 30 | + const requestUrl = new URL(request.url); |
| 31 | + const incomingHost: string = request.headers.get('host') ?? requestUrl.host; |
| 32 | + const incomingProtoHeader = request.headers.get('x-forwarded-proto'); |
| 33 | + const incomingProtocol: string = incomingProtoHeader ?? requestUrl.protocol.replace(':', ''); |
| 34 | + |
| 35 | + const typeParam = requestUrl.searchParams.get('type'); |
| 36 | + const pageParam = requestUrl.searchParams.get('page'); |
| 37 | + |
| 38 | + // If any sitemap page is requested, require both well-known params |
| 39 | + if (typeParam !== null || pageParam !== null) { |
| 40 | + if (!typeParam || !pageParam) { |
| 41 | + return new Response('Both "type" and "page" query params are required', { |
| 42 | + status: 400, |
| 43 | + headers: { 'Content-Type': 'text/plain; charset=utf-8' }, |
| 44 | + }); |
| 45 | + } |
| 46 | + |
| 47 | + const upstream = await fetchSitemapResponse({ type: typeParam, page: pageParam }); |
| 48 | + const body = await upstream.text(); |
| 49 | + |
| 50 | + return new Response(body, { |
| 51 | + status: upstream.status, |
| 52 | + statusText: upstream.statusText, |
| 53 | + headers: { 'Content-Type': 'application/xml' }, |
| 54 | + }); |
| 55 | + } |
| 56 | + |
| 57 | + const sitemapIndexXml = await getSitemapIndex(); |
| 58 | + |
| 59 | + // Rewrite <loc> links to use incoming host and normalized path, only when both type & page exist |
| 60 | + const rewrittenXml = sitemapIndexXml.replace( |
| 61 | + /<loc>([^<]+)<\/loc>/g, |
| 62 | + (match: string, locUrlStr: string) => { |
| 63 | + try { |
| 64 | + const decoded: string = locUrlStr.replace(/&/g, '&'); |
| 65 | + const original = new URL(decoded); |
| 66 | + |
| 67 | + if (!original.pathname.endsWith('/xmlsitemap.php')) { |
| 68 | + return match; |
| 69 | + } |
| 70 | + |
| 71 | + const normalized = new URL(`${incomingProtocol}://${incomingHost}/sitemap.xml`); |
| 72 | + |
| 73 | + const t = original.searchParams.get('type'); |
| 74 | + const p = original.searchParams.get('page'); |
| 75 | + |
| 76 | + if (!t || !p) { |
| 77 | + return match; |
| 78 | + } |
| 79 | + |
| 80 | + normalized.searchParams.set('type', t); |
| 81 | + normalized.searchParams.set('page', p); |
| 82 | + |
| 83 | + const normalizedXml: string = normalized.toString().replace(/&/g, '&'); |
| 84 | + |
| 85 | + return `<loc>${normalizedXml}</loc>`; |
| 86 | + } catch { |
| 87 | + return match; |
| 88 | + } |
| 89 | + }, |
| 90 | + ); |
| 91 | + |
| 92 | + return new Response(rewrittenXml, { |
| 93 | + headers: { |
| 94 | + 'Content-Type': 'application/xml', |
| 95 | + }, |
| 96 | + }); |
| 97 | +} |
| 98 | + |
| 99 | + |
0 commit comments