Skip to content

Commit d3318d0

Browse files
bookernathchanceaclark
authored andcommitted
Proxy internal sitemaps for prettier sitemap URLs
1 parent d77f69d commit d3318d0

File tree

2 files changed

+106
-2
lines changed

2 files changed

+106
-2
lines changed

core/app/sitemap.xml/route.ts

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,77 @@ import { getChannelIdFromLocale } from '~/channels.config';
77
import { client } from '~/client';
88
import { defaultLocale } from '~/i18n/locales';
99

10-
export const GET = async () => {
10+
export const GET = async (request: Request) => {
11+
const url = new URL(request.url);
12+
const incomingHost = request.headers.get('host') ?? url.host;
13+
const incomingProto = request.headers.get('x-forwarded-proto') ?? url.protocol.replace(':', '');
14+
15+
const type = url.searchParams.get('type');
16+
const page = url.searchParams.get('page');
17+
18+
// If a specific sitemap within the index is requested, require both params
19+
if (type !== null || page !== null) {
20+
if (!type || !page) {
21+
return new Response('Both "type" and "page" query params are required', {
22+
status: 400,
23+
headers: { 'Content-Type': 'text/plain; charset=utf-8' },
24+
});
25+
}
26+
27+
const upstream = await client.fetchSitemapResponse(
28+
{ type, page },
29+
getChannelIdFromLocale(defaultLocale),
30+
);
31+
32+
// Pass-through upstream status/body but enforce XML content-type
33+
const body = await upstream.text();
34+
35+
return new Response(body, {
36+
status: upstream.status,
37+
statusText: upstream.statusText,
38+
headers: { 'Content-Type': 'application/xml' },
39+
});
40+
}
41+
42+
// Otherwise, return the sitemap index with normalized internal links
1143
const sitemapIndex = await client.fetchSitemapIndex(getChannelIdFromLocale(defaultLocale));
1244

13-
return new Response(sitemapIndex, {
45+
const rewritten = sitemapIndex.replace(
46+
/<loc>([^<]+)<\/loc>/g,
47+
(match: string, locUrlStr: string) => {
48+
try {
49+
// Decode XML entities for '&' so URL parsing works
50+
const decoded: string = locUrlStr.replace(/&amp;/g, '&');
51+
const original = new URL(decoded);
52+
53+
if (!original.pathname.endsWith('/xmlsitemap.php')) {
54+
return match;
55+
}
56+
57+
const normalized = new URL(`${incomingProto}://${incomingHost}/sitemap.xml`);
58+
59+
const t = original.searchParams.get('type');
60+
const p = original.searchParams.get('page');
61+
62+
// Only rewrite entries that include both type and page; otherwise leave untouched
63+
if (!t || !p) {
64+
return match;
65+
}
66+
67+
normalized.searchParams.set('type', t);
68+
normalized.searchParams.set('page', p);
69+
70+
// Re-encode '&' for XML output
71+
const normalizedXml: string = normalized.toString().replace(/&/g, '&amp;');
72+
73+
return `<loc>${normalizedXml}</loc>`;
74+
} catch {
75+
return match;
76+
}
77+
},
78+
);
79+
80+
return new Response(rewritten, {
1481
headers: {
1582
'Content-Type': 'application/xml',
1683
},

packages/client/src/client.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,43 @@ class Client<FetcherRequestInit extends RequestInit = RequestInit> {
210210
return response.text();
211211
}
212212

213+
async fetchSitemap(
214+
params: { type?: string | null; page?: string | number | null },
215+
channelId?: string,
216+
): Promise<string> {
217+
const response = await this.fetchSitemapResponse(params, channelId);
218+
219+
if (!response.ok) {
220+
throw new Error(`Unable to get Sitemap: ${response.statusText}`);
221+
}
222+
223+
return response.text();
224+
}
225+
226+
async fetchSitemapResponse(
227+
params: { type?: string | null; page?: string | number | null },
228+
channelId?: string,
229+
): Promise<Response> {
230+
const baseUrl = new URL(`${await this.getCanonicalUrl(channelId)}/xmlsitemap.php`);
231+
232+
// Only forward well-known params
233+
if (params.type) baseUrl.searchParams.set('type', String(params.type));
234+
if (params.page !== undefined && params.page !== null)
235+
baseUrl.searchParams.set('page', String(params.page));
236+
237+
const response = await fetch(baseUrl.toString(), {
238+
method: 'GET',
239+
headers: {
240+
Accept: 'application/xml',
241+
'Content-Type': 'application/xml',
242+
'User-Agent': this.backendUserAgent,
243+
...(this.trustedProxySecret && { 'X-BC-Trusted-Proxy-Secret': this.trustedProxySecret }),
244+
},
245+
});
246+
247+
return response;
248+
}
249+
213250
private async getCanonicalUrl(channelId?: string) {
214251
const resolvedChannelId = channelId ?? (await this.getChannelId(this.defaultChannelId));
215252

0 commit comments

Comments
 (0)