Skip to content

Commit 7ad6f7b

Browse files
authored
fix(algolia): skip record exceeding size limit (#9310)
We excluded the changelog from the record since it exceeds the maximum size allowed by algolia. This change make it generic by checking the size of each record. Fixes INC-296. ``` 13:18:29 [@astrojs/sitemap] `sitemap-index.xml` created at `dist` [Algolia] Record changelog exceeds size limit (200003 bytes). Skipping. [Algolia] collected 821 records from dist 13:18:32 [build] Waiting for integration "algolia-update-index", hook "astro:build:done"... [Algolia] Index updated with optimized records. 13:18:35 [build] 245 page(s) built in 13.85s 13:18:35 [build] Complete! ```
1 parent 2694418 commit 7ad6f7b

File tree

1 file changed

+24
-9
lines changed

1 file changed

+24
-9
lines changed

integrations/algolia-update-index.ts

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,6 @@ async function collectPagesFromDist(distRoot: string): Promise<AlgoliaRecord[]>
5454
const baseUrl =
5555
toObjectIdFromCanonical(canonicalHref) || toObjectIdFromPath(distRoot, filePath);
5656

57-
// Skip indexing the changelog listing page because it contains the full
58-
// changelog (many entries) and produces oversized Algolia records.
59-
if (baseUrl === 'changelog') {
60-
console.info('[Algolia] Skipping changelog index page to avoid oversized record:', filePath);
61-
continue;
62-
}
63-
6457
const pageTitle = $('meta[property="og:title"]').attr('content') || $('title').text() || '';
6558
const pageDescription =
6659
$('meta[name="description"]').attr('content') ||
@@ -79,7 +72,7 @@ async function collectPagesFromDist(distRoot: string): Promise<AlgoliaRecord[]>
7972
if (introHtml) {
8073
const introText = htmlToText(introHtml, $);
8174
const introProperties = extractProperties(introHtml, $);
82-
records.push({
75+
const introRecord = ensureRecordWithinSizeLimit({
8376
objectID: baseUrl,
8477
url: baseUrl,
8578
hierarchy: buildHierarchy(baseUrl, null),
@@ -91,6 +84,9 @@ async function collectPagesFromDist(distRoot: string): Promise<AlgoliaRecord[]>
9184
pageTitle,
9285
pageDescription,
9386
});
87+
if (introRecord) {
88+
records.push(introRecord);
89+
}
9490
}
9591

9692
// Create H1, H2, H3 records (hierarchical, no nesting)
@@ -101,7 +97,7 @@ async function collectPagesFromDist(distRoot: string): Promise<AlgoliaRecord[]>
10197
const headingText = htmlToText(heading.html, $);
10298
const headingProperties = extractProperties(heading.html, $);
10399

104-
records.push({
100+
const headingRecord = ensureRecordWithinSizeLimit({
105101
objectID: headingId,
106102
url: headingId,
107103
hierarchy: buildHierarchy(baseUrl, heading.headingPath),
@@ -113,12 +109,31 @@ async function collectPagesFromDist(distRoot: string): Promise<AlgoliaRecord[]>
113109
pageTitle,
114110
pageDescription,
115111
});
112+
if (headingRecord) {
113+
records.push(headingRecord);
114+
}
116115
}
117116
}
118117

119118
return records;
120119
}
121120

121+
// Algolia has a 100KB limit per record
122+
const ALGOLIA_SIZE_LIMIT = 100000;
123+
124+
function ensureRecordWithinSizeLimit(record: AlgoliaRecord): AlgoliaRecord | null {
125+
const recordSize = Buffer.byteLength(JSON.stringify(record), 'utf8');
126+
127+
if (recordSize <= ALGOLIA_SIZE_LIMIT) {
128+
return record;
129+
}
130+
131+
console.warn(
132+
`[Algolia] Record ${record.objectID} exceeds size limit (${recordSize} bytes). Skipping.`
133+
);
134+
return null;
135+
}
136+
122137
async function listHtmlFiles(
123138
root: string,
124139
current: string = root,

0 commit comments

Comments
 (0)