Skip to content

Commit 9ef66d2

Browse files
skip versions during indexing (#168)
1 parent 9079e03 commit 9ef66d2

File tree

1 file changed

+39
-21
lines changed

1 file changed

+39
-21
lines changed

extensions/export-content-extension.js

Lines changed: 39 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,26 @@ module.exports.register = function () {
3636
*/
3737
function collectPages(contentCatalog, siteUrl) {
3838
const all = [];
39+
40+
// Define which server-admin versions to exclude from indexing
41+
const excludedServerAdminVersions = [
42+
'server-v4.1',
43+
'server-v4.2',
44+
'server-v4.3',
45+
'server-v4.4',
46+
'server-v4.5',
47+
'server-v4.6',
48+
'server-v4.7'
49+
];
50+
3951
contentCatalog.getComponents().forEach(({ name: comp, versions }) => {
4052
versions.forEach(({ version }) => {
53+
// Skip indexing for excluded server-admin versions
54+
if (comp === 'server-admin' && excludedServerAdminVersions.includes(version)) {
55+
console.log(`Skipping Algolia indexing for server-admin version: ${version}`);
56+
return; // Skip this entire component version
57+
}
58+
4159
const compVer = contentCatalog.getComponentVersion(comp, version);
4260
const navMap = getNavEntriesByUrl(compVer.navigation);
4361
contentCatalog
@@ -107,25 +125,25 @@ function hasAlgoliaCredentials() {
107125
function chunkText(text, maxBytes = 8500) { // Reduced from 10000 to have margin for metadata
108126
const chunks = [];
109127
let current = '';
110-
128+
111129
text.split(/\n\n/).forEach(paragraph => {
112130
const para = paragraph + '\n\n';
113131
const combined = current + para;
114132
const size = Buffer.byteLength(combined, 'utf8');
115-
133+
116134
if (size > maxBytes) {
117135
if (current) chunks.push(current.trim());
118-
136+
119137
if (Buffer.byteLength(para, 'utf8') > maxBytes) {
120138
// Paragraph too big: split by sentences
121139
let sentenceBuf = '';
122140
paragraph.split(/(?<=\.)\s/).forEach(sentence => {
123141
const combinedSentence = sentenceBuf + sentence + ' ';
124142
const sentSize = Buffer.byteLength(combinedSentence, 'utf8');
125-
143+
126144
if (sentSize > maxBytes) {
127145
if (sentenceBuf) chunks.push(sentenceBuf.trim());
128-
146+
129147
if (Buffer.byteLength(sentence, 'utf8') > maxBytes) {
130148
// Sentence too big: split by characters
131149
let charBuf = '';
@@ -144,7 +162,7 @@ function chunkText(text, maxBytes = 8500) { // Reduced from 10000 to have margi
144162
sentenceBuf = combinedSentence;
145163
}
146164
});
147-
165+
148166
if (sentenceBuf) chunks.push(sentenceBuf.trim());
149167
current = '';
150168
} else {
@@ -154,7 +172,7 @@ function chunkText(text, maxBytes = 8500) { // Reduced from 10000 to have margi
154172
current = combined;
155173
}
156174
});
157-
175+
158176
if (current) chunks.push(current.trim());
159177
return chunks;
160178
}
@@ -170,12 +188,12 @@ async function indexToAlgolia(pages) {
170188

171189
const client = algoliasearch(appId, apiKey);
172190
const records = [];
173-
191+
174192
pages.forEach((p) => {
175193
const pathId = p.relUrl.replace(/^\/+/, '').replace(/[\/]/g, '_');
176194
const baseId = `${p.component}:${p.version}:${pathId}`;
177195
const chunks = chunkText(p.text);
178-
196+
179197
chunks.forEach((chunk, i) => {
180198
// Create the record with all fields except content
181199
const record = {
@@ -187,26 +205,26 @@ async function indexToAlgolia(pages) {
187205
version: p.version,
188206
objectID: `${baseId}:${i}`,
189207
};
190-
208+
191209
// Calculate metadata size
192210
const metadataSize = Buffer.byteLength(JSON.stringify(record), 'utf8');
193211
// Maximum allowed content size
194212
const maxContentSize = 9500 - metadataSize;
195-
213+
196214
// Trim content if necessary to ensure total record size is under limit
197215
let content = chunk;
198216
if (Buffer.byteLength(content, 'utf8') > maxContentSize) {
199217
content = content.slice(0, Math.floor(maxContentSize * 0.9));
200218
}
201-
219+
202220
record.content = content;
203221
records.push(record);
204222
});
205223
});
206-
224+
207225
console.log(`Prepared ${records.length} chunked records for indexing`);
208226

209-
227+
210228
// Configure index settings with path faceting
211229
try {
212230
const settingsResponse = await client.setSettings({
@@ -220,9 +238,9 @@ async function indexToAlgolia(pages) {
220238
paginationLimitedTo: 1000,
221239
},
222240
});
223-
241+
224242
console.log(`Applied index settings, task ID: ${settingsResponse.taskID}`);
225-
243+
226244
// Wait for task completion
227245
await waitForTask(client, indexName, settingsResponse.taskID);
228246
console.log('Index settings update completed successfully');
@@ -233,7 +251,7 @@ async function indexToAlgolia(pages) {
233251

234252
const response = await client.saveObjects({ indexName, objects: records });
235253
console.log(`Indexed ${response[0]?.objectIDs.length} records to ${indexName}`);
236-
254+
237255
return response;
238256
}
239257

@@ -247,17 +265,17 @@ async function indexToAlgolia(pages) {
247265
*/
248266
async function waitForTask(client, indexName, taskID, timeout = 60000, pollInterval = 1000) {
249267
const startTime = Date.now();
250-
268+
251269
while (Date.now() - startTime < timeout) {
252270
const taskResponse = await client.getTask({ indexName, taskID });
253-
271+
254272
if (taskResponse.status === 'published') {
255273
return taskResponse;
256274
}
257-
275+
258276
// Wait before checking again
259277
await new Promise(resolve => setTimeout(resolve, pollInterval));
260278
}
261-
279+
262280
throw new Error(`Task ${taskID} timed out after ${timeout}ms`);
263281
}

0 commit comments

Comments
 (0)