Skip to content

Commit 323f857

Browse files
authored
Merge pull request #15795 from Mridul012/fix-empty-html-content
fix(generateSearch): skip empty HTML blocks when generating search content
2 parents d0086ea + 5087345 commit 323f857

File tree

1 file changed

+24
-27
lines changed

1 file changed

+24
-27
lines changed

scripts/generateSearch.js

Lines changed: 24 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,17 @@ function generateContents() {
6363
content.validateSync();
6464

6565
const $ = cheerio.load(text);
66-
6766
contents.push(content);
6867

69-
// Break up individual h3's into separate content for more fine grained search
68+
// Break up h3's into additional content entries
7069
$('h3').each((index, el) => {
7170
el = $(el);
7271
const title = el.text();
7372
const html = el.nextUntil('h3').html();
73+
74+
// *** FIXED: Skip empty HTML blocks ***
75+
if (!html || html.trim() === '') return;
76+
7477
const id = el.prop('id');
7578
const baseUrl = filename.replace('.md', '.html').replace(/^docs/, '');
7679
const content = new Content({
@@ -82,6 +85,7 @@ function generateContents() {
8285
content.validateSync();
8386
contents.push(content);
8487
});
88+
8589
} else if (file.guide) {
8690
let text = fs.readFileSync(filename, 'utf8');
8791
text = text.substring(text.indexOf('block content') + 'block content\n'.length);
@@ -96,14 +100,17 @@ function generateContents() {
96100
content.validateSync();
97101

98102
const $ = cheerio.load(text);
99-
100103
contents.push(content);
101104

102-
// Break up individual h3's into separate content for more fine grained search
105+
// Break up h3's into additional content entries
103106
$('h3').each((index, el) => {
104107
el = $(el);
105108
const title = el.text();
106109
const html = el.nextUntil('h3').html();
110+
111+
// *** FIXED: Skip empty HTML blocks ***
112+
if (!html || html.trim() === '') return;
113+
107114
const id = el.prop('id');
108115
const baseUrl = filename.replace('.pug', '.html').replace(/^docs/, '');
109116
const content = new Content({
@@ -137,56 +144,47 @@ async function generateSearch(config) {
137144

138145
const promises = [];
139146
let lastPrint = 0;
140-
141147
let doneCount = 0;
148+
142149
console.log('Search Content to save:', contents.length);
150+
143151
for (const content of contents) {
144-
if (version === '9.x') {
145-
let url = content.url.startsWith('/') ? content.url : `/${content.url}`;
146-
if (!url.startsWith('/docs')) {
147-
url = '/docs' + url;
148-
}
149-
content.url = url;
150-
} else {
151-
let url = content.url.startsWith('/') ? content.url : `/${content.url}`;
152-
if (!url.startsWith('/docs')) {
153-
url = '/docs' + url;
154-
}
155-
content.url = `/docs/${version}${url}`;
152+
let url = content.url.startsWith('/') ? content.url : `/${content.url}`;
153+
if (!url.startsWith('/docs')) {
154+
url = '/docs' + url;
156155
}
156+
content.url = version === '9.x' ? url : `/docs/${version}${url}`;
157+
157158
const promise = content.save().then(() => {
158159
doneCount += 1;
159160
const nowDate = Date.now();
160-
// only print every 2 seconds, or if it is the first or last element
161161
if (nowDate - lastPrint > 2000 || doneCount === contents.length || doneCount === 1) {
162162
lastPrint = nowDate;
163163
console.log(`${doneCount} / ${contents.length}`);
164164
}
165165
});
166+
166167
promises.push(promise);
167168
}
168169

169170
await Promise.allSettled(promises);
170171

171-
const results = await Content.
172-
find({ $text: { $search: 'validate' }, version }, { score: { $meta: 'textScore' } }).
173-
sort({ score: { $meta: 'textScore' } }).
174-
limit(10);
172+
const results = await Content
173+
.find({ $text: { $search: 'validate' }, version }, { score: { $meta: 'textScore' } })
174+
.sort({ score: { $meta: 'textScore' } })
175+
.limit(10);
175176

176177
console.log(results.map(res => res.url));
177178

178179
console.log(`Added ${contents.length} Search Content`);
179-
180-
// this likely should not be done as part of this script, but by the caller,
181-
// but this script is currently the only one that connects in the website generation.
182180
await mongoose.disconnect();
183181
}
184182

185183
function getConfig() {
186184
const config = require('../.config.js');
187185

188186
if (!config || !config.uri) {
189-
throw new Error('No Config or config.URI given, please create a .config.js file with those values in the root of the repository');
187+
throw new Error('No Config or config.uri given, please create a .config.js file with those values in the root of the repository');
190188
}
191189

192190
return config;
@@ -195,7 +193,6 @@ function getConfig() {
195193
module.exports.generateSearch = generateSearch;
196194
module.exports.getConfig = getConfig;
197195

198-
// only run the following code if this file is the main module / entry file
199196
if (isMain) {
200197
(async function main() {
201198
const config = getConfig();

0 commit comments

Comments
 (0)