Skip to content

Commit 0083491

Browse files
authored
Merge pull request #3152 from ClickHouse/fix_title_search
fix search title
2 parents 14b50bd + 1195fbd commit 0083491

File tree

1 file changed

+16
-14
lines changed

1 file changed

+16
-14
lines changed

scripts/search/index_pages.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,14 @@ def parse_metadata_and_content(directory, base_directory, md_file_path, log_snip
6363
# Add file path to metadata
6464
metadata['file_path'] = md_file_path
6565
# Note: we assume last sub folder in directory is in url
66-
if metadata['file_path'] == '/opt/clickhouse-docs/docs/en/guides/best-practices/sparse-primary-indexes.md':
67-
pass
6866
slug = metadata.get('slug', '/' + os.path.split(directory)[-1] + metadata['file_path'].replace(directory, ''))
6967
for p in ['.md', '.mdx', '"', "'"]:
7068
slug = slug.removeprefix(p).removesuffix(p)
7169
slug = slug.removesuffix('/')
7270
content = re.sub(r'^import .+?from .+?$', '', content, flags=re.MULTILINE) # remove import
7371
content = re.sub(r'<[A-Za-z0-9_-]+\s*[^>]*\/>', '', content) # report components
7472
metadata['slug'] = slug
73+
metadata['title'] = metadata.get('title', '').strip()
7574
return metadata, content
7675

7776

@@ -250,6 +249,7 @@ def parse_markdown_content(metadata, content):
250249
current_subdoc['type'] = 'lvl1'
251250
current_subdoc['object_id'] = custom_slugify(heading_slug)
252251
current_subdoc['hierarchy']['lvl1'] = current_h1
252+
current_subdoc['hierarchy']['lvl0'] = current_h1 if metadata.get('title', '') == '' else metadata.get('title', '')
253253
elif line.startswith('## '):
254254
if current_subdoc:
255255
yield from split_large_document(current_subdoc)
@@ -272,7 +272,7 @@ def parse_markdown_content(metadata, content):
272272
'objectID': get_object_id(f'{heading_slug}-{current_h2}'),
273273
'type': 'lvl2',
274274
'hierarchy': {
275-
'lvl0': metadata.get('title', ''),
275+
'lvl0': current_h1 if metadata.get('title', '') == '' else metadata.get('title', ''),
276276
'lvl1': current_h1,
277277
'lvl2': current_h2,
278278
}
@@ -300,7 +300,7 @@ def parse_markdown_content(metadata, content):
300300
'objectID': get_object_id(f'{heading_slug}-{current_h3}'),
301301
'type': 'lvl3',
302302
'hierarchy': {
303-
'lvl0': metadata.get('title', ''),
303+
'lvl0': current_h1 if metadata.get('title', '') == '' else metadata.get('title', ''),
304304
'lvl1': current_h1,
305305
'lvl2': current_h2,
306306
'lvl3': current_h3,
@@ -325,7 +325,7 @@ def parse_markdown_content(metadata, content):
325325
'objectID': get_object_id(f'{heading_slug}-{current_h4}'),
326326
'type': 'lvl4',
327327
'hierarchy': {
328-
'lvl0': metadata.get('title', ''),
328+
'lvl0': current_h1 if metadata.get('title', '') == '' else metadata.get('title', ''),
329329
'lvl1': current_h1,
330330
'lvl2': current_h2,
331331
'lvl3': current_h3,
@@ -404,7 +404,8 @@ def main(base_directory, sub_directories, algolia_app_id, algolia_api_key, algol
404404
batch_size=1000, dry_run=False):
405405
temp_index_name = f"{algolia_index_name}_temp"
406406
client = SearchClientSync(algolia_app_id, algolia_api_key)
407-
create_new_index(client, temp_index_name)
407+
if not dry_run:
408+
create_new_index(client, temp_index_name)
408409
docs = []
409410
for sub_directory in sub_directories:
410411
directory = os.path.join(base_directory, sub_directory)
@@ -426,14 +427,15 @@ def main(base_directory, sub_directories, algolia_app_id, algolia_api_key, algol
426427
print(f'{'processed' if dry_run else 'indexed'} {len(batch)} records')
427428
t += len(batch)
428429
print(f'total {'processed' if dry_run else 'indexed'} {t} records')
429-
print('switching temporary index...', end='')
430-
client.operation_index(
431-
index_name=temp_index_name,
432-
operation_index_params={
433-
"operation": "move",
434-
"destination": algolia_index_name
435-
},
436-
)
430+
if not dry_run:
431+
print('switching temporary index...', end='')
432+
client.operation_index(
433+
index_name=temp_index_name,
434+
operation_index_params={
435+
"operation": "move",
436+
"destination": algolia_index_name
437+
},
438+
)
437439
print('done')
438440

439441

0 commit comments

Comments
 (0)