@@ -21,6 +21,7 @@ class AlgoliaRecord(TypedDict):
21
21
abs_url : str
22
22
title : str
23
23
objectID : str
24
+ rank : int
24
25
25
26
26
27
records : list [AlgoliaRecord ] = []
@@ -42,6 +43,13 @@ def on_page_content(html: str, page: Page, config: Config, files: Files) -> str:
42
43
43
44
soup = BeautifulSoup (html , 'html.parser' )
44
45
46
+ # If the page does not start with a heading, add the h1 with the title
47
+ # Some examples don't have a heading. or start with h2
48
+ first_element = soup .find ()
49
+
50
+ if not first_element or not first_element .name or first_element .name not in ['h1' , 'h2' , 'h3' ]:
51
+ soup .insert (0 , BeautifulSoup (f'<h1 id="{ title } ">{ title } </h1>' , 'html.parser' ))
52
+
45
53
# Clean up presentational and UI elements
46
54
for element in soup .find_all (['autoref' ]):
47
55
element .decompose ()
@@ -62,8 +70,10 @@ def on_page_content(html: str, page: Page, config: Config, files: Files) -> str:
62
70
for extra in soup .find_all ('table' , attrs = {'class' : 'highlighttable' }):
63
71
extra .replace_with (BeautifulSoup (f'<pre>{ extra .find ("code" ).get_text ()} </pre>' , 'html.parser' ))
64
72
65
- # Find all h1 and h2 headings
66
- headings = soup .find_all (['h1' , 'h2' ])
73
+ headings = soup .find_all (['h1' , 'h2' , 'h3' ])
74
+
75
+ # Use the rank to put the sections in the beginning higher in the search results
76
+ rank = 100
67
77
68
78
# Process each section
69
79
for current_heading in headings :
@@ -73,26 +83,41 @@ def on_page_content(html: str, page: Page, config: Config, files: Files) -> str:
73
83
# Get content until next heading
74
84
content : list [str ] = []
75
85
sibling = current_heading .find_next_sibling ()
76
- while sibling and sibling .name not in {'h1' , 'h2' }:
86
+ while sibling and sibling .name not in {'h1' , 'h2' , 'h3' }:
77
87
content .append (str (sibling ))
78
88
sibling = sibling .find_next_sibling ()
79
89
80
90
section_html = '' .join (content )
81
91
92
+ section_soup = BeautifulSoup (section_html , 'html.parser' )
93
+ section_plain_text = section_soup .get_text (' ' , strip = True )
94
+
82
95
# Create anchor URL
83
96
anchor_url : str = f'{ page .abs_url } #{ heading_id } ' if heading_id else page .abs_url or ''
84
97
98
+ record_title = title
99
+
100
+ if current_heading .name == 'h2' :
101
+ record_title = f'{ title } - { section_title } '
102
+ elif current_heading .name == 'h3' :
103
+ previous_heading = current_heading .find_previous (['h1' , 'h2' ])
104
+ record_title = f'{ title } - { previous_heading .get_text ()} - { section_title } '
105
+
106
+ # print(f'Adding record {record_title}, {rank}, {current_heading.name}')
85
107
# Create record for this section
86
108
records .append (
87
109
AlgoliaRecord (
88
- content = section_html ,
110
+ content = section_plain_text ,
89
111
pageID = title ,
90
112
abs_url = anchor_url ,
91
- title = f' { title } - { section_title } ' ,
113
+ title = record_title ,
92
114
objectID = anchor_url ,
115
+ rank = rank ,
93
116
)
94
117
)
95
118
119
+ rank -= 5
120
+
96
121
return html
97
122
98
123
@@ -132,6 +157,16 @@ def algolia_upload() -> None:
132
157
print (f'Uploading { len (filtered_records )} out of { len (all_records )} records to Algolia...' )
133
158
134
159
client .clear_objects (index_name = ALGOLIA_INDEX_NAME )
160
+ client .set_settings (
161
+ index_name = ALGOLIA_INDEX_NAME ,
162
+ index_settings = {
163
+ 'searchableAttributes' : ['title' , 'content' ],
164
+ 'attributesToSnippet' : ['content:40' ],
165
+ 'customRanking' : [
166
+ 'desc(rank)' ,
167
+ ],
168
+ },
169
+ )
135
170
136
171
client .batch (
137
172
index_name = ALGOLIA_INDEX_NAME ,
0 commit comments