2
2
from __future__ import annotations as _annotations
3
3
4
4
import os
5
- from typing import TypedDict , cast
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import TYPE_CHECKING , TypedDict , cast
6
8
7
- from algoliasearch .search .client import SearchClientSync
8
- from bs4 import BeautifulSoup
9
- from mkdocs .config import Config
10
- from mkdocs .structure .files import Files
11
- from mkdocs .structure .pages import Page
9
+ from pydantic import TypeAdapter
10
+
11
+ if TYPE_CHECKING :
12
+ from mkdocs .config import Config
13
+ from mkdocs .structure .files import Files
14
+ from mkdocs .structure .pages import Page
12
15
13
16
14
17
class AlgoliaRecord (TypedDict ):
@@ -20,19 +23,18 @@ class AlgoliaRecord(TypedDict):
20
23
21
24
22
25
records : list [AlgoliaRecord ] = []
26
+ records_ta = TypeAdapter (list [AlgoliaRecord ])
23
27
# these values should match docs/javascripts/search-worker.js.
24
28
ALGOLIA_APP_ID = 'KPPUDTIAVX'
25
29
ALGOLIA_INDEX_NAME = 'pydantic-ai-docs'
26
- ALGOLIA_WRITE_API_KEY = os .environ .get ('ALGOLIA_WRITE_API_KEY' )
27
30
28
31
# Algolia has a limit of 100kb per record in the paid plan,
29
32
# leave some space for the other fields as well.
30
33
MAX_CONTENT_LENGTH = 90_000
31
34
32
35
33
36
def on_page_content (html : str , page : Page , config : Config , files : Files ) -> str :
34
- if not ALGOLIA_WRITE_API_KEY :
35
- return html
37
+ from bs4 import BeautifulSoup
36
38
37
39
assert page .title is not None , 'Page title must not be None'
38
40
title = cast (str , page .title )
@@ -93,26 +95,52 @@ def on_page_content(html: str, page: Page, config: Config, files: Files) -> str:
93
95
return html
94
96
95
97
98
+ ALGOLIA_RECORDS_FILE = 'algolia_records.json'
99
+
100
+
96
101
def on_post_build (config : Config ) -> None :
97
- if not ALGOLIA_WRITE_API_KEY :
98
- return
102
+ if records :
103
+ algolia_records_path = Path (config ['site_dir' ]) / ALGOLIA_RECORDS_FILE
104
+ with algolia_records_path .open ('wb' ) as f :
105
+ f .write (records_ta .dump_json (records ))
106
+
107
+
108
+ def algolia_upload () -> None :
109
+ from algoliasearch .search .client import SearchClientSync
99
110
100
- client = SearchClientSync ( ALGOLIA_APP_ID , ALGOLIA_WRITE_API_KEY )
111
+ algolia_write_api_key = os . environ [ ' ALGOLIA_WRITE_API_KEY' ]
101
112
102
- for record in records :
103
- if len (record ['content' ]) > MAX_CONTENT_LENGTH :
113
+ client = SearchClientSync (ALGOLIA_APP_ID , algolia_write_api_key )
114
+ filtered_records : list [AlgoliaRecord ] = []
115
+
116
+ algolia_records_path = Path .cwd () / 'site' / ALGOLIA_RECORDS_FILE
117
+
118
+ with algolia_records_path .open ('rb' ) as f :
119
+ all_records = records_ta .validate_json (f .read ())
120
+
121
+ for record in all_records :
122
+ content = record ['content' ]
123
+ if len (content ) > MAX_CONTENT_LENGTH :
104
124
print (
105
- f"Record with title '{ record ['title' ]} ' has more than { MAX_CONTENT_LENGTH } characters, { len (record [ ' content' ] )} ."
125
+ f"Record with title '{ record ['title' ]} ' has more than { MAX_CONTENT_LENGTH } characters, { len (content )} ."
106
126
)
107
- print (record ['content' ])
127
+ print (content )
128
+ else :
129
+ filtered_records .append (record )
108
130
109
- # Filter the records from the index if the content is bigger than 100kb, Algolia limit
110
- filtered_records = list (filter (lambda record : len (record ['content' ]) < MAX_CONTENT_LENGTH , records ))
111
- print (f'Uploading { len (filtered_records )} out of { len (records )} records to Algolia...' )
131
+ print (f'Uploading { len (filtered_records )} out of { len (all_records )} records to Algolia...' )
112
132
113
133
client .clear_objects (index_name = ALGOLIA_INDEX_NAME )
114
134
115
135
client .batch (
116
136
index_name = ALGOLIA_INDEX_NAME ,
117
137
batch_write_params = {'requests' : [{'action' : 'addObject' , 'body' : record } for record in filtered_records ]},
118
138
)
139
+
140
+
141
+ if __name__ == '__main__' :
142
+ if sys .argv [- 1 ] == 'upload' :
143
+ algolia_upload ()
144
+ else :
145
+ print ('Run with "upload" argument to upload records to Algolia.' )
146
+ exit (1 )
0 commit comments