1414import tqdm
1515import click
1616
17+ async def ensure_collection_exists () -> None :
18+ collections = typesense_client .collections .retrieve ()
19+ if not any (collection ['name' ] == os .getenv ('TYPESENSE_INDEX_NAME' ) for collection in collections ):
20+ typesense_client .collections .create ({
21+ 'name' : os .getenv ('TYPESENSE_INDEX_NAME' ),
22+ 'fields' : [
23+ { 'name' : "id" , 'type' : "int32" },
24+ { 'name' : "url" , 'type' : "string" },
25+ { 'name' : "time_added" , 'type' : "int64" },
26+ { 'name' : "time_added_as_date" , 'type' : "string" , 'optional' : True },
27+ { 'name' : "source" , 'type' : "string" },
28+ { 'name' : "tags" , 'type' : "string[]" , 'facet' : True },
29+ { 'name' : "title" , 'type' : "string" },
30+ { 'name' : "abstract" , 'type' : "string" },
31+ { 'name' : "content" , 'type' : "string" },
32+ { 'name' : "html" , 'type' : "string" , 'optional' : True },
33+ { 'name' : ".*" , 'type' : "auto" },
34+ ],
35+ })
36+
1737async def index_post (post : Post ) -> None :
38+ # check if the collection exists
1839 collection = typesense_client .collections [os .getenv ('TYPESENSE_INDEX_NAME' )]
1940 post ['id' ] = str (post ['id' ])
2041 post ['links' ] = json .dumps (post ['links' ])
@@ -23,6 +44,7 @@ async def index_post(post: Post) -> None:
2344 print (f"Indexed post { post ['id' ]} " )
2445
2546async def run_indexing (limit , page_size = 100 , concurrency = 10 ):
47+ await ensure_collection_exists ()
2648 semaphore = asyncio .Semaphore (concurrency )
2749 async def index_post_with_semaphore (post : Post ) -> None :
2850 async with semaphore :
0 commit comments