Skip to content

Commit fa73adc

Browse files
committed
backend index create coll
1 parent 4890f59 commit fa73adc

File tree

1 file changed

+22
-0
lines changed

1 file changed

+22
-0
lines changed

backend/scripts/index_posts.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,28 @@
1414
import tqdm
1515
import click
1616

17+
async def ensure_collection_exists() -> None:
18+
collections = typesense_client.collections.retrieve()
19+
if not any(collection['name'] == os.getenv('TYPESENSE_INDEX_NAME') for collection in collections):
20+
typesense_client.collections.create({
21+
'name': os.getenv('TYPESENSE_INDEX_NAME'),
22+
'fields': [
23+
{ 'name': "id", 'type': "int32" },
24+
{ 'name': "url", 'type': "string" },
25+
{ 'name': "time_added", 'type': "int64" },
26+
{ 'name': "time_added_as_date", 'type': "string", 'optional': True },
27+
{ 'name': "source", 'type': "string" },
28+
{ 'name': "tags", 'type': "string[]", 'facet': True },
29+
{ 'name': "title", 'type': "string" },
30+
{ 'name': "abstract", 'type': "string" },
31+
{ 'name': "content", 'type': "string" },
32+
{ 'name': "html", 'type': "string", 'optional': True },
33+
{ 'name': ".*", 'type': "auto" },
34+
],
35+
})
36+
1737
async def index_post(post: Post) -> None:
38+
# check if the collection exists
1839
collection = typesense_client.collections[os.getenv('TYPESENSE_INDEX_NAME')]
1940
post['id'] = str(post['id'])
2041
post['links'] = json.dumps(post['links'])
@@ -23,6 +44,7 @@ async def index_post(post: Post) -> None:
2344
print(f"Indexed post {post['id']}")
2445

2546
async def run_indexing(limit, page_size=100, concurrency=10):
47+
await ensure_collection_exists()
2648
semaphore = asyncio.Semaphore(concurrency)
2749
async def index_post_with_semaphore(post: Post) -> None:
2850
async with semaphore:

0 commit comments

Comments
 (0)