Skip to content

Commit b340898

Browse files
authored
+search algo to include synonyms bucket (#1626)
1 parent 3d54390 commit b340898

File tree

5 files changed

+102
-7
lines changed

5 files changed

+102
-7
lines changed

.github/workflows/deploy-dev.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ jobs:
4747
run: |
4848
uv pip install typesense python-frontmatter markdown beautifulsoup4
4949
python scripts/typesense_indexer.py --docs-path ./docs --blog-path ./blog --force
50+
python scripts/synonym_indexer.py
5051
5152
- name: Deploy to Reflex
5253
id: deploy

.github/workflows/deploy-prd.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ jobs:
4545
run: |
4646
uv pip install typesense python-frontmatter markdown beautifulsoup4
4747
python scripts/typesense_indexer.py --docs-path ./docs --blog-path ./blog --force
48+
python scripts/synonym_indexer.py
4849
4950
- name: Deploy to Reflex
5051
id: deploy

.github/workflows/deploy-stg.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ jobs:
4747
run: |
4848
uv pip install typesense python-frontmatter markdown beautifulsoup4
4949
python scripts/typesense_indexer.py --docs-path ./docs --blog-path ./blog --force
50+
python scripts/synonym_indexer.py
5051
5152
- name: Deploy to Reflex
5253
id: deploy

scripts/synonym_indexer.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
""" Dynamically generate synonyms for docs to better type match"""
2+
import os
3+
import pathlib
4+
import typesense
5+
6+
from typesense_indexer import TYPESENSE_CONFIG
7+
8+
client = typesense.Client(TYPESENSE_CONFIG)
9+
10+
def get_folder_hierarchy(root: str) -> dict:
11+
"""Recursively build folder hierarchy from root, ignoring unwanted folders."""
12+
hierarchy = {}
13+
for entry in os.scandir(root):
14+
if entry.is_dir() and entry.name not in ("__pycache__", ".git", ".venv"):
15+
hierarchy[entry.name] = get_folder_hierarchy(entry.path)
16+
return hierarchy
17+
18+
def generate_synonyms(name: str) -> list[str]:
19+
"""
20+
Generate multiple synonym forms for a folder/component name:
21+
- flattened lowercase: reactflow
22+
- lowercase spaced: react flow
23+
- title case spaced: React Flow
24+
- underscore: react_flow
25+
- hyphen: react-flow
26+
- camelcase: ReactFlow
27+
"""
28+
clean_name = name.replace("_", " ").replace("-", " ").strip()
29+
words = clean_name.split()
30+
31+
synonyms = set()
32+
33+
# 1) Flattened lowercase
34+
synonyms.add("".join(w.lower() for w in words))
35+
# 2) Lowercase spaced
36+
synonyms.add(" ".join(w.lower() for w in words))
37+
# 3) Title case spaced
38+
synonyms.add(" ".join(w.capitalize() for w in words))
39+
# 4) Original underscore
40+
synonyms.add("_".join(w.lower() for w in words))
41+
# 5) Original hyphen
42+
synonyms.add("-".join(w.lower() for w in words))
43+
# 6) CamelCase
44+
synonyms.add("".join(w.capitalize() for w in words))
45+
46+
return list(synonyms)
47+
48+
def flatten_hierarchy(hierarchy: dict) -> dict:
49+
"""
50+
Flatten nested folder hierarchy into a single dict with synonyms
51+
keyed by flattened lowercase name.
52+
"""
53+
flat = {}
54+
55+
def recurse(subtree):
56+
for key, value in subtree.items():
57+
# Flatten key for canonical form
58+
key_flat = "".join(key.lower().split("_")).replace("-", "")
59+
flat[key_flat] = {"synonyms": generate_synonyms(key)}
60+
if value:
61+
recurse(value)
62+
63+
recurse(hierarchy)
64+
return flat
65+
66+
def main() -> bool:
67+
try:
68+
docs_root = pathlib.Path("docs")
69+
hierarchy = get_folder_hierarchy(docs_root)
70+
SYNONYMS = flatten_hierarchy(hierarchy)
71+
72+
print("Upserting new synonyms to Typesense ...")
73+
for canonical, info in SYNONYMS.items():
74+
client.collections["docs"].synonyms.upsert(
75+
canonical,
76+
{"synonyms": info["synonyms"]}
77+
)
78+
79+
print("Synonyms synced successfully!")
80+
return True
81+
82+
except Exception as e:
83+
print("Error syncing synonyms:", e)
84+
return False
85+
86+
if __name__ == "__main__":
87+
success = main()
88+
exit(0 if success else 1)

scripts/typesense_indexer.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
"""Generate indexed docs + collection for typesense search"""
2+
13
import os
24
import pathlib
35
import datetime
@@ -10,16 +12,15 @@
1012

1113
import typesense
1214
import reflex as rx
13-
from reflex.utils.imports import ImportVar
1415

15-
# Add the project root to the sys.path
16+
1617
project_root = pathlib.Path(__file__).resolve().parent.parent
1718
if str(project_root) not in sys.path:
1819
sys.path.insert(0, str(project_root))
1920

20-
from pcweb.pages.docs.source import Source, generate_docs
21+
from pcweb.pages.docs.source import Source
2122
from pcweb.pages.docs.apiref import modules
22-
from pcweb.pages.docs.env_vars import env_vars_page, EnvVarDocs
23+
from pcweb.pages.docs.env_vars import EnvVarDocs
2324

2425
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
2526
logger = logging.getLogger(__name__)
@@ -73,12 +74,13 @@ def _extract_headings_from_component(c: Any) -> List[str]:
7374
'connection_timeout_seconds': 60
7475
}
7576

77+
7678
COLLECTION_SCHEMA = {
7779
'name': 'docs',
7880
'fields': [
79-
{'name': 'id', 'type': 'string'},
80-
{'name': 'title', 'type': 'string'},
81-
{'name': 'content', 'type': 'string'},
81+
{'name': 'id', 'type': 'string', "infix": True},
82+
{'name': 'title', 'type': 'string', "infix": True},
83+
{'name': 'content', 'type': 'string', "infix": True},
8284
{'name': 'headings', 'type': 'string[]'},
8385
{'name': 'components', 'type': 'string[]', 'optional': True},
8486
{'name': 'path', 'type': 'string'},
@@ -91,6 +93,8 @@ def _extract_headings_from_component(c: Any) -> List[str]:
9193
],
9294
}
9395

96+
97+
9498
class SimpleTypesenseIndexer:
9599
"""Simplified indexer using your existing logic."""
96100

0 commit comments

Comments
 (0)