Skip to content

Commit dec926e

Browse files
committed
more updates to typesense
1 parent b0ed7f7 commit dec926e

File tree

3 files changed

+46
-36
lines changed

3 files changed

+46
-36
lines changed

pcweb/components/docpage/navbar/typesense.py

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import typesense
55
import os
66
import re
7+
from scripts.component_discovery import get_component_names
78

89
# Constants
910
TYPESENSE_CONFIG = {
@@ -18,7 +19,7 @@
1819

1920
# Enhanced search parameters with component-aware boosting
2021
BASE_SEARCH_PARAMS = {
21-
'per_page': 8,
22+
'per_page': 20,
2223
'highlight_full_fields': 'title,content,components',
2324
'snippet_threshold': 30,
2425
'num_typos': 2,
@@ -107,16 +108,6 @@ def _get_filter_sections(self) -> list[str]:
107108
"""Get sections for current filter."""
108109
return FILTER_SECTION_MAPPING.get(self.selected_filter, [])
109110

110-
def _is_component_query(self, query: str) -> bool:
111-
"""Detect if the query is likely searching for a component."""
112-
query_lower = query.lower()
113-
# Check for rx. prefix, common component patterns, or if it's in components section
114-
return (
115-
query_lower.startswith('rx.') or
116-
query_lower.startswith('reflex.') or
117-
any(keyword in query_lower for keyword in ['button', 'input', 'text', 'box', 'image', 'link', 'icon', 'form', 'table', 'chart', 'modal', 'dialog']) or
118-
self.selected_filter == "Components"
119-
)
120111

121112
def _clean_component_query(self, query: str) -> str:
122113
"""Clean and normalize component queries."""
@@ -125,26 +116,25 @@ def _clean_component_query(self, query: str) -> str:
125116
return cleaned.strip()
126117

127118
async def search_docs(self, query: str):
128-
"""Enhanced search with component-aware logic."""
119+
"""Search docs, using component‑boost when the Components tab is active."""
129120
self.search_query = query
130-
131121
if not query.strip():
132122
self._clear_search_results()
133123
return
134124

135125
self.is_searching = True
136126

137127
try:
138-
# Determine search strategy based on query type
139-
is_component_search = self._is_component_query(query)
140-
141-
if is_component_search:
128+
if self.selected_filter == "Components":
129+
# Hard “Components” priority
142130
results = await self._perform_component_search(query)
143131
else:
132+
# All other tabs (Docs, API Reference, Blogs, or All)
144133
results = await self._perform_regular_search(query)
145134

146135
self.search_results = self._format_search_results(results)
147136
self.show_results = True
137+
148138
except Exception as e:
149139
print(f"Search error: {e}")
150140
self._clear_search_results()

scripts/component_discovery.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# component_discovery.py
2+
import logging
3+
from pathlib import Path
4+
5+
logger = logging.getLogger(__name__)
6+
_component_names = None
7+
8+
def get_component_names() -> list[str]:
9+
"""Scan docs/library for .md files and return clean slugs (no -ll suffix)."""
10+
global _component_names
11+
if _component_names is not None:
12+
return _component_names
13+
14+
repo_root = Path(__file__).parent
15+
library_root = repo_root / 'docs' / 'library'
16+
names = set()
17+
18+
if library_root.exists():
19+
for md in library_root.rglob('*.md'):
20+
slug = md.stem
21+
# strip “-ll”
22+
if slug.endswith('-ll'):
23+
slug = slug[:-3]
24+
# variants: hyphens, underscores, plain
25+
names.add(slug)
26+
names.add(slug.replace('-', '_'))
27+
names.add(slug.replace('_', ''))
28+
else:
29+
logger.warning(f"docs/library not found at {library_root}")
30+
31+
_component_names = sorted(names)
32+
logger.info(f"Discovered {len(_component_names)} components")
33+
return _component_names

scripts/typesense_indexer.py

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
import typesense
1818
from markdown import Markdown
1919
from bs4 import BeautifulSoup
20+
from component_discovery import get_component_names
21+
2022

2123
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
2224
logger = logging.getLogger(__name__)
@@ -111,17 +113,13 @@ def _get_component_names_from_docs(self) -> List[str]:
111113
return self._component_names
112114

113115
def extract_components(self, content: str) -> Set[str]:
114-
"""Find any of those components (with rx. prefix) in the markdown."""
116+
"""Extract any component names (with rx. prefix) found in the markdown."""
115117
components = set()
116-
comp_names = self._get_component_names_from_docs()
117-
118-
# look for either plain or rx.<name>
119-
for name in comp_names:
120-
# word boundary so we don’t match “button” inside “mybutton”
118+
for name in get_component_names():
119+
# match whole words “button” or “rx.button”
121120
pattern = rf'\b(?:rx\.)?{re.escape(name)}\b'
122-
for match in re.finditer(pattern, content):
121+
for _ in re.finditer(pattern, content, flags=re.IGNORECASE):
123122
components.add(f"rx.{name}")
124-
125123
return components
126124

127125
def extract_headings(self, content: str) -> List[str]:
@@ -139,17 +137,6 @@ def extract_headings(self, content: str) -> List[str]:
139137

140138
return headings
141139

142-
def _is_likely_component(self, name: str) -> bool:
143-
"""Check if a name is likely a Reflex component."""
144-
if not name.startswith('rx.'):
145-
return False
146-
147-
component_name = name[3:] # Remove 'rx.' prefix
148-
149-
# Component names should be lowercase with underscores
150-
# and not contain special characters
151-
return re.match(r'^[a-z][a-z0-9_]*$', component_name) is not None
152-
153140
def extract_code_examples(self, content: str) -> str:
154141
"""Extract code examples from markdown content."""
155142
code_blocks = []

0 commit comments

Comments
 (0)