Skip to content

Commit ed19762

Browse files
authored
typsense patches (#1511)
* typsense patches * more updates * more updates to typesense * Revert "more updates to typesense" This reverts commit dec926e. Revert * updates * better query and dynamics * more updates to typesense * ui changes * final fix for blog & docs url * remove last hard coded snippet
1 parent c929e2e commit ed19762

File tree

2 files changed

+424
-377
lines changed

2 files changed

+424
-377
lines changed

pcweb/components/docpage/navbar/typesense.py

Lines changed: 106 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
"""Typesense search component for the navbar."""
1+
"""Improved Typesense search component with better component search handling."""
22

33
import reflex as rx
44
import typesense
55
import os
6+
import re
67

78
# Constants
89
TYPESENSE_CONFIG = {
@@ -15,12 +16,16 @@
1516
'connection_timeout_seconds': 10
1617
}
1718

18-
SEARCH_PARAMS = {
19-
'query_by': 'title,content,headings',
20-
'per_page': 8,
21-
'highlight_full_fields': 'title,content',
19+
# Enhanced search parameters with component-aware boosting
20+
BASE_SEARCH_PARAMS = {
21+
'per_page': 20,
22+
'highlight_full_fields': 'title,content,components',
2223
'snippet_threshold': 30,
23-
'num_typos': 2
24+
'num_typos': 2,
25+
'typo_tokens_threshold': 1,
26+
'drop_tokens_threshold': 1,
27+
'prioritize_exact_match': True,
28+
'prioritize_token_position': True,
2429
}
2530

2631
FILTER_CATEGORIES = [
@@ -42,46 +47,26 @@
4247
"Blogs": ["Blog"]
4348
}
4449

45-
SECTION_DISPLAY_NAMES = {
46-
'getting_started': 'Getting Started',
47-
'library': 'Components',
48-
'api-reference': 'API Reference',
49-
'hosting': 'Hosting',
50-
'events': 'Events',
51-
'styling': 'Styling',
52-
'state': 'State',
53-
'vars': 'Variables',
54-
'database': 'Database',
55-
'authentication': 'Authentication',
56-
'custom-components': 'Custom Components',
57-
'wrapping-react': 'Wrapping React',
58-
'ai_builder': 'AI Builder',
59-
'recipes': 'Recipes',
60-
'advanced_onboarding': 'Advanced',
61-
'enterprise': 'Enterprise',
62-
'utility_methods': 'Utilities',
63-
'client_storage': 'Client Storage',
64-
'components': 'Components',
65-
'pages': 'Pages',
66-
'assets': 'Assets',
67-
'api-routes': 'API Routes',
68-
'ui': 'UI',
69-
'state_structure': 'State Structure',
70-
'Blog': 'Blog'
71-
}
72-
7350
DEFAULT_SUGGESTIONS = [
7451
{"title": "Getting Started with Reflex", "url": "/docs/getting-started/introduction"},
7552
{"title": "Components Overview", "url": "/docs/library"},
7653
{"title": "State Management", "url": "/docs/state/overview"},
77-
{"title": "Event Handlers", "url": "/docs/events/event-handlers"},
54+
{"title": "Events Overview", "url": "/docs/events/events-overview"},
7855
{"title": "Styling and Theming", "url": "/docs/styling/overview"},
79-
{"title": "Deployment Guide", "url": "/docs/hosting/deploy"},
56+
{"title": "Deployment Guide", "url": "/docs/hosting/deploy-quick-start"},
8057
]
8158

59+
# Precompiled regex patterns for component highlighting fixes
60+
PATTERN_PARTIAL_WORD = re.compile(r'<mark>([^<]*?)</mark>([a-zA-Z0-9_]*)')
61+
PATTERN_COMPONENT_NAME = re.compile(r'((?:rx|reflex)\.)<mark>([^<]*?)</mark>')
62+
PATTERN_NAMESPACE = re.compile(r'<mark>(rx|reflex)</mark>\.([a-zA-Z0-9_]+)')
63+
PATTERN_CHAINED = re.compile(r'<mark>((?:rx|reflex)\.[a-zA-Z0-9_]+)</mark>\.([a-zA-Z0-9_]+)')
64+
65+
# Styling for highlights
66+
HIGHLIGHT_STYLE = '<span style="background-color: var(--violet-3); color: var(--violet-11); padding: 2px 4px; border-radius: 3px;">'
8267

8368
class TypesenseSearchState(rx.State):
84-
"""State management for the Typesense search component."""
69+
"""Enhanced state management for the Typesense search component."""
8570

8671
# State variables
8772
search_query: str = ""
@@ -93,7 +78,6 @@ class TypesenseSearchState(rx.State):
9378
filter_categories: list[str] = FILTER_CATEGORIES
9479
suggestions: list[dict] = DEFAULT_SUGGESTIONS
9580

96-
# Modal management
9781
def open_modal(self):
9882
"""Open the search modal and reset filter state."""
9983
self.show_modal = True
@@ -112,7 +96,6 @@ def _reset_search_state(self):
11296
self.search_results = []
11397
self.selected_filter = "All"
11498

115-
# Filter management
11699
async def set_filter(self, filter_name: str):
117100
"""Set the selected filter and re-run search if there's an active query."""
118101
self.selected_filter = filter_name
@@ -123,9 +106,18 @@ def _get_filter_sections(self) -> list[str]:
123106
"""Get sections for current filter."""
124107
return FILTER_SECTION_MAPPING.get(self.selected_filter, [])
125108

126-
# Search functionality
109+
def _clean_component_query(self, query: str) -> str:
110+
"""Normalize component query by removing rx./reflex. prefix."""
111+
return re.sub(r'^(rx\.|reflex\.)', '', query.lower()).strip()
112+
113+
def _expand_query_variants(self, query: str) -> str:
114+
"""Return query string with rx./reflex. variants for flexible matching."""
115+
cleaned = self._clean_component_query(query)
116+
variants = {cleaned, f"rx.{cleaned}", f"reflex.{cleaned}"}
117+
return " ".join(sorted(variants)) # Order doesn't matter
118+
127119
async def search_docs(self, query: str):
128-
"""Search the documentation using Typesense."""
120+
"""Enhanced search with component-aware logic."""
129121
self.search_query = query
130122

131123
if not query.strip():
@@ -135,7 +127,7 @@ async def search_docs(self, query: str):
135127
self.is_searching = True
136128

137129
try:
138-
results = await self._perform_search(query)
130+
results = await self._perform_unified_search(query)
139131
self.search_results = self._format_search_results(results)
140132
self.show_results = True
141133
except Exception as e:
@@ -144,99 +136,101 @@ async def search_docs(self, query: str):
144136

145137
self.is_searching = False
146138

147-
def _clear_search_results(self):
148-
"""Clear search results and hide results display."""
149-
self.search_results = []
150-
self.show_results = False
151-
152-
async def _perform_search(self, query: str) -> dict:
153-
"""Perform the actual Typesense search."""
139+
async def _perform_unified_search(self, query: str) -> dict:
140+
"""Perform a single search using is_component metadata for boosting/filtering."""
154141
client = typesense.Client(TYPESENSE_CONFIG)
155142

143+
expanded_query = self._expand_query_variants(query)
144+
156145
search_parameters = {
157-
'q': query,
158-
**SEARCH_PARAMS,
146+
'q': expanded_query,
147+
**BASE_SEARCH_PARAMS,
148+
'query_by': 'title,content,headings,components',
149+
'query_by_weights': '10,3,3,6',
159150
'highlight_start_tag': '<mark>',
160-
'highlight_end_tag': '</mark>'
151+
'highlight_end_tag': '</mark>',
152+
'sort_by': 'weight:desc, is_component:desc, _text_match:desc',
161153
}
162154

163155
# Apply filter if not "All"
164156
if self.selected_filter != "All":
165-
sections = self._get_filter_sections()
166-
if sections:
167-
filter_conditions = [f'section:={section}' for section in sections]
168-
search_parameters['filter_by'] = ' || '.join(filter_conditions)
157+
if self.selected_filter == "Components":
158+
search_parameters["filter_by"] = "is_component:=true"
159+
else:
160+
sections = self._get_filter_sections()
161+
if sections:
162+
search_parameters['filter_by'] = ' || '.join(f"section:={s}" for s in sections)
169163

170164
return client.collections['docs'].documents.search(search_parameters)
171165

166+
def _clear_search_results(self):
167+
"""Clear search results and hide results display."""
168+
self.search_results = []
169+
self.show_results = False
170+
172171
def _format_search_results(self, result: dict) -> list[dict]:
173-
"""Format search results for display."""
174-
return [
175-
{
176-
'title': hit['document']['title'],
177-
'content': self._get_highlighted_content(hit), # <-- use highlight-aware content
178-
'url': hit['document']['url'],
179-
'path': hit['document']['path'],
180-
'section': hit['document'].get('section', ''),
181-
'subsection': hit['document'].get('subsection', ''),
182-
'breadcrumb': self._create_breadcrumb(hit['document'])
172+
"""Format search results for display with enhanced component info."""
173+
formatted_results = []
174+
175+
for hit in result['hits']:
176+
doc = hit['document']
177+
components = doc.get('components', [])
178+
component_info = None
179+
if components:
180+
component_info = f"Components: {', '.join(components)}"
181+
formatted_result = {
182+
'title': doc['title'],
183+
'content': self._get_highlighted_content(hit),
184+
'url': doc['url'],
185+
'path': doc['path'],
186+
'section': doc.get('section', ''),
187+
'subsection': doc.get('subsection', ''),
188+
'breadcrumb': doc.get('breadcrumb', ''),
189+
'components': components,
190+
'component_info': component_info,
191+
'score': hit.get('text_match', 0)
183192
}
184-
for hit in result['hits']
185-
]
193+
formatted_results.append(formatted_result)
194+
195+
return formatted_results
186196

187197

188198
def _get_highlighted_content(self, hit: dict) -> str:
189-
"""Get highlighted content snippet."""
199+
"""Get highlighted content snippet with component-aware highlighting."""
190200
highlights = hit.get('highlights', [])
191-
if highlights:
192-
for highlight in highlights:
193-
if highlight.get('field') == 'content':
194-
# Prefer the short 'snippet' if available, else fallback to 'value'
195-
content = highlight.get('snippet') or highlight.get('value')
196-
if content and '<mark>' in content:
197-
content = content.replace(
198-
'<mark>', '<span style="background-color: var(--violet-3); color: var(--violet-11); padding: 2px 4px; border-radius: 3px;">'
199-
).replace('</mark>', '</span>')
200-
201-
return content
201+
202+
def fix_component_highlighting(text):
203+
"""Fix incomplete word and component highlighting patterns."""
204+
text = PATTERN_PARTIAL_WORD.sub(r'<mark>\1\2</mark>', text)
205+
text = PATTERN_COMPONENT_NAME.sub(r'<mark>\1\2</mark>', text)
206+
text = PATTERN_NAMESPACE.sub(r'<mark>\1.\2</mark>', text)
207+
text = PATTERN_CHAINED.sub(r'<mark>\1.\2</mark>', text)
208+
return text
209+
210+
for highlight in highlights:
211+
field = highlight.get('field')
212+
if field == 'components':
213+
values = highlight.get('values', [])
214+
if values:
215+
fixed_values = [fix_component_highlighting(value) for value in values]
216+
highlighted_components = ', '.join(fixed_values)
217+
styled = f"<span style='font-weight: 600;'>Components:</span> {highlighted_components}"
218+
return styled.replace('<mark>', HIGHLIGHT_STYLE).replace('</mark>', '</span>')
219+
elif field in ['content', 'title']:
220+
content = highlight.get('snippet') or highlight.get('value', '')
221+
if content and '<mark>' in content:
222+
content = fix_component_highlighting(content)
223+
return content.replace('<mark>', HIGHLIGHT_STYLE).replace('</mark>', '</span>')
202224

203225
# Fallback to truncated plain content
204226
return self._truncate_content(hit['document']['content'])
205227

206-
207228
def _truncate_content(self, content: str, max_length: int = 150) -> str:
208229
"""Truncate content to specified length."""
209230
if len(content) <= max_length:
210231
return content
211232
return content[:max_length] + '...'
212233

213-
def _create_breadcrumb(self, document: dict) -> str:
214-
"""Create a breadcrumb string from document metadata."""
215-
parts = []
216-
217-
# Add section
218-
section = document.get('section', '')
219-
if section:
220-
section_display = SECTION_DISPLAY_NAMES.get(
221-
section,
222-
section.replace('-', ' ').replace('_', ' ').title()
223-
)
224-
parts.append(section_display)
225-
226-
# Add subsection
227-
subsection = document.get('subsection', '')
228-
if subsection:
229-
subsection_display = subsection.replace('-', ' ').replace('_', ' ').title()
230-
parts.append(subsection_display)
231-
232-
# Add title if different from last part
233-
title = document.get('title', '')
234-
if title and (not parts or title.lower() != parts[-1].lower()):
235-
parts.append(title)
236-
237-
return ' › '.join(parts)
238-
239-
# Navigation
240234
def hide_results(self):
241235
"""Hide search results."""
242236
self.show_results = False
@@ -248,7 +242,7 @@ def navigate_to_result(self, url: str):
248242
return rx.redirect(url)
249243

250244

251-
# Component functions
245+
# Component functions (keeping your existing UI components)
252246
def filter_pill(filter_name: str) -> rx.Component:
253247
"""Render a single filter pill."""
254248
is_selected = TypesenseSearchState.selected_filter == filter_name
@@ -333,7 +327,7 @@ def suggestions_section() -> rx.Component:
333327

334328

335329
def search_result_item(result: rx.Var) -> rx.Component:
336-
"""Render a single search result item with breadcrumb navigation."""
330+
"""Enhanced search result item with component information."""
337331
return rx.box(
338332
rx.vstack(
339333
rx.text(
@@ -345,14 +339,14 @@ def search_result_item(result: rx.Var) -> rx.Component:
345339
rx.html(
346340
result['title'],
347341
class_name="text-md font-medium !text-slate-12",
342+
word_break= "keep-all",
348343
),
349344
spacing="2",
350345
align_items="center",
351346
margin_bottom="4px"
352347
),
353348
rx.html(
354349
result['content'],
355-
# color="var(--c-slate-11)",
356350
font_size="14px",
357351
line_height="1.5",
358352
class_name="prose prose-sm text-slate-12"
@@ -386,7 +380,7 @@ def search_input() -> rx.Component:
386380
on_click=rx.run_script("document.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape' }))"),
387381
),
388382
rx.el.input(
389-
placeholder="What are you searching for?",
383+
placeholder="Search components, docs, or features...",
390384
on_change=TypesenseSearchState.search_docs.debounce(500),
391385
id="search-input",
392386
auto_focus=True,

0 commit comments

Comments
 (0)