Skip to content

Commit 9ef2ae8

Browse files
committed
typsense patches
1 parent c929e2e commit 9ef2ae8

File tree

2 files changed

+409
-326
lines changed

2 files changed

+409
-326
lines changed

pcweb/components/docpage/navbar/typesense.py

Lines changed: 157 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
"""Typesense search component for the navbar."""
1+
"""Improved Typesense search component with better component search handling."""
22

33
import reflex as rx
44
import typesense
55
import os
6+
import re
67

78
# Constants
89
TYPESENSE_CONFIG = {
@@ -15,12 +16,16 @@
1516
'connection_timeout_seconds': 10
1617
}
1718

18-
SEARCH_PARAMS = {
19-
'query_by': 'title,content,headings',
19+
# Enhanced search parameters with component-aware boosting
20+
BASE_SEARCH_PARAMS = {
2021
'per_page': 8,
21-
'highlight_full_fields': 'title,content',
22+
'highlight_full_fields': 'title,content,components',
2223
'snippet_threshold': 30,
23-
'num_typos': 2
24+
'num_typos': 2,
25+
'typo_tokens_threshold': 1,
26+
'drop_tokens_threshold': 1,
27+
'prioritize_exact_match': True,
28+
'prioritize_token_position': True,
2429
}
2530

2631
FILTER_CATEGORIES = [
@@ -74,14 +79,14 @@
7479
{"title": "Getting Started with Reflex", "url": "/docs/getting-started/introduction"},
7580
{"title": "Components Overview", "url": "/docs/library"},
7681
{"title": "State Management", "url": "/docs/state/overview"},
77-
{"title": "Event Handlers", "url": "/docs/events/event-handlers"},
82+
{"title": "Events Overview", "url": "/docs/events/events-overview"},
7883
{"title": "Styling and Theming", "url": "/docs/styling/overview"},
79-
{"title": "Deployment Guide", "url": "/docs/hosting/deploy"},
84+
{"title": "Deployment Guide", "url": "/docs/hosting/deploy-quick-start"},
8085
]
8186

8287

8388
class TypesenseSearchState(rx.State):
84-
"""State management for the Typesense search component."""
89+
"""Enhanced state management for the Typesense search component."""
8590

8691
# State variables
8792
search_query: str = ""
@@ -93,7 +98,6 @@ class TypesenseSearchState(rx.State):
9398
filter_categories: list[str] = FILTER_CATEGORIES
9499
suggestions: list[dict] = DEFAULT_SUGGESTIONS
95100

96-
# Modal management
97101
def open_modal(self):
98102
"""Open the search modal and reset filter state."""
99103
self.show_modal = True
@@ -112,7 +116,6 @@ def _reset_search_state(self):
112116
self.search_results = []
113117
self.selected_filter = "All"
114118

115-
# Filter management
116119
async def set_filter(self, filter_name: str):
117120
"""Set the selected filter and re-run search if there's an active query."""
118121
self.selected_filter = filter_name
@@ -123,9 +126,25 @@ def _get_filter_sections(self) -> list[str]:
123126
"""Get sections for current filter."""
124127
return FILTER_SECTION_MAPPING.get(self.selected_filter, [])
125128

126-
# Search functionality
129+
def _is_component_query(self, query: str) -> bool:
130+
"""Detect if the query is likely searching for a component."""
131+
query_lower = query.lower()
132+
# Check for rx. prefix, common component patterns, or if it's in components section
133+
return (
134+
query_lower.startswith('rx.') or
135+
query_lower.startswith('reflex.') or
136+
any(keyword in query_lower for keyword in ['button', 'input', 'text', 'box', 'image', 'link', 'icon', 'form', 'table', 'chart', 'modal', 'dialog']) or
137+
self.selected_filter == "Components"
138+
)
139+
140+
def _clean_component_query(self, query: str) -> str:
141+
"""Clean and normalize component queries."""
142+
# Remove rx. or reflex. prefix for better matching
143+
cleaned = re.sub(r'^(rx\.|reflex\.)', '', query.lower())
144+
return cleaned.strip()
145+
127146
async def search_docs(self, query: str):
128-
"""Search the documentation using Typesense."""
147+
"""Enhanced search with component-aware logic."""
129148
self.search_query = query
130149

131150
if not query.strip():
@@ -135,7 +154,14 @@ async def search_docs(self, query: str):
135154
self.is_searching = True
136155

137156
try:
138-
results = await self._perform_search(query)
157+
# Determine search strategy based on query type
158+
is_component_search = self._is_component_query(query)
159+
160+
if is_component_search:
161+
results = await self._perform_component_search(query)
162+
else:
163+
results = await self._perform_regular_search(query)
164+
139165
self.search_results = self._format_search_results(results)
140166
self.show_results = True
141167
except Exception as e:
@@ -149,15 +175,51 @@ def _clear_search_results(self):
149175
self.search_results = []
150176
self.show_results = False
151177

152-
async def _perform_search(self, query: str) -> dict:
153-
"""Perform the actual Typesense search."""
178+
async def _perform_component_search(self, query: str) -> dict:
179+
"""Perform component-focused search with boosted relevance."""
180+
client = typesense.Client(TYPESENSE_CONFIG)
181+
182+
# Clean the query for component matching
183+
cleaned_query = self._clean_component_query(query)
184+
185+
search_parameters = {
186+
'q': cleaned_query,
187+
**BASE_SEARCH_PARAMS,
188+
# Prioritize components field, then title, then content
189+
'query_by': 'components,title,content,headings',
190+
'query_by_weights': '4,3,1,2', # Higher weight for components field
191+
'highlight_start_tag': '<mark>',
192+
'highlight_end_tag': '</mark>',
193+
'sort_by': '_text_match:desc', # Sort by text match relevance
194+
}
195+
196+
# Apply filter if not "All"
197+
if self.selected_filter != "All":
198+
sections = self._get_filter_sections()
199+
if sections:
200+
filter_conditions = [f'section:={section}' for section in sections]
201+
search_parameters['filter_by'] = ' || '.join(filter_conditions)
202+
else:
203+
# For component searches, prefer component-heavy sections
204+
component_sections = ['library', 'components', 'custom-components', 'wrapping-react', 'api-reference']
205+
boost_conditions = [f'section:={section}' for section in component_sections]
206+
# Use this as a boost rather than hard filter
207+
search_parameters['boost_by'] = f"if(section:=[{','.join(component_sections)}], 2, 1)"
208+
209+
return client.collections['docs'].documents.search(search_parameters)
210+
211+
async def _perform_regular_search(self, query: str) -> dict:
212+
"""Perform regular search for non-component queries."""
154213
client = typesense.Client(TYPESENSE_CONFIG)
155214

156215
search_parameters = {
157216
'q': query,
158-
**SEARCH_PARAMS,
217+
**BASE_SEARCH_PARAMS,
218+
'query_by': 'title,content,headings,components',
219+
'query_by_weights': '3,2,2,1', # Balanced weights for regular search
159220
'highlight_start_tag': '<mark>',
160-
'highlight_end_tag': '</mark>'
221+
'highlight_end_tag': '</mark>',
222+
'sort_by': '_text_match:desc',
161223
}
162224

163225
# Apply filter if not "All"
@@ -170,40 +232,98 @@ async def _perform_search(self, query: str) -> dict:
170232
return client.collections['docs'].documents.search(search_parameters)
171233

172234
def _format_search_results(self, result: dict) -> list[dict]:
173-
"""Format search results for display."""
174-
return [
175-
{
176-
'title': hit['document']['title'],
177-
'content': self._get_highlighted_content(hit), # <-- use highlight-aware content
178-
'url': hit['document']['url'],
179-
'path': hit['document']['path'],
180-
'section': hit['document'].get('section', ''),
181-
'subsection': hit['document'].get('subsection', ''),
182-
'breadcrumb': self._create_breadcrumb(hit['document'])
235+
"""Format search results for display with enhanced component info."""
236+
formatted_results = []
237+
238+
for hit in result['hits']:
239+
doc = hit['document']
240+
241+
# Extract component information
242+
components = doc.get('components', [])
243+
component_info = None
244+
if components:
245+
component_info = f"Components: {', '.join(components)}"
246+
247+
formatted_result = {
248+
'title': doc['title'],
249+
'content': self._get_highlighted_content(hit),
250+
'url': doc['url'],
251+
'path': doc['path'],
252+
'section': doc.get('section', ''),
253+
'subsection': doc.get('subsection', ''),
254+
'breadcrumb': self._create_breadcrumb(doc),
255+
'components': components,
256+
'component_info': component_info,
257+
'score': hit.get('text_match', 0) # Include relevance score
183258
}
184-
for hit in result['hits']
185-
]
186259

260+
formatted_results.append(formatted_result)
261+
262+
return formatted_results
187263

188264
def _get_highlighted_content(self, hit: dict) -> str:
189-
"""Get highlighted content snippet."""
265+
"""Get highlighted content snippet with component-aware highlighting."""
190266
highlights = hit.get('highlights', [])
267+
268+
def fix_component_highlighting(text):
269+
"""Fix incomplete word and component highlighting patterns."""
270+
import re
271+
272+
# Fix 1: Complete partial words (e.g., Form -> Forms)
273+
text = re.sub(r'<mark>([^<]*?)</mark>([a-zA-Z0-9_]*)', r'<mark>\1\2</mark>', text)
274+
275+
# Fix 2: Handle component patterns where only component name is highlighted
276+
# rx.<mark>form</mark> -> <mark>rx.form</mark>
277+
text = re.sub(r'((?:rx|reflex)\.)<mark>([^<]*?)</mark>', r'<mark>\1\2</mark>', text)
278+
279+
# Fix 3: Handle reverse case where namespace is highlighted
280+
# <mark>rx</mark>.form -> <mark>rx.form</mark>
281+
text = re.sub(r'<mark>(rx|reflex)</mark>\.([a-zA-Z0-9_]+)', r'<mark>\1.\2</mark>', text)
282+
283+
# Fix 4: Handle chained components/methods
284+
# <mark>rx.component</mark>.method -> <mark>rx.component.method</mark>
285+
text = re.sub(r'<mark>((?:rx|reflex)\.[a-zA-Z0-9_]+)</mark>\.([a-zA-Z0-9_]+)', r'<mark>\1.\2</mark>', text)
286+
287+
return text
288+
191289
if highlights:
290+
# Prioritize component field highlights
291+
for highlight in highlights:
292+
if highlight.get('field') == 'components':
293+
values = highlight.get('values', [])
294+
if values:
295+
# Apply highlighting fix to component values too
296+
fixed_values = [fix_component_highlighting(value) for value in values]
297+
highlighted_components = ', '.join(fixed_values)
298+
return f"<span style='font-weight: 600;'>Components:</span> {highlighted_components}"
299+
300+
# Then look for content highlights
192301
for highlight in highlights:
193302
if highlight.get('field') == 'content':
194-
# Prefer the short 'snippet' if available, else fallback to 'value'
195303
content = highlight.get('snippet') or highlight.get('value')
196304
if content and '<mark>' in content:
305+
# Apply comprehensive highlighting fix
306+
content = fix_component_highlighting(content)
197307
content = content.replace(
198308
'<mark>', '<span style="background-color: var(--violet-3); color: var(--violet-11); padding: 2px 4px; border-radius: 3px;">'
199309
).replace('</mark>', '</span>')
310+
return content
200311

312+
# Finally, title highlights
313+
for highlight in highlights:
314+
if highlight.get('field') == 'title':
315+
content = highlight.get('snippet') or highlight.get('value')
316+
if content and '<mark>' in content:
317+
# Apply comprehensive highlighting fix
318+
content = fix_component_highlighting(content)
319+
content = content.replace(
320+
'<mark>', '<span style="background-color: var(--violet-3); color: var(--violet-11); padding: 2px 4px; border-radius: 3px;">'
321+
).replace('</mark>', '</span>')
201322
return content
202323

203324
# Fallback to truncated plain content
204325
return self._truncate_content(hit['document']['content'])
205326

206-
207327
def _truncate_content(self, content: str, max_length: int = 150) -> str:
208328
"""Truncate content to specified length."""
209329
if len(content) <= max_length:
@@ -236,7 +356,6 @@ def _create_breadcrumb(self, document: dict) -> str:
236356

237357
return ' › '.join(parts)
238358

239-
# Navigation
240359
def hide_results(self):
241360
"""Hide search results."""
242361
self.show_results = False
@@ -248,7 +367,7 @@ def navigate_to_result(self, url: str):
248367
return rx.redirect(url)
249368

250369

251-
# Component functions
370+
# Component functions (keeping your existing UI components)
252371
def filter_pill(filter_name: str) -> rx.Component:
253372
"""Render a single filter pill."""
254373
is_selected = TypesenseSearchState.selected_filter == filter_name
@@ -333,7 +452,7 @@ def suggestions_section() -> rx.Component:
333452

334453

335454
def search_result_item(result: rx.Var) -> rx.Component:
336-
"""Render a single search result item with breadcrumb navigation."""
455+
"""Enhanced search result item with component information."""
337456
return rx.box(
338457
rx.vstack(
339458
rx.text(
@@ -345,14 +464,14 @@ def search_result_item(result: rx.Var) -> rx.Component:
345464
rx.html(
346465
result['title'],
347466
class_name="text-md font-medium !text-slate-12",
467+
word_break= "keep-all",
348468
),
349469
spacing="2",
350470
align_items="center",
351471
margin_bottom="4px"
352472
),
353473
rx.html(
354474
result['content'],
355-
# color="var(--c-slate-11)",
356475
font_size="14px",
357476
line_height="1.5",
358477
class_name="prose prose-sm text-slate-12"
@@ -386,7 +505,7 @@ def search_input() -> rx.Component:
386505
on_click=rx.run_script("document.dispatchEvent(new KeyboardEvent('keydown', { key: 'Escape' }))"),
387506
),
388507
rx.el.input(
389-
placeholder="What are you searching for?",
508+
placeholder="Search components, docs, or features...",
390509
on_change=TypesenseSearchState.search_docs.debounce(500),
391510
id="search-input",
392511
auto_focus=True,
@@ -441,7 +560,7 @@ def search_trigger() -> rx.Component:
441560
class_name="absolute right-2 top-1/2 transform -translate-y-1/2 text-sm bg-slate-3 rounded-md text-sm !text-slate-9 px-[5px] py-[2px] hidden md:inline",
442561
),
443562
rx.el.input(
444-
placeholder="Search",
563+
placeholder="Search components...",
445564
read_only=True,
446565
class_name="bg-transparent border-none outline-none focus:outline-none pl-4 cursor-pointer hidden md:block",
447566
),

0 commit comments

Comments
 (0)