Skip to content

Commit 13ce4d2

Browse files
committed
final tweak
1 parent f6ce2f4 commit 13ce4d2

File tree

2 files changed

+62
-196
lines changed

2 files changed

+62
-196
lines changed

pcweb/components/docpage/navbar/typesense.py

Lines changed: 56 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
import reflex as rx
33
import typesense
44

5+
suggestion_items = [
6+
{"name": "Components Overview", "path": "/docs/library", "icon": "blocks", "description": "Discover and explore the full library of available components"},
7+
{"name": "State Management", "path": "/docs/state/overview", "icon": "database", "description": "Master state handling, data flow, and reactive programming"},
8+
{"name": "Event Overview", "path": "/docs/events/events-overview", "icon": "zap", "description": "Learn how to handle user interactions and system events"},
9+
{"name": "Styling and Theming", "path": "/docs/styling/overview", "icon": "palette", "description": "Customize colors, layouts, and create beautiful app designs"},
10+
{"name": "Deployment Guide", "path": "/docs/hosting/deploy-quick-start/", "icon": "cloud", "description": "Deploy and host your application in production environments"},
11+
]
12+
513
CLUSTERS = {
614
"All Content": [],
715
"AI Builder": ["ai_builder"],
@@ -13,22 +21,18 @@
1321
"Blog Posts": []
1422
}
1523

16-
# Typesense configuration
24+
1725
TYPESENSE_CONFIG = {
1826
"nodes": [{"host": os.getenv("TYPESENSE_HOST"), "port": "443", "protocol": "https"}],
1927
"api_key": os.getenv("TYPESENSE_SEARCH_API_KEY"),
2028
"connection_timeout_seconds": 2,
2129
}
2230

23-
# Score cutoff to filter weak results
24-
CUTOFF = 0.6
25-
2631
class SimpleSearch(rx.State):
2732
query: str
2833
selected_filter: str = "All Content"
2934
is_fetching: bool = False
3035

31-
# Results - keeping same structure as your fuzzy search
3236
idxed_docs_results: list[dict] = []
3337
idxed_blogs_results: list[dict] = []
3438

@@ -66,7 +70,6 @@ async def perform_search(self):
6670

6771
client = typesense.Client(TYPESENSE_CONFIG)
6872

69-
# Build search parameters
7073
search_params = {
7174
"q": self.query,
7275
"query_by": "title,content,headings,components",
@@ -84,7 +87,6 @@ async def perform_search(self):
8487
"snippet_threshold": 30,
8588
}
8689

87-
# Apply filter
8890
if self.selected_filter != "All Content":
8991
if self.selected_filter == "Blog Posts":
9092
search_params["filter_by"] = "section:=Blog"
@@ -95,17 +97,14 @@ async def perform_search(self):
9597
section_filter = " || ".join(f"section:={s}" for s in sections)
9698
search_params["filter_by"] = section_filter
9799

98-
# Perform search
99100
result = client.collections["docs"].documents.search(search_params)
100101

101-
# Split results into docs and blogs
102102
docs_results = []
103103
blog_results = []
104104

105105
for hit in result["hits"]:
106106
doc = hit["document"]
107-
# formatted_doc = self._format_result(doc)
108-
formatted_doc = self._format_result(doc, hit.get("highlights", [])) # Pass highlights here
107+
formatted_doc = self._format_result(doc, hit.get("highlights", []))
109108

110109
if doc.get("section") == "Blog":
111110
blog_results.append(formatted_doc)
@@ -124,162 +123,62 @@ async def perform_search(self):
124123
self.is_fetching = False
125124

126125
def _get_highlighted_content(self, doc, highlights, snippet_length=350):
127-
"""
128-
Return a string snippet (HTML-ready) prioritizing:
129-
1) Typesense content/title snippets (expanded + bolded)
130-
2) Typesense components highlights
131-
3) Client-side components match -> "Components: ..." (bold matched parts)
132-
4) Truncated content fallback
133-
"""
134126
import re
127+
135128
BOLD_STYLE = '<span style="font-weight: 900; color: #AA99EC;">'
136129
CLOSE_TAG = '</span>'
137130
content = doc.get("content", "") or ""
138131

139-
# Helper to bold tokens case-insensitively in a snippet
140-
def bold_tokens_in_snippet(snippet: str, tokens: list[str]) -> str:
132+
def bold_tokens(snippet: str, tokens: list[str]) -> str:
141133
for tok in sorted(set(t for t in tokens if t), key=len, reverse=True):
142134
try:
143135
snippet = re.sub(re.escape(tok), f"{BOLD_STYLE}\\g<0>{CLOSE_TAG}", snippet, flags=re.I)
144136
except re.error:
145-
# fallback to simple replace if regex fails (rare)
146137
snippet = snippet.replace(tok, f"{BOLD_STYLE}{tok}{CLOSE_TAG}")
147138
return snippet
148139

149-
# 1) Prefer Typesense content/title highlight snippets (and expand them)
140+
# 1) Typesense content/title highlights
150141
for h in highlights or []:
151-
field = h.get("field", "")
152-
if field in ("content", "title"):
153-
# support different response shapes
154-
snippet = h.get("snippet") or (h.get("snippets") or [None])[0] or h.get("value") or (h.get("values") or [None])[0] or ""
142+
if h.get("field") in ("content", "title"):
143+
snippet = h.get("snippet") or h.get("value") or ""
155144
if snippet:
156-
# If Typesense already included <mark> tags, extract matched tokens
157145
marked = re.findall(r"<mark>(.*?)</mark>", snippet, flags=re.I)
158146
if marked:
159-
# find earliest occurrence of any marked token in full content
160-
match_index = -1
161-
match_token = None
162-
for token in marked:
163-
idx = content.lower().find(token.lower())
164-
if idx != -1 and (match_index == -1 or idx < match_index):
165-
match_index = idx
166-
match_token = token
167-
# if we found the token in full content, expand around it
168-
if match_index != -1 and match_token is not None:
169-
start_full = match_index
170-
end_full = start_full + len(match_token)
171-
before = max(0, start_full - snippet_length // 2)
172-
after = min(len(content), end_full + snippet_length // 2)
173-
candidate = content[before:after].strip()
174-
# bold all marked tokens in the candidate
175-
candidate = bold_tokens_in_snippet(candidate, marked)
176-
if before > 0:
177-
candidate = "..." + candidate
178-
if after < len(content):
179-
candidate = candidate + "..."
180-
return candidate
181-
else:
182-
# If we didn't find the token in content, still replace marks in snippet and return (short)
183-
cleaned = snippet.replace("<mark>", BOLD_STYLE).replace("</mark>", CLOSE_TAG)
184-
return cleaned[:snippet_length] + ("..." if len(cleaned) > snippet_length else cleaned)
185-
else:
186-
# No <mark> in typesense snippet; return snippet up to length
187-
s = snippet[:snippet_length]
188-
return s + "..." if len(snippet) > snippet_length else s
189-
190-
# 2) If Typesense returned component highlights, use them
147+
token = marked[0]
148+
idx = content.lower().find(token.lower())
149+
if idx != -1:
150+
start = max(0, idx - snippet_length // 2)
151+
end = min(len(content), idx + len(token) + snippet_length // 2)
152+
snippet = content[start:end]
153+
snippet = bold_tokens(snippet, marked)
154+
if start > 0:
155+
snippet = "..." + snippet
156+
if end < len(content):
157+
snippet = snippet + "..."
158+
return snippet
159+
160+
snippet = snippet.replace("<mark>", BOLD_STYLE).replace("</mark>", CLOSE_TAG)
161+
return snippet[:snippet_length] + ("..." if len(snippet) > snippet_length else snippet)
162+
163+
# 2) Typesense component highlights (simplified)
191164
for h in highlights or []:
192-
field = h.get("field", "")
193-
# handle "components" or "components[0]" style or similar
194-
if field.startswith("components"):
195-
# typesense may return .get("values") or .get("value")
165+
if h.get("field", "").startswith("components"):
196166
values = h.get("values") or ([h.get("value")] if h.get("value") else [])
197167
if values:
198-
# Replace any <mark> tags with bold style
199-
cleaned_vals = [v.replace("<mark>", BOLD_STYLE).replace("</mark>", CLOSE_TAG) for v in values if v]
200-
comp_str = ", ".join(cleaned_vals[:6]) # cap to first few
201-
return f"Components: {comp_str}"
168+
cleaned = [v.replace("<mark>", BOLD_STYLE).replace("</mark>", CLOSE_TAG) for v in values if v]
169+
return f"Matches found: {', '.join(cleaned[:6])}"
202170

203-
# 3) Client-side components match when Typesense didn't highlight components
204-
q = (getattr(self, "query", "") or "").strip()
171+
# 3) Client-side components match
172+
q = (getattr(self, "query", "") or "").strip().lower()
205173
if q:
206-
q_lower = q.lower()
207174
comps = doc.get("components") or []
208-
matched = []
209-
for comp in comps:
210-
if not isinstance(comp, str):
211-
continue
212-
comp_l = comp.lower()
213-
# match whole query or any token
214-
if q_lower in comp_l or any(tok in comp_l for tok in q_lower.split()):
215-
matched.append(comp)
175+
matched = [c for c in comps if isinstance(c, str) and q in c.lower()]
216176
if matched:
217-
# Bold the matched substrings in each component name
218-
def bold_name(name: str) -> str:
219-
# try to bold full query first
220-
try:
221-
res = re.sub(re.escape(q), f"{BOLD_STYLE}\\g<0>{CLOSE_TAG}", name, flags=re.I)
222-
except re.error:
223-
res = name
224-
# also bold any token parts if still unbolded
225-
for tok in set(t for t in q_lower.split() if t):
226-
try:
227-
res = re.sub(re.escape(tok), f"{BOLD_STYLE}\\g<0>{CLOSE_TAG}", res, flags=re.I)
228-
except re.error:
229-
res = res
230-
return res
231-
comp_preview = ", ".join(bold_name(x) for x in matched[:6])
232-
return f"Components: {comp_preview}"
233-
234-
# 4) final fallback: truncated content
235-
return self._truncate_content(content, max_length=snippet_length)
236-
237-
238-
239-
# def _get_highlighted_content(self, doc, highlights, snippet_length=350):
240-
# BOLD_STYLE = '<span style="font-weight: 900; color: #AA99EC;">'
241-
# CLOSE_TAG = '</span>'
242-
# content = doc.get("content", "")
243-
244-
# for h in highlights:
245-
# field = h.get("field")
246-
# if field in ["content", "title"]:
247-
# snippet = h.get("snippet") or h.get("value", "")
248-
249-
# if "<mark>" in snippet:
250-
# # Extract the matched word
251-
# start_mark = snippet.find("<mark>") + len("<mark>")
252-
# end_mark = snippet.find("</mark>")
253-
# matched_word = snippet[start_mark:end_mark]
177+
bolded = [re.sub(re.escape(q), f"{BOLD_STYLE}\\g<0>{CLOSE_TAG}", c, flags=re.I) for c in matched[:6]]
178+
return f"Matches found: {', '.join(bolded)}"
254179

255-
# # Find that word in the full content
256-
# start_full = content.find(matched_word)
257-
# end_full = start_full + len(matched_word)
258-
259-
# # Expand around the match
260-
# before = max(0, start_full - snippet_length // 2)
261-
# after = min(len(content), end_full + snippet_length // 2)
262-
# snippet = content[before:after]
263-
264-
# # Re-insert bold around the matched word
265-
# snippet = snippet.replace(
266-
# matched_word, f"{BOLD_STYLE}{matched_word}{CLOSE_TAG}"
267-
# )
268-
269-
# # Add ellipses if truncated
270-
# if before > 0:
271-
# snippet = "..." + snippet
272-
# if after < len(content):
273-
# snippet = snippet + "..."
274-
# return snippet
275-
276-
# # fallback if no <mark>
277-
# return snippet[:snippet_length] + "..." if len(snippet) > snippet_length else snippet
278-
279-
280-
# # Fallback if no highlights: plain truncated content
281-
# content = self._truncate_content(doc.get("content", ""), max_length=snippet_length)
282-
# return content
180+
# 4) fallback: truncated content
181+
return self._truncate_content(content, max_length=snippet_length)
283182

284183

285184
def _get_sections_for_cluster(self, cluster_name: str) -> list[str]:
@@ -288,9 +187,9 @@ def _get_sections_for_cluster(self, cluster_name: str) -> list[str]:
288187

289188
def _format_result(self, doc: dict, highlights: list = []) -> dict:
290189
"""Format Typesense result to match your fuzzy search structure"""
291-
# For docs
190+
292191
if doc.get("section") != "Blog":
293-
# Reconstruct parts from path for breadcrumb
192+
294193
path_parts = doc.get("path", "").replace(".md", "").split("/")
295194
parts = [part.replace("-", " ").replace("_", " ").title() for part in path_parts if part]
296195

@@ -300,20 +199,18 @@ def _format_result(self, doc: dict, highlights: list = []) -> dict:
300199
"url": doc.get("url", ""),
301200
"image": doc.get('path', ""),
302201
"cluster": self._get_cluster_from_section(doc.get("section", "")),
303-
# "description": self._truncate_content(doc.get("content", "")),
304202
"description":self._get_highlighted_content(doc, highlights),
305203
}
306204

307-
# For blogs
308205
else:
309206

310207
return {
311208
"title": doc.get("title", ""),
312209
"url": doc.get("url", ""),
313-
"author": doc.get("subsection", ""), # Author stored in subsection for blogs
314-
"date": "2024", # You might want to add proper date handling
210+
"author": doc.get("author", ""),
211+
"date": doc.get("date", ""),
315212
"description": self._truncate_content(doc.get("content", "")),
316-
"image": "/placeholder-image.jpg", # You'll need to handle images properly
213+
"image": doc.get("image", ""),
317214
}
318215

319216
def _get_cluster_from_section(self, section: str) -> str:
@@ -330,14 +227,6 @@ def _truncate_content(self, content: str, max_length: int = 200) -> str:
330227
return content[:max_length].rstrip() + "..."
331228

332229

333-
# Keep all your existing UI components exactly the same
334-
suggestion_items = [
335-
{"name": "Components Overview", "path": "/docs/library", "icon": "blocks", "description": "Discover and explore the full library of available components"},
336-
{"name": "State Management", "path": "/docs/state/overview", "icon": "database", "description": "Master state handling, data flow, and reactive programming"},
337-
{"name": "Event Overview", "path": "/docs/events/events-overview", "icon": "zap", "description": "Learn how to handle user interactions and system events"},
338-
{"name": "Styling and Theming", "path": "/docs/styling/overview", "icon": "palette", "description": "Customize colors, layouts, and create beautiful app designs"},
339-
{"name": "Deployment Guide", "path": "/docs/hosting/deploy-quick-start/", "icon": "cloud", "description": "Deploy and host your application in production environments"},
340-
]
341230

342231
def keyboard_shortcut_script() -> rx.Component:
343232
"""Add keyboard shortcut support for opening search."""
@@ -572,14 +461,14 @@ def search_result_blog(value: dict):
572461
),
573462
style={"display": "-webkit-box", "-webkit-line-clamp": "2", "-webkit-box-orient": "vertical"},
574463
),
575-
# rx.box(
576-
# rx.image(
577-
# src=value["image"].to(str),
578-
# class_name="rounded-md",
579-
# border_radius="10px 10px",
580-
# ),
581-
# class_name="w-full rounded-md pt-3",
582-
# ),
464+
rx.box(
465+
rx.image(
466+
src=value["image"].to(str),
467+
class_name="rounded-md",
468+
border_radius="10px 10px",
469+
),
470+
class_name="w-full rounded-md pt-3",
471+
),
583472
class_name="p-2 w-full flex flex-col gap-y-1 justify-start items-start align-start",
584473
),
585474
href=f"{value['url'].to(str)}",
@@ -636,26 +525,22 @@ def search_content():
636525
return rx.scroll_area(
637526
rx.cond(
638527
SimpleSearch.query.length() < 3,
639-
# Show suggestions when query is too short
640528
rx.box(
641529
rx.foreach(suggestion_items, lambda value: search_result_start(value)),
642530
class_name="flex flex-col gap-y-2",
643531
),
644-
# Query is 3+ characters
645532
rx.cond(
646533
SimpleSearch.is_fetching,
647534
rx.cond(
648535
(SimpleSearch.idxed_docs_results.length() >= 1) | (SimpleSearch.idxed_blogs_results.length() >= 1),
649536
rx.box(
650-
# Docs results
651537
rx.box(
652538
rx.foreach(
653539
SimpleSearch.idxed_docs_results,
654540
lambda value: search_result(value["parts"].to(list), value)
655541
),
656542
class_name="flex flex-col gap-y-2",
657543
),
658-
# Blog results
659544
rx.box(
660545
rx.foreach(
661546
SimpleSearch.idxed_blogs_results,
@@ -670,15 +555,13 @@ def search_content():
670555
rx.cond(
671556
(SimpleSearch.idxed_docs_results.length() >= 1) | (SimpleSearch.idxed_blogs_results.length() >= 1),
672557
rx.box(
673-
# Docs results
674558
rx.box(
675559
rx.foreach(
676560
SimpleSearch.idxed_docs_results,
677561
lambda value: search_result(value["parts"].to(list), value)
678562
),
679563
class_name="flex flex-col gap-y-2",
680564
),
681-
# Blog results
682565
rx.box(
683566
rx.foreach(
684567
SimpleSearch.idxed_blogs_results,
@@ -697,7 +580,7 @@ def search_content():
697580

698581

699582
def typesense_search() -> rx.Component:
700-
"""Create the main search component."""
583+
"""Create the main search component for Reflex Web"""
701584
return rx.fragment(
702585
rx.dialog.root(
703586
rx.dialog.trigger(search_trigger(), id="search-trigger"),

0 commit comments

Comments
 (0)