more updates to typesense

LineIndent · LineIndent · commit dec926ed2886 · 2025-07-14T14:10:13.000+03:00
diff --git a/pcweb/components/docpage/navbar/typesense.py b/pcweb/components/docpage/navbar/typesense.py
@@ -4,6 +4,7 @@
 import typesense
 import os
 import re
+from scripts.component_discovery import get_component_names
 
 # Constants
 TYPESENSE_CONFIG = {
@@ -18,7 +19,7 @@
 
 # Enhanced search parameters with component-aware boosting
 BASE_SEARCH_PARAMS = {
-    'per_page': 8,
+    'per_page': 20,
     'highlight_full_fields': 'title,content,components',
     'snippet_threshold': 30,
     'num_typos': 2,
@@ -107,16 +108,6 @@ def _get_filter_sections(self) -> list[str]:
         """Get sections for current filter."""
         return FILTER_SECTION_MAPPING.get(self.selected_filter, [])
 
-    def _is_component_query(self, query: str) -> bool:
-        """Detect if the query is likely searching for a component."""
-        query_lower = query.lower()
-        # Check for rx. prefix, common component patterns, or if it's in components section
-        return (
-            query_lower.startswith('rx.') or
-            query_lower.startswith('reflex.') or
-            any(keyword in query_lower for keyword in ['button', 'input', 'text', 'box', 'image', 'link', 'icon', 'form', 'table', 'chart', 'modal', 'dialog']) or
-            self.selected_filter == "Components"
-        )
 
     def _clean_component_query(self, query: str) -> str:
         """Clean and normalize component queries."""
@@ -125,26 +116,25 @@ def _clean_component_query(self, query: str) -> str:
         return cleaned.strip()
 
     async def search_docs(self, query: str):
-        """Enhanced search with component-aware logic."""
+        """Search docs, using component‑boost when the Components tab is active."""
         self.search_query = query
-
         if not query.strip():
             self._clear_search_results()
             return
 
         self.is_searching = True
 
         try:
-            # Determine search strategy based on query type
-            is_component_search = self._is_component_query(query)
-
-            if is_component_search:
+            if self.selected_filter == "Components":
+                # Hard “Components” priority
                 results = await self._perform_component_search(query)
             else:
+                # All other tabs (Docs, API Reference, Blogs, or All)
                 results = await self._perform_regular_search(query)
 
             self.search_results = self._format_search_results(results)
             self.show_results = True
+
         except Exception as e:
             print(f"Search error: {e}")
             self._clear_search_results()
diff --git a/scripts/component_discovery.py b/scripts/component_discovery.py
@@ -0,0 +1,33 @@
+# component_discovery.py
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+_component_names = None
+
+def get_component_names() -> list[str]:
+    """Scan docs/library for .md files and return clean slugs (no -ll suffix)."""
+    global _component_names
+    if _component_names is not None:
+        return _component_names
+
+    repo_root = Path(__file__).parent
+    library_root = repo_root / 'docs' / 'library'
+    names = set()
+
+    if library_root.exists():
+        for md in library_root.rglob('*.md'):
+            slug = md.stem
+            # strip “-ll”
+            if slug.endswith('-ll'):
+                slug = slug[:-3]
+            # variants: hyphens, underscores, plain
+            names.add(slug)
+            names.add(slug.replace('-', '_'))
+            names.add(slug.replace('_', ''))
+    else:
+        logger.warning(f"docs/library not found at {library_root}")
+
+    _component_names = sorted(names)
+    logger.info(f"Discovered {len(_component_names)} components")
+    return _component_names
diff --git a/scripts/typesense_indexer.py b/scripts/typesense_indexer.py
@@ -17,6 +17,8 @@
 import typesense
 from markdown import Markdown
 from bs4 import BeautifulSoup
+from component_discovery import get_component_names
+
 
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
@@ -111,17 +113,13 @@ def _get_component_names_from_docs(self) -> List[str]:
             return self._component_names
 
     def extract_components(self, content: str) -> Set[str]:
-        """Find any of those components (with rx. prefix) in the markdown."""
+        """Extract any component names (with rx. prefix) found in the markdown."""
         components = set()
-        comp_names = self._get_component_names_from_docs()
-
-        # look for either plain or rx.<name>
-        for name in comp_names:
-            # word boundary so we don’t match “button” inside “mybutton”
+        for name in get_component_names():
+            # match whole words “button” or “rx.button”
             pattern = rf'\b(?:rx\.)?{re.escape(name)}\b'
-            for match in re.finditer(pattern, content):
+            for _ in re.finditer(pattern, content, flags=re.IGNORECASE):
                 components.add(f"rx.{name}")
-
         return components
 
     def extract_headings(self, content: str) -> List[str]:
@@ -139,17 +137,6 @@ def extract_headings(self, content: str) -> List[str]:
 
         return headings
 
-    def _is_likely_component(self, name: str) -> bool:
-        """Check if a name is likely a Reflex component."""
-        if not name.startswith('rx.'):
-            return False
-
-        component_name = name[3:]  # Remove 'rx.' prefix
-
-        # Component names should be lowercase with underscores
-        # and not contain special characters
-        return re.match(r'^[a-z][a-z0-9_]*$', component_name) is not None
-
     def extract_code_examples(self, content: str) -> str:
         """Extract code examples from markdown content."""
         code_blocks = []