Updated sources and segments to support multiple names. Updated documentation generators.

travis-bauer · travis-bauer · commit 80ea2c747006 · 2025-09-20T21:02:34.000-06:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## 0.9.2 (in development)
+### Improvements
+- Added ability for sources and segments to have multiple names in chatterlang.
+
+## 0.9.1
+Forgot to import the lancedb module in talkpipe/__init__.py, so it wasn't registering the segments.
+
 ## 0.9.0
 ### New and Updated Segments and Sources
 - Added **set**, which simply assigns some constant to a key.
diff --git a/src/talkpipe/__init__.py b/src/talkpipe/__init__.py
@@ -1,3 +1,7 @@
+import warnings
+warnings.filterwarnings("ignore", message=".*ColPaliEmbeddings.*has conflict with protected namespace.*")
+warnings.filterwarnings("ignore", message=".*SigLipEmbeddings.*has conflict with protected namespace.*")
+
 from talkpipe.pipe.basic import *
 from talkpipe.pipe.math import *
 from talkpipe.pipe.io import *
diff --git a/src/talkpipe/app/chatterlang_reference_browser.py b/src/talkpipe/app/chatterlang_reference_browser.py
@@ -18,24 +18,36 @@
 
 class TalkPipeDoc:
     """Represents a single TalkPipe component (class or function)."""
-    
-    def __init__(self, name: str, chatterlang_name: str, doc_type: str, 
-                 module: str, base_classes: List[str], docstring: str, 
+
+    def __init__(self, name: str, chatterlang_names: List[str], doc_type: str,
+                 module: str, base_classes: List[str], docstring: str,
                  parameters: Dict[str, str]):
         self.name = name
-        self.chatterlang_name = chatterlang_name
+        self.chatterlang_names = chatterlang_names  # List of all names for this component
+        self.primary_name = chatterlang_names[0]  # Primary name for display
         self.doc_type = doc_type  # 'Source', 'Segment', 'Field Segment'
         self.module = module
         self.base_classes = base_classes
         self.docstring = docstring
         self.parameters = parameters
 
+    @property
+    def chatterlang_name(self):
+        """Backward compatibility property."""
+        return self.primary_name
+
+    @property
+    def all_names_display(self):
+        """Display string showing all names."""
+        return ", ".join(self.chatterlang_names)
+
 
 class TalkPipeBrowser:
     """Interactive terminal browser for TalkPipe documentation."""
-    
+
     def __init__(self):
-        self.components: Dict[str, TalkPipeDoc] = {}
+        self.components: Dict[str, TalkPipeDoc] = {}  # Maps primary name to component
+        self.name_to_primary: Dict[str, str] = {}  # Maps any name to primary name
         self.modules: Dict[str, List[str]] = {}
         self.load_components()
     
@@ -44,23 +56,38 @@ def _extract_parameters(self, cls: type) -> Dict[str, str]:
         return extract_parameters_dict(cls)
     
     def load_components(self):
-        """Load all components from the plugin system."""
+        """Load all components from the plugin system, grouping multiple names for the same class."""
         load_plugins()  # Ensure plugins are loaded
-        
+
+        # Group components by class to consolidate multiple names
+        class_to_names = {}
+        class_to_type = {}
+
         # Load sources
         for chatterlang_name, cls in input_registry.all.items():
-            component_info = extract_component_info(chatterlang_name, cls, "Source")
-            if component_info:
-                self._load_component_from_info(component_info)
-        
+            if cls not in class_to_names:
+                class_to_names[cls] = []
+                class_to_type[cls] = "Source"
+            class_to_names[cls].append(chatterlang_name)
+
         # Load segments
         for chatterlang_name, cls in segment_registry.all.items():
-            component_type = detect_component_type(cls, "Segment")
-            component_info = extract_component_info(chatterlang_name, cls, component_type)
+            if cls not in class_to_names:
+                class_to_names[cls] = []
+                class_to_type[cls] = detect_component_type(cls, "Segment")
+            class_to_names[cls].append(chatterlang_name)
+
+        # Create consolidated components
+        for cls, names in class_to_names.items():
+            # Sort names to ensure consistent primary name selection
+            names.sort()
+            primary_name = names[0]
+
+            component_info = extract_component_info(primary_name, cls, class_to_type[cls])
             if component_info:
-                self._load_component_from_info(component_info)
+                self._load_component_from_info(component_info, names)
     
-    def _load_component_from_info(self, component_info):
+    def _load_component_from_info(self, component_info, all_names: List[str]):
         """Load a single component from ComponentInfo into the browser."""
         try:
             # Convert parameters from ParamSpec list to dict for browser compatibility
@@ -100,20 +127,26 @@ def _load_component_from_info(self, component_info):
             # Create component
             component = TalkPipeDoc(
                 name=component_info.name,
-                chatterlang_name=component_info.chatterlang_name,
+                chatterlang_names=all_names,
                 doc_type=component_info.component_type,
                 module=component_info.module,
                 base_classes=component_info.base_classes,
                 docstring=component_info.docstring,
                 parameters=parameters
             )
-            
-            self.components[component_info.chatterlang_name] = component
+
+            # Store component under primary name
+            primary_name = all_names[0]
+            self.components[primary_name] = component
+
+            # Map all names to the primary name for lookup
+            for name in all_names:
+                self.name_to_primary[name] = primary_name
             
             # Group by module
             if component_info.module not in self.modules:
                 self.modules[component_info.module] = []
-            self.modules[component_info.module].append(component_info.chatterlang_name)
+            self.modules[component_info.module].append(primary_name)
             
         except Exception as e:
             print(f"Warning: Failed to load component {component_info.chatterlang_name}: {e}")
@@ -223,24 +256,29 @@ def _list_module_components(self, module_name: str):
                 type_icon = "🔧"
             else:
                 type_icon = "⚙️"
-            print(f"{type_icon} {comp.chatterlang_name:<20} ({comp.name})")
+            print(f"{type_icon} {comp.all_names_display:<30} ({comp.name})")
         print()
     
     def _show_component(self, component_name: str):
         """Show detailed information about a component."""
-        # Try exact match first
-        component = self.components.get(component_name)
-        
-        # If not found, try case-insensitive search
+        # Try exact match using name lookup
+        primary_name = self.name_to_primary.get(component_name)
+        component = None
+
+        if primary_name:
+            component = self.components.get(primary_name)
+
+        # If not found, try case-insensitive search in all names
         if not component:
-            matches = [name for name in self.components.keys() 
+            matches = [name for name in self.name_to_primary.keys()
                       if name.lower() == component_name.lower()]
             if matches:
-                component = self.components[matches[0]]
+                primary_name = self.name_to_primary[matches[0]]
+                component = self.components[primary_name]
         
         # If still not found, suggest similar names
         if not component:
-            similar = [name for name in self.components.keys() 
+            similar = [name for name in self.name_to_primary.keys()
                       if component_name.lower() in name.lower()]
             if similar:
                 print(f"Component '{component_name}' not found. Did you mean:")
@@ -252,7 +290,7 @@ def _show_component(self, component_name: str):
         
         # Display component details
         print(f"\n{'='*60}")
-        print(f"📋 {component.chatterlang_name}")
+        print(f"📋 {component.all_names_display}")
         print(f"{'='*60}")
         print(f"Class/Function: {component.name}")
         print(f"Type:           {component.doc_type}")
@@ -281,11 +319,12 @@ def _search_components(self, search_term: str):
         """Search for components by name or description."""
         search_lower = search_term.lower()
         matches = []
-        
+
         for comp_name, component in self.components.items():
-            # Search in chatterlang name, class name, and docstring
-            if (search_lower in comp_name.lower() or 
-                search_lower in component.name.lower() or 
+            # Search in all chatterlang names, class name, and docstring
+            name_match = any(search_lower in name.lower() for name in component.chatterlang_names)
+            if (name_match or
+                search_lower in component.name.lower() or
                 search_lower in component.docstring.lower()):
                 matches.append(component)
         
@@ -296,14 +335,14 @@ def _search_components(self, search_term: str):
         print(f"\nSearch Results for '{search_term}' ({len(matches)} found):")
         print("-" * 60)
         
-        for component in sorted(matches, key=lambda x: x.chatterlang_name):
+        for component in sorted(matches, key=lambda x: x.primary_name):
             if component.doc_type == "Source":
                 type_icon = "🔌"
             elif component.doc_type == "Field Segment":
                 type_icon = "🔧"
             else:
                 type_icon = "⚙️"
-            print(f"{type_icon} {component.chatterlang_name:<20} ({component.module})")
+            print(f"{type_icon} {component.all_names_display:<30} ({component.module})")
             
             # Show brief description
             if component.docstring:
diff --git a/src/talkpipe/app/chatterlang_reference_generator.py b/src/talkpipe/app/chatterlang_reference_generator.py
@@ -38,25 +38,42 @@ def sanitize_id(text: str) -> str:
 def analyze_registered_items() -> List[AnalyzedItem]:
     """
     Analyze all registered sources and segments from the plugin system.
+    Groups items with multiple chatterlang names together.
     """
     load_plugins()  # Ensure plugins are loaded
-    analyzed_items = []
-    
+
+    # Group by class to handle multiple names for the same class
+    class_to_names = {}
+
     # Process sources
     for chatterlang_name, cls in input_registry.all.items():
-        component_info = extract_component_info(chatterlang_name, cls, 'Source')
-        if component_info:
-            item = convert_component_info_to_analyzed_item(component_info)
-            analyzed_items.append(item)
-    
+        if cls not in class_to_names:
+            class_to_names[cls] = {'names': [], 'type': 'Source'}
+        class_to_names[cls]['names'].append(chatterlang_name)
+
     # Process segments
     for chatterlang_name, cls in segment_registry.all.items():
-        component_type = detect_component_type(cls, 'Segment')
-        component_info = extract_component_info(chatterlang_name, cls, component_type)
+        if cls not in class_to_names:
+            component_type = detect_component_type(cls, 'Segment')
+            class_to_names[cls] = {'names': [], 'type': component_type}
+        class_to_names[cls]['names'].append(chatterlang_name)
+
+    analyzed_items = []
+
+    # Create AnalyzedItem objects with combined names
+    for cls, info in class_to_names.items():
+        # Sort names for consistent output
+        sorted_names = sorted(info['names'])
+        primary_name = sorted_names[0]  # Use first alphabetically as primary
+
+        component_info = extract_component_info(primary_name, cls, info['type'])
         if component_info:
             item = convert_component_info_to_analyzed_item(component_info)
+            # If multiple names, combine them
+            if len(sorted_names) > 1:
+                item.chatterlang_name = ', '.join(sorted_names)
             analyzed_items.append(item)
-    
+
     return analyzed_items
 
 def convert_component_info_to_analyzed_item(component_info: ComponentInfo) -> AnalyzedItem:
diff --git a/src/talkpipe/chatterlang/registry.py b/src/talkpipe/chatterlang/registry.py
@@ -31,17 +31,37 @@ def all(self) -> Dict[str, Type[T]]:
 input_registry = Registry()
 segment_registry = Registry()
 
-def register_source(name: str):
-    """Decorator to register a source module in the registry. """
+def register_source(*names: str, name: str = None):
+    """Decorator to register a source module with one or more names in the registry. """
+    # Handle backward compatibility with name= keyword argument
+    if name is not None:
+        if names:
+            raise ValueError("Cannot specify both positional names and 'name' keyword argument")
+        names = (name,)
+
+    if not names:
+        raise ValueError("At least one name must be provided")
+
     def wrap(cls):
-        input_registry.register(cls, name=name)
+        for source_name in names:
+            input_registry.register(cls, name=source_name)
         return cls
     return wrap
 
-def register_segment(name: str):
-    """Decorator to register a setment module in the registry. """
+def register_segment(*names: str, name: str = None):
+    """Decorator to register a segment module with one or more names in the registry. """
+    # Handle backward compatibility with name= keyword argument
+    if name is not None:
+        if names:
+            raise ValueError("Cannot specify both positional names and 'name' keyword argument")
+        names = (name,)
+
+    if not names:
+        raise ValueError("At least one name must be provided")
+
     def wrap(cls):
-        segment_registry.register(cls, name=name)
+        for segment_name in names:
+            segment_registry.register(cls, name=segment_name)
         return cls
     return wrap
 
diff --git a/src/talkpipe/search/lancedb.py b/src/talkpipe/search/lancedb.py
@@ -10,7 +10,7 @@
 
 logger = logging.getLogger(__name__)
 
-@register_segment("searchLancDB")
+@register_segment("searchLanceDB", "searchLancDB")
 @segment()
 def search_lancedb(items: Annotated[object, "Items with the query vectors"],
                    path: Annotated[str, "Path to the LanceDB database"],
@@ -54,7 +54,7 @@ def search_lancedb(items: Annotated[object, "Items with the query vectors"],
             for result in search_results:
                 yield result
 
-@register_segment("addToLancDB")
+@register_segment("addToLanceDB", "addToLancDB")
 @segment()
 def add_to_lancedb(items: Annotated[object, "Items with the vectors and documents"],
                    path: Annotated[str, "Path to the LanceDB database"],
diff --git a/tests/talkpipe/app/test_chatterlang_reference_browser.py b/tests/talkpipe/app/test_chatterlang_reference_browser.py