Skip to content

Commit 80ea2c7

Browse files
committed
Updated sources and segments to support multiple names. Updated documentation generators.
1 parent bb017de commit 80ea2c7

File tree

7 files changed

+209
-82
lines changed

7 files changed

+209
-82
lines changed

CHANGELOG.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
# Changelog
22

3+
## 0.9.2 (in development)
4+
### Improvements
5+
- Added ability for sources and segments to have multiple names in chatterlang.
6+
7+
## 0.9.1
8+
Forgot to import the lancedb module in talkpipe/__init__.py, so it wasn't registering the segments.
9+
310
## 0.9.0
411
### New and Updated Segments and Sources
512
- Added **set**, which simply assigns some constant to a key.

src/talkpipe/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import warnings
2+
warnings.filterwarnings("ignore", message=".*ColPaliEmbeddings.*has conflict with protected namespace.*")
3+
warnings.filterwarnings("ignore", message=".*SigLipEmbeddings.*has conflict with protected namespace.*")
4+
15
from talkpipe.pipe.basic import *
26
from talkpipe.pipe.math import *
37
from talkpipe.pipe.io import *

src/talkpipe/app/chatterlang_reference_browser.py

Lines changed: 74 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,24 +18,36 @@
1818

1919
class TalkPipeDoc:
2020
"""Represents a single TalkPipe component (class or function)."""
21-
22-
def __init__(self, name: str, chatterlang_name: str, doc_type: str,
23-
module: str, base_classes: List[str], docstring: str,
21+
22+
def __init__(self, name: str, chatterlang_names: List[str], doc_type: str,
23+
module: str, base_classes: List[str], docstring: str,
2424
parameters: Dict[str, str]):
2525
self.name = name
26-
self.chatterlang_name = chatterlang_name
26+
self.chatterlang_names = chatterlang_names # List of all names for this component
27+
self.primary_name = chatterlang_names[0] # Primary name for display
2728
self.doc_type = doc_type # 'Source', 'Segment', 'Field Segment'
2829
self.module = module
2930
self.base_classes = base_classes
3031
self.docstring = docstring
3132
self.parameters = parameters
3233

34+
@property
35+
def chatterlang_name(self):
36+
"""Backward compatibility property."""
37+
return self.primary_name
38+
39+
@property
40+
def all_names_display(self):
41+
"""Display string showing all names."""
42+
return ", ".join(self.chatterlang_names)
43+
3344

3445
class TalkPipeBrowser:
3546
"""Interactive terminal browser for TalkPipe documentation."""
36-
47+
3748
def __init__(self):
38-
self.components: Dict[str, TalkPipeDoc] = {}
49+
self.components: Dict[str, TalkPipeDoc] = {} # Maps primary name to component
50+
self.name_to_primary: Dict[str, str] = {} # Maps any name to primary name
3951
self.modules: Dict[str, List[str]] = {}
4052
self.load_components()
4153

@@ -44,23 +56,38 @@ def _extract_parameters(self, cls: type) -> Dict[str, str]:
4456
return extract_parameters_dict(cls)
4557

4658
def load_components(self):
47-
"""Load all components from the plugin system."""
59+
"""Load all components from the plugin system, grouping multiple names for the same class."""
4860
load_plugins() # Ensure plugins are loaded
49-
61+
62+
# Group components by class to consolidate multiple names
63+
class_to_names = {}
64+
class_to_type = {}
65+
5066
# Load sources
5167
for chatterlang_name, cls in input_registry.all.items():
52-
component_info = extract_component_info(chatterlang_name, cls, "Source")
53-
if component_info:
54-
self._load_component_from_info(component_info)
55-
68+
if cls not in class_to_names:
69+
class_to_names[cls] = []
70+
class_to_type[cls] = "Source"
71+
class_to_names[cls].append(chatterlang_name)
72+
5673
# Load segments
5774
for chatterlang_name, cls in segment_registry.all.items():
58-
component_type = detect_component_type(cls, "Segment")
59-
component_info = extract_component_info(chatterlang_name, cls, component_type)
75+
if cls not in class_to_names:
76+
class_to_names[cls] = []
77+
class_to_type[cls] = detect_component_type(cls, "Segment")
78+
class_to_names[cls].append(chatterlang_name)
79+
80+
# Create consolidated components
81+
for cls, names in class_to_names.items():
82+
# Sort names to ensure consistent primary name selection
83+
names.sort()
84+
primary_name = names[0]
85+
86+
component_info = extract_component_info(primary_name, cls, class_to_type[cls])
6087
if component_info:
61-
self._load_component_from_info(component_info)
88+
self._load_component_from_info(component_info, names)
6289

63-
def _load_component_from_info(self, component_info):
90+
def _load_component_from_info(self, component_info, all_names: List[str]):
6491
"""Load a single component from ComponentInfo into the browser."""
6592
try:
6693
# Convert parameters from ParamSpec list to dict for browser compatibility
@@ -100,20 +127,26 @@ def _load_component_from_info(self, component_info):
100127
# Create component
101128
component = TalkPipeDoc(
102129
name=component_info.name,
103-
chatterlang_name=component_info.chatterlang_name,
130+
chatterlang_names=all_names,
104131
doc_type=component_info.component_type,
105132
module=component_info.module,
106133
base_classes=component_info.base_classes,
107134
docstring=component_info.docstring,
108135
parameters=parameters
109136
)
110-
111-
self.components[component_info.chatterlang_name] = component
137+
138+
# Store component under primary name
139+
primary_name = all_names[0]
140+
self.components[primary_name] = component
141+
142+
# Map all names to the primary name for lookup
143+
for name in all_names:
144+
self.name_to_primary[name] = primary_name
112145

113146
# Group by module
114147
if component_info.module not in self.modules:
115148
self.modules[component_info.module] = []
116-
self.modules[component_info.module].append(component_info.chatterlang_name)
149+
self.modules[component_info.module].append(primary_name)
117150

118151
except Exception as e:
119152
print(f"Warning: Failed to load component {component_info.chatterlang_name}: {e}")
@@ -223,24 +256,29 @@ def _list_module_components(self, module_name: str):
223256
type_icon = "🔧"
224257
else:
225258
type_icon = "⚙️"
226-
print(f"{type_icon} {comp.chatterlang_name:<20} ({comp.name})")
259+
print(f"{type_icon} {comp.all_names_display:<30} ({comp.name})")
227260
print()
228261

229262
def _show_component(self, component_name: str):
230263
"""Show detailed information about a component."""
231-
# Try exact match first
232-
component = self.components.get(component_name)
233-
234-
# If not found, try case-insensitive search
264+
# Try exact match using name lookup
265+
primary_name = self.name_to_primary.get(component_name)
266+
component = None
267+
268+
if primary_name:
269+
component = self.components.get(primary_name)
270+
271+
# If not found, try case-insensitive search in all names
235272
if not component:
236-
matches = [name for name in self.components.keys()
273+
matches = [name for name in self.name_to_primary.keys()
237274
if name.lower() == component_name.lower()]
238275
if matches:
239-
component = self.components[matches[0]]
276+
primary_name = self.name_to_primary[matches[0]]
277+
component = self.components[primary_name]
240278

241279
# If still not found, suggest similar names
242280
if not component:
243-
similar = [name for name in self.components.keys()
281+
similar = [name for name in self.name_to_primary.keys()
244282
if component_name.lower() in name.lower()]
245283
if similar:
246284
print(f"Component '{component_name}' not found. Did you mean:")
@@ -252,7 +290,7 @@ def _show_component(self, component_name: str):
252290

253291
# Display component details
254292
print(f"\n{'='*60}")
255-
print(f"📋 {component.chatterlang_name}")
293+
print(f"📋 {component.all_names_display}")
256294
print(f"{'='*60}")
257295
print(f"Class/Function: {component.name}")
258296
print(f"Type: {component.doc_type}")
@@ -281,11 +319,12 @@ def _search_components(self, search_term: str):
281319
"""Search for components by name or description."""
282320
search_lower = search_term.lower()
283321
matches = []
284-
322+
285323
for comp_name, component in self.components.items():
286-
# Search in chatterlang name, class name, and docstring
287-
if (search_lower in comp_name.lower() or
288-
search_lower in component.name.lower() or
324+
# Search in all chatterlang names, class name, and docstring
325+
name_match = any(search_lower in name.lower() for name in component.chatterlang_names)
326+
if (name_match or
327+
search_lower in component.name.lower() or
289328
search_lower in component.docstring.lower()):
290329
matches.append(component)
291330

@@ -296,14 +335,14 @@ def _search_components(self, search_term: str):
296335
print(f"\nSearch Results for '{search_term}' ({len(matches)} found):")
297336
print("-" * 60)
298337

299-
for component in sorted(matches, key=lambda x: x.chatterlang_name):
338+
for component in sorted(matches, key=lambda x: x.primary_name):
300339
if component.doc_type == "Source":
301340
type_icon = "🔌"
302341
elif component.doc_type == "Field Segment":
303342
type_icon = "🔧"
304343
else:
305344
type_icon = "⚙️"
306-
print(f"{type_icon} {component.chatterlang_name:<20} ({component.module})")
345+
print(f"{type_icon} {component.all_names_display:<30} ({component.module})")
307346

308347
# Show brief description
309348
if component.docstring:

src/talkpipe/app/chatterlang_reference_generator.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -38,25 +38,42 @@ def sanitize_id(text: str) -> str:
3838
def analyze_registered_items() -> List[AnalyzedItem]:
3939
"""
4040
Analyze all registered sources and segments from the plugin system.
41+
Groups items with multiple chatterlang names together.
4142
"""
4243
load_plugins() # Ensure plugins are loaded
43-
analyzed_items = []
44-
44+
45+
# Group by class to handle multiple names for the same class
46+
class_to_names = {}
47+
4548
# Process sources
4649
for chatterlang_name, cls in input_registry.all.items():
47-
component_info = extract_component_info(chatterlang_name, cls, 'Source')
48-
if component_info:
49-
item = convert_component_info_to_analyzed_item(component_info)
50-
analyzed_items.append(item)
51-
50+
if cls not in class_to_names:
51+
class_to_names[cls] = {'names': [], 'type': 'Source'}
52+
class_to_names[cls]['names'].append(chatterlang_name)
53+
5254
# Process segments
5355
for chatterlang_name, cls in segment_registry.all.items():
54-
component_type = detect_component_type(cls, 'Segment')
55-
component_info = extract_component_info(chatterlang_name, cls, component_type)
56+
if cls not in class_to_names:
57+
component_type = detect_component_type(cls, 'Segment')
58+
class_to_names[cls] = {'names': [], 'type': component_type}
59+
class_to_names[cls]['names'].append(chatterlang_name)
60+
61+
analyzed_items = []
62+
63+
# Create AnalyzedItem objects with combined names
64+
for cls, info in class_to_names.items():
65+
# Sort names for consistent output
66+
sorted_names = sorted(info['names'])
67+
primary_name = sorted_names[0] # Use first alphabetically as primary
68+
69+
component_info = extract_component_info(primary_name, cls, info['type'])
5670
if component_info:
5771
item = convert_component_info_to_analyzed_item(component_info)
72+
# If multiple names, combine them
73+
if len(sorted_names) > 1:
74+
item.chatterlang_name = ', '.join(sorted_names)
5875
analyzed_items.append(item)
59-
76+
6077
return analyzed_items
6178

6279
def convert_component_info_to_analyzed_item(component_info: ComponentInfo) -> AnalyzedItem:

src/talkpipe/chatterlang/registry.py

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,37 @@ def all(self) -> Dict[str, Type[T]]:
3131
input_registry = Registry()
3232
segment_registry = Registry()
3333

34-
def register_source(name: str):
35-
"""Decorator to register a source module in the registry. """
34+
def register_source(*names: str, name: str = None):
35+
"""Decorator to register a source module with one or more names in the registry. """
36+
# Handle backward compatibility with name= keyword argument
37+
if name is not None:
38+
if names:
39+
raise ValueError("Cannot specify both positional names and 'name' keyword argument")
40+
names = (name,)
41+
42+
if not names:
43+
raise ValueError("At least one name must be provided")
44+
3645
def wrap(cls):
37-
input_registry.register(cls, name=name)
46+
for source_name in names:
47+
input_registry.register(cls, name=source_name)
3848
return cls
3949
return wrap
4050

41-
def register_segment(name: str):
42-
"""Decorator to register a setment module in the registry. """
51+
def register_segment(*names: str, name: str = None):
52+
"""Decorator to register a segment module with one or more names in the registry. """
53+
# Handle backward compatibility with name= keyword argument
54+
if name is not None:
55+
if names:
56+
raise ValueError("Cannot specify both positional names and 'name' keyword argument")
57+
names = (name,)
58+
59+
if not names:
60+
raise ValueError("At least one name must be provided")
61+
4362
def wrap(cls):
44-
segment_registry.register(cls, name=name)
63+
for segment_name in names:
64+
segment_registry.register(cls, name=segment_name)
4565
return cls
4666
return wrap
4767

src/talkpipe/search/lancedb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
logger = logging.getLogger(__name__)
1212

13-
@register_segment("searchLancDB")
13+
@register_segment("searchLanceDB", "searchLancDB")
1414
@segment()
1515
def search_lancedb(items: Annotated[object, "Items with the query vectors"],
1616
path: Annotated[str, "Path to the LanceDB database"],
@@ -54,7 +54,7 @@ def search_lancedb(items: Annotated[object, "Items with the query vectors"],
5454
for result in search_results:
5555
yield result
5656

57-
@register_segment("addToLancDB")
57+
@register_segment("addToLanceDB", "addToLancDB")
5858
@segment()
5959
def add_to_lancedb(items: Annotated[object, "Items with the vectors and documents"],
6060
path: Annotated[str, "Path to the LanceDB database"],

0 commit comments

Comments
 (0)