sandialabs
diff --git a/‎src/talkpipe/pipe/fork.py‎
Lines changed: 43 additions & 28 deletions b/‎src/talkpipe/pipe/fork.py‎
Lines changed: 43 additions & 28 deletions
diff --git a/‎src/talkpipe/pipe/math.py‎
Lines changed: 21 additions & 21 deletions b/‎src/talkpipe/pipe/math.py‎
Lines changed: 21 additions & 21 deletions
diff --git a/‎src/talkpipe/util/collections.py‎
Lines changed: 34 additions & 2 deletions b/‎src/talkpipe/util/collections.py‎
Lines changed: 34 additions & 2 deletions
@@ -1,3 +1,9 @@
+"""Fork segments: split a stream into parallel branches.
+
+ForkSegment distributes items across multiple downstream pipelines using
+threads and queues. Supports round-robin (one item per branch) or broadcast
+(all items to all branches).
+"""
 from typing import List, Iterator, Iterable, Any
 import logging
 from queue import Queue
@@ -7,42 +13,50 @@
 
 logger = logging.getLogger(__name__)
 
+# Sentinel to signal end of stream to branch consumers
+_poison_pill = object()
+
+
 class ForkMode(Enum):
     """Distribution modes for fork segments."""
+
     ROUND_ROBIN = "round_robin"  # Distribute items across branches
     BROADCAST = "broadcast"      # Send all items to all branches
 
-_poison_pill = object()
 
 def _poison_filter(queue: Queue) -> Iterator[Any]:
-    """Filter that adds a poison pill to the end of the input."""
+    """Iterator over queue items until _poison_pill is seen."""
     while True:
         item = queue.get()
         if item is _poison_pill:
             break
         yield item
 
+
 class ForkSegment(AbstractSegment):
-    """A segment that forks the input stream into multiple downstream pipelines,
-    processing them in parallel using threads.
-    """
-    
-    def __init__(self, 
-                 branches: List[AbstractSegment], 
-                 mode: ForkMode = ForkMode.BROADCAST,
-                 max_queue_size: int = 100, 
-                 num_threads: int = None):
-        """Initialize the fork segment."""
-        # Set process_metadata=True so metadata flows into branches
-        super().__init__(process_metadata=True)
+    """Forks the input stream into multiple downstream pipelines in parallel."""
+
+    def __init__(
+        self,
+        branches: List[AbstractSegment],
+        mode: ForkMode = ForkMode.BROADCAST,
+        max_queue_size: int = 100,
+        num_threads: int = None,
+    ):
+        super().__init__(process_metadata=True)  # Metadata flows into branches
         self.branches = branches
         self.mode = mode
         self.max_queue_size = max_queue_size
         self.num_threads = num_threads or len(branches)
 
-    def process_branch(self, branch_id: int, branch: AbstractSegment, 
-                      input_queue: Queue, output_queue: Queue):
-        """Process a single branch of the fork."""
+    def process_branch(
+        self,
+        branch_id: int,
+        branch: AbstractSegment,
+        input_queue: Queue,
+        output_queue: Queue,
+    ):
+        """Run one branch: consume from input_queue, emit (branch_id, item) to output_queue."""
         try:
             if isinstance(branch, AbstractSegment):
                 iter = branch(_poison_filter(input_queue))
@@ -51,19 +65,19 @@ def process_branch(self, branch_id: int, branch: AbstractSegment,
 
             for item in iter:
                 output_queue.put((branch_id, item))
-                
+
         except Exception as e:
             logger.error(f"Error in fork branch {branch_id}: {e}")
             raise
         finally:
-            output_queue.put((branch_id, None))  # Signal branch completion
+            output_queue.put((branch_id, None))  # Sentinel: branch finished
             input_queue.task_done()
 
     def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
-        """Transform input by distributing it across multiple branches."""
+        """Distribute input to branches, collect results as they complete."""
         input_queues = [Queue(maxsize=self.max_queue_size) for _ in self.branches]
         output_queue = Queue()
-        
+
         with ThreadPoolExecutor(max_workers=self.num_threads) as executor:
             # Submit branch processing tasks
             futures = [
@@ -89,7 +103,7 @@ def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
                 for queue in input_queues:
                     queue.put(_poison_pill)
 
-                # Yield results as they become available
+                # Drain output_queue; result=None is branch completion sentinel
                 active_branches = len(self.branches)
                 while active_branches > 0:
                     branch_id, result = output_queue.get()
@@ -105,10 +119,11 @@ def transform(self, input_iter: Iterable[Any]) -> Iterator[Any]:
                 for future in futures:
                     future.cancel()
 
-# Helper function to create a fork
-def fork(*branches: AbstractSegment, 
-         mode: ForkMode = ForkMode.ROUND_ROBIN,
-         max_queue_size: int = 100, 
-         num_threads: int = None) -> ForkSegment:
-    """Create a fork segment with the given branches."""
+def fork(
+    *branches: AbstractSegment,
+    mode: ForkMode = ForkMode.ROUND_ROBIN,
+    max_queue_size: int = 100,
+    num_threads: int = None,
+) -> ForkSegment:
+    """Create a ForkSegment with the given branches."""
     return ForkSegment(list(branches), mode, max_queue_size, num_threads)
@@ -1,5 +1,8 @@
-"""Math operations for pipe."""
+"""Math operations for pipe: random numbers, ranges, scaling, and comparison filters.
 
+Provides sources (randomInts, range) and segments (scale, eq, neq, gt, gte, lt, lte)
+for numeric pipelines.
+"""
 from typing import Iterable, Union, Callable, Any, Annotated
 from numpy import random
 from talkpipe.pipe import core
@@ -27,7 +30,7 @@ def scale(
         yield x * multiplier
 
 @registry.register_source(name="range")
-@core.source(lower=0,  upper=10)
+@core.source(lower=0, upper=10)
 def arange(
     lower: Annotated[int, "Lower bound of the range (inclusive)"], 
     upper: Annotated[int, "Upper bound of the range (exclusive)"]
@@ -42,27 +45,29 @@ def arange(
 
 
 class AbstractComparisonFilter(core.AbstractSegment):
-    """Abstract base class for comparison segments."""
+    """Base for comparison segments: filter items where field value op threshold."""
 
-    def __init__(self, 
-                 field: Annotated[str, "Field/property to compare"], 
-                 n: Annotated[Any, "Value to compare against"], 
-                 comparator: Callable[[Any, Any], bool]):
+    def __init__(
+        self,
+        field: Annotated[str, "Field/property to compare"],
+        n: Annotated[Any, "Value to compare against"],
+        comparator: Callable[[Any, Any], bool],
+    ):
         super().__init__()
         self.field = field
         self.n = n
         self.comparator = comparator
 
     def transform(self, items: Iterable) -> Iterable:
-        """Filter items based on the comparison."""
+        """Yield items whose field value satisfies the comparator."""
         for item in items:
             value = extract_property(item, self.field, fail_on_missing=True)
             if self.comparator(value, self.n):
                 yield item
 
 
 def _make_comparison_segment(name: str, op: Callable[[Any, Any], bool], docstring: str):
-    """Factory for comparison segments (eq, neq, gt, gte, lt, lte)."""
+    """Factory: create a registered comparison segment with given op and docstring."""
     @registry.register_segment(name=name)
     class ComparisonSegment(AbstractComparisonFilter):
         __doc__ = docstring
@@ -75,21 +80,16 @@ def __init__(self,
     return ComparisonSegment
 
 
-# TODO: rename to EQ in 0.5.0
-eq = _make_comparison_segment("eq", lambda x, y: x == y,
+# Comparison segments: filter by field value vs threshold 
+EQ = _make_comparison_segment("eq", lambda x, y: x == y,
     "Filter items where a specified field's value equals a number.")
-# TODO: rename to NEQ in 0.5.0
-neq = _make_comparison_segment("neq", lambda x, y: x != y,
+NEQ = _make_comparison_segment("neq", lambda x, y: x != y,
     "Filter items where a specified field's value does not equal a number.")
-# TODO: rename to GT in 0.5.0
-gt = _make_comparison_segment("gt", lambda x, y: x > y,
+GT = _make_comparison_segment("gt", lambda x, y: x > y,
     "Filter items where a specified field's value is greater than a number.")
-# TODO: rename to GTE in 0.5.0
-gte = _make_comparison_segment("gte", lambda x, y: x >= y,
+GTE = _make_comparison_segment("gte", lambda x, y: x >= y,
     "Filter items where a specified field's value is greater than or equal to a number.")
-# TODO: rename to LT in 0.5.0
-lt = _make_comparison_segment("lt", lambda x, y: x < y,
+LT = _make_comparison_segment("lt", lambda x, y: x < y,
     "Filters items based on a field value being less than a specified number.")
-# TODO: rename to LTE in 0.5.0
-lte = _make_comparison_segment("lte", lambda x, y: x <= y,
+LTE = _make_comparison_segment("lte", lambda x, y: x <= y,
     "Filter items where a specified field's value is less than or equal to a number.")
@@ -1,3 +1,8 @@
+"""Collection utilities: adaptive buffers and expiring key-value stores.
+
+Provides AdaptiveBuffer for rate-aware batching and ExpiringDict for
+in-memory caches with optional TTL and persistence.
+"""
 import json
 import logging
 import os
@@ -6,8 +11,15 @@
 
 logger = logging.getLogger(__name__)
 
+
 class AdaptiveBuffer:
-    """Buffer that adapts its flush size based on item arrival rate."""
+    """Buffer that adapts its flush size based on item arrival rate.
+
+    When items arrive quickly (intervals <= fast_interval), flushes at max_size
+    for efficiency. When items arrive slowly (intervals >= slow_interval),
+    flushes at min_size for responsiveness. Uses EMA of inter-arrival intervals
+    to interpolate target size between these extremes.
+    """
 
     def __init__(
         self,
@@ -18,6 +30,7 @@ def __init__(
         smoothing=0.2,
         time_func=time.time,
     ):
+        """Initialize the buffer with size and timing parameters."""
         if min_size < 1:
             raise ValueError("min_size must be >= 1")
         if max_size < min_size:
@@ -42,6 +55,7 @@ def __init__(
         self._target_size = min_size
 
     def append(self, item):
+        """Add an item; returns flushed batch if target size reached, else None."""
         now = self.time_func()
         if self._last_append_time is not None:
             interval = max(0.0, now - self._last_append_time)
@@ -55,6 +69,7 @@ def append(self, item):
         return None
 
     def extend(self, items):
+        """Add multiple items; returns list of any flushed batches."""
         flushed = []
         for item in items:
             batch = self.append(item)
@@ -63,6 +78,7 @@ def extend(self, items):
         return flushed
 
     def flush(self):
+        """Force flush and return all buffered items, or None if empty."""
         if not self._buffer:
             return None
         items = self._buffer
@@ -73,20 +89,23 @@ def __len__(self):
         return len(self._buffer)
 
     def _update_interval(self, interval):
+        """Update EMA of inter-arrival interval using smoothing factor."""
         if self._ema_interval is None:
             self._ema_interval = interval
         else:
             alpha = self.smoothing
             self._ema_interval = (alpha * interval) + ((1 - alpha) * self._ema_interval)
 
     def _compute_target_size(self):
+        """Compute target flush size from EMA interval (min_size to max_size)."""
         if self._ema_interval is None:
             return self.min_size
         if self._ema_interval <= self.fast_interval:
             return self.max_size
         if self._ema_interval >= self.slow_interval:
             return self.min_size
 
+        # Linear interpolation: faster arrivals -> larger target
         ratio = (self.slow_interval - self._ema_interval) / (
             self.slow_interval - self.fast_interval
         )
@@ -95,7 +114,14 @@ def _compute_target_size(self):
 
 
 class ExpiringDict(UserDict):
+    """Dict-like store with per-key TTL and optional JSON persistence.
+
+    Keys expire after their TTL (seconds). If filename is set, the dict is
+    saved to disk on every mutation and loaded on init.
+    """
+
     def __init__(self, filename=None, default_ttl=None):
+        """Initialize with optional persistence path and default TTL in seconds."""
         super().__init__()
         self.default_ttl = default_ttl
         self.filename = filename
@@ -105,6 +131,7 @@ def __init__(self, filename=None, default_ttl=None):
             self._load()
 
     def __setitem__(self, key, value, ttl=None):
+        """Set key to value; ttl overrides default_ttl if provided."""
         self.data[key] = value
 
         if ttl is None:
@@ -117,6 +144,7 @@ def __setitem__(self, key, value, ttl=None):
             self._save()  # Save when a key is set
 
     def __delitem__(self, key):
+        """Delete key and its expiry entry."""
         super().__delitem__(key)
         if key in self.expiry:
             del self.expiry[key]
@@ -125,6 +153,7 @@ def __delitem__(self, key):
             self._save()  # Save when a key is deleted
 
     def __getitem__(self, key):
+        """Get value; raises KeyError if missing or expired."""
         self._clean_expired()
         if key in self.expiry and time.time() > self.expiry[key]:
             del self.data[key]
@@ -135,6 +164,7 @@ def __getitem__(self, key):
         return self.data[key]
 
     def set_with_ttl(self, key, value, ttl):
+        """Convenience method to set a key with explicit TTL."""
         self.__setitem__(key, value, ttl)
 
     def clear(self):
@@ -165,7 +195,7 @@ def popitem(self):
         return key, value
 
     def _clean_expired(self):
-        """Remove all expired keys"""
+        """Remove all expired keys from data and expiry maps."""
         now = time.time()
         expired = [k for k, exp in self.expiry.items() if now > exp]
         if expired:  # Only save if something was expired
@@ -197,6 +227,7 @@ def __contains__(self, key):
         return key in self.data
 
     def _save(self):
+        """Persist data and expiry to JSON file via atomic write."""
         if self.filename:
             # Use .tmp file and atomic rename for safety
             tmp_filename = str(self.filename) + '.tmp'
@@ -214,6 +245,7 @@ def _save(self):
                 raise
 
     def _load(self):
+        """Load data and expiry from JSON file; on failure, start empty."""
         try:
             if os.path.exists(self.filename):
                 with open(self.filename, 'r') as f: