Fixed lambda parser tracking for looping cases (#47)

segfly · Copilot · web-flow · commit 963d6a0e5345 · 2025-06-14T18:50:37.000-04:00
- Fixed lambda parser tracking for looping cases
- Added lambda cache limit and pruning
- Improved bad test case

Signed-off-by: Nicholas Pace &lt;segfly@users.noreply.github.com&gt;
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/pyproject.toml b/pyproject.toml
@@ -20,7 +20,7 @@ requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
-version = "1.1.3" # Update manually, or use plugin
+version = "1.1.4" # Update manually, or use plugin
 packages = [{ include = "vulcan_core", from="src" }]
 requires-poetry = "~2.1.1"
 classifiers = [
diff --git a/src/vulcan_core/ast_utils.py b/src/vulcan_core/ast_utils.py
@@ -3,18 +3,21 @@
 
 import ast
 import inspect
+import logging
 import re
 import textwrap
-import threading
 from ast import Attribute, Module, Name, NodeTransformer, NodeVisitor
+from collections import OrderedDict
 from collections.abc import Callable
 from dataclasses import dataclass, field
 from functools import cached_property
 from types import MappingProxyType
-from typing import Any, TypeAliasType, get_type_hints
+from typing import Any, ClassVar, TypeAliasType, get_type_hints
 
 from vulcan_core.models import Fact, HasSource
 
+logger = logging.getLogger(__name__)
+
 
 class ASTProcessingError(RuntimeError):
     """Internal error encountered while processing AST."""
@@ -84,23 +87,74 @@ def visit_Attribute(self, node: Attribute):  # noqa: N802
         return node
 
 
-# Global index to cache and track lambda function positions within the same source lines.
-# Tuple format: (source code, last processed index)
-# TODO: Consider if a redesign is possible to have a single ASTProcessor handle the entire source line, perhaps eagerly
-# processing all lambdas found in the line before the correspondign `condition` call.
-_lambda_index_lock = threading.Lock()
-lambda_index: dict[Any, tuple[str, int | None]] = {}
+@dataclass(slots=True)
+class LambdaSource:
+    """Index entry for tracking the parsing position of lambda functions in source lines.
+
+    Attributes:
+        source (str): The source code string containing lambda functions
+        count (int): The number of lambda functions found in the source string.
+        pos (int): The current parsing position within the source string.
+    """
+
+    source: str
+    count: int
+    pos: int = field(default=0)
+    in_use: bool = field(default=True)
 
 
 @dataclass
 class ASTProcessor[T: Callable]:
+    """
+    This class extracts source code from functions or lambda expressions, parses them into
+    Abstract Syntax Trees (AST), and performs various validations and transformations.
+
+    The processor validates that:
+    - Functions have proper type hints for parameters and return types
+    - All parameters are subclasses of Fact
+    - No nested attribute access (e.g., X.y.z) is used
+    - No async functions are processed
+    - Lambda expressions do not contain parameters
+    - No duplicate parameter types in function signatures
+
+    For lambda expressions, it automatically transforms attribute access patterns
+    (e.g., ClassName.attribute) into parameterized functions for easier execution.
+
+    Note: This class is not thread-safe and should not be used concurrently across multiple threads.
+
+    Type Parameters:
+        T: The type signature the processor is working with, this varies based on a condition or action being processed.
+
+    Attributes:
+        func: The callable to process, a lambda or a function
+        decorator: The decorator type that initiated the processing (e.g., `condition` or `action`)
+        return_type: Expected return type for the callable
+        source: Extracted source code of func (set during post-init)
+        tree: Parsed AST of the source code (set during post-init)
+        facts: Tuple of fact strings discovered in the callable (set during post-init)
+
+    Properties:
+        is_lambda: True if the callable is a lambda expression
+
+    Raises:
+        OSError: When source code cannot be extracted
+        ScopeAccessError: When accessing undefined classes or using nested attributes
+        CallableSignatureError: When function signature doesn't meet requirements
+        NotAFactError: When parameter types are not Fact subclasses
+        ASTProcessingError: When AST processing encounters internal errors
+    """
+
     func: T
     decorator: Callable
     return_type: type | TypeAliasType
     source: str = field(init=False)
     tree: Module = field(init=False)
     facts: tuple[str, ...] = field(init=False)
 
+    # Class-level tracking of lambdas across parsing calls to handle multiple lambdas on the same line
+    _lambda_cache: ClassVar[OrderedDict[str, LambdaSource]] = OrderedDict()
+    _MAX_LAMBDA_CACHE_SIZE: ClassVar[int] = 1024
+
     @cached_property
     def is_lambda(self) -> bool:
         return isinstance(self.func, type(lambda: None)) and self.func.__name__ == "<lambda>"
@@ -113,30 +167,34 @@ def __post_init__(self):
             try:
                 if self.is_lambda:
                     # As of Python 3.12, there is no way to determine to which lambda self.func refers in an
-                    # expression containing multiple lambdas. Therefore we use a global dict to track the index of each
+                    # expression containing multiple lambdas. Therefore we use a dict to track the index of each
                     # lambda function encountered, as the order will correspond to the order of ASTProcessor
                     # invocations for that line. An additional benefit is that we can also use this as a cache to
                     # avoid re-reading the source code for lambda functions sharing the same line.
-                    #
-                    # The key for the index is a hash of the stack trace plus line number, which will be
-                    # unique for each call of a list of lambdas on the same line.
-                    frames = inspect.stack()[1:]  # Exclude current frame
-                    key = "".join(f"{f.filename}:{f.lineno}" for f in frames)
-
-                    # Use a lock to ensure thread safety when accessing the global lambda index
-                    with _lambda_index_lock:
-                        index = lambda_index.get(key)
-                        if index is None or index[1] is None:
-                            self.source = self._get_lambda_source()
-                            index = (self.source, 0)
-                            lambda_index[key] = index
-                        else:
-                            self.source = index[0]
-                            index = (self.source, index[1] + 1)
-                            lambda_index[key] = index
+                    source_line = f"{self.func.__code__.co_filename}:{self.func.__code__.co_firstlineno}"
+                    lambda_src = self._lambda_cache.get(source_line)
+
+                    if lambda_src is None:
+                        self.source = self._get_lambda_source()
+                        lambda_count = self._count_lambdas(self.source)
+                        lambda_src = LambdaSource(self.source, lambda_count)
+                        self._lambda_cache[source_line] = lambda_src
+                        self._trim_lambda_cache()
+                    else:
+                        self.source = lambda_src.source
+                        lambda_src.pos += 1
+
+                        # Reset the position if it exceeds the count of lambda expressions
+                        if lambda_src.pos >= lambda_src.count:
+                            lambda_src.pos = 0
 
                     # Normalize the lambda source and extract the next lambda expression from the last index
-                    self.source = self._normalize_lambda_source(self.source, index[1])
+                    self.source = self._normalize_lambda_source(self.source, lambda_src.pos)
+
+                    # If done processing lambdas in the source, mark as not processing anymore
+                    if lambda_src.pos >= lambda_src.count - 1:
+                        lambda_src.in_use = False
+
                 else:
                     self.source = textwrap.dedent(inspect.getsource(self.func))
             except OSError as e:
@@ -180,19 +238,48 @@ def __post_init__(self):
 
             self.facts = tuple(facts)
 
+    def _trim_lambda_cache(self) -> None:
+        """Clean up lambda cache by removing oldest unused entries when cache size exceeds limit."""
+        if len(self._lambda_cache) <= self._MAX_LAMBDA_CACHE_SIZE:
+            return
+
+        # Calculate how many entries to remove (excess + 20% buffer to avoid thrashing)
+        excess_count = len(self._lambda_cache) - self._MAX_LAMBDA_CACHE_SIZE
+        buffer_count = int(self._MAX_LAMBDA_CACHE_SIZE * 0.2)
+        target_count = excess_count + buffer_count
+
+        # Find and remove unused entries
+        removed_count = 0
+        for key in list(self._lambda_cache):
+            if removed_count >= target_count:
+                break
+            if not self._lambda_cache[key].in_use:
+                del self._lambda_cache[key]
+                removed_count += 1
+
+    def _count_lambdas(self, source: str) -> int:
+        """Count lambda expressions in source code using AST parsing."""
+        tree = ast.parse(source)
+
+        class LambdaCounter(ast.NodeVisitor):
+            def __init__(self):
+                self.count = 0
+
+            def visit_Lambda(self, node):  # noqa: N802 - Case sensitive for AST
+                self.count += 1
+                self.generic_visit(node)
+
+        counter = LambdaCounter()
+        counter.visit(tree)
+        return counter.count
+
     def _get_lambda_source(self) -> str:
         """Get single and multiline lambda source using AST parsing of the source file."""
-        try:
-            # Get caller frame to find the source file
-            frame = inspect.currentframe()
-            while frame and frame.f_code.co_name != self.decorator.__name__:
-                frame = frame.f_back
-
-            if not frame or not frame.f_back:
-                return textwrap.dedent(inspect.getsource(self.func))
+        source = None
 
-            caller_frame = frame.f_back
-            filename = caller_frame.f_code.co_filename
+        try:
+            # Get the source file and line number
+            filename = self.func.__code__.co_filename
             lambda_lineno = self.func.__code__.co_firstlineno
 
             # Read the source file
@@ -235,20 +322,25 @@ def visit_Lambda(self, node):  # noqa: N802 - Case sensitive for AST
                             end_line = i
                             break
 
-                return "\n".join(lines[start_line : end_line + 1])
+                source = "\n".join(lines[start_line : end_line + 1])
 
         except (OSError, SyntaxError, AttributeError):
-            pass
-
-        # Fallback to regular inspect.getsource
-        return textwrap.dedent(inspect.getsource(self.func))
+            logger.exception("Failed to extract lambda source, attempting fallback.")
+            source = inspect.getsource(self.func).strip()
 
-    def _normalize_lambda_source(self, source: str, index: int) -> str:
-        """Extracts just the lambda expression from source code."""
+        if source is None or source == "":
+            msg = "Could not extract lambda source code"
+            raise ASTProcessingError(msg)
 
-        # Remove line endings and extra whitespace
+        # Normalize the source: convert line breaks to spaces, collapse whitespace, and dedent
         source = re.sub(r"\r\n|\r|\n", " ", source)
         source = re.sub(r"\s+", " ", source)
+        source = textwrap.dedent(source)
+
+        return source
+
+    def _normalize_lambda_source(self, source: str, index: int) -> str:
+        """Extracts just the lambda expression from source code."""
 
         # Find the Nth lambda occurrence using generator expression
         positions = [i for i in range(len(source) - 5) if source[i : i + 6] == "lambda"]
diff --git a/tests/core/fixtures/rule_loading.py b/tests/core/fixtures/rule_loading.py
@@ -9,8 +9,9 @@ class Foo(Fact):
 
 
 def load_simple_rule(engine: RuleEngine):
+    # This rule tests for repeated parsing of the same lambda expression, plus potential errors with naive parsing.
     engine.rule(
         name="test_rule",
-        when=condition(lambda: Foo.baz),
+        when=condition(lambda: Foo.baz and "lambda:" != None),  # Keep this comment to test parser counting: lambda:
         then=action(partial(Foo, bol=False)),
     )
diff --git a/tests/core/test_conditions.py b/tests/core/test_conditions.py
@@ -125,6 +125,7 @@ def test_invert_condition(foo_instance: Foo):
     assert inverted.facts == cond.facts
     assert inverted(foo_instance) == (not cond(foo_instance))
 
+
 # https://github.com/latchfield/vulcan-core/issues/30
 def test_short_circuit_condition(foo_instance: Foo):
     true_condition = condition(lambda: True)
@@ -143,6 +144,7 @@ def test_short_circuit_condition(foo_instance: Foo):
     with pytest.raises(AssertionError):
         cond3()
 
+
 # https://github.com/latchfield/vulcan-core/issues/28
 def test_mixed_conditions(foo_instance: Foo, bar_instance: Bar):
     mycond = condition(lambda: Foo.baz)
@@ -151,6 +153,7 @@ def test_mixed_conditions(foo_instance: Foo, bar_instance: Bar):
     result = compound_cond(foo_instance, bar_instance)
     assert result is False
 
+
 # https://github.com/latchfield/vulcan-core/issues/28
 def test_multiple_lambdas(foo_instance: Foo, bar_instance: Bar):
     compound_cond1 = condition(lambda: Foo.baz) & condition(lambda: Bar.biz)
@@ -161,6 +164,7 @@ def test_multiple_lambdas(foo_instance: Foo, bar_instance: Bar):
     assert result1 is False
     assert result2 is True
 
+
 # https://github.com/latchfield/vulcan-core/issues/28
 def test_mixed_conditions_decorator(foo_instance: Foo, bar_instance: Bar):
     @condition
@@ -180,6 +184,7 @@ def test_non_boolean_question(custom_model: BaseChatModel, fact_a_instance: Fact
     with pytest.raises(AIDecisionError):
         cond(fact_a_instance)
 
+
 # https://github.com/latchfield/vulcan-core/issues/32
 @pytest.mark.integration
 def test_literal_placeholder_interpretation(fact_a_instance: FactA):
diff --git a/tests/core/test_engine.py b/tests/core/test_engine.py
@@ -77,9 +77,10 @@ def test_simple_rule(engine: RuleEngine):
 
 
 # https://github.com/latchfield/vulcan-core/issues/44
+# Updated for https://github.com/latchfield/vulcan-core/issues/46
 def test_lambda_reparsing(engine: RuleEngine):
-    load_simple_rule(engine)
-    load_simple_rule(engine)
+    for _ in range(2):
+        load_simple_rule(engine)
 
 
 def test_same_fact_multiple_attributes_lambda(engine: RuleEngine):

Original file line number	Diff line number	Diff line change
`@@ -9,8 +9,9 @@ class Foo(Fact):`
`9`	`9`
`10`	`10`
`11`	`11`	`def load_simple_rule(engine: RuleEngine):`
	`12`	`+ # This rule tests for repeated parsing of the same lambda expression, plus potential errors with naive parsing.`
`12`	`13`	`engine.rule(`
`13`	`14`	`name="test_rule",`
`14`		`- when=condition(lambda: Foo.baz),`
	`15`	`+ when=condition(lambda: Foo.baz and "lambda:" != None), # Keep this comment to test parser counting: lambda:`
`15`	`16`	`then=action(partial(Foo, bol=False)),`
`16`	`17`	`)`