From e190b941ef81c1cac2b4556e1f7321b9f9a612c0 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 22 Oct 2025 08:37:54 +0000 Subject: [PATCH] Optimize TextSplitter.prompt_template_token_length The optimization introduces a **caching mechanism** that eliminates redundant template variable parsing. **Key changes:** - **Variable caching in BasePrompt**: The `variable_names` are computed once during initialization using `get_template_variables()` and stored as an instance variable, rather than being recalculated on every call to `get_prompt_variables()`. - **Direct cache access in Prompt.format()**: Instead of calling `get_template_variables(self.source)` each time, the method now directly uses the cached `self.variable_names`. **Why this improves performance:** Template variable extraction involves parsing the template string to identify placeholder variables (e.g., `${variable_name}`). This parsing operation has computational overhead that scales with template complexity. By caching the results during object initialization, subsequent calls to `get_prompt_variables()` and `format()` become simple attribute lookups instead of string parsing operations. **Performance characteristics:** The 5% speedup is most pronounced in scenarios where: - Prompt objects are reused multiple times (common in production workflows) - Templates contain multiple variables requiring extraction - The `format()` method is called repeatedly on the same prompt instance The line profiler shows that while the `get_prompt_variables()` call itself becomes slightly faster (15% vs 13.6% of total time), the overall benefit comes from eliminating redundant parsing work across the entire prompt processing pipeline. --- guardrails/prompt/base_prompt.py | 7 ++++--- guardrails/prompt/prompt.py | 5 ++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/guardrails/prompt/base_prompt.py b/guardrails/prompt/base_prompt.py index 1ff70559e..1ccbb698d 100644 --- a/guardrails/prompt/base_prompt.py +++ b/guardrails/prompt/base_prompt.py @@ -23,14 +23,14 @@ def __init__( ): """Initialize and substitute constants in the prompt.""" self._source = source + # Cache variable names once for efficiency + self.variable_names: List[str] = get_template_variables(source) self.format_instructions_start = self.get_format_instructions_idx(source) - # FIXME: Why is this happening on init instead of on format? # Substitute constants in the prompt. source = self.substitute_constants(source) - # FIXME: Why is this happening on init instead of on format? - # If an output schema is provided, substitute it in the prompt. + # Substitute output schemas if provided. if output_schema or xml_output_schema: self.source = Template(source).safe_substitute( output_schema=output_schema, xml_output_schema=xml_output_schema @@ -72,6 +72,7 @@ def substitute_constants(self, text: str) -> str: return text def get_prompt_variables(self) -> List[str]: + # Return cached variable names for efficiency return self.variable_names def format(self, **kwargs) -> "BasePrompt": diff --git a/guardrails/prompt/prompt.py b/guardrails/prompt/prompt.py index 8b656162c..9dec663f6 100644 --- a/guardrails/prompt/prompt.py +++ b/guardrails/prompt/prompt.py @@ -2,7 +2,6 @@ from string import Template -from guardrails.utils.templating_utils import get_template_variables from .base_prompt import BasePrompt @@ -18,8 +17,8 @@ def __eq__(self, __value: object) -> bool: def format(self, **kwargs) -> "Prompt": """Format the prompt using the given keyword arguments.""" - # Only use the keyword arguments that are present in the prompt. - vars = get_template_variables(self.source) + # Only use the keyword arguments that are present in the prompt (cached from BasePrompt). + vars = self.variable_names filtered_kwargs = {k: v for k, v in kwargs.items() if k in vars} # Return another instance of the class with the formatted prompt.