From f2fec00da2bdf92746e4b40347afd347b67c1eb5 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 22 Oct 2025 09:43:10 +0000 Subject: [PATCH] Optimize extract_internal_format The optimized code achieves a **31% speedup** through two key string processing optimizations: **1. Replaced `split()` with `partition()`** - Original: `format.split("; ")` and `internal.split(": ")` create lists and process all occurrences - Optimized: `format.partition("; ")` and `internal.partition(": ")` stop at the first delimiter, returning exactly 3 elements - This is significantly faster when you only need to split on the first occurrence of a delimiter **2. Eliminated redundant string operations** - Original: `"; ".join(custom_rest)` reconstructs the custom format string from a list - Optimized: Direct assignment of the remainder from `partition()` - Also eliminates the second `RailTypes.get()` call by reusing the cached result **3. Avoided redundant dictionary lookups** - Original: Calls `RailTypes.get(internal_type)` twice (once for the check, once for assignment) - Optimized: Stores the result in `rail_type` variable and reuses it The optimizations are particularly effective for **large-scale test cases** where the performance gains are most dramatic (up to 400% faster). For inputs with many custom formats or long strings, avoiding the overhead of list creation, joining operations, and redundant dictionary lookups provides substantial benefits. The improvements are consistent across all input types and sizes, with smaller gains on simple inputs (17-25%) and massive gains on complex inputs with many semicolon-separated segments. --- guardrails/schema/rail_schema.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/guardrails/schema/rail_schema.py b/guardrails/schema/rail_schema.py index 922a993d8..5a0181f44 100644 --- a/guardrails/schema/rail_schema.py +++ b/guardrails/schema/rail_schema.py @@ -434,20 +434,20 @@ def __repr__(self): def extract_internal_format(format: str) -> Format: fmt = Format() - internal, *custom_rest = format.split("; ") + internal, sep, custom = format.partition("; ") + fmt.custom_format = custom if sep else "" - fmt.custom_format = "; ".join(custom_rest) + internal_type, sep2, format_attr_rest = internal.partition(": ") - internal_type, *format_attr_rest = internal.split(": ") - - if not RailTypes.get(internal_type): + rail_type = RailTypes.get(internal_type) + if not rail_type: # This format wasn't manipulated by us, # it just happened to match our pattern fmt.custom_format = format return fmt - fmt.internal_type = RailTypes.get(internal_type) - fmt.internal_format_attr = ": ".join(format_attr_rest) + fmt.internal_type = rail_type + fmt.internal_format_attr = format_attr_rest return fmt