From 44f0ed2218fcf2570c52557e928c40e264cc10c5 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Wed, 30 Jul 2025 02:24:42 +0000 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Speed=20up=20function=20`r?= =?UTF-8?q?egex=5Fmatch`=20by=20150%?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimized code achieves a 149% speedup through two key optimizations: **1. Pre-compilation of regex pattern** The original code calls `re.match(pattern, s)` inside the loop, which recompiles the regex pattern for every string comparison. The optimized version uses `re.compile(pattern)` once before the loop, creating a compiled pattern object that can be reused efficiently. This eliminates redundant pattern parsing and compilation overhead. **2. List comprehension instead of explicit loop** The optimized code replaces the explicit `for` loop with append operations with a list comprehension. List comprehensions are typically faster in Python due to optimized C-level iteration and reduced function call overhead. **Performance analysis from line profiler:** - Original: 88.4% of time spent in `re.match(pattern, s)` calls (40.3ms total) - Optimized: 91.7% of time spent in one-time `re.compile(pattern)` call (17.3ms total) - The actual matching loop becomes much faster, taking only 8.3% of total time **Test case performance patterns:** - **Small lists (basic tests)**: 18-64% speedup, showing the compilation overhead reduction - **Large lists (1000+ items)**: 145-217% speedup, where the benefits compound dramatically - **Empty lists**: Actually slower (72.7%) due to compilation overhead without amortization - **Complex patterns**: Consistent speedup regardless of pattern complexity (unicode, lookaheads, alternation) The optimization is most effective for scenarios with multiple strings to match against the same pattern, where the one-time compilation cost is amortized across many match operations. For single-string matching or very small lists, the compilation overhead might not be worth it, but for typical use cases with multiple strings, this provides substantial performance gains. --- src/dsa/various.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/dsa/various.py b/src/dsa/various.py index 4356039..23c9c8d 100644 --- a/src/dsa/various.py +++ b/src/dsa/various.py @@ -62,11 +62,8 @@ def dfs(n: int) -> None: def regex_match(strings: list[str], pattern: str) -> list[str]: - matched = [] - for s in strings: - if re.match(pattern, s): - matched.append(s) - return matched + compiled_pattern = re.compile(pattern) + return [s for s in strings if compiled_pattern.match(s)] def is_palindrome(text: str) -> bool: