From e784545c83c1d8d4287b9a5f20e0c82b4e122b4b Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 23 Sep 2025 06:31:24 -0700 Subject: [PATCH 1/4] oops --- codeflash/discovery/discover_unit_tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index 7b9681854..8fc7401c4 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -109,7 +109,7 @@ def insert_test( def get_tests_for_file(self, file_path: str, file_hash: str) -> list[FunctionCalledInTest] | None: cache_key = (file_path, file_hash) - if cache_key in self._memory_cache: + if cache_key in self.memory_cache: return self.memory_cache[cache_key] self.cur.execute("SELECT * FROM discovered_tests WHERE file_path = ? AND file_hash = ?", (file_path, file_hash)) From b0da3690d13f1a1dfaa0066b7419db3aa9bc7157 Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 23 Sep 2025 06:44:24 -0700 Subject: [PATCH 2/4] respond to code review --- codeflash/discovery/discover_unit_tests.py | 50 ++++++++++------------ 1 file changed, 23 insertions(+), 27 deletions(-) diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index 8fc7401c4..148e04934 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -107,7 +107,9 @@ def insert_test( ) self.connection.commit() - def get_tests_for_file(self, file_path: str, file_hash: str) -> list[FunctionCalledInTest] | None: + def get_function_to_test_map_for_file( + self, file_path: str, file_hash: str + ) -> dict[str, set[FunctionCalledInTest]] | None: cache_key = (file_path, file_hash) if cache_key in self.memory_cache: return self.memory_cache[cache_key] @@ -117,15 +119,20 @@ def get_tests_for_file(self, file_path: str, file_hash: str) -> list[FunctionCal if not rows: return None - result = [ - FunctionCalledInTest( + function_to_test_map = defaultdict(set) + + for row in rows: + qualified_name_with_modules_from_root = row[2] + function_called_in_test = FunctionCalledInTest( tests_in_file=TestsInFile( test_file=Path(row[0]), test_class=row[4], test_function=row[5], test_type=TestType(int(row[6])) ), position=CodePosition(line_no=row[7], col_no=row[8]), + qualified_name_with_modules_from_root=qualified_name_with_modules_from_root, ) - for row in rows - ] + function_to_test_map[qualified_name_with_modules_from_root].add(function_called_in_test) + + result = dict(function_to_test_map) self.memory_cache[cache_key] = result return result @@ -552,28 +559,16 @@ def process_test_files( for test_file, functions in file_to_test_map.items(): file_hash = TestsCache.compute_file_hash(test_file) - cached_tests = tests_cache.get_tests_for_file(str(test_file), file_hash) - - if cached_tests: - # Rebuild function_to_test_map from cached data - tests_cache.cur.execute( - "SELECT * FROM discovered_tests WHERE file_path = ? AND file_hash = ?", (str(test_file), file_hash) - ) - for row in tests_cache.cur.fetchall(): - qualified_name_with_modules_from_root = row[2] - test_type = TestType(int(row[6])) - - function_called_in_test = FunctionCalledInTest( - tests_in_file=TestsInFile( - test_file=test_file, test_class=row[4], test_function=row[5], test_type=test_type - ), - position=CodePosition(line_no=row[7], col_no=row[8]), - ) - - function_to_test_map[qualified_name_with_modules_from_root].add(function_called_in_test) - if test_type == TestType.REPLAY_TEST: - num_discovered_replay_tests += 1 - num_discovered_tests += 1 + cached_function_to_test_map = tests_cache.get_function_to_test_map_for_file(str(test_file), file_hash) + + if cached_function_to_test_map: + for qualified_name, test_set in cached_function_to_test_map.items(): + function_to_test_map[qualified_name].update(test_set) + + for function_called_in_test in test_set: + if function_called_in_test.tests_in_file.test_type == TestType.REPLAY_TEST: + num_discovered_replay_tests += 1 + num_discovered_tests += 1 progress.advance(task_id) continue @@ -707,6 +702,7 @@ def process_test_files( test_type=test_func.test_type, ), position=CodePosition(line_no=name.line, col_no=name.column), + qualified_name_with_modules_from_root=qualified_name_with_modules_from_root, ) ) tests_cache.insert_test( From ce72cfd118a08e120f5cb43368a14e4f8b3cdcab Mon Sep 17 00:00:00 2001 From: Kevin Turcios Date: Tue, 23 Sep 2025 06:46:23 -0700 Subject: [PATCH 3/4] cleanup --- codeflash/discovery/discover_unit_tests.py | 2 -- codeflash/models/models.py | 1 - 2 files changed, 3 deletions(-) diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index 148e04934..5b5c3c1e5 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -128,7 +128,6 @@ def get_function_to_test_map_for_file( test_file=Path(row[0]), test_class=row[4], test_function=row[5], test_type=TestType(int(row[6])) ), position=CodePosition(line_no=row[7], col_no=row[8]), - qualified_name_with_modules_from_root=qualified_name_with_modules_from_root, ) function_to_test_map[qualified_name_with_modules_from_root].add(function_called_in_test) @@ -702,7 +701,6 @@ def process_test_files( test_type=test_func.test_type, ), position=CodePosition(line_no=name.line, col_no=name.column), - qualified_name_with_modules_from_root=qualified_name_with_modules_from_root, ) ) tests_cache.insert_test( diff --git a/codeflash/models/models.py b/codeflash/models/models.py index c1a563672..687dc002f 100644 --- a/codeflash/models/models.py +++ b/codeflash/models/models.py @@ -363,7 +363,6 @@ class FunctionCalledInTest: tests_in_file: TestsInFile position: CodePosition - @dataclass(frozen=True) class CodePosition: line_no: int From fda19eae092e8c5a33fb69082333317304d509b7 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Tue, 23 Sep 2025 14:02:32 +0000 Subject: [PATCH 4/4] Optimize discover_parameters_unittest The optimized code achieves a 252% speedup through two key optimizations: **1. Early Exit for No Underscores**: Added a check `if '_' not in function_name:` that immediately returns `False` for strings without underscores. This avoids unnecessary splitting operations for simple function names, providing significant speedups (51-69% faster) for cases like single words or empty strings. **2. Right-Split Optimization**: Replaced `split("_")` with `rsplit("_", 1)` which only splits from the right once, creating exactly 2 parts instead of potentially hundreds. This dramatically reduces memory allocation and processing time, especially for long function names with many underscores. **Performance Impact by Test Type**: - **Simple cases** (no underscores): 51-69% faster due to early exit - **Valid numeric suffixes**: 21-50% faster from efficient rsplit - **Large-scale tests**: 660-3584% faster - the rsplit optimization shines here, avoiding expensive operations on strings with hundreds of parts - **Edge cases with non-numeric suffixes**: Slight 2-13% slowdown due to the additional underscore check, but this is minimal compared to the gains The optimizations preserve exact functionality while being most effective for complex function names with many parts, which are common in parameterized test scenarios. --- codeflash/discovery/discover_unit_tests.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py index 5b5c3c1e5..0acfccd0e 100644 --- a/codeflash/discovery/discover_unit_tests.py +++ b/codeflash/discovery/discover_unit_tests.py @@ -523,9 +523,12 @@ def get_test_details(_test: unittest.TestCase) -> TestsInFile | None: def discover_parameters_unittest(function_name: str) -> tuple[bool, str, str | None]: - function_parts = function_name.split("_") - if len(function_parts) > 1 and function_parts[-1].isdigit(): - return True, "_".join(function_parts[:-1]), function_parts[-1] + if "_" not in function_name: + return False, function_name, None + + function_parts = function_name.rsplit("_", 1) + if len(function_parts) == 2 and function_parts[1].isdigit(): + return True, function_parts[0], function_parts[1] return False, function_name, None