Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions codeflash/code_utils/code_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,3 +355,19 @@ def exit_with_message(message: str, *, error_on_exit: bool = False) -> None:
paneled_text(message, panel_args={"style": "red"})

sys.exit(1 if error_on_exit else 0)


def extract_unique_errors(pytest_output: str) -> set[str]:
unique_errors = set()

# Regex pattern to match error lines:
# - Start with 'E' followed by optional whitespace
# - Capture the actual error message
pattern = r"^E\s+(.*)$"

for error_message in re.findall(pattern, pytest_output, re.MULTILINE):
error_message = error_message.strip() # noqa: PLW2901
if error_message:
unique_errors.add(error_message)

return unique_errors
22 changes: 16 additions & 6 deletions codeflash/optimization/function_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@
replace_function_definitions_in_module,
)
from codeflash.code_utils.code_utils import (
ImportErrorPattern,
cleanup_paths,
create_rank_dictionary_compact,
diff_length,
extract_unique_errors,
file_name_from_test_module_name,
get_run_tmp_file,
module_name_from_file_path,
Expand Down Expand Up @@ -1576,11 +1576,14 @@ def establish_original_code_baseline(
)
if not behavioral_results:
logger.warning(
f"force_lsp|Couldn't run any tests for original function {self.function_to_optimize.function_name}. SKIPPING OPTIMIZING THIS FUNCTION."
f"force_lsp|Couldn't run any tests for original function {self.function_to_optimize.function_name}. Skipping optimization."
)
console.rule()
return Failure("Failed to establish a baseline for the original code - bevhavioral tests failed.")
if not coverage_critic(coverage_results, self.args.test_framework):
did_pass_all_tests = all(result.did_pass for result in behavioral_results)
if not did_pass_all_tests:
return Failure("Tests failed to pass for the original code.")
Comment on lines +1584 to +1586
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we're not quite good with our tests to where this is going to work smoothly, currently we have tests that fail frequently

Copy link
Contributor Author

@mohammedahmed18 mohammedahmed18 Oct 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so you are saying if the coverage wasn't enough, and some tests failed,
optimization didn't fail necessary because of failed tests? and what do you suggest instead ? @KRRT7

return Failure(
f"Test coverage is {coverage_results.coverage}%, which is below the required threshold of {COVERAGE_THRESHOLD}%."
)
Expand Down Expand Up @@ -1944,12 +1947,19 @@ def run_and_parse_tests(
f"stdout: {run_result.stdout}\n"
f"stderr: {run_result.stderr}\n"
)
if "ModuleNotFoundError" in run_result.stdout:

unique_errors = extract_unique_errors(run_result.stdout)

if unique_errors:
from rich.text import Text

match = ImportErrorPattern.search(run_result.stdout).group()
panel = Panel(Text.from_markup(f"⚠️ {match} ", style="bold red"), expand=False)
console.print(panel)
for error in unique_errors:
if is_LSP_enabled():
lsp_log(LspCodeMessage(code=error, file_name="errors"))
else:
panel = Panel(Text.from_markup(f"⚠️ {error} ", style="bold red"), expand=False)
console.print(panel)

if testing_type in {TestingMode.BEHAVIOR, TestingMode.PERFORMANCE}:
results, coverage_results = parse_test_results(
test_xml_path=result_file_path,
Expand Down
Loading