diff --git a/codeflash/cli_cmds/cmd_init.py b/codeflash/cli_cmds/cmd_init.py index 61a949d0..db10c38f 100644 --- a/codeflash/cli_cmds/cmd_init.py +++ b/codeflash/cli_cmds/cmd_init.py @@ -1099,7 +1099,7 @@ def enter_api_key_and_save_to_rc() -> None: if is_successful(result): click.echo(result.unwrap()) else: - click.echo(result.failure()) + click.echo(result.failure().message) click.pause() os.environ["CODEFLASH_API_KEY"] = api_key diff --git a/codeflash/code_utils/shell_utils.py b/codeflash/code_utils/shell_utils.py index 30a5aada..849571dd 100644 --- a/codeflash/code_utils/shell_utils.py +++ b/codeflash/code_utils/shell_utils.py @@ -7,6 +7,7 @@ from codeflash.code_utils.compat import LF from codeflash.either import Failure, Success +from codeflash.errors.errors import shell_rc_not_found_error, shell_rc_permission_error if TYPE_CHECKING: from codeflash.either import Result @@ -69,13 +70,6 @@ def save_api_key_to_rc(api_key: str) -> Result[str, str]: shell_file.truncate() return Success(f"✅ {action} {shell_rc_path}") except PermissionError: - return Failure( - f"💡 I tried adding your Codeflash API key to {shell_rc_path} - but seems like I don't have permissions to do so.{LF}" - f"You'll need to open it yourself and add the following line:{LF}{LF}{api_key_line}{LF}" - ) + return Failure(shell_rc_permission_error(shell_rc_path, api_key_line)) except FileNotFoundError: - return Failure( - f"💡 I went to save your Codeflash API key to {shell_rc_path}, but noticed that it doesn't exist.{LF}" - f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {shell_rc_path} and add the following line:{LF}" - f"{LF}{api_key_line}{LF}" - ) + return Failure(shell_rc_not_found_error(shell_rc_path, api_key_line)) diff --git a/codeflash/either.py b/codeflash/either.py index 46fcb913..c2cd8bce 100644 --- a/codeflash/either.py +++ b/codeflash/either.py @@ -2,10 +2,35 @@ from typing import Generic, TypeVar +from codeflash.cli_cmds.console import logger + L = TypeVar("L") R = TypeVar("R") +class CodeflashError: + def __init__(self, code: str, message_template: str, **formatting_args: str) -> None: + self.code = code + self.message_template = message_template + self.formatting_args = formatting_args + + @property + def message(self) -> str: + try: + formatted = "" + if not isinstance(self.message_template, str): + formatted = str(self.message_template) + else: + formatted = self.message_template.format(**self.formatting_args) + return f"[{self.code}] {formatted}" # noqa: TRY300 + except KeyError: + logger.debug(f"Invalid template: missing {self.formatting_args}") + return self.message_template + + def __str__(self) -> str: + return self.message + + class Result(Generic[L, R]): def __init__(self, value: L | R) -> None: self.value = value @@ -22,15 +47,19 @@ def unwrap(self) -> L | R: raise ValueError(msg) return self.value - def failure(self) -> L | R: + def failure(self) -> CodeflashError: if self.is_successful(): msg = "Cannot get failure value from a success" raise ValueError(msg) - return self.value + if isinstance(self, Failure): + return self.error_code + raise ValueError("Result is not a failure") class Failure(Result[L, R]): - pass + def __init__(self, error_code: CodeflashError) -> None: + super().__init__(error_code.message) + self.error_code = error_code class Success(Result[L, R]): diff --git a/codeflash/errors/__init__.py b/codeflash/errors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeflash/errors/errors.py b/codeflash/errors/errors.py new file mode 100644 index 00000000..c7730934 --- /dev/null +++ b/codeflash/errors/errors.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +from codeflash.code_utils.compat import LF +from codeflash.either import CodeflashError + +_TEST_CONFIDENCE_ERROR = CodeflashError( + "TEST_CONFIDENCE_THRESHOLD_NOT_MET_ERROR", "The threshold for test confidence was not met." +) + +_BEHAVIORAL_TEST_FAILURE_ERROR = CodeflashError( + "BEHAVIORAL_TEST_FAILURE_ERROR", "Failed to establish a baseline for the original code - bevhavioral tests failed." +) + +_COVERAGE_THRESHOLD_NOT_MET_ERROR = CodeflashError( + "COVERAGE_THRESHOLD_NOT_MET_ERROR", "The threshold for test coverage was not met." +) + +_FUNCTION_OPTIMIZATION_ATTEMPTED_ERROR = CodeflashError( + "FUNCTION_OPTIMIZATION_ATTEMPTED_ERROR", "Function optimization previously attempted, skipping." +) + +_TEST_RESULT_DIDNT_MATCH_ERROR = CodeflashError( + "TEST_RESULT_DIDNT_MATCH_ERROR", "Test results did not match the test results of the original code." +) + + +def test_result_didnt_match_error() -> CodeflashError: + return _TEST_RESULT_DIDNT_MATCH_ERROR + + +def function_optimization_attempted_error() -> CodeflashError: + return _FUNCTION_OPTIMIZATION_ATTEMPTED_ERROR + + +def coverage_threshold_not_met_error() -> CodeflashError: + return _COVERAGE_THRESHOLD_NOT_MET_ERROR + + +def test_confidence_threshold_not_met_error() -> CodeflashError: + return _TEST_CONFIDENCE_ERROR + + +def behavioral_test_failure_error() -> CodeflashError: + return _BEHAVIORAL_TEST_FAILURE_ERROR + + +def shell_rc_permission_error(shell_rc_path: str, api_key_line: str) -> CodeflashError: + return CodeflashError( + "SHELL_RC_PERMISSION_ERROR", + f"I tried adding your Codeflash API key to {{shell_rc_path}} - but seems like I don't have permissions to do so.{LF}" + f"You'll need to open it yourself and add the following line:{LF}{LF}{{api_key_line}}{LF}", + **locals(), + ) + + +def shell_rc_not_found_error(shell_rc_path: str, api_key_line: str) -> CodeflashError: + return CodeflashError( + "SHELL_RC_NOT_FOUND_ERROR", + f"💡 I went to save your Codeflash API key to {{shell_rc_path}}, but noticed that it doesn't exist.{LF}" + f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {{shell_rc_path}} and add the following line:{LF}" + f"{LF}{{api_key_line}}{LF}", + **locals(), + ) + + +def baseline_establishment_failed_error(failure_msg: str) -> CodeflashError: + return CodeflashError( + "BASELINE_ESTABLISHMENT_FAILED_ERROR", + "Failed to establish a baseline for the original code. {failure_msg}", + **locals(), + ) + + +def no_tests_generated_error(function_name: str) -> CodeflashError: + return CodeflashError("NO_TESTS_GENERATED_ERROR", "NO TESTS GENERATED for {function_name}", **locals()) + + +def no_optimizations_generated_error(function_name: str) -> CodeflashError: + return CodeflashError( + "NO_OPTIMIZATIONS_GENERATED_ERROR", "NO OPTIMIZATIONS GENERATED for {function_name}", **locals() + ) + + +def no_best_optimization_found_error(function_name: str) -> CodeflashError: + return CodeflashError( + "NO_BEST_OPTIMIZATION_FOUND_ERROR", "No best optimizations found for function {function_name}", **locals() + ) + + +def code_context_extraction_failed_error(error: str) -> CodeflashError: + return CodeflashError( + "CODE_CONTEXT_EXTRACTION_FAILED_ERROR", "Failed to extract code context. Error: {error}.", **locals() + ) diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py index 77d87e8a..a6e6e437 100644 --- a/codeflash/lsp/beta.py +++ b/codeflash/lsp/beta.py @@ -201,7 +201,7 @@ def provide_api_key(server: CodeflashLanguageServer, params: ProvideApiKeyParams result = save_api_key_to_rc(api_key) if not is_successful(result): - return {"status": "error", "message": result.failure()} + return {"status": "error", "message": result.failure().message} # clear cache to ensure the new api key is used get_codeflash_api_key.cache_clear() @@ -251,7 +251,11 @@ def perform_function_optimization( # noqa: PLR0911 initialization_result = function_optimizer.can_be_optimized() if not is_successful(initialization_result): - return {"functionName": params.functionName, "status": "error", "message": initialization_result.failure()} + return { + "functionName": params.functionName, + "status": "error", + "message": initialization_result.failure().message, + } should_run_experiment, code_context, original_helper_code = initialization_result.unwrap() @@ -259,7 +263,11 @@ def perform_function_optimization( # noqa: PLR0911 code_context, should_run_experiment=should_run_experiment ) if not is_successful(test_setup_result): - return {"functionName": params.functionName, "status": "error", "message": test_setup_result.failure()} + return { + "functionName": params.functionName, + "status": "error", + "message": test_setup_result.failure().message, + } ( generated_tests, function_to_concolic_tests, @@ -282,7 +290,11 @@ def perform_function_optimization( # noqa: PLR0911 ) if not is_successful(baseline_setup_result): - return {"functionName": params.functionName, "status": "error", "message": baseline_setup_result.failure()} + return { + "functionName": params.functionName, + "status": "error", + "message": baseline_setup_result.failure().message, + } ( function_to_optimize_qualified_name, diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py index c523dcbc..cd39693e 100644 --- a/codeflash/optimization/function_optimizer.py +++ b/codeflash/optimization/function_optimizer.py @@ -60,7 +60,19 @@ from codeflash.context import code_context_extractor from codeflash.context.unused_definition_remover import detect_unused_helper_functions, revert_unused_helper_functions from codeflash.discovery.functions_to_optimize import was_function_previously_optimized -from codeflash.either import Failure, Success, is_successful +from codeflash.either import CodeflashError, Failure, Success, is_successful +from codeflash.errors.errors import ( + baseline_establishment_failed_error, + behavioral_test_failure_error, + code_context_extraction_failed_error, + coverage_threshold_not_met_error, + function_optimization_attempted_error, + no_best_optimization_found_error, + no_optimizations_generated_error, + no_tests_generated_error, + test_confidence_threshold_not_met_error, + test_result_didnt_match_error, +) from codeflash.models.ExperimentMetadata import ExperimentMetadata from codeflash.models.models import ( BestOptimization, @@ -252,14 +264,18 @@ def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[P has_any_async_functions(code_string.code) for code_string in code_context.read_writable_code.code_strings ) if async_code: - return Failure("Codeflash does not support async functions in the code to optimize.") + return Failure( + CodeflashError( + "ASYNC_CODE_ERROR", "Codeflash does not support async functions in the code to optimize." + ) + ) # Random here means that we still attempt optimization with a fractional chance to see if # last time we could not find an optimization, maybe this time we do. # Random is before as a performance optimization, swapping the two 'and' statements has the same effect if random.random() > REPEAT_OPTIMIZATION_PROBABILITY and was_function_previously_optimized( # noqa: S311 self.function_to_optimize, code_context, self.args ): - return Failure("Function optimization previously attempted, skipping.") + return Failure(function_optimization_attempted_error()) return Success((should_run_experiment, code_context, original_helper_code)) @@ -430,7 +446,7 @@ def optimize_function(self) -> Result[BestOptimization, str]: if self.args.override_fixtures: restore_conftest(original_conftest_content) if not best_optimization: - return Failure(f"No best optimizations found for function {self.function_to_optimize.qualified_name}") + return Failure(no_best_optimization_found_error(self.function_to_optimize.qualified_name)) return Success(best_optimization) def determine_best_candidate( @@ -852,7 +868,7 @@ def get_code_optimization_context(self) -> Result[CodeOptimizationContext, str]: self.function_to_optimize, self.project_root ) except ValueError as e: - return Failure(str(e)) + return Failure(code_context_extraction_failed_error(str(e))) return Success( CodeOptimizationContext( @@ -1012,7 +1028,7 @@ def generate_tests_and_optimizations( # Retrieve results candidates: list[OptimizedCandidate] = future_optimization_candidates.result() if not candidates: - return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}") + return Failure(no_optimizations_generated_error(self.function_to_optimize.function_name)) candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None @@ -1040,7 +1056,7 @@ def generate_tests_and_optimizations( ) if not tests: logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}") - return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}") + return Failure(no_tests_generated_error(self.function_to_optimize.function_name)) function_to_concolic_tests, concolic_test_str = future_concolic_tests.result() logger.info(f"Generated {len(tests)} tests for {self.function_to_optimize.function_name}") console.rule() @@ -1100,14 +1116,21 @@ def setup_and_establish_baseline( return Failure(baseline_result.failure()) original_code_baseline, test_functions_to_remove = baseline_result.unwrap() - if isinstance(original_code_baseline, OriginalCodeBaseline) and ( - not coverage_critic(original_code_baseline.coverage_results, self.args.test_framework) - or not quantity_of_tests_critic(original_code_baseline) - ): - if self.args.override_fixtures: - restore_conftest(original_conftest_content) - cleanup_paths(paths_to_cleanup) - return Failure("The threshold for test confidence was not met.") + if isinstance(original_code_baseline, OriginalCodeBaseline): + error = None + sufficent_coverage = coverage_critic(original_code_baseline.coverage_results, self.args.test_framework) + sufficent_tests = quantity_of_tests_critic(original_code_baseline) + + if not sufficent_coverage: + error = coverage_threshold_not_met_error() + elif not sufficent_tests: + error = test_confidence_threshold_not_met_error() + + if error: + if self.args.override_fixtures: + restore_conftest(original_conftest_content) + cleanup_paths(paths_to_cleanup) + return Failure(error) return Success( ( @@ -1394,9 +1417,9 @@ def establish_original_code_baseline( f"Couldn't run any tests for original function {self.function_to_optimize.function_name}. SKIPPING OPTIMIZING THIS FUNCTION." ) console.rule() - return Failure("Failed to establish a baseline for the original code - bevhavioral tests failed.") + return Failure(behavioral_test_failure_error()) if not coverage_critic(coverage_results, self.args.test_framework): - return Failure("The threshold for test coverage was not met.") + return Failure(coverage_threshold_not_met_error()) if test_framework == "pytest": line_profile_results = self.line_profiler_step( code_context=code_context, original_helper_code=original_helper_code, candidate_index=0 @@ -1438,6 +1461,7 @@ def establish_original_code_baseline( ) ) console.rule() + failure_msg = "" total_timing = benchmarking_results.total_passed_runtime() # caution: doesn't handle the loop index functions_to_remove = [ @@ -1446,17 +1470,17 @@ def establish_original_code_baseline( if (result.test_type == TestType.GENERATED_REGRESSION and not result.did_pass) ] if total_timing == 0: - logger.warning( - "The overall summed benchmark runtime of the original function is 0, couldn't run tests." - ) + failure_msg = "The overall summed benchmark runtime of the original function is 0, couldn't run tests." + logger.warning(failure_msg) console.rule() success = False if not total_timing: - logger.warning("Failed to run the tests for the original function, skipping optimization") + failure_msg = "Failed to run the tests for the original function, skipping optimization" + logger.warning(failure_msg) console.rule() success = False if not success: - return Failure("Failed to establish a baseline for the original code.") + return Failure(baseline_establishment_failed_error(failure_msg)) loop_count = max([int(result.loop_index) for result in benchmarking_results.test_results]) logger.info( @@ -1540,7 +1564,7 @@ def run_optimized_candidate( else: logger.info("Test results did not match the test results of the original code.") console.rule() - return Failure("Test results did not match the test results of the original code.") + return Failure(test_result_didnt_match_error()) if test_framework == "pytest": candidate_benchmarking_results, _ = self.run_and_parse_tests( diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py index 941705cf..849c3e22 100644 --- a/codeflash/optimization/optimizer.py +++ b/codeflash/optimization/optimizer.py @@ -359,7 +359,7 @@ def run(self) -> None: f"Optimizing {functions_to_optimize[i + 1].qualified_name}", ) else: - logger.warning(best_optimization.failure()) + logger.warning(best_optimization.failure().message) console.rule() continue finally: diff --git a/tests/test_comparator.py b/tests/test_comparator.py index 06e692b3..7ae4f718 100644 --- a/tests/test_comparator.py +++ b/tests/test_comparator.py @@ -13,7 +13,7 @@ import pydantic import pytest -from codeflash.either import Failure, Success +from codeflash.either import CodeflashError, Failure, Success from codeflash.models.models import FunctionTestInvocation, InvocationId, TestResults, TestType from codeflash.verification.comparator import comparator from codeflash.verification.equivalence import compare_test_results @@ -789,7 +789,7 @@ def test_returns(): a = Success(5) b = Success(5) c = Success(6) - d = Failure(5) + d = Failure(CodeflashError("TEST", 5)) e = Success((5, 5)) f = Success((5, 6)) assert comparator(a, b) diff --git a/tests/test_unused_helper_revert.py b/tests/test_unused_helper_revert.py index 30f291e6..19d86513 100644 --- a/tests/test_unused_helper_revert.py +++ b/tests/test_unused_helper_revert.py @@ -87,7 +87,7 @@ def helper_function_2(x): # Get original code context to find helper functions ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -194,7 +194,7 @@ def helper_function_2(x): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -259,7 +259,7 @@ def helper_function_2(x): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -345,7 +345,7 @@ def entrypoint_function(n): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -533,7 +533,7 @@ def helper_method_2(self, x): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -678,7 +678,7 @@ def process_data(self, n): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -1013,7 +1013,7 @@ def entrypoint_function(n): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -1173,7 +1173,7 @@ def entrypoint_function(n): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap() @@ -1394,7 +1394,7 @@ def calculate_class(cls, n): # Get original code context ctx_result = optimizer.get_code_optimization_context() - assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure()}" + assert ctx_result.is_successful(), f"Failed to get context: {ctx_result.failure().message}" code_context = ctx_result.unwrap()