codeflash-ai · mohammedahmed18 · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025 · Aug 29, 2025
diff --git a/codeflash/cli_cmds/cmd_init.py b/codeflash/cli_cmds/cmd_init.py
@@ -1099,7 +1099,7 @@ def enter_api_key_and_save_to_rc() -> None:
     if is_successful(result):
         click.echo(result.unwrap())
     else:
-        click.echo(result.failure())
+        click.echo(result.failure().message)
         click.pause()
 
     os.environ["CODEFLASH_API_KEY"] = api_key

diff --git a/codeflash/code_utils/shell_utils.py b/codeflash/code_utils/shell_utils.py
@@ -7,6 +7,7 @@
 
 from codeflash.code_utils.compat import LF
 from codeflash.either import Failure, Success
+from codeflash.errors.errors import shell_rc_not_found_error, shell_rc_permission_error
 
 if TYPE_CHECKING:
     from codeflash.either import Result
@@ -69,13 +70,6 @@ def save_api_key_to_rc(api_key: str) -> Result[str, str]:
             shell_file.truncate()
         return Success(f"✅ {action} {shell_rc_path}")
     except PermissionError:
-        return Failure(
-            f"💡 I tried adding your Codeflash API key to {shell_rc_path} - but seems like I don't have permissions to do so.{LF}"
-            f"You'll need to open it yourself and add the following line:{LF}{LF}{api_key_line}{LF}"
-        )
+        return Failure(shell_rc_permission_error(shell_rc_path, api_key_line))
     except FileNotFoundError:
-        return Failure(
-            f"💡 I went to save your Codeflash API key to {shell_rc_path}, but noticed that it doesn't exist.{LF}"
-            f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {shell_rc_path} and add the following line:{LF}"
-            f"{LF}{api_key_line}{LF}"
-        )
+        return Failure(shell_rc_not_found_error(shell_rc_path, api_key_line))
diff --git a/codeflash/either.py b/codeflash/either.py
@@ -2,10 +2,35 @@
 
 from typing import Generic, TypeVar
 
+from codeflash.cli_cmds.console import logger
+
 L = TypeVar("L")
 R = TypeVar("R")
 
 
+class CodeflashError:
+    def __init__(self, code: str, message_template: str, **formatting_args: str) -> None:
+        self.code = code
+        self.message_template = message_template
+        self.formatting_args = formatting_args
+
+    @property
+    def message(self) -> str:
+        try:
+            formatted = ""
+            if not isinstance(self.message_template, str):
+                formatted = str(self.message_template)
+            else:
+                formatted = self.message_template.format(**self.formatting_args)
+            return f"[{self.code}] {formatted}"  # noqa: TRY300
+        except KeyError:
+            logger.debug(f"Invalid template: missing {self.formatting_args}")
+            return self.message_template
+
+    def __str__(self) -> str:
+        return self.message
+
+
 class Result(Generic[L, R]):
     def __init__(self, value: L | R) -> None:
         self.value = value
@@ -22,15 +47,19 @@ def unwrap(self) -> L | R:
             raise ValueError(msg)
         return self.value
 
-    def failure(self) -> L | R:
+    def failure(self) -> CodeflashError:
         if self.is_successful():
             msg = "Cannot get failure value from a success"
             raise ValueError(msg)
-        return self.value
+        if isinstance(self, Failure):
+            return self.error_code
+        raise ValueError("Result is not a failure")
 
 
 class Failure(Result[L, R]):
-    pass
+    def __init__(self, error_code: CodeflashError) -> None:
+        super().__init__(error_code.message)
+        self.error_code = error_code
 
 
 class Success(Result[L, R]):

diff --git a/codeflash/errors/__init__.py b/codeflash/errors/__init__.py
diff --git a/codeflash/errors/errors.py b/codeflash/errors/errors.py
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+from codeflash.code_utils.compat import LF
+from codeflash.either import CodeflashError
+
+_TEST_CONFIDENCE_ERROR = CodeflashError(
+    "TEST_CONFIDENCE_THRESHOLD_NOT_MET_ERROR", "The threshold for test confidence was not met."
+)
+
+_BEHAVIORAL_TEST_FAILURE_ERROR = CodeflashError(
+    "BEHAVIORAL_TEST_FAILURE_ERROR", "Failed to establish a baseline for the original code - bevhavioral tests failed."
+)
+
+_COVERAGE_THRESHOLD_NOT_MET_ERROR = CodeflashError(
+    "COVERAGE_THRESHOLD_NOT_MET_ERROR", "The threshold for test coverage was not met."
+)
+
+_FUNCTION_OPTIMIZATION_ATTEMPTED_ERROR = CodeflashError(
+    "FUNCTION_OPTIMIZATION_ATTEMPTED_ERROR", "Function optimization previously attempted, skipping."
+)
+
+_TEST_RESULT_DIDNT_MATCH_ERROR = CodeflashError(
+    "TEST_RESULT_DIDNT_MATCH_ERROR", "Test results did not match the test results of the original code."
+)
+
+
+def test_result_didnt_match_error() -> CodeflashError:
+    return _TEST_RESULT_DIDNT_MATCH_ERROR
+
+
+def function_optimization_attempted_error() -> CodeflashError:
+    return _FUNCTION_OPTIMIZATION_ATTEMPTED_ERROR
+
+
+def coverage_threshold_not_met_error() -> CodeflashError:
+    return _COVERAGE_THRESHOLD_NOT_MET_ERROR
+
+
+def test_confidence_threshold_not_met_error() -> CodeflashError:
+    return _TEST_CONFIDENCE_ERROR
+
+
+def behavioral_test_failure_error() -> CodeflashError:
+    return _BEHAVIORAL_TEST_FAILURE_ERROR
+
+
+def shell_rc_permission_error(shell_rc_path: str, api_key_line: str) -> CodeflashError:
+    return CodeflashError(
+        "SHELL_RC_PERMISSION_ERROR",
+        f"I tried adding your Codeflash API key to {{shell_rc_path}} - but seems like I don't have permissions to do so.{LF}"
+        f"You'll need to open it yourself and add the following line:{LF}{LF}{{api_key_line}}{LF}",
+        **locals(),
+    )
+
+
+def shell_rc_not_found_error(shell_rc_path: str, api_key_line: str) -> CodeflashError:
+    return CodeflashError(
+        "SHELL_RC_NOT_FOUND_ERROR",
+        f"💡 I went to save your Codeflash API key to {{shell_rc_path}}, but noticed that it doesn't exist.{LF}"
+        f"To ensure your Codeflash API key is automatically loaded into your environment at startup, you can create {{shell_rc_path}} and add the following line:{LF}"
+        f"{LF}{{api_key_line}}{LF}",
+        **locals(),
+    )
+
+
+def baseline_establishment_failed_error(failure_msg: str) -> CodeflashError:
+    return CodeflashError(
+        "BASELINE_ESTABLISHMENT_FAILED_ERROR",
+        "Failed to establish a baseline for the original code. {failure_msg}",
+        **locals(),
+    )
+
+
+def no_tests_generated_error(function_name: str) -> CodeflashError:
+    return CodeflashError("NO_TESTS_GENERATED_ERROR", "NO TESTS GENERATED for {function_name}", **locals())
+
+
+def no_optimizations_generated_error(function_name: str) -> CodeflashError:
+    return CodeflashError(
+        "NO_OPTIMIZATIONS_GENERATED_ERROR", "NO OPTIMIZATIONS GENERATED for {function_name}", **locals()
+    )
+
+
+def no_best_optimization_found_error(function_name: str) -> CodeflashError:
+    return CodeflashError(
+        "NO_BEST_OPTIMIZATION_FOUND_ERROR", "No best optimizations found for function {function_name}", **locals()
+    )
+
+
+def code_context_extraction_failed_error(error: str) -> CodeflashError:
+    return CodeflashError(
+        "CODE_CONTEXT_EXTRACTION_FAILED_ERROR", "Failed to extract code context. Error: {error}.", **locals()
+    )
diff --git a/codeflash/lsp/beta.py b/codeflash/lsp/beta.py
@@ -201,7 +201,7 @@ def provide_api_key(server: CodeflashLanguageServer, params: ProvideApiKeyParams
 
         result = save_api_key_to_rc(api_key)
         if not is_successful(result):
-            return {"status": "error", "message": result.failure()}
+            return {"status": "error", "message": result.failure().message}
 
         # clear cache to ensure the new api key is used
         get_codeflash_api_key.cache_clear()
@@ -251,15 +251,23 @@ def perform_function_optimization(  # noqa: PLR0911
 
         initialization_result = function_optimizer.can_be_optimized()
         if not is_successful(initialization_result):
-            return {"functionName": params.functionName, "status": "error", "message": initialization_result.failure()}
+            return {
+                "functionName": params.functionName,
+                "status": "error",
+                "message": initialization_result.failure().message,
+            }
 
         should_run_experiment, code_context, original_helper_code = initialization_result.unwrap()
 
         test_setup_result = function_optimizer.generate_and_instrument_tests(
             code_context, should_run_experiment=should_run_experiment
         )
         if not is_successful(test_setup_result):
-            return {"functionName": params.functionName, "status": "error", "message": test_setup_result.failure()}
+            return {
+                "functionName": params.functionName,
+                "status": "error",
+                "message": test_setup_result.failure().message,
+            }
         (
             generated_tests,
             function_to_concolic_tests,
@@ -282,7 +290,11 @@ def perform_function_optimization(  # noqa: PLR0911
         )
 
         if not is_successful(baseline_setup_result):
-            return {"functionName": params.functionName, "status": "error", "message": baseline_setup_result.failure()}
+            return {
+                "functionName": params.functionName,
+                "status": "error",
+                "message": baseline_setup_result.failure().message,
+            }
 
         (
             function_to_optimize_qualified_name,

diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
@@ -60,7 +60,19 @@
 from codeflash.context import code_context_extractor
 from codeflash.context.unused_definition_remover import detect_unused_helper_functions, revert_unused_helper_functions
 from codeflash.discovery.functions_to_optimize import was_function_previously_optimized
-from codeflash.either import Failure, Success, is_successful
+from codeflash.either import CodeflashError, Failure, Success, is_successful
+from codeflash.errors.errors import (
+    baseline_establishment_failed_error,
+    behavioral_test_failure_error,
+    code_context_extraction_failed_error,
+    coverage_threshold_not_met_error,
+    function_optimization_attempted_error,
+    no_best_optimization_found_error,
+    no_optimizations_generated_error,
+    no_tests_generated_error,
+    test_confidence_threshold_not_met_error,
+    test_result_didnt_match_error,
+)
 from codeflash.models.ExperimentMetadata import ExperimentMetadata
 from codeflash.models.models import (
     BestOptimization,
@@ -252,14 +264,18 @@ def can_be_optimized(self) -> Result[tuple[bool, CodeOptimizationContext, dict[P
             has_any_async_functions(code_string.code) for code_string in code_context.read_writable_code.code_strings
         )
         if async_code:
-            return Failure("Codeflash does not support async functions in the code to optimize.")
+            return Failure(
+                CodeflashError(
+                    "ASYNC_CODE_ERROR", "Codeflash does not support async functions in the code to optimize."
+                )
+            )
         # Random here means that we still attempt optimization with a fractional chance to see if
         # last time we could not find an optimization, maybe this time we do.
         # Random is before as a performance optimization, swapping the two 'and' statements has the same effect
         if random.random() > REPEAT_OPTIMIZATION_PROBABILITY and was_function_previously_optimized(  # noqa: S311
             self.function_to_optimize, code_context, self.args
         ):
-            return Failure("Function optimization previously attempted, skipping.")
+            return Failure(function_optimization_attempted_error())
 
         return Success((should_run_experiment, code_context, original_helper_code))
 
@@ -430,7 +446,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
         if self.args.override_fixtures:
             restore_conftest(original_conftest_content)
         if not best_optimization:
-            return Failure(f"No best optimizations found for function {self.function_to_optimize.qualified_name}")
+            return Failure(no_best_optimization_found_error(self.function_to_optimize.qualified_name))
         return Success(best_optimization)
 
     def determine_best_candidate(
@@ -852,7 +868,7 @@ def get_code_optimization_context(self) -> Result[CodeOptimizationContext, str]:
                 self.function_to_optimize, self.project_root
             )
         except ValueError as e:
-            return Failure(str(e))
+            return Failure(code_context_extraction_failed_error(str(e)))
 
         return Success(
             CodeOptimizationContext(
@@ -1012,7 +1028,7 @@ def generate_tests_and_optimizations(
         # Retrieve results
         candidates: list[OptimizedCandidate] = future_optimization_candidates.result()
         if not candidates:
-            return Failure(f"/!\\ NO OPTIMIZATIONS GENERATED for {self.function_to_optimize.function_name}")
+            return Failure(no_optimizations_generated_error(self.function_to_optimize.function_name))
 
         candidates_experiment = future_candidates_exp.result() if future_candidates_exp else None
 
@@ -1040,7 +1056,7 @@ def generate_tests_and_optimizations(
                 )
         if not tests:
             logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
-            return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
+            return Failure(no_tests_generated_error(self.function_to_optimize.function_name))
         function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
         logger.info(f"Generated {len(tests)} tests for {self.function_to_optimize.function_name}")
         console.rule()
@@ -1100,14 +1116,21 @@ def setup_and_establish_baseline(
             return Failure(baseline_result.failure())
 
         original_code_baseline, test_functions_to_remove = baseline_result.unwrap()
-        if isinstance(original_code_baseline, OriginalCodeBaseline) and (
-            not coverage_critic(original_code_baseline.coverage_results, self.args.test_framework)
-            or not quantity_of_tests_critic(original_code_baseline)
-        ):
-            if self.args.override_fixtures:
-                restore_conftest(original_conftest_content)
-            cleanup_paths(paths_to_cleanup)
-            return Failure("The threshold for test confidence was not met.")
+        if isinstance(original_code_baseline, OriginalCodeBaseline):
+            error = None
+            sufficent_coverage = coverage_critic(original_code_baseline.coverage_results, self.args.test_framework)
+            sufficent_tests = quantity_of_tests_critic(original_code_baseline)
+
+            if not sufficent_coverage:
+                error = coverage_threshold_not_met_error()
+            elif not sufficent_tests:
+                error = test_confidence_threshold_not_met_error()
+
+            if error:
+                if self.args.override_fixtures:
+                    restore_conftest(original_conftest_content)
+                cleanup_paths(paths_to_cleanup)
+                return Failure(error)
 
         return Success(
             (
@@ -1394,9 +1417,9 @@ def establish_original_code_baseline(
                     f"Couldn't run any tests for original function {self.function_to_optimize.function_name}. SKIPPING OPTIMIZING THIS FUNCTION."
                 )
                 console.rule()
-                return Failure("Failed to establish a baseline for the original code - bevhavioral tests failed.")
+                return Failure(behavioral_test_failure_error())
             if not coverage_critic(coverage_results, self.args.test_framework):
-                return Failure("The threshold for test coverage was not met.")
+                return Failure(coverage_threshold_not_met_error())
             if test_framework == "pytest":
                 line_profile_results = self.line_profiler_step(
                     code_context=code_context, original_helper_code=original_helper_code, candidate_index=0
@@ -1438,6 +1461,7 @@ def establish_original_code_baseline(
                 )
             )
             console.rule()
+            failure_msg = ""
 
             total_timing = benchmarking_results.total_passed_runtime()  # caution: doesn't handle the loop index
             functions_to_remove = [
@@ -1446,17 +1470,17 @@ def establish_original_code_baseline(
                 if (result.test_type == TestType.GENERATED_REGRESSION and not result.did_pass)
             ]
             if total_timing == 0:
-                logger.warning(
-                    "The overall summed benchmark runtime of the original function is 0, couldn't run tests."
-                )
+                failure_msg = "The overall summed benchmark runtime of the original function is 0, couldn't run tests."
+                logger.warning(failure_msg)
                 console.rule()
                 success = False
             if not total_timing:
-                logger.warning("Failed to run the tests for the original function, skipping optimization")
+                failure_msg = "Failed to run the tests for the original function, skipping optimization"
+                logger.warning(failure_msg)
                 console.rule()
                 success = False
             if not success:
-                return Failure("Failed to establish a baseline for the original code.")
+                return Failure(baseline_establishment_failed_error(failure_msg))
 
             loop_count = max([int(result.loop_index) for result in benchmarking_results.test_results])
             logger.info(
@@ -1540,7 +1564,7 @@ def run_optimized_candidate(
             else:
                 logger.info("Test results did not match the test results of the original code.")
                 console.rule()
-                return Failure("Test results did not match the test results of the original code.")
+                return Failure(test_result_didnt_match_error())
 
             if test_framework == "pytest":
                 candidate_benchmarking_results, _ = self.run_and_parse_tests(