diff --git a/codeflash/code_utils/code_utils.py b/codeflash/code_utils/code_utils.py
index 507b5a169..5d4e7ea04 100644
--- a/codeflash/code_utils/code_utils.py
+++ b/codeflash/code_utils/code_utils.py
@@ -254,6 +254,11 @@ def module_name_from_file_path(file_path: Path, project_root_path: Path, *, trav
         raise ValueError(msg)  # noqa: B904
 
 
+def get_qualified_function_path(file_path: Path, project_root_path: Path, qualified_name: str) -> str:
+    module_path = file_path.relative_to(project_root_path).with_suffix("").as_posix().replace("/", ".")
+    return f"{module_path}.{qualified_name}"
+
+
 def file_path_from_module_name(module_name: str, project_root_path: Path) -> Path:
     """Get file path from module path."""
     return project_root_path / (module_name.replace(".", os.sep) + ".py")
diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index 398efe461..55cc9f33c 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -66,6 +66,76 @@ class TestFunction:
 FUNCTION_NAME_REGEX = re.compile(r"([^.]+)\.([a-zA-Z0-9_]+)$")
 
 
+def _extract_dotted_call_name(node: ast.expr) -> str | None:
+    """Extract full dotted name from function call (e.g., 'src.math.computation.gcd_recursive')."""
+    parts = []
+    current = node
+    while isinstance(current, ast.Attribute):
+        parts.append(current.attr)
+        current = current.value
+    if isinstance(current, ast.Name):
+        parts.append(current.id)
+        parts.reverse()
+        return ".".join(parts) if parts else None
+    return None
+
+
+def _discover_calls_via_ast(
+    test_file: Path, test_functions: set[TestFunction], target_qualified_names: set[str]
+) -> dict[str, list[tuple[TestFunction, CodePosition]]]:
+    try:
+        with test_file.open("r", encoding="utf-8") as f:
+            source = f.read()
+        tree = ast.parse(source, filename=str(test_file))
+    except (SyntaxError, FileNotFoundError) as e:
+        logger.debug(f"AST parsing failed for {test_file}: {e}")
+        return {}
+
+    import_map = {}  # alias -> full_qualified_path
+    for node in ast.walk(tree):
+        if isinstance(node, ast.Import):
+            for alias in node.names:
+                name = alias.asname or alias.name
+                import_map[name] = alias.name
+        elif isinstance(node, ast.ImportFrom) and node.module:
+            for alias in node.names:
+                if alias.name != "*":
+                    full_name = f"{node.module}.{alias.name}"
+                    name = alias.asname or alias.name
+                    import_map[name] = full_name
+
+    test_funcs_by_name = {tf.function_name: tf for tf in test_functions}
+
+    result = defaultdict(list)
+
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.FunctionDef) or node.name not in test_funcs_by_name:
+            continue
+
+        test_func = test_funcs_by_name[node.name]
+
+        for child in ast.walk(node):
+            if not isinstance(child, ast.Call):
+                continue
+
+            call_name = _extract_dotted_call_name(child.func)
+            if not call_name:
+                continue
+
+            if call_name in target_qualified_names:
+                result[call_name].append((test_func, CodePosition(line_no=child.lineno, col_no=child.col_offset)))
+                continue
+
+            parts = call_name.split(".", 1)
+            if parts[0] in import_map:
+                resolved = f"{import_map[parts[0]]}.{parts[1]}" if len(parts) == 2 else import_map[parts[0]]
+
+                if resolved in target_qualified_names:
+                    result[resolved].append((test_func, CodePosition(line_no=child.lineno, col_no=child.col_offset)))
+
+    return dict(result)
+
+
 class TestsCache:
     SCHEMA_VERSION = 1  # Increment this when schema changes
 
@@ -489,6 +559,7 @@ def discover_tests_pytest(
         console.rule()
     else:
         logger.debug(f"Pytest collection exit code: {exitcode}")
+
     if pytest_rootdir is not None:
         cfg.tests_project_rootdir = Path(pytest_rootdir)
     file_to_test_map: dict[Path, list[FunctionCalledInTest]] = defaultdict(list)
@@ -497,6 +568,8 @@ def discover_tests_pytest(
             test_type = TestType.REPLAY_TEST
         elif "test_concolic_coverage" in test["test_file"]:
             test_type = TestType.CONCOLIC_COVERAGE_TEST
+        elif "test_hypothesis" in test["test_file"]:
+            test_type = TestType.HYPOTHESIS_TEST
         else:
             test_type = TestType.EXISTING_UNIT_TEST
 
@@ -509,6 +582,7 @@ def discover_tests_pytest(
         if discover_only_these_tests and test_obj.test_file not in discover_only_these_tests:
             continue
         file_to_test_map[test_obj.test_file].append(test_obj)
+
     # Within these test files, find the project functions they are referring to and return their names/locations
     return process_test_files(file_to_test_map, cfg, functions_to_optimize)
 
@@ -540,6 +614,8 @@ def get_test_details(_test: unittest.TestCase) -> TestsInFile | None:
             test_type = TestType.REPLAY_TEST
         elif "test_concolic_coverage" in str(_test_module_path):
             test_type = TestType.CONCOLIC_COVERAGE_TEST
+        elif "test_hypothesis" in str(_test_module_path):
+            test_type = TestType.HYPOTHESIS_TEST
         else:
             test_type = TestType.EXISTING_UNIT_TEST
         return TestsInFile(
@@ -588,7 +664,9 @@ def process_test_files(
     test_framework = cfg.test_framework
 
     if functions_to_optimize:
-        target_function_names = {func.qualified_name for func in functions_to_optimize}
+        target_function_names = {
+            func.qualified_name_with_modules_from_root(project_root_path) for func in functions_to_optimize
+        }
         file_to_test_map = filter_test_files_by_imports(file_to_test_map, target_function_names)
 
     function_to_test_map = defaultdict(set)
@@ -598,6 +676,7 @@ def process_test_files(
 
     tests_cache = TestsCache(project_root_path)
     logger.info("!lsp|Discovering tests and processing unit tests")
+
     with test_files_progress_bar(total=len(file_to_test_map), description="Processing test files") as (
         progress,
         task_id,
@@ -698,6 +777,79 @@ def process_test_files(
                 test_functions_by_name[func.function_name].append(func)
 
             test_function_names_set = set(test_functions_by_name.keys())
+
+            is_generated_test_file = (
+                any(
+                    tf.test_type in (TestType.HYPOTHESIS_TEST, TestType.CONCOLIC_COVERAGE_TEST) for tf in test_functions
+                )
+                if test_functions
+                else any(
+                    func.test_type in (TestType.HYPOTHESIS_TEST, TestType.CONCOLIC_COVERAGE_TEST) for func in functions
+                )
+            )
+
+            # For generated tests, use AST-based discovery since Jedi often fails
+            if is_generated_test_file and functions_to_optimize:
+                logger.debug(f"Using AST-based discovery for generated test file: {test_file.name}")
+                target_qualified_names = {
+                    func.qualified_name_with_modules_from_root(project_root_path) for func in functions_to_optimize
+                }
+
+                if not test_functions:
+                    logger.debug("Jedi found no functions, building test_functions from collected functions")
+                    test_functions = {
+                        TestFunction(
+                            function_name=func.test_function,
+                            test_class=func.test_class,
+                            parameters=None,
+                            test_type=func.test_type,
+                        )
+                        for func in functions
+                    }
+
+                ast_results = _discover_calls_via_ast(test_file, test_functions, target_qualified_names)
+
+                for qualified_name, matches in ast_results.items():
+                    for test_func, position in matches:
+                        if test_func.parameters is not None:
+                            if test_framework == "pytest":
+                                scope_test_function = f"{test_func.function_name}[{test_func.parameters}]"
+                            else:  # unittest
+                                scope_test_function = f"{test_func.function_name}_{test_func.parameters}"
+                        else:
+                            scope_test_function = test_func.function_name
+
+                        function_to_test_map[qualified_name].add(
+                            FunctionCalledInTest(
+                                tests_in_file=TestsInFile(
+                                    test_file=test_file,
+                                    test_class=test_func.test_class,
+                                    test_function=scope_test_function,
+                                    test_type=test_func.test_type,
+                                ),
+                                position=position,
+                            )
+                        )
+                        tests_cache.insert_test(
+                            file_path=str(test_file),
+                            file_hash=file_hash,
+                            qualified_name_with_modules_from_root=qualified_name,
+                            function_name=test_func.function_name,
+                            test_class=test_func.test_class or "",
+                            test_function=scope_test_function,
+                            test_type=test_func.test_type,
+                            line_number=position.line_no,
+                            col_number=position.col_no,
+                        )
+
+                        if test_func.test_type == TestType.REPLAY_TEST:
+                            num_discovered_replay_tests += 1
+
+                        num_discovered_tests += 1
+
+                progress.advance(task_id)
+                continue
+
             relevant_names = []
 
             names_with_full_name = [name for name in all_names if name.full_name is not None]
diff --git a/codeflash/models/test_type.py b/codeflash/models/test_type.py
index 103a3bc4d..f30089967 100644
--- a/codeflash/models/test_type.py
+++ b/codeflash/models/test_type.py
@@ -8,6 +8,7 @@ class TestType(Enum):
     REPLAY_TEST = 4
     CONCOLIC_COVERAGE_TEST = 5
     INIT_STATE_TEST = 6
+    HYPOTHESIS_TEST = 7
 
     def to_name(self) -> str:
         if self is TestType.INIT_STATE_TEST:
@@ -18,5 +19,6 @@ def to_name(self) -> str:
             TestType.GENERATED_REGRESSION: "🌀 Generated Regression Tests",
             TestType.REPLAY_TEST: "⏪ Replay Tests",
             TestType.CONCOLIC_COVERAGE_TEST: "🔎 Concolic Coverage Tests",
+            TestType.HYPOTHESIS_TEST: "🔮 Hypothesis Tests",
         }
         return names[self]
diff --git a/codeflash/optimization/function_optimizer.py b/codeflash/optimization/function_optimizer.py
index 093ff0966..33f53e31e 100644
--- a/codeflash/optimization/function_optimizer.py
+++ b/codeflash/optimization/function_optimizer.py
@@ -95,6 +95,7 @@
 from codeflash.telemetry.posthog_cf import ph
 from codeflash.verification.concolic_testing import generate_concolic_tests
 from codeflash.verification.equivalence import compare_test_results
+from codeflash.verification.hypothesis_testing import generate_hypothesis_tests
 from codeflash.verification.instrument_codeflash_capture import instrument_codeflash_capture
 from codeflash.verification.parse_line_profile_test_output import parse_line_profile_results
 from codeflash.verification.parse_test_output import calculate_function_throughput_from_test_results, parse_test_results
@@ -239,6 +240,7 @@ def __init__(
         self.function_benchmark_timings = function_benchmark_timings if function_benchmark_timings else {}
         self.total_benchmark_timings = total_benchmark_timings if total_benchmark_timings else {}
         self.replay_tests_dir = replay_tests_dir if replay_tests_dir else None
+        self.hypothesis_tests_dir: Path | None = None
         self.generate_and_instrument_tests_results: (
             tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet] | None
         ) = None
@@ -281,6 +283,8 @@ def generate_and_instrument_tests(
             GeneratedTestsList,
             dict[str, set[FunctionCalledInTest]],
             str,
+            dict[str, set[FunctionCalledInTest]],
+            str,
             OptimizationSet,
             list[Path],
             list[Path],
@@ -323,9 +327,15 @@ def generate_and_instrument_tests(
 
         generated_tests: GeneratedTestsList
         optimizations_set: OptimizationSet
-        count_tests, generated_tests, function_to_concolic_tests, concolic_test_str, optimizations_set = (
-            generated_results.unwrap()
-        )
+        (
+            count_tests,
+            generated_tests,
+            function_to_concolic_tests,
+            concolic_test_str,
+            function_to_hypothesis_tests,
+            hypothesis_test_str,
+            optimizations_set,
+        ) = generated_results.unwrap()
 
         for i, generated_test in enumerate(generated_tests.generated_tests):
             with generated_test.behavior_file_path.open("w", encoding="utf8") as f:
@@ -345,12 +355,19 @@ def generate_and_instrument_tests(
             logger.info(f"Generated test {i + 1}/{count_tests}:")
             code_print(generated_test.generated_original_test_source, file_name=f"test_{i + 1}.py")
         if concolic_test_str:
-            logger.info(f"Generated test {count_tests}/{count_tests}:")
+            logger.info(f"Generated test {count_tests - (1 if hypothesis_test_str else 0)}/{count_tests}:")
             code_print(concolic_test_str)
+        if hypothesis_test_str:
+            logger.info(f"Generated test {count_tests}/{count_tests}:")
+            code_print(hypothesis_test_str)
 
         function_to_all_tests = {
-            key: self.function_to_tests.get(key, set()) | function_to_concolic_tests.get(key, set())
-            for key in set(self.function_to_tests) | set(function_to_concolic_tests)
+            key: (
+                self.function_to_tests.get(key, set())
+                | function_to_concolic_tests.get(key, set())
+                | function_to_hypothesis_tests.get(key, set())
+            )
+            for key in set(self.function_to_tests) | set(function_to_concolic_tests) | set(function_to_hypothesis_tests)
         }
         instrumented_unittests_created_for_function = self.instrument_existing_tests(function_to_all_tests)
 
@@ -366,6 +383,8 @@ def generate_and_instrument_tests(
                 generated_tests,
                 function_to_concolic_tests,
                 concolic_test_str,
+                function_to_hypothesis_tests,
+                hypothesis_test_str,
                 optimizations_set,
                 generated_test_paths,
                 generated_perf_test_paths,
@@ -398,6 +417,8 @@ def optimize_function(self) -> Result[BestOptimization, str]:
             generated_tests,
             function_to_concolic_tests,
             concolic_test_str,
+            function_to_hypothesis_tests,
+            _hypothesis_test_str,
             optimizations_set,
             generated_test_paths,
             generated_perf_test_paths,
@@ -409,6 +430,7 @@ def optimize_function(self) -> Result[BestOptimization, str]:
             code_context=code_context,
             original_helper_code=original_helper_code,
             function_to_concolic_tests=function_to_concolic_tests,
+            function_to_hypothesis_tests=function_to_hypothesis_tests,
             generated_test_paths=generated_test_paths,
             generated_perf_test_paths=generated_perf_test_paths,
             instrumented_unittests_created_for_function=instrumented_unittests_created_for_function,
@@ -995,6 +1017,7 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
         existing_test_files_count = 0
         replay_test_files_count = 0
         concolic_coverage_test_files_count = 0
+        hypothesis_test_files_count = 0
         unique_instrumented_test_files = set()
 
         func_qualname = self.function_to_optimize.qualified_name_with_modules_from_root(self.project_root)
@@ -1015,6 +1038,8 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
                     replay_test_files_count += 1
                 elif test_type == TestType.CONCOLIC_COVERAGE_TEST:
                     concolic_coverage_test_files_count += 1
+                elif test_type == TestType.HYPOTHESIS_TEST:
+                    hypothesis_test_files_count += 1
                 else:
                     msg = f"Unexpected test type: {test_type}"
                     raise ValueError(msg)
@@ -1073,9 +1098,11 @@ def instrument_existing_tests(self, function_to_all_tests: dict[str, set[Functio
             logger.info(
                 f"Discovered {existing_test_files_count} existing unit test file"
                 f"{'s' if existing_test_files_count != 1 else ''}, {replay_test_files_count} replay test file"
-                f"{'s' if replay_test_files_count != 1 else ''}, and "
+                f"{'s' if replay_test_files_count != 1 else ''}, "
                 f"{concolic_coverage_test_files_count} concolic coverage test file"
-                f"{'s' if concolic_coverage_test_files_count != 1 else ''} for {func_qualname}"
+                f"{'s' if concolic_coverage_test_files_count != 1 else ''}, and "
+                f"{hypothesis_test_files_count} hypothesis test file"
+                f"{'s' if hypothesis_test_files_count != 1 else ''} for {func_qualname}"
             )
             console.rule()
         return unique_instrumented_test_files
@@ -1089,7 +1116,15 @@ def generate_tests_and_optimizations(
         generated_test_paths: list[Path],
         generated_perf_test_paths: list[Path],
         run_experiment: bool = False,  # noqa: FBT001, FBT002
-    ) -> Result[tuple[GeneratedTestsList, dict[str, set[FunctionCalledInTest]], OptimizationSet], str]:
+    ) -> Result[
+        tuple[
+            GeneratedTestsList,
+            dict[str, set[FunctionCalledInTest]],
+            dict[str, set[FunctionCalledInTest]],
+            OptimizationSet,
+        ],
+        str,
+    ]:
         n_tests = N_TESTS_TO_GENERATE_EFFECTIVE
         assert len(generated_test_paths) == n_tests
         console.rule()
@@ -1116,7 +1151,14 @@ def generate_tests_and_optimizations(
         future_concolic_tests = self.executor.submit(
             generate_concolic_tests, self.test_cfg, self.args, self.function_to_optimize, self.function_to_optimize_ast
         )
-        futures = [*future_tests, future_optimization_candidates, future_concolic_tests]
+        future_hypothesis_tests = self.executor.submit(
+            generate_hypothesis_tests,
+            self.test_cfg,
+            self.args,
+            self.function_to_optimize,
+            self.function_to_optimize_ast,
+        )
+        futures = [*future_tests, future_optimization_candidates, future_concolic_tests, future_hypothesis_tests]
         if run_experiment:
             future_candidates_exp = self.executor.submit(
                 self.local_aiservice_client.optimize_python_code,
@@ -1168,29 +1210,36 @@ def generate_tests_and_optimizations(
             logger.warning(f"Failed to generate and instrument tests for {self.function_to_optimize.function_name}")
             return Failure(f"/!\\ NO TESTS GENERATED for {self.function_to_optimize.function_name}")
         function_to_concolic_tests, concolic_test_str = future_concolic_tests.result()
+        function_to_hypothesis_tests, hypothesis_test_str, hypothesis_test_suite_dir = future_hypothesis_tests.result()
+        self.hypothesis_tests_dir = hypothesis_test_suite_dir
 
         count_tests = len(tests)
         if concolic_test_str:
             count_tests += 1
+        if hypothesis_test_str:
+            count_tests += 1
 
         logger.info(f"Generated '{count_tests}' tests for {self.function_to_optimize.function_name}")
         console.rule()
         generated_tests = GeneratedTestsList(generated_tests=tests)
-        result = (
+
+        self.generate_and_instrument_tests_results = (
             count_tests,
             generated_tests,
             function_to_concolic_tests,
             concolic_test_str,
+            function_to_hypothesis_tests,
+            hypothesis_test_str,
             OptimizationSet(control=candidates, experiment=candidates_experiment),
         )
-        self.generate_and_instrument_tests_results = result
-        return Success(result)
+        return Success(self.generate_and_instrument_tests_results)
 
     def setup_and_establish_baseline(
         self,
         code_context: CodeOptimizationContext,
         original_helper_code: dict[Path, str],
         function_to_concolic_tests: dict[str, set[FunctionCalledInTest]],
+        function_to_hypothesis_tests: dict[str, set[FunctionCalledInTest]],
         generated_test_paths: list[Path],
         generated_perf_test_paths: list[Path],
         instrumented_unittests_created_for_function: set[Path],
@@ -1201,8 +1250,12 @@ def setup_and_establish_baseline(
         """Set up baseline context and establish original code baseline."""
         function_to_optimize_qualified_name = self.function_to_optimize.qualified_name
         function_to_all_tests = {
-            key: self.function_to_tests.get(key, set()) | function_to_concolic_tests.get(key, set())
-            for key in set(self.function_to_tests) | set(function_to_concolic_tests)
+            key: (
+                self.function_to_tests.get(key, set())
+                | function_to_concolic_tests.get(key, set())
+                | function_to_hypothesis_tests.get(key, set())
+            )
+            for key in set(self.function_to_tests) | set(function_to_concolic_tests) | set(function_to_hypothesis_tests)
         }
 
         # Get a dict of file_path_to_classes of fto and helpers_of_fto
@@ -2019,7 +2072,11 @@ def cleanup_generated_files(self) -> None:
             paths_to_cleanup.append(test_file.instrumented_behavior_file_path)
             paths_to_cleanup.append(test_file.benchmarking_file_path)
 
+        if self.hypothesis_tests_dir and self.hypothesis_tests_dir.exists():
+            paths_to_cleanup.append(self.hypothesis_tests_dir)
+
         cleanup_paths(paths_to_cleanup)
+        self.hypothesis_tests_dir = None
 
     def get_test_env(
         self, codeflash_loop_index: int, codeflash_test_iteration: int, codeflash_tracer_disable: int = 1
diff --git a/codeflash/optimization/optimizer.py b/codeflash/optimization/optimizer.py
index 38b7f0d37..325714c2e 100644
--- a/codeflash/optimization/optimizer.py
+++ b/codeflash/optimization/optimizer.py
@@ -53,6 +53,7 @@ def __init__(self, args: Namespace) -> None:
         self.experiment_id = os.getenv("CODEFLASH_EXPERIMENT_ID", None)
         self.local_aiservice_client = LocalAiServiceClient() if self.experiment_id else None
         self.replay_tests_dir = None
+        self.hypothesis_tests_dirs: list[Path] = []  # Track all hypothesis test directories
         self.functions_checkpoint: CodeflashRunCheckpoint | None = None
         self.current_function_being_optimized: FunctionToOptimize | None = None  # current only for the LSP
         self.current_function_optimizer: FunctionOptimizer | None = None
@@ -335,6 +336,8 @@ def run(self) -> None:
                             function_optimizer  # needed to clean up from the outside of this function
                         )
                         best_optimization = function_optimizer.optimize_function()
+                        if function_optimizer.hypothesis_tests_dir:
+                            self.hypothesis_tests_dirs.append(function_optimizer.hypothesis_tests_dir)
                         if self.functions_checkpoint:
                             self.functions_checkpoint.add_function_to_checkpoint(
                                 function_to_optimize.qualified_name_with_modules_from_root(self.args.project_root)
@@ -428,7 +431,11 @@ def cleanup_temporary_paths(self) -> None:
 
         if self.current_function_optimizer:
             self.current_function_optimizer.cleanup_generated_files()
-        cleanup_paths([self.test_cfg.concolic_test_root_dir, self.replay_tests_dir])
+
+        paths_to_cleanup = [self.test_cfg.concolic_test_root_dir, self.replay_tests_dir]
+        paths_to_cleanup.extend(self.hypothesis_tests_dirs)
+        cleanup_paths(paths_to_cleanup)
+        self.hypothesis_tests_dirs.clear()
 
     def worktree_mode(self) -> None:
         if self.current_worktree:
diff --git a/codeflash/verification/concolic_testing.py b/codeflash/verification/concolic_testing.py
index 8f30a1562..2190ba6f9 100644
--- a/codeflash/verification/concolic_testing.py
+++ b/codeflash/verification/concolic_testing.py
@@ -8,6 +8,7 @@
 from typing import TYPE_CHECKING
 
 from codeflash.cli_cmds.console import console, logger
+from codeflash.code_utils.code_utils import get_qualified_function_path
 from codeflash.code_utils.compat import SAFE_SYS_EXECUTABLE
 from codeflash.code_utils.concolic_utils import clean_concolic_tests
 from codeflash.code_utils.static_analysis import has_typed_parameters
@@ -42,6 +43,9 @@ def generate_concolic_tests(
         logger.info("Generating concolic opcode coverage tests for the original code…")
         console.rule()
         try:
+            qualified_function_path = get_qualified_function_path(
+                function_to_optimize.file_path, args.project_root, function_to_optimize.qualified_name
+            )
             cover_result = subprocess.run(
                 [
                     SAFE_SYS_EXECUTABLE,
@@ -50,15 +54,7 @@ def generate_concolic_tests(
                     "cover",
                     "--example_output_format=pytest",
                     "--per_condition_timeout=20",
-                    ".".join(
-                        [
-                            function_to_optimize.file_path.relative_to(args.project_root)
-                            .with_suffix("")
-                            .as_posix()
-                            .replace("/", "."),
-                            function_to_optimize.qualified_name,
-                        ]
-                    ),
+                    qualified_function_path,
                 ],
                 capture_output=True,
                 text=True,
@@ -84,7 +80,10 @@ def generate_concolic_tests(
                 test_framework=args.test_framework,
                 pytest_cmd=args.pytest_cmd,
             )
-            function_to_concolic_tests, num_discovered_concolic_tests, _ = discover_unit_tests(concolic_test_cfg)
+            file_to_funcs = {function_to_optimize.file_path: [function_to_optimize]}
+            function_to_concolic_tests, num_discovered_concolic_tests, _ = discover_unit_tests(
+                concolic_test_cfg, file_to_funcs_to_optimize=file_to_funcs
+            )
             logger.info(
                 f"Created {num_discovered_concolic_tests} "
                 f"concolic unit test case{'s' if num_discovered_concolic_tests != 1 else ''} "
diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
index 9d7f5ba2c..89b0d9b6a 100644
--- a/codeflash/verification/equivalence.py
+++ b/codeflash/verification/equivalence.py
@@ -1,7 +1,7 @@
 import sys
 
 from codeflash.cli_cmds.console import logger
-from codeflash.models.models import TestResults, TestType, VerificationType
+from codeflash.models.models import FunctionTestInvocation, TestResults, TestType, VerificationType
 from codeflash.verification.comparator import comparator
 
 INCREASED_RECURSION_LIMIT = 5000
@@ -14,14 +14,47 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
     original_recursion_limit = sys.getrecursionlimit()
     if original_recursion_limit < INCREASED_RECURSION_LIMIT:
         sys.setrecursionlimit(INCREASED_RECURSION_LIMIT)  # Increase recursion limit to avoid RecursionError
+
+    # Separate Hypothesis tests from other test types for semantic comparison
+    # Hypothesis tests are always compared semantically (by test function, not example count)
+    original_hypothesis = [
+        r for r in original_results.test_results if r.test_type == TestType.HYPOTHESIS_TEST and r.loop_index == 1
+    ]
+    candidate_hypothesis = [
+        r for r in candidate_results.test_results if r.test_type == TestType.HYPOTHESIS_TEST and r.loop_index == 1
+    ]
+
+    # Compare Hypothesis tests semantically if any are present
+    if original_hypothesis or candidate_hypothesis:
+        logger.debug(
+            f"Comparing Hypothesis tests: original={len(original_hypothesis)} examples, "
+            f"candidate={len(candidate_hypothesis)} examples"
+        )
+        hypothesis_equal = _compare_hypothesis_tests_semantic(original_hypothesis, candidate_hypothesis)
+        if not hypothesis_equal:
+            logger.info("Hypothesis comparison failed")
+            sys.setrecursionlimit(original_recursion_limit)
+            return False
+        logger.debug("Hypothesis comparison passed")
+
     test_ids_superset = original_results.get_all_unique_invocation_loop_ids().union(
         set(candidate_results.get_all_unique_invocation_loop_ids())
     )
+    logger.debug(f"Total test IDs in superset: {len(test_ids_superset)}")
     are_equal: bool = True
     did_all_timeout: bool = True
     for test_id in test_ids_superset:
         original_test_result = original_results.get_by_unique_invocation_loop_id(test_id)
         cdd_test_result = candidate_results.get_by_unique_invocation_loop_id(test_id)
+
+        # Skip Hypothesis tests - already compared semantically above
+        if original_test_result and original_test_result.test_type == TestType.HYPOTHESIS_TEST:
+            did_all_timeout = False  # Hypothesis tests are checked separately, not timed out
+            continue
+        if cdd_test_result and cdd_test_result.test_type == TestType.HYPOTHESIS_TEST:
+            did_all_timeout = False
+            continue
+
         if cdd_test_result is not None and original_test_result is None:
             continue
         # If helper function instance_state verification is not present, that's ok. continue
@@ -33,6 +66,11 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
             continue
         if original_test_result is None or cdd_test_result is None:
             are_equal = False
+            logger.debug(
+                f"Test result mismatch: test_id={test_id}, "
+                f"original_present={original_test_result is not None}, "
+                f"candidate_present={cdd_test_result is not None}"
+            )
             break
         did_all_timeout = did_all_timeout and original_test_result.timed_out
         if original_test_result.timed_out:
@@ -80,5 +118,71 @@ def compare_test_results(original_results: TestResults, candidate_results: TestR
             break
     sys.setrecursionlimit(original_recursion_limit)
     if did_all_timeout:
+        logger.debug("Comparison failed: all tests timed out")
         return False
+    logger.debug(f"Final comparison result: are_equal={are_equal}")
     return are_equal
+
+
+def _compare_hypothesis_tests_semantic(original_hypothesis: list, candidate_hypothesis: list) -> bool:
+    """Compare Hypothesis tests by test function, not by example count.
+
+    Hypothesis can generate different numbers of examples between runs due to:
+    - Timing differences
+    - Early stopping
+    - Shrinking behavior
+    - Performance differences
+
+    What matters is whether the test functions themselves pass or fail,
+    not how many examples Hypothesis generated.
+    """
+
+    def get_test_key(test_result: FunctionTestInvocation) -> tuple[str, str, str, str]:
+        """Get unique key for a Hypothesis test function."""
+        return (
+            test_result.id.test_module_path,
+            test_result.id.test_class_name,
+            test_result.id.test_function_name,
+            test_result.id.function_getting_tested,
+        )
+
+    # Group by test function and simultaneously collect failure flag and example count
+    orig_by_func = {}
+    for result in original_hypothesis:
+        test_key = get_test_key(result)
+        group = orig_by_func.setdefault(test_key, [0, False])  # [count, had_failure]
+        group[0] += 1
+        if not result.did_pass:
+            group[1] = True
+
+    cand_by_func = {}
+    for result in candidate_hypothesis:
+        test_key = get_test_key(result)
+        group = cand_by_func.setdefault(test_key, [0, False])  # [count, had_failure]
+        group[0] += 1
+        if not result.did_pass:
+            group[1] = True
+
+    orig_total_examples = sum(group[0] for group in orig_by_func.values())
+    cand_total_examples = sum(group[0] for group in cand_by_func.values())
+
+    logger.debug(
+        f"Hypothesis comparison: Original={len(orig_by_func)} test functions ({orig_total_examples} examples), "
+        f"Candidate={len(cand_by_func)} test functions ({cand_total_examples} examples)"
+    )
+
+    # Compare only for test_keys present in original
+    for test_key, (_orig_count, orig_had_failure) in orig_by_func.items():
+        cand_group = cand_by_func.get(test_key)
+        if cand_group is None:
+            continue  # Already handled above
+
+        cand_had_failure = cand_group[1]
+
+        if orig_had_failure != cand_had_failure:
+            logger.debug(
+                f"Hypothesis test function behavior mismatch: {test_key} "
+                f"(original_failed={orig_had_failure}, candidate_failed={cand_had_failure})"
+            )
+            return False
+    return True
diff --git a/codeflash/verification/hypothesis_testing.py b/codeflash/verification/hypothesis_testing.py
new file mode 100644
index 000000000..9d213c4b4
--- /dev/null
+++ b/codeflash/verification/hypothesis_testing.py
@@ -0,0 +1,280 @@
+from __future__ import annotations
+
+import ast
+import subprocess
+import tempfile
+import time
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from codeflash.cli_cmds.console import console, logger
+from codeflash.code_utils.code_utils import get_qualified_function_path
+from codeflash.code_utils.formatter import format_code
+from codeflash.code_utils.static_analysis import has_typed_parameters
+from codeflash.discovery.discover_unit_tests import discover_unit_tests
+from codeflash.verification.verification_utils import TestConfig
+
+if TYPE_CHECKING:
+    from argparse import Namespace
+
+    from codeflash.discovery.functions_to_optimize import FunctionToOptimize
+    from codeflash.models.models import FunctionCalledInTest
+
+
+def remove_functions_with_only_any_type(code_string: str) -> str:
+    """Remove functions that have only Any type annotations.
+
+    This filters out functions where all parameters are annotated with typing.Any,
+    as these don't provide useful type information for property-based testing.
+    """
+    tree = ast.parse(code_string)
+    new_body = []
+
+    for node in tree.body:
+        if isinstance(node, (ast.Import, ast.ImportFrom)):
+            new_body.append(node)
+        elif isinstance(node, ast.FunctionDef):
+            all_any = True
+            has_args = False
+
+            for arg in node.args.args:
+                has_args = True
+                if arg.annotation:
+                    if isinstance(arg.annotation, ast.Name):
+                        if arg.annotation.id != "Any":
+                            all_any = False
+                    elif isinstance(arg.annotation, ast.Attribute):
+                        if arg.annotation.attr != "Any":
+                            all_any = False
+                    elif isinstance(arg.annotation, ast.Subscript):
+                        all_any = False
+                    else:
+                        all_any = False
+                else:
+                    all_any = False
+
+            if (has_args and not all_any) or not has_args:
+                new_body.append(node)
+
+        else:
+            new_body.append(node)
+
+    new_tree = ast.Module(body=new_body, type_ignores=[])
+    return ast.unparse(new_tree)
+
+
+def filter_hypothesis_tests_by_function_name(code: str, function_name: str) -> str:
+    """Filter hypothesis tests to only include tests matching the function name.
+
+    Preserves all imports, module-level assignments, and only test functions
+    that contain the target function name.
+
+    Args:
+        code: The hypothesis test code to filter
+        function_name: The name of the function being tested
+
+    Returns:
+        Filtered code with only matching tests
+
+    """
+    tree = ast.parse(code)
+
+    class TestFunctionRemover(ast.NodeTransformer):
+        def visit_Module(self, node):  # noqa: ANN001, ANN202
+            # Filter body to keep imports, module-level assignments, and matching test functions
+            new_body = []
+            for item in node.body:
+                if isinstance(item, (ast.Import, ast.ImportFrom, ast.Assign)):
+                    # Keep all imports and module-level assignments
+                    new_body.append(item)
+                elif isinstance(item, ast.FunctionDef) and item.name.startswith("test_") and function_name in item.name:
+                    # Only keep test functions that match the function name
+                    new_body.append(item)
+            node.body = new_body
+            return node
+
+    modified_tree = TestFunctionRemover().visit(tree)
+    ast.fix_missing_locations(modified_tree)
+    return ast.unparse(modified_tree)
+
+
+def make_hypothesis_tests_deterministic(code: str) -> str:
+    """Add @settings(derandomize=True) decorator and constrain strategies to make Hypothesis tests deterministic."""
+    try:
+        tree = ast.parse(code)
+    except SyntaxError:
+        return code
+
+    settings_imported = any(
+        isinstance(node, ast.ImportFrom)
+        and node.module == "hypothesis"
+        and any(alias.name == "settings" for alias in node.names)
+        for node in tree.body
+    )
+
+    if not settings_imported:
+        tree.body.insert(0, ast.parse("from hypothesis import settings").body[0])
+
+    class StrategyConstrainer(ast.NodeTransformer):
+        def visit_Call(self, node: ast.Call) -> ast.Call:
+            self.generic_visit(node)
+
+            # Check if this is a strategy call (st.floats(), st.integers(), etc.)
+            if (
+                isinstance(node.func, ast.Attribute)
+                and isinstance(node.func.value, ast.Name)
+                and node.func.value.id == "st"
+            ):
+                if node.func.attr == "floats" and not any(
+                    k.arg in ["min_value", "max_value", "allow_nan", "allow_infinity"] for k in node.keywords
+                ):
+                    # Constrain floats to reasonable bounds
+                    node.keywords.extend(
+                        [
+                            ast.keyword(
+                                arg="min_value", value=ast.UnaryOp(op=ast.USub(), operand=ast.Constant(value=1e6))
+                            ),
+                            ast.keyword(arg="max_value", value=ast.Constant(value=1e6)),
+                            ast.keyword(arg="allow_nan", value=ast.Constant(value=False)),
+                            ast.keyword(arg="allow_infinity", value=ast.Constant(value=False)),
+                        ]
+                    )
+                elif node.func.attr == "integers" and not any(
+                    k.arg in ["min_value", "max_value"] for k in node.keywords
+                ):
+                    # Constrain integers to reasonable bounds (including negatives)
+                    node.keywords.extend(
+                        [
+                            ast.keyword(arg="min_value", value=ast.Constant(value=-10000)),
+                            ast.keyword(arg="max_value", value=ast.Constant(value=10000)),
+                        ]
+                    )
+            return node
+
+    tree = StrategyConstrainer().visit(tree)
+    ast.fix_missing_locations(tree)
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef):
+            settings_decorator = next(
+                (
+                    d
+                    for d in node.decorator_list
+                    if isinstance(d, ast.Call) and isinstance(d.func, ast.Name) and d.func.id == "settings"
+                ),
+                None,
+            )
+
+            if settings_decorator:
+                if not any(k.arg == "derandomize" for k in settings_decorator.keywords):
+                    settings_decorator.keywords.append(ast.keyword(arg="derandomize", value=ast.Constant(value=True)))
+            else:
+                node.decorator_list.append(
+                    ast.Call(
+                        func=ast.Name(id="settings", ctx=ast.Load()),
+                        args=[],
+                        keywords=[ast.keyword(arg="derandomize", value=ast.Constant(value=True))],
+                    )
+                )
+
+    return ast.unparse(tree)
+
+
+def generate_hypothesis_tests(
+    test_cfg: TestConfig, args: Namespace, function_to_optimize: FunctionToOptimize, function_to_optimize_ast: ast.AST
+) -> tuple[dict[str, list[FunctionCalledInTest]], str, Path | None]:
+    """Generate property-based tests using Hypothesis ghostwriter.
+
+    This function:
+    1. Uses Hypothesis CLI to generate property-based tests for the target function
+    2. Filters generated tests to only include the target function
+    3. Removes functions with only Any type annotations
+    4. Makes tests deterministic by adding @settings(derandomize=True)
+    5. Formats the tests with the project formatter
+
+    Returns:
+        Tuple of (function_to_tests_map, test_suite_code, hypothesis_test_suite_dir)
+        The hypothesis_test_suite_dir is None if no tests were generated.
+
+    """
+    start_time = time.perf_counter()
+    function_to_hypothesis_tests: dict[str, list[FunctionCalledInTest]] = {}
+    hypothesis_test_suite_code: str = ""
+    hypothesis_test_suite_dir: Path | None = None
+
+    if (
+        test_cfg.project_root_path
+        and isinstance(function_to_optimize_ast, (ast.FunctionDef, ast.AsyncFunctionDef))
+        and has_typed_parameters(function_to_optimize_ast, function_to_optimize.parents)
+    ):
+        logger.info("Generating Hypothesis tests for the original code…")
+        console.rule()
+
+        try:
+            qualified_function_path = get_qualified_function_path(
+                function_to_optimize.file_path, args.project_root, function_to_optimize.qualified_name
+            )
+            hypothesis_result = subprocess.run(
+                ["hypothesis", "write", qualified_function_path],
+                capture_output=True,
+                text=True,
+                cwd=args.project_root,
+                check=False,
+                timeout=60,
+            )
+        except subprocess.TimeoutExpired:
+            logger.debug("Hypothesis test generation timed out")
+            end_time = time.perf_counter()
+            logger.debug(f"Hypothesis test generation completed in {end_time - start_time:.2f} seconds")
+            return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
+
+        if hypothesis_result.returncode == 0:
+            hypothesis_test_suite_code = hypothesis_result.stdout
+            hypothesis_test_suite_dir = Path(tempfile.mkdtemp(prefix="codeflash_hypothesis_", dir=test_cfg.tests_root))
+            hypothesis_path = hypothesis_test_suite_dir / "test_hypothesis.py"
+            hypothesis_path.write_text(hypothesis_test_suite_code, encoding="utf8")
+
+            hypothesis_config = TestConfig(
+                tests_root=hypothesis_test_suite_dir,
+                tests_project_rootdir=test_cfg.tests_project_rootdir,
+                project_root_path=args.project_root,
+                test_framework=args.test_framework,
+                pytest_cmd=args.pytest_cmd,
+            )
+            file_to_funcs = {function_to_optimize.file_path: [function_to_optimize]}
+            function_to_hypothesis_tests, num_discovered_hypothesis_tests, _ = discover_unit_tests(
+                hypothesis_config, file_to_funcs_to_optimize=file_to_funcs
+            )
+            with hypothesis_path.open("r", encoding="utf-8") as f:
+                original_code = f.read()
+
+            unparsed = filter_hypothesis_tests_by_function_name(original_code, function_to_optimize.function_name)
+
+            hypothesis_test_suite_code = format_code(
+                args.formatter_cmds,
+                hypothesis_path,
+                optimized_code=make_hypothesis_tests_deterministic(remove_functions_with_only_any_type(unparsed)),
+                print_status=False,
+            )
+            with hypothesis_path.open("w", encoding="utf-8") as f:
+                f.write(hypothesis_test_suite_code)
+            function_to_hypothesis_tests, num_discovered_hypothesis_tests, _ = discover_unit_tests(
+                hypothesis_config, file_to_funcs_to_optimize=file_to_funcs
+            )
+            logger.info(
+                f"Created {num_discovered_hypothesis_tests} "
+                f"hypothesis unit test case{'s' if num_discovered_hypothesis_tests != 1 else ''} "
+            )
+            console.rule()
+            end_time = time.perf_counter()
+            logger.debug(f"Generated hypothesis tests in {end_time - start_time:.2f} seconds")
+            return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
+
+        logger.debug(
+            f"Error running hypothesis write {': ' + hypothesis_result.stderr if hypothesis_result.stderr else '.'}"
+        )
+        console.rule()
+
+    end_time = time.perf_counter()
+    logger.debug(f"Hypothesis test generation completed in {end_time - start_time:.2f} seconds")
+    return function_to_hypothesis_tests, hypothesis_test_suite_code, hypothesis_test_suite_dir
diff --git a/pyproject.toml b/pyproject.toml
index 1186574c0..911b2728f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,6 +44,7 @@ dependencies = [
     "pygls>=2.0.0,<3.0.0",
     "codeflash-benchmark",
     "filelock",
+    "hypothesis>=6.141.1",
 ]
 
 [project.urls]
diff --git a/tests/test_hypothesis_testing.py b/tests/test_hypothesis_testing.py
new file mode 100644
index 000000000..d44b2413a
--- /dev/null
+++ b/tests/test_hypothesis_testing.py
@@ -0,0 +1,140 @@
+"""Tests for hypothesis_testing.py functions."""
+
+from codeflash.verification.hypothesis_testing import make_hypothesis_tests_deterministic
+
+
+def test_adds_derandomize_decorator():
+    """Test that @settings(derandomize=True) is added when missing."""
+    src = """
+from hypothesis import given, strategies as st
+
+@given(x=st.integers())
+def test_x(x):
+    assert isinstance(x, int)
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(x=st.integers(min_value=-10000, max_value=10000))\n@settings(derandomize=True)\ndef test_x(x):\n    assert isinstance(x, int)"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_integers_constrained_with_negatives():
+    """Test that st.integers() gets bounded to [-10000, 10000]."""
+    src = """from hypothesis import given, strategies as st
+@given(x=st.integers())
+def t(x):
+    pass
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(x=st.integers(min_value=-10000, max_value=10000))\n@settings(derandomize=True)\ndef t(x):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_floats_constrained_to_finite():
+    """Test that st.floats() is constrained to finite values with bounds."""
+    src = """from hypothesis import given, strategies as st
+@given(x=st.floats())
+def t(x):
+    pass
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(x=st.floats(min_value=-1000000.0, max_value=1000000.0, allow_nan=False, allow_infinity=False))\n@settings(derandomize=True)\ndef t(x):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_existing_constraints_not_overridden():
+    """Test that existing constraints on strategies are preserved."""
+    src = """from hypothesis import given, strategies as st, settings
+
+@settings(derandomize=True, max_examples=5)
+@given(x=st.integers(min_value=-5, max_value=5))
+def t(x):
+    pass
+"""
+    expected = """from hypothesis import given, strategies as st, settings\n\n@settings(derandomize=True, max_examples=5)\n@given(x=st.integers(min_value=-5, max_value=5))\ndef t(x):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_existing_float_constraints_preserved():
+    """Test that existing float constraints are not overridden."""
+    src = """from hypothesis import given, strategies as st
+
+@given(y=st.floats(min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False))
+def t(y):
+    pass
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(y=st.floats(min_value=-1.0, max_value=1.0, allow_nan=False, allow_infinity=False))\n@settings(derandomize=True)\ndef t(y):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_idempotency():
+    """Test that running the function twice produces the same result."""
+    src = """from hypothesis import given, strategies as st
+
+@given(x=st.integers(), y=st.floats())
+def test_func(x, y):
+    pass
+"""
+    out1 = make_hypothesis_tests_deterministic(src)
+    out2 = make_hypothesis_tests_deterministic(out1)
+    assert out1 == out2
+
+
+def test_multiple_strategies_handled():
+    """Test that multiple strategies in one test are all constrained."""
+    src = """from hypothesis import given, strategies as st
+
+@given(a=st.integers(), b=st.integers(), c=st.floats())
+def test_multi(a, b, c):
+    pass
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(a=st.integers(min_value=-10000, max_value=10000), b=st.integers(min_value=-10000, max_value=10000), c=st.floats(min_value=-1000000.0, max_value=1000000.0, allow_nan=False, allow_infinity=False))\n@settings(derandomize=True)\ndef test_multi(a, b, c):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_settings_import_added_if_missing():
+    """Test that 'from hypothesis import settings' is added when needed."""
+    src = """from hypothesis import given, strategies as st
+
+@given(x=st.integers())
+def test_x(x):
+    pass
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(x=st.integers(min_value=-10000, max_value=10000))\n@settings(derandomize=True)\ndef test_x(x):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_partial_constraints_completed():
+    """Test that partial constraints are completed."""
+    src = """from hypothesis import given, strategies as st
+
+@given(x=st.integers(min_value=100))
+def test_x(x):
+    pass
+"""
+    expected = """from hypothesis import settings\nfrom hypothesis import given, strategies as st\n\n@given(x=st.integers(min_value=100))\n@settings(derandomize=True)\ndef test_x(x):\n    pass"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
+
+
+def test_syntax_error_returns_original():
+    """Test that invalid Python syntax returns original code unchanged."""
+    invalid_src = "this is not valid python @#$%"
+    out = make_hypothesis_tests_deterministic(invalid_src)
+    assert out == invalid_src
+
+
+def test_no_hypothesis_code_unchanged():
+    """Test that code without hypothesis is returned mostly unchanged."""
+    src = """def regular_function(x):
+    return x * 2
+
+def test_regular():
+    assert regular_function(2) == 4
+"""
+    expected = """from hypothesis import settings\n\n@settings(derandomize=True)\ndef regular_function(x):\n    return x * 2\n\n@settings(derandomize=True)\ndef test_regular():\n    assert regular_function(2) == 4"""
+    out = make_hypothesis_tests_deterministic(src)
+    assert out == expected
diff --git a/uv.lock b/uv.lock
index 0d99bdf15..6d3800cbd 100644
--- a/uv.lock
+++ b/uv.lock
@@ -309,6 +309,8 @@ dependencies = [
     { name = "filelock", version = "3.20.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "gitpython" },
     { name = "humanize" },
+    { name = "hypothesis", version = "6.141.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "hypothesis", version = "6.142.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
     { name = "inquirer", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.9.2'" },
     { name = "inquirer", version = "3.4.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.9.2'" },
     { name = "isort", version = "6.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
@@ -399,6 +401,7 @@ requires-dist = [
     { name = "filelock" },
     { name = "gitpython", specifier = ">=3.1.31" },
     { name = "humanize", specifier = ">=4.0.0" },
+    { name = "hypothesis", specifier = ">=6.141.1" },
     { name = "inquirer", specifier = ">=3.0.0" },
     { name = "isort", specifier = ">=5.11.0" },
     { name = "jedi", specifier = ">=0.19.1" },
@@ -791,6 +794,44 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1e/c7/316e7ca04d26695ef0635dc81683d628350810eb8e9b2299fc08ba49f366/humanize-4.13.0-py3-none-any.whl", hash = "sha256:b810820b31891813b1673e8fec7f1ed3312061eab2f26e3fa192c393d11ed25f", size = 128869, upload-time = "2025-08-25T09:39:18.54Z" },
 ]
 
+[[package]]
+name = "hypothesis"
+version = "6.141.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.9.2' and python_full_version < '3.10'",
+    "python_full_version < '3.9.2'",
+]
+dependencies = [
+    { name = "attrs", marker = "python_full_version < '3.10'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.10'" },
+    { name = "sortedcontainers", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/85/20/8aa62b3e69fea68bb30d35d50be5395c98979013acd8152d64dc927e4cdb/hypothesis-6.141.1.tar.gz", hash = "sha256:8ef356e1e18fbeaa8015aab3c805303b7fe4b868e5b506e87ad83c0bf951f46f", size = 467389, upload-time = "2025-10-15T19:12:25.262Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/bc/9a/f901858f139694dd669776983781b08a7c1717911025da6720e526bd8ce3/hypothesis-6.141.1-py3-none-any.whl", hash = "sha256:a5b3c39c16d98b7b4c3c5c8d4262e511e3b2255e6814ced8023af49087ad60b3", size = 535000, upload-time = "2025-10-15T19:12:21.659Z" },
+]
+
+[[package]]
+name = "hypothesis"
+version = "6.142.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.13'",
+    "python_full_version == '3.12.*'",
+    "python_full_version == '3.11.*'",
+    "python_full_version == '3.10.*'",
+]
+dependencies = [
+    { name = "attrs", marker = "python_full_version >= '3.10'" },
+    { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" },
+    { name = "sortedcontainers", marker = "python_full_version >= '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/47/83/8f76d7c965beb4d3a65d188232c32db97b0799b0e893227d520d5d2a0144/hypothesis-6.142.2.tar.gz", hash = "sha256:c4204a2ce327e45fbaf83a2b58142a285135698dc1d08e368ae9901f06b49e64", size = 465987, upload-time = "2025-10-20T16:08:20.225Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9b/8f/194d63f715c7b0ace35b4f2a83b756d5bc703299b706c401b7ec593054fc/hypothesis-6.142.2-py3-none-any.whl", hash = "sha256:cc6c6e66c06aff695dd255501a767b528e00d84ce3572160425a9ba5e4a47845", size = 533375, upload-time = "2025-10-20T16:08:16.903Z" },
+]
+
 [[package]]
 name = "identify"
 version = "2.6.15"
@@ -3275,6 +3316,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/04/be/d09147ad1ec7934636ad912901c5fd7667e1c858e19d355237db0d0cd5e4/smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e", size = 24303, upload-time = "2025-01-02T07:14:38.724Z" },
 ]
 
+[[package]]
+name = "sortedcontainers"
+version = "2.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/e8/c4/ba2f8066cceb6f23394729afe52f3bf7adec04bf9ed2c820b39e19299111/sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88", size = 30594, upload-time = "2021-05-16T22:03:42.897Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/32/46/9cb0e58b2deb7f82b84065f37f3bffeb12413f947f9388e4cac22c4621ce/sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0", size = 29575, upload-time = "2021-05-16T22:03:41.177Z" },
+]
+
 [[package]]
 name = "stack-data"
 version = "0.6.3"