fix tests

mohammedahmed18 · mohammedahmed18 · commit a7f8816f5ed8 · 2025-11-27T19:51:55.000+02:00
diff --git a/codeflash/models/models.py b/codeflash/models/models.py
@@ -513,6 +513,8 @@ def find_func_in_class(self, class_node: cst.ClassDef, func_name: str) -> Option
         return None
 
     def get_src_code(self, test_path: Path) -> Optional[str]:
+        if not test_path.exists():
+            return None
         test_src = test_path.read_text(encoding="utf-8")
         module_node = cst.parse_module(test_src)
 
diff --git a/codeflash/verification/equivalence.py b/codeflash/verification/equivalence.py
@@ -1,11 +1,17 @@
+from __future__ import annotations
+
 import sys
 from dataclasses import dataclass
 from enum import Enum
+from typing import TYPE_CHECKING, Optional
 
 from codeflash.cli_cmds.console import logger
 from codeflash.models.models import TestResults, TestType, VerificationType
 from codeflash.verification.comparator import comparator
 
+if TYPE_CHECKING:
+    from codeflash.models.models import TestResults
+
 INCREASED_RECURSION_LIMIT = 5000
 
 
@@ -19,10 +25,10 @@ class TestDiffScope(Enum):
 @dataclass
 class TestDiff:
     scope: TestDiffScope
-    test_src_code: str
     pytest_error: str
     original_value: any
     candidate_value: any
+    test_src_code: Optional[str] = None
 
 
 def compare_test_results(original_results: TestResults, candidate_results: TestResults) -> tuple[bool, list[TestDiff]]:
diff --git a/tests/test_codeflash_capture.py b/tests/test_codeflash_capture.py
@@ -502,7 +502,8 @@ def __init__(self, x=2):
             pytest_max_loops=1,
             testing_time=0.1,
         )
-        assert compare_test_results(test_results, test_results2)
+        match, _ = compare_test_results(test_results, test_results2)
+        assert match
 
     finally:
         test_path.unlink(missing_ok=True)
@@ -626,7 +627,8 @@ def __init__(self, *args, **kwargs):
             testing_time=0.1,
         )
 
-        assert compare_test_results(test_results, results2)
+        match, _ = compare_test_results(test_results, results2)
+        assert match
 
     finally:
         test_path.unlink(missing_ok=True)
@@ -754,7 +756,8 @@ def __init__(self, x=2):
             testing_time=0.1,
         )
 
-        assert compare_test_results(test_results, test_results2)
+        match, _ = compare_test_results(test_results, test_results2)
+        assert match
     finally:
         test_path.unlink(missing_ok=True)
         sample_code_path.unlink(missing_ok=True)
@@ -902,7 +905,8 @@ def another_helper(self):
             testing_time=0.1,
         )
 
-        assert compare_test_results(test_results, results2)
+        match, _ = compare_test_results(test_results, results2)
+        assert match
 
     finally:
         test_path.unlink(missing_ok=True)
@@ -1132,7 +1136,8 @@ def target_function(self):
         )
         # Remove instrumentation
         FunctionOptimizer.write_code_and_helpers(candidate_fto_code, candidate_helper_code, fto.file_path)
-        assert not compare_test_results(test_results, mutated_test_results)
+        match, _ = compare_test_results(test_results, mutated_test_results)
+        assert not match
 
         # This fto code stopped using a helper class. it should still pass
         no_helper1_fto_code = """
@@ -1170,7 +1175,8 @@ def target_function(self):
         )
         # Remove instrumentation
         FunctionOptimizer.write_code_and_helpers(candidate_fto_code, candidate_helper_code, fto.file_path)
-        assert compare_test_results(test_results, no_helper1_test_results)
+        match, _ = compare_test_results(test_results, no_helper1_test_results)
+        assert match
 
     finally:
         test_path.unlink(missing_ok=True)
diff --git a/tests/test_comparator.py b/tests/test_comparator.py
@@ -1176,7 +1176,8 @@ def test_compare_results_fn():
         )
     )
 
-    assert compare_test_results(original_results, new_results_1)
+    match, _ = compare_test_results(original_results, new_results_1)
+    assert match
 
     new_results_2 = TestResults()
     new_results_2.add(
@@ -1199,7 +1200,8 @@ def test_compare_results_fn():
         )
     )
 
-    assert not compare_test_results(original_results, new_results_2)
+    match, _ = compare_test_results(original_results, new_results_2)
+    assert not match
 
     new_results_3 = TestResults()
     new_results_3.add(
@@ -1241,7 +1243,8 @@ def test_compare_results_fn():
         )
     )
 
-    assert compare_test_results(original_results, new_results_3)
+    match, _ = compare_test_results(original_results, new_results_3)
+    assert match
 
     new_results_4 = TestResults()
     new_results_4.add(
@@ -1264,7 +1267,8 @@ def test_compare_results_fn():
         )
     )
 
-    assert not compare_test_results(original_results, new_results_4)
+    match, _ = compare_test_results(original_results, new_results_4)
+    assert not match
 
     new_results_5_baseline = TestResults()
     new_results_5_baseline.add(
@@ -1308,7 +1312,8 @@ def test_compare_results_fn():
         )
     )
 
-    assert  not compare_test_results(new_results_5_baseline, new_results_5_opt)
+    match, _ = compare_test_results(new_results_5_baseline, new_results_5_opt)
+    assert not match
 
     new_results_6_baseline = TestResults()
     new_results_6_baseline.add(
@@ -1352,9 +1357,11 @@ def test_compare_results_fn():
         )
     )
 
-    assert  not compare_test_results(new_results_6_baseline, new_results_6_opt)
+    match, _ = compare_test_results(new_results_6_baseline, new_results_6_opt)
+    assert not match
 
-    assert not compare_test_results(TestResults(), TestResults())
+    match, _ = compare_test_results(TestResults(), TestResults())
+    assert not match
 
 
 def test_exceptions():
diff --git a/tests/test_instrument_all_and_run.py b/tests/test_instrument_all_and_run.py
@@ -223,7 +223,8 @@ def test_sort():
 result: [0, 1, 2, 3, 4, 5]
 """
         assert out_str == results2[0].stdout
-        assert compare_test_results(test_results, results2)
+        match, _ = compare_test_results(test_results, results2)
+        assert match
     finally:
         fto_path.write_text(original_code, "utf-8")
         test_path.unlink(missing_ok=True)
@@ -368,7 +369,8 @@ def test_sort():
         assert test_results[1].return_value == ([0, 1, 2, 3, 4, 5],)
         out_str = """codeflash stdout : BubbleSorter.sorter() called\n"""
         assert test_results[1].stdout == out_str
-        assert compare_test_results(test_results, test_results)
+        match, _ = compare_test_results(test_results, test_results)
+        assert match
         assert test_results[2].id.function_getting_tested == "BubbleSorter.__init__"
         assert test_results[2].id.test_function_name == "test_sort"
         assert test_results[2].did_pass
@@ -396,7 +398,8 @@ def test_sort():
             testing_time=0.1,
         )
 
-        assert compare_test_results(test_results, results2)
+        match, _ = compare_test_results(test_results, results2)
+        assert match
 
         # Replace with optimized code that mutated instance attribute
         optimized_code = """
@@ -491,7 +494,8 @@ def sorter(self, arr):
         )
         assert new_test_results[3].runtime > 0
         assert new_test_results[3].did_pass
-        assert not compare_test_results(test_results, new_test_results)
+        match, _ = compare_test_results(test_results, new_test_results)
+        assert not match
 
     finally:
         fto_path.write_text(original_code, "utf-8")
@@ -630,7 +634,8 @@ def test_sort():
         out_str = """codeflash stdout : BubbleSorter.sorter_classmethod() called
 """
         assert test_results[0].stdout == out_str
-        assert compare_test_results(test_results, test_results)
+        match, _ = compare_test_results(test_results, test_results)
+        assert match
 
         assert test_results[1].id.function_getting_tested == "BubbleSorter.sorter_classmethod"
         assert test_results[1].id.iteration_id == "4_0"
@@ -655,7 +660,8 @@ def test_sort():
             testing_time=0.1,
         )
 
-        assert compare_test_results(test_results, results2)
+        match, _ = compare_test_results(test_results, results2)
+        assert match
 
     finally:
         fto_path.write_text(original_code, "utf-8")
@@ -794,7 +800,8 @@ def test_sort():
         out_str = """codeflash stdout : BubbleSorter.sorter_staticmethod() called
 """
         assert test_results[0].stdout == out_str
-        assert compare_test_results(test_results, test_results)
+        match, _ = compare_test_results(test_results, test_results)
+        assert match
 
         assert test_results[1].id.function_getting_tested == "BubbleSorter.sorter_staticmethod"
         assert test_results[1].id.iteration_id == "4_0"
@@ -819,7 +826,8 @@ def test_sort():
             testing_time=0.1,
         )
 
-        assert compare_test_results(test_results, results2)
+        match, _ = compare_test_results(test_results, results2)
+        assert match
 
     finally:
         fto_path.write_text(original_code, "utf-8")
diff --git a/tests/test_instrumentation_run_results_aiservice.py b/tests/test_instrumentation_run_results_aiservice.py
@@ -221,10 +221,10 @@ def sorter(self, arr):
             testing_time=0.1,
         )
         # assert test_results_mutated_attr[0].return_value[1]["self"].x == 1 TODO: add self as input to function
-        assert compare_test_results(
+        match, _ = compare_test_results(
             test_results, test_results_mutated_attr
         )  # Without codeflash capture, the init state was not verified, and the results are verified as correct even with the attribute mutated
-
+        assert match
         assert test_results_mutated_attr[0].stdout == "codeflash stdout : BubbleSorter.sorter() called\n"
     finally:
         fto_path.write_text(original_code, "utf-8")
@@ -403,9 +403,10 @@ def sorter(self, arr):
         assert test_results_mutated_attr[0].return_value[0] == {"x": 1}
         assert test_results_mutated_attr[0].verification_type == VerificationType.INIT_STATE_FTO
         assert test_results_mutated_attr[0].stdout == ""
-        assert not compare_test_results(
+        match,_ = compare_test_results(
             test_results, test_results_mutated_attr
         )  # The test should fail because the instance attribute was mutated
+        assert not match
         # Replace with optimized code that did not mutate existing instance attribute, but added a new one
         optimized_code_new_attr = """
 import sys
@@ -457,9 +458,10 @@ def sorter(self, arr):
         assert test_results_new_attr[0].stdout == ""
         # assert test_results_new_attr[1].return_value[1]["self"].x == 0 TODO: add self as input
         # assert test_results_new_attr[1].return_value[1]["self"].y == 2 TODO: add self as input
-        assert compare_test_results(
+        match,_ = compare_test_results(
             test_results, test_results_new_attr
         )  # The test should pass because the instance attribute was not mutated, only a new one was added
+        assert match
     finally:
         fto_path.write_text(original_code, "utf-8")
         test_path.unlink(missing_ok=True)
diff --git a/tests/test_pickle_patcher.py b/tests/test_pickle_patcher.py
@@ -427,8 +427,8 @@ def bubble_sort_with_unused_socket(data_container):
             testing_time=1.0,
         )
         assert len(optimized_test_results_unused_socket) == 1
-        verification_result = compare_test_results(test_results_unused_socket, optimized_test_results_unused_socket)
-        assert verification_result is True
+        match, _ = compare_test_results(test_results_unused_socket, optimized_test_results_unused_socket)
+        assert match
 
         # Remove the previous instrumentation
         replay_test_path.write_text(original_replay_test_code)
@@ -517,8 +517,8 @@ def bubble_sort_with_used_socket(data_container):
         assert test_results_used_socket.test_results[0].did_pass is False
 
         # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
-        assert compare_test_results(test_results_used_socket, optimized_test_results_used_socket) is False
-
+        match, _ = compare_test_results(test_results_used_socket, optimized_test_results_used_socket)
+        assert not match
     finally:
         # cleanup
         output_file.unlink(missing_ok=True)

Original file line number	Diff line number	Diff line change
`@@ -1176,7 +1176,8 @@ def test_compare_results_fn():`
`1176`	`1176`	`)`
`1177`	`1177`	`)`
`1178`	`1178`
`1179`		`- assert compare_test_results(original_results, new_results_1)`
	`1179`	`+ match, _ = compare_test_results(original_results, new_results_1)`
	`1180`	`+ assert match`
`1180`	`1181`
`1181`	`1182`	`new_results_2 = TestResults()`
`1182`	`1183`	`new_results_2.add(`
`@@ -1199,7 +1200,8 @@ def test_compare_results_fn():`
`1199`	`1200`	`)`
`1200`	`1201`	`)`
`1201`	`1202`
`1202`		`- assert not compare_test_results(original_results, new_results_2)`
	`1203`	`+ match, _ = compare_test_results(original_results, new_results_2)`
	`1204`	`+ assert not match`
`1203`	`1205`
`1204`	`1206`	`new_results_3 = TestResults()`
`1205`	`1207`	`new_results_3.add(`
`@@ -1241,7 +1243,8 @@ def test_compare_results_fn():`
`1241`	`1243`	`)`
`1242`	`1244`	`)`
`1243`	`1245`
`1244`		`- assert compare_test_results(original_results, new_results_3)`
	`1246`	`+ match, _ = compare_test_results(original_results, new_results_3)`
	`1247`	`+ assert match`
`1245`	`1248`
`1246`	`1249`	`new_results_4 = TestResults()`
`1247`	`1250`	`new_results_4.add(`
`@@ -1264,7 +1267,8 @@ def test_compare_results_fn():`
`1264`	`1267`	`)`
`1265`	`1268`	`)`
`1266`	`1269`
`1267`		`- assert not compare_test_results(original_results, new_results_4)`
	`1270`	`+ match, _ = compare_test_results(original_results, new_results_4)`
	`1271`	`+ assert not match`
`1268`	`1272`
`1269`	`1273`	`new_results_5_baseline = TestResults()`
`1270`	`1274`	`new_results_5_baseline.add(`
`@@ -1308,7 +1312,8 @@ def test_compare_results_fn():`
`1308`	`1312`	`)`
`1309`	`1313`	`)`
`1310`	`1314`
`1311`		`- assert not compare_test_results(new_results_5_baseline, new_results_5_opt)`
	`1315`	`+ match, _ = compare_test_results(new_results_5_baseline, new_results_5_opt)`
	`1316`	`+ assert not match`
`1312`	`1317`
`1313`	`1318`	`new_results_6_baseline = TestResults()`
`1314`	`1319`	`new_results_6_baseline.add(`
`@@ -1352,9 +1357,11 @@ def test_compare_results_fn():`
`1352`	`1357`	`)`
`1353`	`1358`	`)`
`1354`	`1359`
`1355`		`- assert not compare_test_results(new_results_6_baseline, new_results_6_opt)`
	`1360`	`+ match, _ = compare_test_results(new_results_6_baseline, new_results_6_opt)`
	`1361`	`+ assert not match`
`1356`	`1362`
`1357`		`- assert not compare_test_results(TestResults(), TestResults())`
	`1363`	`+ match, _ = compare_test_results(TestResults(), TestResults())`
	`1364`	`+ assert not match`
`1358`	`1365`
`1359`	`1366`
`1360`	`1367`	`def test_exceptions():`