changes

KRRT7 · KRRT7 · commit faa88a9d786e · 2025-07-29T22:50:13.000-07:00
diff --git a/.github/workflows/unit-tests.yaml b/.github/workflows/unit-tests.yaml
@@ -11,10 +11,9 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
-        os: [ubuntu-latest]
+        python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
     continue-on-error: true
-    runs-on: ${{ matrix.os }}
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
         with:
@@ -25,29 +24,10 @@ jobs:
         uses: astral-sh/setup-uv@v5
         with:
           python-version: ${{ matrix.python-version }}
+          version: "0.5.30"
 
       - name: install dependencies
         run: uv sync
 
       - name: Unit tests
-        run: uv run pytest tests/ --benchmark-skip -m "not ci_skip"
-
-  # unit-tests-windows:
-  #   runs-on: windows-latest
-  #   continue-on-error: true
-  #   steps:
-  #     - uses: actions/checkout@v4
-  #       with:
-  #         fetch-depth: 0
-  #         token: ${{ secrets.GITHUB_TOKEN }}
-
-  #     - name: Install uv
-  #       uses: astral-sh/setup-uv@v5
-  #       with:
-  #         python-version: "3.11"
-
-  #     - name: install dependencies
-  #       run: uv sync
-
-  #     - name: Unit tests
-  #       run: uv run pytest tests/ --benchmark-skip -m "not ci_skip"
+        run: uv run pytest tests/
diff --git a/tests/test_pickle_patcher.py b/tests/test_pickle_patcher.py
@@ -1,6 +1,5 @@
 import os
 import pickle
-import platform
 import shutil
 import socket
 import sqlite3
@@ -281,7 +280,7 @@ def test_run_and_parse_picklepatch() -> None:
         cursor.execute(
             "SELECT function_name, class_name, module_name, file_path, benchmark_function_name, benchmark_module_path, benchmark_line_number FROM benchmark_function_timings ORDER BY benchmark_module_path, benchmark_function_name, function_name")
         function_calls = cursor.fetchall()
-        
+
         # Assert the length of function calls
         assert len(function_calls) == 2, f"Expected 2 function calls, but got {len(function_calls)}"
         function_benchmark_timings = codeflash_benchmark_plugin.get_function_benchmark_timings(output_file)
@@ -327,6 +326,7 @@ def test_run_and_parse_picklepatch() -> None:
             assert actual[4] == expected[4], f"Mismatch at index {idx} for benchmark_function_name"
             assert actual[5] == expected[5], f"Mismatch at index {idx} for benchmark_module_path"
             assert actual[6] == expected[6], f"Mismatch at index {idx} for benchmark_line_number"
+        conn.close()
 
         # Generate replay test
         generate_replay_test(output_file, replay_tests_dir)
@@ -372,7 +372,7 @@ def test_run_and_parse_picklepatch() -> None:
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
         test_type = TestType.REPLAY_TEST
-        replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket"
+        replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket_test_socket_picklepatch"
         func_optimizer = opt.create_function_optimizer(func)
         func_optimizer.test_files = TestFiles(
             test_files=[
@@ -396,7 +396,7 @@ def test_run_and_parse_picklepatch() -> None:
         )
         assert len(test_results_unused_socket) == 1
         assert test_results_unused_socket.test_results[0].id.test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
-        assert test_results_unused_socket.test_results[0].id.test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket"
+        assert test_results_unused_socket.test_results[0].id.test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket_test_socket_picklepatch"
         assert test_results_unused_socket.test_results[0].did_pass == True
 
         # Replace with optimized candidate
@@ -440,16 +440,14 @@ def bubble_sort_with_unused_socket(data_container):
         assert new_test is not None
         replay_test_path.write_text(new_test)
 
-        # Run test for original function code that uses the socket. This test should pass because 
-        # the PicklePlaceholderAccessError is thrown as expected behavior, which the test framework 
-        # treats as a successful test execution (the exception is the expected outcome).
+        # Run test for original function code that uses the socket. This should fail, as the PicklePlaceholder is accessed.
         test_env = os.environ.copy()
         test_env["CODEFLASH_TEST_ITERATION"] = "0"
         test_env["CODEFLASH_LOOP_INDEX"] = "1"
         test_type = TestType.REPLAY_TEST
         func = FunctionToOptimize(function_name="bubble_sort_with_used_socket", parents=[],
                                   file_path=Path(fto_used_socket_path))
-        replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket"
+        replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch"
         func_optimizer = opt.create_function_optimizer(func)
         func_optimizer.test_files = TestFiles(
             test_files=[
@@ -477,8 +475,8 @@ def bubble_sort_with_unused_socket(data_container):
         assert test_results_used_socket.test_results[
                    0].id.test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
         assert test_results_used_socket.test_results[
-                   0].id.test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket"
-        assert test_results_used_socket.test_results[0].did_pass is True
+                   0].id.test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch"
+        assert test_results_used_socket.test_results[0].did_pass is False
         print("test results used socket")
         print(test_results_used_socket)
         # Replace with optimized candidate
@@ -508,27 +506,15 @@ def bubble_sort_with_used_socket(data_container):
         assert test_results_used_socket.test_results[
                    0].id.test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
         assert test_results_used_socket.test_results[
-                   0].id.test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket"
-        
-        # TODO: Fix socket behavior differences on Windows
-        # On Windows, socket behavior differs from Unix platforms, causing tests to pass instead of fail
-        if platform.system() == "Windows":
-            # On Windows, the test passes when it should fail due to socket behavior differences
-            assert test_results_used_socket.test_results[0].did_pass is True
-            assert optimized_test_results_used_socket.test_results[0].did_pass is True
-            # Since both pass on Windows, comparison should return True
-            assert compare_test_results(test_results_used_socket, optimized_test_results_used_socket) is True
-        else:
-            # On Unix platforms, the expected behavior is that the test fails
-            assert test_results_used_socket.test_results[0].did_pass is False
-            assert optimized_test_results_used_socket.test_results[0].did_pass is True
-            # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
-            assert compare_test_results(test_results_used_socket, optimized_test_results_used_socket) is False
+                   0].id.test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch"
+        assert test_results_used_socket.test_results[0].did_pass is False
+
+        # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
+        assert compare_test_results(test_results_used_socket, optimized_test_results_used_socket) is False
 
     finally:
         # cleanup
         output_file.unlink(missing_ok=True)
         shutil.rmtree(replay_tests_dir, ignore_errors=True)
         fto_unused_socket_path.write_text(original_fto_unused_socket_code)
         fto_used_socket_path.write_text(original_fto_used_socket_code)
-
diff --git a/uv.lock b/uv.lock