11import os
22import pickle
3- import platform
43import shutil
54import socket
65import sqlite3
@@ -281,7 +280,7 @@ def test_run_and_parse_picklepatch() -> None:
281280 cursor .execute (
282281 "SELECT function_name, class_name, module_name, file_path, benchmark_function_name, benchmark_module_path, benchmark_line_number FROM benchmark_function_timings ORDER BY benchmark_module_path, benchmark_function_name, function_name" )
283282 function_calls = cursor .fetchall ()
284-
283+
285284 # Assert the length of function calls
286285 assert len (function_calls ) == 2 , f"Expected 2 function calls, but got { len (function_calls )} "
287286 function_benchmark_timings = codeflash_benchmark_plugin .get_function_benchmark_timings (output_file )
@@ -327,6 +326,7 @@ def test_run_and_parse_picklepatch() -> None:
327326 assert actual [4 ] == expected [4 ], f"Mismatch at index { idx } for benchmark_function_name"
328327 assert actual [5 ] == expected [5 ], f"Mismatch at index { idx } for benchmark_module_path"
329328 assert actual [6 ] == expected [6 ], f"Mismatch at index { idx } for benchmark_line_number"
329+ conn .close ()
330330
331331 # Generate replay test
332332 generate_replay_test (output_file , replay_tests_dir )
@@ -372,7 +372,7 @@ def test_run_and_parse_picklepatch() -> None:
372372 test_env ["CODEFLASH_TEST_ITERATION" ] = "0"
373373 test_env ["CODEFLASH_LOOP_INDEX" ] = "1"
374374 test_type = TestType .REPLAY_TEST
375- replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket "
375+ replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket_test_socket_picklepatch "
376376 func_optimizer = opt .create_function_optimizer (func )
377377 func_optimizer .test_files = TestFiles (
378378 test_files = [
@@ -396,7 +396,7 @@ def test_run_and_parse_picklepatch() -> None:
396396 )
397397 assert len (test_results_unused_socket ) == 1
398398 assert test_results_unused_socket .test_results [0 ].id .test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
399- assert test_results_unused_socket .test_results [0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket "
399+ assert test_results_unused_socket .test_results [0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_unused_socket_bubble_sort_with_unused_socket_test_socket_picklepatch "
400400 assert test_results_unused_socket .test_results [0 ].did_pass == True
401401
402402 # Replace with optimized candidate
@@ -440,16 +440,14 @@ def bubble_sort_with_unused_socket(data_container):
440440 assert new_test is not None
441441 replay_test_path .write_text (new_test )
442442
443- # Run test for original function code that uses the socket. This test should pass because
444- # the PicklePlaceholderAccessError is thrown as expected behavior, which the test framework
445- # treats as a successful test execution (the exception is the expected outcome).
443+ # Run test for original function code that uses the socket. This should fail, as the PicklePlaceholder is accessed.
446444 test_env = os .environ .copy ()
447445 test_env ["CODEFLASH_TEST_ITERATION" ] = "0"
448446 test_env ["CODEFLASH_LOOP_INDEX" ] = "1"
449447 test_type = TestType .REPLAY_TEST
450448 func = FunctionToOptimize (function_name = "bubble_sort_with_used_socket" , parents = [],
451449 file_path = Path (fto_used_socket_path ))
452- replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket "
450+ replay_test_function = "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch "
453451 func_optimizer = opt .create_function_optimizer (func )
454452 func_optimizer .test_files = TestFiles (
455453 test_files = [
@@ -477,8 +475,8 @@ def bubble_sort_with_unused_socket(data_container):
477475 assert test_results_used_socket .test_results [
478476 0 ].id .test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
479477 assert test_results_used_socket .test_results [
480- 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket "
481- assert test_results_used_socket .test_results [0 ].did_pass is True
478+ 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch "
479+ assert test_results_used_socket .test_results [0 ].did_pass is False
482480 print ("test results used socket" )
483481 print (test_results_used_socket )
484482 # Replace with optimized candidate
@@ -508,27 +506,15 @@ def bubble_sort_with_used_socket(data_container):
508506 assert test_results_used_socket .test_results [
509507 0 ].id .test_module_path == "code_to_optimize.tests.pytest.benchmarks_socket_test.codeflash_replay_tests.test_code_to_optimize_tests_pytest_benchmarks_socket_test_test_socket__replay_test_0"
510508 assert test_results_used_socket .test_results [
511- 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket"
512-
513- # TODO: Fix socket behavior differences on Windows
514- # On Windows, socket behavior differs from Unix platforms, causing tests to pass instead of fail
515- if platform .system () == "Windows" :
516- # On Windows, the test passes when it should fail due to socket behavior differences
517- assert test_results_used_socket .test_results [0 ].did_pass is True
518- assert optimized_test_results_used_socket .test_results [0 ].did_pass is True
519- # Since both pass on Windows, comparison should return True
520- assert compare_test_results (test_results_used_socket , optimized_test_results_used_socket ) is True
521- else :
522- # On Unix platforms, the expected behavior is that the test fails
523- assert test_results_used_socket .test_results [0 ].did_pass is False
524- assert optimized_test_results_used_socket .test_results [0 ].did_pass is True
525- # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
526- assert compare_test_results (test_results_used_socket , optimized_test_results_used_socket ) is False
509+ 0 ].id .test_function_name == "test_code_to_optimize_bubble_sort_picklepatch_test_used_socket_bubble_sort_with_used_socket_test_used_socket_picklepatch"
510+ assert test_results_used_socket .test_results [0 ].did_pass is False
511+
512+ # Even though tests threw the same error, we reject this as the behavior of the unpickleable object could not be determined.
513+ assert compare_test_results (test_results_used_socket , optimized_test_results_used_socket ) is False
527514
528515 finally :
529516 # cleanup
530517 output_file .unlink (missing_ok = True )
531518 shutil .rmtree (replay_tests_dir , ignore_errors = True )
532519 fto_unused_socket_path .write_text (original_fto_unused_socket_code )
533520 fto_used_socket_path .write_text (original_fto_used_socket_code )
534-
0 commit comments